Skip to content
Snippets Groups Projects
Commit 3931c9bd authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

WIP

parent eea46658
No related branches found
No related tags found
2 merge requests!100WIP: Filling XLSX: Seems to be working.,!93Filling XLSX: Everything except multiple choice.
Pipeline #48437 failed
......@@ -26,10 +26,11 @@ import json
import pathlib
from collections import OrderedDict
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Union, TextIO
from typing import Any, Dict, List, Optional, TextIO, Union
from jsonschema import FormatChecker, validate
from openpyxl import load_workbook, Workbook
from jsonschema.exceptions import ValidationError
from openpyxl import Workbook, load_workbook
from openpyxl.worksheet.worksheet import Worksheet
from .table_generator import ColumnType, RowType
......@@ -183,6 +184,7 @@ class TemplateFiller:
self._props[fullpath] = value
def fill_from_data(self, data: Dict[str, Any]):
# TODO recursive for dicts and list?
"""Fill current level with all scalar elements of ``data``."""
for name, value in data.items():
if not isinstance(value, (dict, list)):
......@@ -272,6 +274,9 @@ out: union[dict, None]
insertables: Dict[str, Any] = {}
for name, content in data.items():
# TODO is this the best way to do it????
if name == "file":
continue
path = current_path + [name]
next_context = context.next_level(name)
# preprocessing
......@@ -328,7 +333,11 @@ out: union[dict, None]
# Insert foreign keys
if insert_row is not None and sheet is not None and _is_exploded_sheet(sheet):
foreigns = _get_foreign_key_columns(sheet)
try:
foreigns = _get_foreign_key_columns(sheet)
except ValueError:
print(f"Sheet: {sheet}")
raise
for index, path in ((f.index, f.path) for f in foreigns.values()):
value = context[path]
sheet.cell(row=insert_row+1, column=index+1, value=value)
......@@ -361,7 +370,11 @@ validation_schema: dict, optional
# Validation
if validation_schema is not None:
validation_schema = _read_or_dict(validation_schema)
validate(data, validation_schema, format_checker=FormatChecker())
try:
validate(data, validation_schema, format_checker=FormatChecker())
except ValidationError as ve:
print(ve.message)
raise RuntimeError("Validation failed")
# Filling the data
result_wb = load_workbook(template)
......
......@@ -196,26 +196,28 @@ class TableTemplateGenerator(ABC):
if sheetname in sheets:
raise ValueError("The schema would lead to two sheets with the same name, "
f"which is forbidden: {sheetname}")
sheets[sheetname] = self._treat_schema_element(
col_def = self._treat_schema_element(
schema=schema['items'], sheets=sheets, path=path, foreign_keys=foreign_keys,
level_in_sheet_name=len(path),
array_paths=array_paths+[path] # since this level is an array extend the list
)
# and add the foreign keys that are necessary up to this point
for array_path in array_paths:
foreigns = self._get_foreign_keys(foreign_keys, array_path)
if isinstance(foreigns, str):
raise ValueError("Foreign keys must be a list of strings, but a single "
"string was given:\n"
f"{array_path} -> {foreigns}")
for foreign in foreigns:
internal_key = p2s(array_path + [foreign])
if internal_key in sheets[sheetname]:
raise ValueError("The schema would lead to two columns with the same "
f"name, which is forbidden:\n{foreign} -> {internal_key}")
ref_sheet = p2s(array_path)
sheets[sheetname][internal_key] = (
ColumnType.FOREIGN, f"see sheet '{ref_sheet}'", array_path + [foreign])
if col_def:
sheets[sheetname] = col_def
# and add the foreign keys that are necessary up to this point
for array_path in array_paths:
foreigns = self._get_foreign_keys(foreign_keys, array_path)
if isinstance(foreigns, str):
raise ValueError("Foreign keys must be a list of strings, but a single "
"string was given:\n"
f"{array_path} -> {foreigns}")
for foreign in foreigns:
internal_key = p2s(array_path + [foreign])
if internal_key in sheets[sheetname]:
raise ValueError("The schema would lead to two columns with the same "
f"name, which is forbidden:\n{foreign} -> {internal_key}")
ref_sheet = p2s(array_path)
sheets[sheetname][internal_key] = (
ColumnType.FOREIGN, f"see sheet '{ref_sheet}'", array_path + [foreign])
# Columns are added to the new sheet, thus we do not return any columns for the
# current sheet.
return {}
......@@ -326,6 +328,8 @@ class XLSXTemplateGenerator(TableTemplateGenerator):
del wb['Sheet']
for sheetname, sheetdef in sheets.items():
if not sheetdef:
continue
ws = wb.create_sheet(re.sub(INVALID_TITLE_REGEX, '_', sheetname))
# First row will by the COL_TYPE row.
# First column will be the indicator row with values COL_TYPE, PATH, IGNORE.
......
......@@ -19,7 +19,9 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import json
import os
import re
import tempfile
import jsonschema.exceptions as schema_exc
......@@ -69,6 +71,56 @@ def test_detect():
assert [1, 2] == _get_path_rows(example['Person'])
def test_temporary():
# TODO: remove the following after manual testing
di = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/crawler/schemas'
dd = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/'
allreadydone = [
"Präventionsmaßnahmen",
"Beratungsstellen",
"Schutzeinrichtungen",
"Einzelfallversorgung",
"Strategiedokumente",
"Kooperationsvereinbarungen",
"Gremien",
"Verwaltungsvorschriften",
"Gewaltschutzkonzepte und -maßnahmen",
"Polizeilicher Opferschutz",
"Feedback",
]
for prefix, _, files in os.walk(dd):
for fi in files:
match = re.match(r"(?P<teilb>.*)_2024-.*\.json", fi)
if match:
print(match.group('teilb'))
tb = match.group('teilb')
if tb in allreadydone:
continue
# allreadydone.append(tb)
template = os.path.join(di, "template_"+tb+".xlsx")
schema = os.path.join(di, "schema_"+tb+".json")
if not os.path.exists(template):
print(template)
assert False
jfi = os.path.join(prefix, fi)
print(jfi)
if not fi.startswith("Art"):
continue
# if jfi != "/home/henrik/CaosDB/management/external/dimr/eingabemaske/django/laforms/persistent/data/datenhalterin_gg/he_gg_2/Art__13_Bewusstseinsbildung_2024-01-11T10:22:26.json":
# continue
with open(jfi, encoding="utf-8") as infile:
data = json.load(infile)
data = data["form_data"]
if "__version__" in data:
del data["__version__"]
with tempfile.TemporaryDirectory() as tmpdir:
outfile = os.path.join(tmpdir, 'test.xlsx')
fill_template(data=data, template=template, result=outfile,
validation_schema=schema)
os.system(f'libreoffice {outfile}')
def test_fill_xlsx():
fill_and_compare(json_file=rfp("data/simple_data.json"),
template_file=rfp("data/simple_template.xlsx"),
......@@ -78,6 +130,7 @@ def test_fill_xlsx():
template_file=rfp("data/multiple_refs_template.xlsx"),
known_good=rfp("data/multiple_refs_data.xlsx"),
schema=rfp("data/multiple_refs_schema.json"))
fill_and_compare(json_file=rfp("data/indirect_data.json"),
template_file=rfp("data/indirect_template.xlsx"),
known_good=rfp("data/indirect_data.xlsx"),
......
......@@ -206,21 +206,44 @@ def test_template_generator():
assert ws.column_dimensions['A'].hidden is True
# TODO: remove the following after manual testing
di = '/home/professional/CaosDB/management/external/dimr/eingabemaske/crawler/schemas'
di = '/home/henrik/CaosDB/management/external/dimr/eingabemaske/crawler/schemas'
if not os.path.exists(di):
return
for fi in os.listdir(di):
rp = os.path.join(di, fi)
if not fi.startswith("schema_"):
continue
with open(rp) as sfi:
schema = json.load(sfi)
fk_path = os.path.join(di, "foreign_keys"+fi[len('schema'):])
if not os.path.exists(fk_path):
print(f"No foreign keys file for:\n{rp}")
path =os.path.join(di, "template"+fi[len('schema'):-4]+"xlsx")
allreadydone = [
"Präventionsmaßnahmen" ,
"Beratungsstellen" ,
"Schutzeinrichtungen",
"Einzelfallversorgung" ,
"Strategiedokumente" ,
"Kooperationsvereinbarungen" ,
"Gremien" ,
"Verwaltungsvorschriften" ,
"Gewaltschutzkonzepte und -maßnahmen",
"Polizeilicher Opferschutz",
"Feedback",
]
if any([path.startswith("template_"+k) for k in allreadydone]):
continue
if not os.path.exists(fk_path):
print(f"No foreign keys file for:\n{fk_path}")
assert False
with open(fk_path) as sfi:
fk = json.load(sfi)
generator.generate(schema=schema, foreign_keys=fk, filepath=path)
os.system(f'libreoffice {path}')
generator = XLSXTemplateGenerator()
if not os.path.exists(path):
generator.generate(schema=schema, foreign_keys=fk, filepath=path)
os.system(f'libreoffice {path}')
else:
print(f"Not creating template because it exists:\n{path}")
# TODO test collisions of sheet or colnames
# TODO test escaping of values
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment