STY: Documentation andcode styled.

c14af58f · Daniel Hornung · e1b5fd4a · c14af58f · c14af58f
Verified Commit c14af58f authored 1 year ago by Daniel Hornung
--- a/src/caosadvancedtools/table_json_conversion/table_generator.py
+++ b/src/caosadvancedtools/table_json_conversion/table_generator.py
@@ -54,69 +54,83 @@ class TableTemplateGenerator(ABC):

    @abstractmethod
    def generate(self, schema: dict, foreign_keys: dict, filepath: str):
-        """ generates a sheet definition from a given JSON schema
+        """Generate a sheet definition from a given JSON schema.

        Parameters:
        -----------
-        schema: dict, given JSON schema
-        foreign_keys: dict, a configuration that defines which attributes shall be used to create
-                      additional columns when a list of references exists. Keys of the dict are
-                      elements of the path within the JSON where the key is needed. Suppose we want
-                      to distinguis Persons that are referenced by Trainings, foreign_keys must at
-                      least contain the following: {"Training": {"Person": [key1, key2]}}.
-                      Values wihtin the dicts can be a list representing the keys or
-                      a dict that allows to set further foreign keys at lower depth.
-                      In latter case if foreign keys exist at that level (
-                      e.g. in the above example there might be further levels below "Person".),
-                      then the keys can be set using the special "__this__" key.
-                      Example: {"Training": {"__this__": ["date"], "Person": [key1, key2]}}
-                      Here, "date" is the sole key for Training.
+        schema: dict
+            Given JSON schema.
+
+        foreign_keys: dict
+            A tree-like configuration (nested dict) that defines which attributes shall be used to
+            create additional columns when a list of references exists. The nested dict is
+            structured like the data model, its innermost elements are leaves of the path trees
+            within the JSON, they define the required keys.
+
+            | Suppose we want to distinguish Persons that are referenced by Trainings, then
+              ``foreign_keys`` must at least contain the following:
+            | ``{"Training": {"Person": ["name", "email"]}}``.
+
+            Values within the dicts can be either a list representing the keys (as in the example
+            above) or a dict that allows to set additional foreign keys at higher depths.  In the
+            latter case (dict instead of list) if foreign keys exist at that level (e.g. in the
+            above example there might be further levels below "Person"), then the foreign keys can
+            be set using the special ``__this__`` key.
+
+            Example: ``{"Training": {"__this__": ["date"], "Person": ["name", "email"]}}``
+            Here, ``date`` is the sole foreign key for Training.
        """

    def _generate_sheets_from_schema(self, schema: dict, foreign_keys: Optional[dict] = None
                                     ) -> Dict[str, Dict[str,
                                                         Tuple[ColumnType, Optional[str], list]]]:
-        """ generates a sheet definition from a given JSON schema
+        """Generate a sheet definition from a given JSON schema.

        Parameters
        ----------
        schema: dict
            given JSON schema
-        foreign_keys: dict
+        foreign_keys: dict, optional
            a configuration that defines which attributes shall be used to create
-            additional columns when a list of references exists. See foreign_keys
+            additional columns when a list of references exists. See ``foreign_keys``
            argument of TableTemplateGenerator.generate.

        Returns
        -------
-            dict
-            The structure of the dict, lets call it ``sheets`` is as follows:
-            ``sheets[sheetname][colname]= (COL_TYPE, description, [path])``
-            I.e. the top level dict contains sheet names as keys and dicts as values.
-            The inner dict has column names as keys and tuples as values. The tuples have the
-            column type as the first element, the description of the corresponding property as
-            second and the path as a list as last.
+        sheets: dict
+            A two-level dict which describes columns of template sheets.
+
+            | The structure of this two-level dict is as follows:
+            | ``sheets[sheetname][colname]= (<col_type>, <description>, [<path>, ...])``
+
+            I.e. the outer dict contains sheet names as keys, the inner dict has column names as
+            keys and tuples as values. These tuples consist of:
+            - the column type
+            - the description of the corresponding property
+            - a list representing the path.

        """
        if not ("type" in schema or "anyOf" in schema):
-            raise ValueError("Inappropriate JSON schema: The following part should contain the "
-                             f"'type' key:\n{schema}\n")
+            raise ValueError("Inappropriate JSON schema: The following object must contain the "
+                             f"'type' or 'anyOf' key:\n{schema}\n")
+        if "properties" not in schema:
+            raise ValueError("Inappropriate JSON schema: The following object must contain "
+                             f"the 'properties' key:\n{schema}\n")
+        if "type" in schema:
+            assert schema["type"] == "object"
        if foreign_keys is None:
            foreign_keys = {}
        # here, we treat the top level
        # sheets[sheetname][colname]= (COL_TYPE, description, [path])
        sheets: Dict[str, Dict[str, Tuple[ColumnType, Optional[str], list]]] = {}
-        if "properties" not in schema:
-            raise ValueError("Inappropriate JSON schema: The following part should contain "
-                             f"the 'properties' key:\n{schema}\n")
-        for rt_name in schema["properties"].keys():
-            sheets[rt_name] = self._treat_schema_element(schema["properties"][rt_name],
-                                                         sheets, [rt_name], foreign_keys)
+        for rt_name, rt_def in schema["properties"].items():
+            sheets[rt_name] = self._treat_schema_element(schema=rt_def, sheets=sheets,
+                                                         path=[rt_name], foreign_keys=foreign_keys)
        return sheets

    def _get_foreign_keys(self, keys: dict, path: list) -> list:
-        """ returns the foreign keys that are needed at the location to which path points """
-        msg = f"A foreign key definition is missing for path:\n{path}\n{keys}"
+        """Return the foreign keys that are needed at the location to which path points."""
+        msg = f"A foreign key definition is missing for path:\n{path}\nKeys are:\n{keys}"
        while path:
            if keys is None or path[0] not in keys:
                raise ValueError(msg)
@@ -128,30 +142,35 @@ class TableTemplateGenerator(ABC):
            return keys
        raise ValueError(msg)

-    def _treat_schema_element(self, schema: dict, sheets: dict, path: list,
+    def _treat_schema_element(self, schema: dict, sheets: dict, path: list[str],
                              foreign_keys: Optional[dict] = None, level_in_sheet_name: int = 1,
                              array_paths: Optional[list] = None
                              ) -> Dict[str, Tuple[ColumnType, Optional[str], list]]:
-        """ recursively transforms elements from the schema into column definitions
+        """Recursively transform elements from the schema into column definitions.
+
+        ``sheets`` is modified in place.

-        sheets is modified in place.
        Parameters
        ----------
-            array_paths: list
-                a list of path along the way to the current object, where the json contains arrays
-            schema: dict
-                part of the json schema; it must be the level that contains the type definition
-                (e.g. 'type' or 'oneOf' key)
+        schema: dict
+            Part of the json schema; it must be the level that contains the type definition
+            (e.g. 'type' or 'oneOf' key)
+        sheets: dict
+            All the sheets, indexed by their name.  This is typically modified in place by this
+            method.
+        path: list[str]
+            The relevant (sub) path for this schema part?
+        array_paths: list
+            A list of path along the way to the current object, where the json contains arrays.

        Returns
        -------
-
-            dict
-            describing the columns; see doc string of _generate_sheets_from_schema
+        columns: dict
+            Describing the columns; see doc string of `_generate_sheets_from_schema`_
        """
        if not ("type" in schema or "enum" in schema or "oneOf" in schema or "anyOf" in schema):
-            raise ValueError("Inappropriate JSON schema: The following part should contain the "
-                             f"'type' key:\n{schema}\n")
+            raise ValueError("Inappropriate JSON schema: The following schema part must contain "
+                             f"'type', 'enum', 'oneOf' or 'anyOf':\n{schema}\n")

        if array_paths is None:
            # if this is not set, we are at top level and the top level element may always be an
@@ -163,88 +182,94 @@ class TableTemplateGenerator(ABC):
        ctype = ColumnType.SCALAR

        # if it is an array, value defs are in 'items'
-        if 'type' in schema and schema['type'] == 'array':
-            if ('type' in schema['items'] and schema['items']['type'] == 'object'
+        if schema.get('type') == 'array':
+            if (schema['items'].get('type') == 'object'
                    and len(path) > 1):  # list of references; special treatment
                # we add a new sheet with columns generated from the subtree of the schema
                sheetname = ".".join(path)
                if sheetname in sheets:
-                    raise ValueError(f"The shema would lead to two sheets with the same name which"
-                                     f" is forbidden:{sheetname}")
+                    raise ValueError("The schema would lead to two sheets with the same name, "
+                                     f"which is forbidden: {sheetname}")
                sheets[sheetname] = self._treat_schema_element(
-                    schema['items'], sheets, path, foreign_keys, len(path),
+                    schema=schema['items'], sheets=sheets, path=path, foreign_keys=foreign_keys,
+                    level_in_sheet_name=len(path),
                    array_paths=array_paths+[path]  # since this level is an array extend the list
                )
                # and add the foreign keys that are necessary up to this point
-                for pa in array_paths:
-                    keys = self._get_foreign_keys(foreign_keys, pa)
-                    for ke in keys:
-                        if ke in sheets[sheetname]:
-                            raise ValueError(f"The shema would lead to two columns with the same "
-                                             f"name which is forbidden:{ke}")
-                        sheets[sheetname][ke] = (ColumnType.FOREIGN, f"see sheet '{path[0]}'",
-                                                 pa+[ke])
-                # columns are added to the new sheet, thus we do not return columns
+                for array_path in array_paths:
+                    keys = self._get_foreign_keys(foreign_keys, array_path)
+                    for key in keys:
+                        if key in sheets[sheetname]:
+                            raise ValueError("The schema would lead to two columns with the same "
+                                             f"name which is forbidden: {key}")
+                        sheets[sheetname][key] = (ColumnType.FOREIGN, f"see sheet '{path[0]}'",
+                                                  array_path + [key])
+                # Columns are added to the new sheet, thus we do not return any columns for the
+                # current sheet.
                return {}

-            # it is a list of primitive types -> semi colon separated list
+            # it is a list of primitive types -> semicolon separated list
            schema = schema['items']
            ctype = ColumnType.LIST

-        if 'oneOf' in schema:
-            for el in schema['oneOf']:
-                if 'type' in el:
-                    schema = el
+        # This should only be the case for "new or existing reference".
+        for el in schema.get('oneOf', []):
+            if 'type' in el:
+                schema = el
+                break

-        if "properties" in schema:  # recurse for each property
+        if "properties" in schema:  # recurse for each property, then return
            cols = {}
            for pname in schema["properties"]:
                col_defs = self._treat_schema_element(
                    schema["properties"][pname], sheets, path+[pname], foreign_keys,
                    level_in_sheet_name, array_paths=array_paths)
-                for k in col_defs.keys():
+                for k in col_defs:
                    if k in cols:
-                        raise ValueError(f"The shema would lead to two columns with the same "
-                                         f"name which is forbidden:{k}")
+                        raise ValueError(f"The schema would lead to two columns with the same "
+                                         f"name which is forbidden: {k}")
                cols.update(col_defs)
            return cols
-        else:  # those are the leaves
-            description = schema['description'] if 'description' in schema else None
-            # definition of a single column
-            default_return = {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
-            if 'type' not in schema and 'enum' in schema:
-                return default_return
-            if 'type' not in schema and 'anyOf' in schema:
-                for d in schema['anyOf']:
-                    # currently the only case where this occurs is date formats
-                    assert d['type'] == 'string'
-                    assert d['format'] == 'date' or d['format'] == 'date-time'
-                return default_return
-            if schema["type"] in ['string', 'number', 'integer', 'boolean']:
-                if 'format' in schema and schema['format'] == 'data-url':
-                    return {}  # file; ignore for now
-                return default_return
-            raise ValueError("Inappropriate JSON schema: The following part should define an"
-                             f" object with properties or a primitive type:\n{schema}\n")
-        raise RuntimeError("This should not be reached. Implementation error.")
+
+        # The schema is a leaf.
+        description = schema['description'] if 'description' in schema else None
+        # definition of a single column
+        default_return = {".".join(path[level_in_sheet_name:]): (ctype, description, path)}
+        if 'type' not in schema and 'enum' in schema:
+            return default_return
+        if 'type' not in schema and 'anyOf' in schema:
+            for d in schema['anyOf']:
+                # currently the only case where this occurs is date formats
+                assert d['type'] == 'string'
+                assert d['format'] == 'date' or d['format'] == 'date-time'
+            return default_return
+        if schema["type"] in ['string', 'number', 'integer', 'boolean']:
+            if 'format' in schema and schema['format'] == 'data-url':
+                return {}  # file; ignore for now
+            return default_return
+        raise ValueError("Inappropriate JSON schema: The following part should define an"
+                         f" object with properties or a primitive type:\n{schema}\n")


 class XLSXTemplateGenerator(TableTemplateGenerator):
-    """ class for generating XLSX tables from json schema """
+    """Class for generating XLSX tables from json schema definitions."""

    def __init__(self):
        pass

-    def generate(self, schema: dict, foreign_keys: dict, filepath: str):
-        """ generates a sheet definition from a given JSON schema
+    def generate(self, schema: dict, foreign_keys: dict, filepath: str) -> None:
+        """Generate a sheet definition from a given JSON schema.

        Parameters:
        -----------
-        schema: dict, given JSON schema
-        foreign_keys: dict, a configuration that defines which attributes shall be used to create
-                      additional columns when a list of references exists. See foreign_keys
-                      argument of TableTemplateGenerator.generate.
-        filepath: str, the XLSX file will be stored under this path.
+        schema: dict
+            Given JSON schema
+        foreign_keys: dict
+            A configuration that defines which attributes shall be used to create
+            additional columns when a list of references exists. See ``foreign_keys``
+            argument of :ref:`TableTemplateGenerator.generate` .
+        filepath: str
+            The XLSX file will be stored under this path.
        """
        sheets = self._generate_sheets_from_schema(schema, foreign_keys)
        wb = self._create_workbook_from_sheets_def(sheets)
@@ -279,22 +304,23 @@ class XLSXTemplateGenerator(TableTemplateGenerator):

    def _create_workbook_from_sheets_def(
            self, sheets: Dict[str, Dict[str, Tuple[ColumnType, Optional[str], list]]]):
+        """Create and return a nice workbook for the given sheets."""
        wb = Workbook()
        assert wb.sheetnames == ["Sheet"]
-        for sn, sheetdef in sheets.items():
-            ws = wb.create_sheet(re.sub(INVALID_TITLE_REGEX, '_', sn))
-            # first row will by the COL_TYPE row
-            # first column will be the indicator row with values COL_TYPE, PATH, IGNORE
-            # the COL_TYPE row will be followed by as many PATH rows as needed
+        for sheetname, sheetdef in sheets.items():
+            ws = wb.create_sheet(re.sub(INVALID_TITLE_REGEX, '_', sheetname))
+            # First row will by the COL_TYPE row.
+            # First column will be the indicator row with values COL_TYPE, PATH, IGNORE.
+            # The COL_TYPE row will be followed by as many PATH rows as needed.

            max_path_length = self._get_max_path_length(sheetdef)
-            header_index = 2+max_path_length
-            description_index = 3+max_path_length
+            header_index = 2 + max_path_length
+            description_index = 3 + max_path_length

            # create first column
            ws.cell(1, 1, RowType.COL_TYPE.name)
            for index in range(max_path_length):
-                ws.cell(2+index, 1, RowType.PATH.name)
+                ws.cell(2 + index, 1, RowType.PATH.name)
            ws.cell(header_index, 1, RowType.IGNORE.name)
            ws.cell(description_index, 1, RowType.IGNORE.name)

@@ -302,12 +328,12 @@ class XLSXTemplateGenerator(TableTemplateGenerator):

            # create other columns
            for index, (colname, ct, desc, path) in enumerate(ordered_cols):
-                ws.cell(1, 2+index, ct.name)
-                for pi, el in enumerate(path):
-                    ws.cell(2+pi, 2+index, el)
-                ws.cell(header_index, 2+index, colname)
+                ws.cell(1, 2 + index, ct.name)
+                for path_index, el in enumerate(path):
+                    ws.cell(2 + path_index, 2 + index, el)
+                ws.cell(header_index, 2 + index, colname)
                if desc:
-                    ws.cell(description_index, 2+index, desc)
+                    ws.cell(description_index, 2 + index, desc)

            # hide special rows
            for index, row in enumerate(ws.rows):
@@ -321,10 +347,10 @@ class XLSXTemplateGenerator(TableTemplateGenerator):
        del wb['Sheet']

        # order sheets
-        # for index, sn in enumerate(sorted(wb.sheetnames)):
-        # wb.move_sheet(sn, index-wb.index(wb[sn]))
+        # for index, sheetname in enumerate(sorted(wb.sheetnames)):
+        # wb.move_sheet(sheetname, index-wb.index(wb[sheetname]))
        # reverse sheets
-        for index, sn in enumerate(wb.sheetnames[::-1]):
-            wb.move_sheet(sn, index-wb.index(wb[sn]))
+        for index, sheetname in enumerate(wb.sheetnames[::-1]):
+            wb.move_sheet(sheetname, index-wb.index(wb[sheetname]))

        return wb
--- a/unittests/table_json_conversion/test_table_template_generator.py
+++ b/unittests/table_json_conversion/test_table_template_generator.py
@@ -146,13 +146,14 @@ def test_get_foreign_keys():
    assert ['a'] == generator._get_foreign_keys(fkd, ['Training'])

    fkd = {"Training": {'hallo'}}
-    with pytest.raises(ValueError, match=r"A foreign key definition is missing for path:\n\['Training'\]\n{'Training': \{'hallo'\}\}"):
+    with pytest.raises(ValueError, match=r"A foreign key definition is missing for path:\n\["
+                       r"'Training'\]\nKeys are:\n{'Training': \{'hallo'\}\}"):
        generator._get_foreign_keys(fkd, ['Training'])

    fkd = {"Training": {"__this__": ['a'], 'b': ['c']}}
    assert ['c'] == generator._get_foreign_keys(fkd, ['Training', 'b'])

-    with pytest.raises(ValueError, match=r"A foreign key definition is missing for.*"):
+    with pytest.raises(ValueError, match=r"A foreign key definition is missing for .*"):
        generator._get_foreign_keys({}, ['Training'])