diff --git a/src/caoscrawler/conv_impl/spss.py b/src/caoscrawler/conv_impl/spss.py index 51528cc2c6c292fac2be7b1bf857fe5912bf4201..1643dfc8d4ff9bde73855c072b929840e7a83a1b 100644 --- a/src/caoscrawler/conv_impl/spss.py +++ b/src/caoscrawler/conv_impl/spss.py @@ -19,6 +19,7 @@ """Converter for SAV files (stored by SPSS).""" import argparse +from collections import OrderedDict import pandas as pd import pyreadstat @@ -55,13 +56,72 @@ class SPSSConverter(converters.TableConverter): return self._children_from_dataframe(df) -def spss_to_yaml(savfile: str, yamlfile: str) -> None: +def spss_to_yaml(savfile: str, yamlfile: str, cfood: str = None) -> None: """Parse the *.sav and create basic datamodel in ``yamlfile``. + +Parameters +---------- +cfood: str + If given, also create a cfood skeleton. """ _, meta = pyreadstat.read_sav(savfile, metadataonly=True) + cfood_str = """ +--- +metadata: + macros: + - !defmacro + # Simple column value -> property rule + name: ColumnValue + params: + name: null + type: TextElement + parent: MyParent + definition: + ${name}: + type: ${type} + match_name: ${name} + match_value: (?P<val>.*) + records: + ${parent}: + ${name}: $$val + - !defmacro + # Same as "ColumnValue", but also give name of property. + name: ColumnValuePropname + params: + name: null + propname: null + type: TextElement + parent: MyParent + definition: + ${name}: + type: ${type} + match_name: ${name} + match_value: (?P<val>.*) + records: + ${parent}: + ${propname}: $$val +--- +directory: # corresponds to the directory given to the crawler + type: Directory + match: .* # we do not care how it is named here + subtree: + # This is the file + thisfile: + type: SPSSFile + match: ".*sav" + subtree: + entry: + type: Dict + match: .* # Name is irrelevant + records: + MyParent: + subtree: !macro + ColumnValue: +""" + enums: dict[str, list[str]] = {} - properties = {} + properties = OrderedDict() for name in meta.column_names: prop = { @@ -87,21 +147,22 @@ def spss_to_yaml(savfile: str, yamlfile: str) -> None: # # python3 -m caosadvancedtools.models.parser datamodel.yaml --sync -# -# Code for creating enum records: -# """ - for name, values in enums.items(): - for line in f""" -cont = db.Container() -for value in {repr(values)}: - rec = db.Record(name=value).add_parent(name="{name}") - cont.append(rec) -cont.insert() - """.splitlines(keepends=True): - if line.strip(): - output += f"# {line}" - output += "#\n" +# # +# # Code for creating enum records: +# # +# """ +# for name, values in enums.items(): +# for line in f""" +# cont = db.Container() +# for value in {repr(values)}: +# rec = db.Record(name=value).add_parent(name="{name}") +# cont.append(rec) +# cont.insert() +# """.splitlines(keepends=True): +# if line.strip(): +# output += f"# {line}" +# output += "#\n" # Actual datamodel output += """ @@ -128,15 +189,52 @@ DummyRT: allow_unicode=True, sort_keys=False).splitlines(keepends=True))) + # Experimental: Enum creation + output += """ +############### +# Enum values # +############### +""" + for name, values in enums.items(): + output += f"\n# ### {name} ###\n" + for value in values: + output += f""" +{value}: + role: Record + inherit_from_suggested: + - {name} +""" + with open(yamlfile, encoding="utf-8", mode="w") as myfile: myfile.write(output) + if cfood: + defs = [] + prefix = " " * 12 + for name, propdef in properties.items(): + dtype = None + if propdef["datatype"] == "DOUBLE": + dtype = "FloatElement" + elif propdef["datatype"] == "TEXT": + dtype = None + else: + dtype = propdef["datatype"] + + new_def = prefix + f"- name: {name}\n" + if dtype: + new_def += prefix + f" type: {dtype}\n" + defs.append(new_def) + cfood_str += "".join(defs) + with open(cfood, encoding="utf-8", mode="w") as myfile: + myfile.write(cfood_str) + def _parse_arguments(): """Parse the arguments.""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-i', '--input', help="The *.sav file.", required=True) parser.add_argument('-o', '--outfile', help="Yaml filename to save the result", required=True) + parser.add_argument('--cfood', help="Yaml filename to create cfood output in", required=False) return parser.parse_args() @@ -144,5 +242,7 @@ def _parse_arguments(): def spss_to_datamodel_main(): """The main function of this script.""" args = _parse_arguments() - spss_to_yaml(savfile=args.input, yamlfile=args.outfile) + spss_to_yaml(savfile=args.input, yamlfile=args.outfile, cfood=args.cfood) print(f"Written datamodel to: {args.outfile}") + if args.cfood: + print(f"Written cfood to: {args.cfood}")