diff --git a/CHANGELOG.md b/CHANGELOG.md index 3488990fda0a75b7169ba82e08c59e1418d55b19..da7250c1d4b1f63473d6cfb689d04293111c68ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - XMLTextNodeConverter for converting text nodes created by XMLTagConverter - XMLAttributeNodeConverter for converting attribute nodes created by XMLTagConverter +- Units for properties. They can be specified by giving the property as a dict in the form + ```yaml + MyRecord: + my_prop: + value: 5 + unit: m + ``` ### Changed ### +- Property values specified by dicts do not have to contain a + `collection_mode` key anymore. If none is given, the + `collection_mode` is determined from the `value` as it is done for + values specified by strings: + - if `value` starts with '+', collection mode is "list". + - if `value` starts with '*', collection mode is "multiproperty". + - in all other cases, collection mode is "single". + ### Deprecated ### ### Removed ### diff --git a/src/caoscrawler/cfood-schema.yml b/src/caoscrawler/cfood-schema.yml index 451cb1c0a4fcb44cd31c5db412a411417936333a..c5e0eaad092c12efbceb5f55b62b3d7cf8afdccf 100644 --- a/src/caoscrawler/cfood-schema.yml +++ b/src/caoscrawler/cfood-schema.yml @@ -135,6 +135,9 @@ cfood: value: description: Dictionary notation for variable values. Values can be given by a variable which is indicated by an initial "$". Use "$$" for setting values actually starting with a dollar sign. type: string + unit: + description: The unit of this property. Units can be given by a variable which is indicated by an initial "$". Use "$$" for setting values actually starting with a dollar sign. + type: string collection_mode: description: The collection mode defines whether the resulting property will be a single property or whether the values of multiple structure elements will be collected either into a list or a multiproperty. enum: diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index f31a0f4463ea805472044e5bd7697ed1316d1d9b..8b5f9a60c999cb0c2366fb3ce1e385729a39d4dd 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -169,64 +169,84 @@ Parameters ---------- value: Union[dict, str, list] - - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" etc. - - If *dict*, must have keys ``value`` and ``collection_mode``. The returned tuple is directly - created from the corresponding values. - - If *list*, each element is checked for replacement and the resulting list will be used - as (list) value for the property + - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" + etc. No unit is set and collection mode is determined from the + first character: + - '+' corresponds to "list" + - '*' corresponds to "multiproperty" + - everything else is "single" + - If *dict*, it must have a ``value`` key and may ``unit``, and + ``collection_mode``. The returned tuple is directly created from + the corresponding values if they are given; ``unit`` defaults to + None and ``collection_mode`` is determined from ``value`` as + explained for the str case above, i.e., + - if it starts with '+', collection mode is "list", + - in case of '*', collection mode is "multiproperty", + - and everything else is "single". + - If *list*, each element is checked for variable replacement and the + resulting list will be used as (list) value for the property Returns ------- out: tuple - the final value of the property; variable names contained in `values` are replaced. + - the final unit of the property; variable names contained in `values` are replaced. - the collection mode (can be single, list or multiproperty) """ # @review Florian Spreckelsen 2022-05-13 + propunit = None + propvalue = None + collection_mode = None if isinstance(value, dict): if "value" not in value: # TODO: how do we handle this case? Just ignore? # or disallow? raise NotImplementedError(f"This definition has no \"value\": {value}") propvalue = value["value"] + if "unit" in value: + propunit = replace_variables(value["unit"], values) # can be "single", "list" or "multiproperty" - collection_mode = value["collection_mode"] - elif isinstance(value, str): - propvalue = value - collection_mode = "single" - if propvalue.startswith("+"): - collection_mode = "list" - propvalue = propvalue[1:] - elif propvalue.startswith("*"): - collection_mode = "multiproperty" - propvalue = propvalue[1:] - elif isinstance(value, list): - # TODO: (for review) - # This is a bit dirty right now and needed for - # being able to directly set list values. Semantics is, however, a bit - # different from the two cases above. - collection_mode = "single" - - # variables replacement: - propvalue = list() - for element in value: - # Do the element-wise replacement only, when its type is string: - if isinstance(element, str): - propvalue.append(replace_variables(element, values)) - else: - propvalue.append(element) - - return (propvalue, collection_mode) + if "collection_mode" in value: + collection_mode = value["collection_mode"] else: - # value is another simple type - collection_mode = "single" propvalue = value - # Return it immediately, otherwise variable substitution would be done and fail: - return (propvalue, collection_mode) + if collection_mode is None: + if isinstance(propvalue, str): + # Determine collection mode from string value + collection_mode = "single" + if propvalue.startswith("+"): + collection_mode = "list" + propvalue = propvalue[1:] + elif propvalue.startswith("*"): + collection_mode = "multiproperty" + propvalue = propvalue[1:] + elif isinstance(propvalue, list): + # TODO: (for review) + # This is a bit dirty right now and needed for + # being able to directly set list values. Semantics is, however, a bit + # different from the two cases above. + collection_mode = "single" + + # variables replacement: + returnvalue = list() + for element in propvalue: + # Do the element-wise replacement only, when its type is string: + if isinstance(element, str): + returnvalue.append(replace_variables(element, values)) + else: + returnvalue.append(element) + + return (returnvalue, propunit, collection_mode) + else: + # value is another simple type + collection_mode = "single" + # Return it immediately, otherwise variable substitution would be done and fail: + return (propvalue, propunit, collection_mode) propvalue = replace_variables(propvalue, values) - return (propvalue, collection_mode) + return (propvalue, propunit, collection_mode) def create_records(values: GeneralStore, records: RecordStore, def_records: dict): @@ -277,7 +297,7 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict key = key_template.safe_substitute(**values.get_storage()) keys_modified.append((name, key)) - propvalue, collection_mode = handle_value(value, values) + propvalue, propunit, collection_mode = handle_value(value, values) if key.lower() in SPECIAL_PROPERTIES: # e.g. description, name, etc. @@ -291,17 +311,26 @@ def create_records(values: GeneralStore, records: RecordStore, def_records: dict else: if c_record.get_property(key) is None: if collection_mode == "list": - c_record.add_property(name=key, value=[propvalue]) + c_record.add_property(name=key, value=[propvalue], unit=propunit) elif (collection_mode == "multiproperty" or collection_mode == "single"): - c_record.add_property(name=key, value=propvalue) + c_record.add_property(name=key, value=propvalue, unit=propunit) else: if collection_mode == "list": + if propunit and c_record.get_property(key).unit and propunit != c_record.get_property(key).unit: + raise RuntimeError( + f"Property '{key}' has contradictory units: " + f"{propunit} and {c_record.get_property(key).unit}" + ) c_record.get_property(key).value.append(propvalue) + if propunit and not c_record.get_property(key).unit: + c_record.get_property(key).unit = propunit elif collection_mode == "multiproperty": - c_record.add_property(name=key, value=propvalue) + c_record.add_property(name=key, value=propvalue, unit=propunit) elif collection_mode == "single": c_record.get_property(key).value = propvalue + if propunit: + c_record.get_property(key).unit = propunit # no matter whether the record existed in the record store or not, # parents will be added when they aren't present in the record yet: diff --git a/src/doc/cfood.rst b/src/doc/cfood.rst index 51c392780b44b73964921506ad3764b95e14d5ed..a42d593035bd37d0712986c958fb8ad7ad287968 100644 --- a/src/doc/cfood.rst +++ b/src/doc/cfood.rst @@ -27,17 +27,17 @@ A single document with a converter tree specification: .. _example_1: .. code-block:: yaml - + extroot: type: Directory match: ^extroot$ subtree: DataAnalysis: - type: Directory - match: DataAnalysis - # (...) + type: Directory + match: DataAnalysis + # (...) + - A single document with a converter tree specification, but also including a custom converters section: .. _example_2: @@ -50,15 +50,15 @@ A single document with a converter tree specification, but also including a cust CustomConverter_2: package: mypackage.converters converter: CustomConverter2 - + extroot: type: Directory match: ^extroot$ subtree: DataAnalysis: - type: Directory - match: DataAnalysis - # (...) + type: Directory + match: DataAnalysis + # (...) @@ -78,11 +78,11 @@ two custom converters in the second document (**not recommended**, see the recom - !defmacro name: SimulationDatasetFile params: - match: null - recordtype: null - nodename: null + match: null + recordtype: null + nodename: null definition: - # (...) + # (...) --- Converters: CustomConverter_1: @@ -91,15 +91,15 @@ two custom converters in the second document (**not recommended**, see the recom CustomConverter_2: package: mypackage.converters converter: CustomConverter2 - + extroot: type: Directory match: ^extroot$ subtree: DataAnalysis: - type: Directory - match: DataAnalysis - # (...) + type: Directory + match: DataAnalysis + # (...) @@ -118,27 +118,27 @@ The **recommended way** of defining metadata, custom converters, macros and the - !defmacro name: SimulationDatasetFile params: - match: null - recordtype: null - nodename: null + match: null + recordtype: null + nodename: null definition: - # (...) + # (...) Converters: CustomConverter_1: - package: mypackage.converters - converter: CustomConverter1 + package: mypackage.converters + converter: CustomConverter1 CustomConverter_2: - package: mypackage.converters - converter: CustomConverter2 + package: mypackage.converters + converter: CustomConverter2 --- extroot: type: Directory match: ^extroot$ subtree: DataAnalysis: - type: Directory - match: DataAnalysis - # (...) + type: Directory + match: DataAnalysis + # (...) List Mode @@ -148,11 +148,73 @@ Specifying values of properties can make use of two special characters, in order create lists or multi properties instead of single values: .. code-block:: yaml - - Experiment1: - Measurement: +Measurement # Element in List (list is cleared before run) - *Measurement # Multi Property (properties are removed before run) - Measurement # Overwrite + + Experiment1: + Measurement: +Measurement # Element in List (list is cleared before run) + *Measurement # Multi Property (properties are removed before run) + Measurement # Overwrite + +Values and units +---------------- + +Property values can be specified as a simple strings (as above) or as +a dictionaries that may also specify the :ref:`collection mode <List +Mode>`. Strings starting with a "$" will be replaced by a +corresponding variable if there is any. See the :doc:`tutorials +chapter<tutorials/index>` of this documentation for more elaborate +examples on how the variable replacment works exactly. A simple +example could look the following. + +.. code-block:: yaml + + ValueElt: + type: TextElement + match_name: ^my_prop$ + match_value: "(?P<value>.*)" # Anything in here is stored in the variable "value" + records: + MyRecord: + MyProp: $value # will be replace by whatever is stored in the "value" variable set above. + +If not given explicitly, the collection mode will be determined from +the first character of the property value as explained above, and the +following three definitions are all equivalent: + +.. code-block:: yaml + + MyProp: +$value + +.. code-block:: yaml + + MyProp: + value: +$value + +and + +.. code-block:: yaml + + MyProp: + value: $value + collection_mode: list + + +Units of numeric values can be set by providing a property value not +as a single string, but as a dictionary with a ``value`` and a +``unit`` key. Within a converter definition this could look the +following. + +.. code-block:: yaml + + ValueWithUnitElt: + type: TextElement + match_name: ^my_prop$ + match_value: "^(?P<number>\\d+\\.?\\d*)\s+(?P<unit>.+)" # Extract value and unit from a string which + # has a number followed by at least one whitespace + # character followed by a unit. + records: + MyRecord: + MyProp: + value: $number + unit: $unit File Entities @@ -160,7 +222,7 @@ File Entities In order to use File Entities, you must set the appropriate ``role: File``. Additionally, the path and file keys have to be given, with values that set the -paths remotely and locally, respectively. You can use the variable +paths remotely and locally, respectively. You can use the variable ``<converter name>_path`` that is automatically created by converters that deal with file system related StructureElements. The file object itsself is stored in a vairable with the same name (as it is the case for other Records). @@ -169,15 +231,15 @@ in a vairable with the same name (as it is the case for other Records). .. code-block:: yaml somefile: - type: SimpleFile - match: ^params.*$ # macht any file that starts with "params" - records: - fileEntity: - role: File # necessary to create a File Entity - path: somefile.path # defines the path in CaosDB - file: somefile.path # path where the file is found locally - SomeRecord: - ParameterFile: $fileEntity # creates a reference to the file + type: SimpleFile + match: ^params.*$ # match any file that starts with "params" + records: + fileEntity: + role: File # necessary to create a File Entity + path: somefile.path # defines the path in CaosDB + file: somefile.path # path where the file is found locally + SomeRecord: + ParameterFile: $fileEntity # creates a reference to the file Transform Functions diff --git a/unittests/test_converters.py b/unittests/test_converters.py index f1b74d89ffd367849b85433e0e456ae8a78134a8..5b3c34cceea4e2be2b24a869cb3fc3de747ad740 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -352,6 +352,8 @@ def test_variable_replacement(): values = GeneralStore() values["a"] = 4 values["b"] = "68" + values["my_unit"] = "m" + values["cm"] = "cm" # basic values stay unchanged assert replace_variables(5, values) is 5 @@ -359,28 +361,38 @@ def test_variable_replacement(): assert replace_variables("$a", values) is 4 assert replace_variables("${b}", values) == "68" - assert handle_value("b", values) == ("b", "single") - assert handle_value("+b", values) == ("b", "list") - assert handle_value("*b", values) == ("b", "multiproperty") - assert handle_value("$b", values) == ("68", "single") - assert handle_value("+$b", values) == ("68", "list") - assert handle_value("*$b", values) == ("68", "multiproperty") + # values given as simple strings never have units + assert handle_value("b", values) == ("b", None, "single") + assert handle_value("+b", values) == ("b", None, "list") + assert handle_value("*b", values) == ("b", None, "multiproperty") + assert handle_value("$b", values) == ("68", None, "single") + assert handle_value("+$b", values) == ("68", None, "list") + assert handle_value("*$b", values) == ("68", None, "multiproperty") + # No units in dicts assert handle_value({"value": "b", - "collection_mode": "single"}, values) == ("b", "single") + "collection_mode": "single"}, values) == ("b", None, "single") assert handle_value({"value": "b", - "collection_mode": "list"}, values) == ("b", "list") + "collection_mode": "list"}, values) == ("b", None, "list") assert handle_value({"value": "b", - "collection_mode": "multiproperty"}, values) == ("b", "multiproperty") + "collection_mode": "multiproperty"}, values) == ("b", None, "multiproperty") assert handle_value({"value": "$b", - "collection_mode": "single"}, values) == ("68", "single") + "collection_mode": "single"}, values) == ("68", None, "single") assert handle_value({"value": "$b", - "collection_mode": "list"}, values) == ("68", "list") + "collection_mode": "list"}, values) == ("68", None, "list") assert handle_value({"value": "$b", - "collection_mode": "multiproperty"}, values) == ("68", "multiproperty") - - assert handle_value(["a", "b"], values) == (["a", "b"], "single") - assert handle_value(["$a", "$b"], values) == ([4, "68"], "single") + "collection_mode": "multiproperty"}, values) == ("68", None, "multiproperty") + + # Unit specified in the same way as value: + assert handle_value({"value": 5, "unit": "m"}, values) == (5, "m", "single") + assert handle_value({"value": 5, "unit": "${my_unit}"}, values) == (5, "m", "single") + assert handle_value({"value": "+5", "unit": "${my_unit}"}, values) == ("5", "m", "list") + assert handle_value({"value": "*5", "unit": "${my_unit}"}, + values) == ("5", "m", "multiproperty") + + assert handle_value(["a", "b"], values) == (["a", "b"], None, "single") + assert handle_value(["$a", "$b"], values) == ([4, "68"], None, "single") + assert handle_value({"value": ["$a", "$a"], "unit": "$cm"}, values) == ([4, 4], "cm", "single") def test_apply_transformers(converter_registry): diff --git a/unittests/test_scalars_cfood.py b/unittests/test_scalars_cfood.py index ba604fe4f5b695506bf8df9dab79fc23232c546a..4375ba199d64c3a24d07b3ea1cc4d221d967954b 100644 --- a/unittests/test_scalars_cfood.py +++ b/unittests/test_scalars_cfood.py @@ -24,15 +24,15 @@ def test_handle_value(): store = GeneralStore() # This one should work: - assert handle_value("bla", store) == ("bla", "single") + assert handle_value("bla", store) == ("bla", None, "single") # These failed: - assert handle_value(4, store) == (4, "single") - assert handle_value(4.2, store) == (4.2, "single") - assert handle_value(True, store) == (True, "single") + assert handle_value(4, store) == (4, None, "single") + assert handle_value(4.2, store) == (4.2, None, "single") + assert handle_value(True, store) == (True, None, "single") # List test: - assert handle_value([4, 3, 2], store) == ([4, 3, 2], "single") + assert handle_value([4, 3, 2], store) == ([4, 3, 2], None, "single") def test_record_structure_generation(): diff --git a/unittests/test_scanner.py b/unittests/test_scanner.py index 226b5040547f0e003729dba63622edf836552f18..da26af0b9436b622aa9e479dc24f000283cfdc32 100644 --- a/unittests/test_scanner.py +++ b/unittests/test_scanner.py @@ -316,3 +316,57 @@ def test_record_parents(): assert rec.parents[0].name == 'Stuff' # default parent stays if no parent is given on # lower levels assert len(rec.parents) == 1 + + +def test_units(): + """Test the correct setting of units.""" + crawler_definition = load_definition(UNITTESTDIR / "test_unit_cfood.yml") + converter_registry = create_converter_registry(crawler_definition) + + data = { + "value_with_unit": "1.1 m", + "array_with_units": [ + "1.1 cm", + "2.2 cm" + ] + } + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + assert len(records) == 1 + rec = records[0] + # This is hard-coded in cfood: + assert rec.get_property("may_be_overwritten") is not None + assert rec.get_property("may_be_overwritten").value == "12" + assert rec.get_property("may_be_overwritten").unit == "K" + # Those are set from data + assert rec.get_property("value_with_unit") is not None + assert rec.get_property("value_with_unit").value == "1.1" + assert rec.get_property("value_with_unit").unit == "m" + assert rec.get_property("list_with_unit") is not None + assert rec.get_property("list_with_unit").value == ["1.1", "2.2"] + assert rec.get_property("list_with_unit").unit == "cm" + + # Contradictory units + data = { + "array_with_units": [ + "1.1 K", + "45 W" + ] + } + with raises(RuntimeError) as rte: + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + assert "Property 'list_with_unit' has contradictory units" in str(rte.value) + + # Overwrite value and unit + data = { + "may_be_overwritten": "400 °C" + } + records = scan_structure_elements(DictElement(name="", value=data), crawler_definition, + converter_registry) + assert len(records) == 1 + rec = records[0] + # Now set from data + assert rec.get_property("may_be_overwritten") is not None + assert rec.get_property("may_be_overwritten").value == "400" + assert rec.get_property("may_be_overwritten").unit == "°C" diff --git a/unittests/test_unit_cfood.yml b/unittests/test_unit_cfood.yml new file mode 100644 index 0000000000000000000000000000000000000000..214aa49adceedce49a162f380ec453fb8597f215 --- /dev/null +++ b/unittests/test_unit_cfood.yml @@ -0,0 +1,43 @@ +--- +metadata: + crawler-version: 0.9.0 +--- +data: + type: Dict + match_name: '.*' + records: + MyRec: + may_be_overwritten: + value: "12" + unit: K + subtree: + ValueWithUnit: + type: TextElement + match_name: ^value_with_unit$ + match_value: "^(?P<number>\\d+\\.?\\d*)\\s+(?P<unit>.+)" + records: + MyRec: + value_with_unit: + value: $number + unit: $unit + MayBeOverwritten: + type: TextElement + match_name: ^may_be_overwritten$ + match_value: "^(?P<number>\\d+\\.?\\d*)\\s+(?P<unit>.+)" + records: + MyRec: + may_be_overwritten: + value: $number + unit: $unit + ListOfValues: + type: ListElement + match_name: ^array_with_units$ + subtree: + SingleValueWithUnit: + type: TextElement + match_value: "^(?P<number>\\d+\\.?\\d*)\\s+(?P<unit>.+)" + records: + MyRec: + list_with_unit: + value: +$number + unit: $unit