diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index a0670eb149f82d348a515df4cc143f91e24e3e00..946637d27ee4ed6a89dcd1af1969cb5115cae71b 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -364,12 +364,14 @@ class Crawler(object): break for key in comp[0]["properties"]: for attribute in ("datatype", "importance", "unit"): - if (attribute in comp[0]["properties"][key] and - comp[0]["properties"][key][attribute] is not None and - comp[0]["properties"][key][attribute] != - comp[1]["properties"][key][attribute]): - identical = False - break + + if attribute in comp[0]["properties"][key]: + attr_val = comp[0]["properties"][key][attribute] + other_attr_val = (comp[1]["properties"][key][attribute] + if attribute in comp[1]["properties"][key] else None) + if attr_val is not None and atrr_val != other_attr_val: + identical = False + break if "value" in comp[0]["properties"][key]: identical = False diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index 01eb55bbb3ec4b54a49633ef839b2ed99ab5b398..cc57bb672c4872d72f4011858e0f49200fb77101 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -23,9 +23,32 @@ # ** end header # +from datetime import datetime import caosdb as db from abc import abstractmethod -from .utils import get_value, has_parent +from .utils import has_parent + + +def convert_value(value): + """ Returns a string representation of the value that is suitable to be used in the query + looking for the identified record. + + Parameters + ---------- + value : The property of which the value shall be returned. + + Returns + ------- + out : the string reprensentation of the value + + """ + + if isinstance(value, db.Entity): + return str(value.id) + elif isinstance(value, datetime): + return value.isoformat() + else: + return str(value) class IdentifiableAdapter(object): @@ -67,7 +90,6 @@ class IdentifiableAdapter(object): if len(ident.parents) != 1: raise RuntimeError("Multiple parents for identifiables not supported.") - # TODO prevent multiple parents query_string = "FIND Record " + ident.get_parents()[0].name query_string += " WITH " @@ -76,17 +98,16 @@ class IdentifiableAdapter(object): "The identifiable must have features to identify it.") if ident.name is not None: - query_string += "name='{}' AND".format(ident.name) + query_string += "name='{}' AND ".format(ident.name) for p in ident.get_properties(): - # TODO this is badly wrong :-| - - if p.datatype is not None and p.datatype.startswith("LIST<"): + if isinstance(p.value, list): for v in p.value: - query_string += ("references " + str(v.id if isinstance(v, db.Entity) - else v) + " AND ") + query_string += ("'" + p.name + "'='" + + convert_value(v) + "' AND ") else: - query_string += ("'" + p.name + "'='" + str(get_value(p)) + "' AND ") + query_string += ("'" + p.name + "'='" + + convert_value(p.value) + "' AND ") # remove the last AND return query_string[:-4] diff --git a/src/newcrawler/utils.py b/src/newcrawler/utils.py index c60b7f871db32d66c12781e5f0cfb246bc41c8fe..35fefe6719d579bc8e8a39489f8a872c0cca11b8 100644 --- a/src/newcrawler/utils.py +++ b/src/newcrawler/utils.py @@ -40,27 +40,3 @@ def has_parent(entity: db.Entity, name: str): if parent.name == name: return True return False - - -def get_value(prop): - """ Returns the value of a Property - - This function is taken from the old crawler: - caosdb-advanced-user-tools/src/caosadvancedtools/crawler.py - - Parameters - ---------- - prop : The property of which the value shall be returned. - - Returns - ------- - out : The value of the property; if the value is an entity, its ID. - - """ - - if isinstance(prop.value, db.Entity): - return prop.value.id - elif isinstance(prop.value, datetime): - return prop.value.isoformat() - else: - return prop.value diff --git a/unittests/test_converters.py b/unittests/test_converters.py index ab8107398a3fbd2a27e3d174d5bc892ec7d8af1e..3ec1764631c4de7b5a7cc247cc559d0dc5f5939c 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -29,6 +29,7 @@ test the converters module from newcrawler.converters import Converter from newcrawler.stores import GeneralStore +from newcrawler.converters import MarkdownFileConverter from newcrawler.structure_elements import Directory from test_tool import rfp @@ -65,3 +66,57 @@ def testDirectoryConverter(): assert len(elements) == 1 assert isinstance(elements[0], Directory) assert elements[0].name == "examples_article" + + +def test_markdown_converter(): + test_readme = File("README.md", rfp( + "test_directories", "examples_article", "DataAnalysis", + "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")) + + converter = MarkdownFileConverter({ + "match": "(.*)" + }, "TestMarkdownFileConverter") + + m = converter.match(File("test_tool.py", rfp( + "test_tool.py"))) + assert m is None + + m = converter.match(test_readme) + assert m is not None + assert m.__class__ == dict + assert len(m) == 0 + + converter = MarkdownFileConverter({ + "match": "README.md" + }, "TestMarkdownFileConverter") + + m = converter.match(test_readme) + assert m is not None + assert len(m) == 0 + + children = converter.create_children(None, test_readme) + assert len(children) == 5 + assert children[1].__class__ == DictTextElement + assert children[1].name == "description" + assert children[1].value.__class__ == str + + assert children[0].__class__ == DictTextElement + assert children[0].name == "responsible" + assert children[0].value.__class__ == str + + test_readme2 = File("README.md", rfp("test_directories", "examples_article", + "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")) + + m = converter.match(test_readme2) + assert m is not None + assert len(m) == 0 + + children = converter.create_children(None, test_readme2) + assert len(children) == 2 + assert children[1].__class__ == DictTextElement + assert children[1].name == "description" + assert children[1].value.__class__ == str + + assert children[0].__class__ == DictListElement + assert children[0].name == "responsible" + assert children[0].value.__class__ == list diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py new file mode 100644 index 0000000000000000000000000000000000000000..085ec4379fe9a6aca97f6fb2ee8a585c95560ac6 --- /dev/null +++ b/unittests/test_identifiable_adapters.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2021 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2021 Henrik tom Wörden <h.tomwoerden@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +test identifiable_adapters module +""" + +from datetime import datetime +from newcrawler.identifiable_adapters import IdentifiableAdapter +import caosdb as db + + +def test_create_query_for_identifiable(): + query = IdentifiableAdapter.create_query_for_identifiable( + db.Record().add_parent("Person") + .add_property("first_name", value="A") + .add_property("last_name", value="B")) + assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' " + + query = IdentifiableAdapter.create_query_for_identifiable( + db.Record(name="A").add_parent("B") + .add_property("c", value="c") + .add_property("d", value=5) + .add_property("e", value=5.5) + .add_property("f", value=datetime(2020, 10, 10)) + .add_property("g", value=True) + .add_property("h", value=db.Record(id=1111)) + .add_property("i", value=[2222, db.Record(id=3333)])) + assert (query.lower() == "find record b with name='a' and 'c'='c' and 'd'='5' and 'e'='5.5'" + " and 'f'='2020-10-10t00:00:00' and 'g'='true' and 'h'='1111' and 'i'='2222' and " + "'i'='3333' ") diff --git a/unittests/test_tool.py b/unittests/test_tool.py index 23912ff133fdb7dceb1805907d24a57578fc63ee..1af55808edfda547f097d5f1d4acd18bf5e56ec3 100755 --- a/unittests/test_tool.py +++ b/unittests/test_tool.py @@ -4,7 +4,6 @@ # A. Schlemmer, 06/2021 from newcrawler import Crawler -from newcrawler.converters import MarkdownFileConverter from newcrawler.structure_elements import File, DictTextElement, DictListElement from newcrawler.identifiable_adapters import IdentifiableAdapter, LocalStorageIdentifiableAdapter from functools import partial @@ -69,7 +68,8 @@ def ident(crawler): .add_property(name="identifier")) return ident -def test_crawler(crawler): + +def test_record_structure_generation(crawler): subd = crawler.debug_tree[dircheckstr("DataAnalysis")] subc = crawler.debug_metadata["copied"][dircheckstr("DataAnalysis")] assert len(subd) == 2 @@ -138,59 +138,6 @@ def test_crawler(crawler): assert subc[0]["identifier"] is False -def test_markdown_converter(): - test_readme = File("README.md", rfp( - "test_directories", "examples_article", "DataAnalysis", - "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md")) - - converter = MarkdownFileConverter({ - "match": "(.*)" - }, "TestMarkdownFileConverter") - - m = converter.match(File("test_tool.py", rfp( - "test_tool.py"))) - assert m is None - - m = converter.match(test_readme) - assert m is not None - assert m.__class__ == dict - assert len(m) == 0 - - converter = MarkdownFileConverter({ - "match": "README.md" - }, "TestMarkdownFileConverter") - - m = converter.match(test_readme) - assert m is not None - assert len(m) == 0 - - children = converter.create_children(None, test_readme) - assert len(children) == 5 - assert children[1].__class__ == DictTextElement - assert children[1].name == "description" - assert children[1].value.__class__ == str - - assert children[0].__class__ == DictTextElement - assert children[0].name == "responsible" - assert children[0].value.__class__ == str - - test_readme2 = File("README.md", rfp("test_directories", "examples_article", - "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")) - - m = converter.match(test_readme2) - assert m is not None - assert len(m) == 0 - - children = converter.create_children(None, test_readme2) - assert len(children) == 2 - assert children[1].__class__ == DictTextElement - assert children[1].name == "description" - assert children[1].value.__class__ == str - - assert children[0].__class__ == DictListElement - assert children[0].name == "responsible" - assert children[0].value.__class__ == list - # def prepare_test_record_file(): # ident = LocalStorageIdentifiableAdapter() # crawler = Crawler(debug=True, identifiableAdapter=ident) @@ -217,8 +164,8 @@ def test_ambigious_records(crawler, ident): def test_crawler_update_list(crawler, ident): crawler.copy_attributes = Mock() - # If the following assertions fail, that is a hint, that the test file records.xml is - # incorrect: + # If the following assertions fail, that is a hint, that the test file records.xml has changed + # and this needs to be updated: assert len(ident.get_records()) == 18 assert len([r for r in ident.get_records() if r.parents[0].name == "Person"]) == 5 assert len([r for r in ident.get_records() if r.parents[0].name == "Measurement"]) == 11 @@ -320,14 +267,6 @@ def test_identifiable_update3(crawler, ident): assert len(updl) == 2 -def test_identifiable_adapter(): - query = IdentifiableAdapter.create_query_for_identifiable( - db.Record().add_parent("Person") - .add_property("first_name", value="A") - .add_property("last_name", value="B")) - assert query.lower() == "find record person with 'first_name'='a' and 'last_name'='b' " - - @pytest.mark.xfail def test_identifiable_adapter_no_identifiable(crawler, ident): del ident._registered_identifiables["Person"]