Skip to content
Snippets Groups Projects
Commit f95d8533 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'dev' into f-integrationtest

parents d2ce1ec9 55034790
No related branches found
No related tags found
2 merge requests!53Release 0.1,!18Add integrationtests based on a real world example
...@@ -122,6 +122,23 @@ def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False): ...@@ -122,6 +122,23 @@ def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False):
return True return True
def _resolve_datatype(prop: db.Property, remote_entity: db.Entity):
if remote_entity.role == "Property":
datatype = remote_entity.datatype
elif remote_entity.role == "RecordType":
datatype = remote_entity.name
else:
raise RuntimeError("Cannot set datatype.")
# Treat lists separately
if isinstance(prop.value, list) and not datatype.startswith("LIST"):
datatype = db.LIST(datatype)
prop.datatype = datatype
return prop
class Crawler(object): class Crawler(object):
""" """
Crawler class that encapsulates crawling functions. Crawler class that encapsulates crawling functions.
...@@ -465,8 +482,8 @@ class Crawler(object): ...@@ -465,8 +482,8 @@ class Crawler(object):
""" """
for p in record.properties: for p in record.properties:
if (isinstance(p.value, list)): if (isinstance(p.value, list)):
for el in p.value:
lst = [] lst = []
for el in p.value:
if (isinstance(el, db.Entity) and el.id is None): if (isinstance(el, db.Entity) and el.id is None):
cached = self.get_identified_record_from_local_cache( cached = self.get_identified_record_from_local_cache(
el) el)
...@@ -703,6 +720,10 @@ class Crawler(object): ...@@ -703,6 +720,10 @@ class Crawler(object):
@staticmethod @staticmethod
def execute_inserts_in_list(to_be_inserted): def execute_inserts_in_list(to_be_inserted):
for record in to_be_inserted:
for prop in record.properties:
entity = db.Entity(name=prop.name).retrieve()
prop = _resolve_datatype(prop, entity)
print("INSERT") print("INSERT")
print(to_be_inserted) print(to_be_inserted)
if len(to_be_inserted) > 0: if len(to_be_inserted) > 0:
...@@ -719,12 +740,7 @@ class Crawler(object): ...@@ -719,12 +740,7 @@ class Crawler(object):
if prop.id is None: if prop.id is None:
entity = db.Entity(name=prop.name).retrieve() entity = db.Entity(name=prop.name).retrieve()
prop.id = entity.id prop.id = entity.id
if entity.role == "Property": prop = _resolve_datatype(prop, entity)
prop.datatype = entity.datatype
elif entity.role == "RecordType":
prop.datatype = entity.name
else:
raise RuntimeError("Cannot set datatype.")
print("UPDATE") print("UPDATE")
print(to_be_updated) print(to_be_updated)
if len(to_be_updated) > 0: if len(to_be_updated) > 0:
...@@ -996,6 +1012,7 @@ def parse_args(): ...@@ -996,6 +1012,7 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def main(): def main():
args = parse_args() args = parse_args()
return crawler_main( return crawler_main(
...@@ -1009,5 +1026,6 @@ def main(): ...@@ -1009,5 +1026,6 @@ def main():
args.prefix args.prefix
) )
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) sys.exit(main())
...@@ -78,6 +78,7 @@ ...@@ -78,6 +78,7 @@
<Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property> <Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property>
<Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property> <Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property>
<Property id="249" name="responsible" datatype="LIST&lt;Person&gt;" importance="FIX" flag="inheritance:FIX"> <Property id="249" name="responsible" datatype="LIST&lt;Person&gt;" importance="FIX" flag="inheritance:FIX">
<Value>289</Value>
<Value>288</Value> <Value>288</Value>
</Property> </Property>
</Record> </Record>
......
...@@ -203,9 +203,10 @@ def test_json_converter(converter_registry): ...@@ -203,9 +203,10 @@ def test_json_converter(converter_registry):
assert children[2].name == "archived" assert children[2].name == "archived"
assert children[2].value.__class__ == bool assert children[2].value.__class__ == bool
assert children[3].__class__ == DictDictElement assert children[3].__class__ == DictListElement
assert children[3].name == "coordinator" assert children[3].name == "Person"
assert children[3].value.__class__ == dict assert children[3].value.__class__ == list
assert len(children[3].value) == 2
assert children[4].__class__ == DictTextElement assert children[4].__class__ == DictTextElement
assert children[4].name == "start_date" assert children[4].name == "start_date"
......
...@@ -3,13 +3,56 @@ JSONTest: # name of the converter ...@@ -3,13 +3,56 @@ JSONTest: # name of the converter
type: JSONFile type: JSONFile
match: '(.*)' match: '(.*)'
validate: ./testjson.schema.json validate: ./testjson.schema.json
records:
Project: # this is an identifiable in this case
parents:
- Project # not needed as the name is equivalent
subtree: subtree:
element: # name of the first subtree element which is a converter name_element:
type: DictTextElement
match_name: "name"
match_value: "(?P<name>.*)"
records:
Project:
name: $name
url_element: # name of the first subtree element which is a converter
type: DictTextElement type: DictTextElement
match_value: "(?P<url>.*)" match_value: "(?P<url>.*)"
match_name: "url" match_name: "url"
records: records:
Project: # this is an identifiable in this case Project:
parents:
- Project # not needed as the name is equivalent
url: $url url: $url
persons_element:
type: DictListElement
match_name: "Person"
subtree:
person_element:
type: Dict
records:
Person:
parents:
- Person
Project:
Person: +$Person
subtree:
firstname_element:
type: DictTextElement
match_name: "firstname"
match_value: "(?P<firstname>.*)"
records:
Person:
firstname: $firstname
lastname_element:
type: DictTextElement
match_name: "lastname"
match_value: "(?P<lastname>.*)"
records:
Person:
lastname: $lastname
email_element:
type: DictTextElement
match_name: "email"
match_value: "(?P<email>.*)"
records:
Person:
email: $email
...@@ -2,11 +2,18 @@ ...@@ -2,11 +2,18 @@
"name": "DEMO", "name": "DEMO",
"projectId": 10002, "projectId": 10002,
"archived": false, "archived": false,
"coordinator": { "Person": [
{
"firstname": "Miri", "firstname": "Miri",
"lastname": "Mueller", "lastname": "Mueller",
"email": "miri.mueller@science.de" "email": "miri.mueller@science.de"
}, },
{
"firstname": "Mara",
"lastname": "Mueller",
"email": "mara.mueller@science.de"
}
],
"start_date": "2022-03-01", "start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"], "candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444, "rvalue": 0.4444,
......
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
"archived": { "archived": {
"type": "boolean" "type": "boolean"
}, },
"coordinator": { "Person": {
"type": "array",
"items": {
"type": "object", "type": "object",
"properties": { "properties": {
"firstname": { "firstname": {
...@@ -30,6 +32,7 @@ ...@@ -30,6 +32,7 @@
"email" "email"
], ],
"additionalProperties": true "additionalProperties": true
}
}, },
"start_date": { "start_date": {
"type": "string", "type": "string",
...@@ -51,7 +54,7 @@ ...@@ -51,7 +54,7 @@
"required": [ "required": [
"name", "name",
"projectId", "projectId",
"coordinator" "Person"
], ],
"additionalProperties": false "additionalProperties": false
} }
...@@ -31,6 +31,8 @@ import os ...@@ -31,6 +31,8 @@ import os
from pytest import raises from pytest import raises
import caosdb as db
from newcrawler.converters import JSONFileConverter, DictConverter from newcrawler.converters import JSONFileConverter, DictConverter
from newcrawler.crawl import Crawler from newcrawler.crawl import Crawler
from newcrawler.structure_elements import File, JSONFile from newcrawler.structure_elements import File, JSONFile
...@@ -47,19 +49,27 @@ def test_json(): ...@@ -47,19 +49,27 @@ def test_json():
# Load and register converter packages: # Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition) converter_registry = crawler.load_converters(crawler_definition)
crawler.start_crawling( records = crawler.start_crawling(
JSONFile(os.path.basename(json_file_path), json_file_path), JSONFile(os.path.basename(json_file_path), json_file_path),
crawler_definition, crawler_definition,
converter_registry converter_registry
) )
subd = crawler.debug_tree
subc = crawler.debug_metadata rec = [r for r in records if r.name == "DEMO"]
#print(json.dumps(subd, indent=3)) assert len(rec) == 1
print(subd) rec = rec[0]
print(subc) assert len(rec.parents) == 1
assert rec.parents[0].name == "Project"
assert rec.get_property("url") is not None
assert rec.get_property("url").value == "https://site.de/index.php/"
assert rec.get_property("Person") is not None
assert isinstance(rec.get_property("Person").value, list)
assert len(rec.get_property("Person").value) == 2
def test_broken_validation(): def test_broken_validation():
crawler_definition_path = rfp("broken_cfoods", "broken_validation_path.yml") crawler_definition_path = rfp(
"broken_cfoods", "broken_validation_path.yml")
crawler = Crawler() crawler = Crawler()
with raises(FileNotFoundError) as err: with raises(FileNotFoundError) as err:
crawler_definition = crawler.load_definition(crawler_definition_path) crawler_definition = crawler.load_definition(crawler_definition_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment