Skip to content
Snippets Groups Projects
Commit f95d8533 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'dev' into f-integrationtest

parents d2ce1ec9 55034790
Branches
Tags
2 merge requests!53Release 0.1,!18Add integrationtests based on a real world example
......@@ -122,6 +122,23 @@ def check_identical(record1: db.Entity, record2: db.Entity, ignore_id=False):
return True
def _resolve_datatype(prop: db.Property, remote_entity: db.Entity):
if remote_entity.role == "Property":
datatype = remote_entity.datatype
elif remote_entity.role == "RecordType":
datatype = remote_entity.name
else:
raise RuntimeError("Cannot set datatype.")
# Treat lists separately
if isinstance(prop.value, list) and not datatype.startswith("LIST"):
datatype = db.LIST(datatype)
prop.datatype = datatype
return prop
class Crawler(object):
"""
Crawler class that encapsulates crawling functions.
......@@ -465,8 +482,8 @@ class Crawler(object):
"""
for p in record.properties:
if (isinstance(p.value, list)):
lst = []
for el in p.value:
lst = []
if (isinstance(el, db.Entity) and el.id is None):
cached = self.get_identified_record_from_local_cache(
el)
......@@ -481,7 +498,7 @@ class Crawler(object):
lst.append(cached)
else:
lst.append(el)
p.value = lst
p.value = lst
if (isinstance(p.value, db.Entity) and p.value.id is None):
cached = self.get_identified_record_from_local_cache(p.value)
if cached is None:
......@@ -703,6 +720,10 @@ class Crawler(object):
@staticmethod
def execute_inserts_in_list(to_be_inserted):
for record in to_be_inserted:
for prop in record.properties:
entity = db.Entity(name=prop.name).retrieve()
prop = _resolve_datatype(prop, entity)
print("INSERT")
print(to_be_inserted)
if len(to_be_inserted) > 0:
......@@ -719,12 +740,7 @@ class Crawler(object):
if prop.id is None:
entity = db.Entity(name=prop.name).retrieve()
prop.id = entity.id
if entity.role == "Property":
prop.datatype = entity.datatype
elif entity.role == "RecordType":
prop.datatype = entity.name
else:
raise RuntimeError("Cannot set datatype.")
prop = _resolve_datatype(prop, entity)
print("UPDATE")
print(to_be_updated)
if len(to_be_updated) > 0:
......@@ -996,6 +1012,7 @@ def parse_args():
return parser.parse_args()
def main():
args = parse_args()
return crawler_main(
......@@ -1009,5 +1026,6 @@ def main():
args.prefix
)
if __name__ == "__main__":
sys.exit(main())
......@@ -78,6 +78,7 @@
<Property id="248" name="identifier" description="identifier of the experiment" datatype="TEXT" importance="FIX" flag="inheritance:FIX">TimeOfFlight</Property>
<Property id="250" name="project" datatype="Project" importance="FIX" flag="inheritance:FIX">287</Property>
<Property id="249" name="responsible" datatype="LIST&lt;Person&gt;" importance="FIX" flag="inheritance:FIX">
<Value>289</Value>
<Value>288</Value>
</Property>
</Record>
......
......@@ -203,9 +203,10 @@ def test_json_converter(converter_registry):
assert children[2].name == "archived"
assert children[2].value.__class__ == bool
assert children[3].__class__ == DictDictElement
assert children[3].name == "coordinator"
assert children[3].value.__class__ == dict
assert children[3].__class__ == DictListElement
assert children[3].name == "Person"
assert children[3].value.__class__ == list
assert len(children[3].value) == 2
assert children[4].__class__ == DictTextElement
assert children[4].name == "start_date"
......
......@@ -3,13 +3,56 @@ JSONTest: # name of the converter
type: JSONFile
match: '(.*)'
validate: ./testjson.schema.json
subtree:
element: # name of the first subtree element which is a converter
records:
Project: # this is an identifiable in this case
parents:
- Project # not needed as the name is equivalent
subtree:
name_element:
type: DictTextElement
match_name: "name"
match_value: "(?P<name>.*)"
records:
Project:
name: $name
url_element: # name of the first subtree element which is a converter
type: DictTextElement
match_value: "(?P<url>.*)"
match_name: "url"
records:
Project: # this is an identifiable in this case
parents:
- Project # not needed as the name is equivalent
url: $url
Project:
url: $url
persons_element:
type: DictListElement
match_name: "Person"
subtree:
person_element:
type: Dict
records:
Person:
parents:
- Person
Project:
Person: +$Person
subtree:
firstname_element:
type: DictTextElement
match_name: "firstname"
match_value: "(?P<firstname>.*)"
records:
Person:
firstname: $firstname
lastname_element:
type: DictTextElement
match_name: "lastname"
match_value: "(?P<lastname>.*)"
records:
Person:
lastname: $lastname
email_element:
type: DictTextElement
match_name: "email"
match_value: "(?P<email>.*)"
records:
Person:
email: $email
......@@ -2,11 +2,18 @@
"name": "DEMO",
"projectId": 10002,
"archived": false,
"coordinator": {
"firstname": "Miri",
"lastname": "Mueller",
"email": "miri.mueller@science.de"
},
"Person": [
{
"firstname": "Miri",
"lastname": "Mueller",
"email": "miri.mueller@science.de"
},
{
"firstname": "Mara",
"lastname": "Mueller",
"email": "mara.mueller@science.de"
}
],
"start_date": "2022-03-01",
"candidates": ["Mouse", "Penguine"],
"rvalue": 0.4444,
......
......@@ -11,25 +11,28 @@
"archived": {
"type": "boolean"
},
"coordinator": {
"type": "object",
"properties": {
"firstname": {
"type": "string"
},
"lastname": {
"type": "string"
},
"email": {
"type": "string"
"Person": {
"type": "array",
"items": {
"type": "object",
"properties": {
"firstname": {
"type": "string"
},
"lastname": {
"type": "string"
},
"email": {
"type": "string"
}
},
"required": [
"firstname",
"lastname",
"email"
],
"additionalProperties": true
}
},
"required": [
"firstname",
"lastname",
"email"
],
"additionalProperties": true
},
"start_date": {
"type": "string",
......@@ -51,7 +54,7 @@
"required": [
"name",
"projectId",
"coordinator"
"Person"
],
"additionalProperties": false
}
......@@ -31,6 +31,8 @@ import os
from pytest import raises
import caosdb as db
from newcrawler.converters import JSONFileConverter, DictConverter
from newcrawler.crawl import Crawler
from newcrawler.structure_elements import File, JSONFile
......@@ -47,19 +49,27 @@ def test_json():
# Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition)
crawler.start_crawling(
records = crawler.start_crawling(
JSONFile(os.path.basename(json_file_path), json_file_path),
crawler_definition,
converter_registry
)
subd = crawler.debug_tree
subc = crawler.debug_metadata
#print(json.dumps(subd, indent=3))
print(subd)
print(subc)
rec = [r for r in records if r.name == "DEMO"]
assert len(rec) == 1
rec = rec[0]
assert len(rec.parents) == 1
assert rec.parents[0].name == "Project"
assert rec.get_property("url") is not None
assert rec.get_property("url").value == "https://site.de/index.php/"
assert rec.get_property("Person") is not None
assert isinstance(rec.get_property("Person").value, list)
assert len(rec.get_property("Person").value) == 2
def test_broken_validation():
crawler_definition_path = rfp("broken_cfoods", "broken_validation_path.yml")
crawler_definition_path = rfp(
"broken_cfoods", "broken_validation_path.yml")
crawler = Crawler()
with raises(FileNotFoundError) as err:
crawler_definition = crawler.load_definition(crawler_definition_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment