Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
2cbeb584
Commit
2cbeb584
authored
1 year ago
by
Florian Spreckelsen
Browse files
Options
Downloads
Patches
Plain Diff
ENH: Implement property from dict generation
parent
7e7eeddc
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!178
FIX: #96 Better error output for crawl.py script.
,
!163
F dict heuristic
Pipeline
#49210
failed
1 year ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caoscrawler/converters.py
+123
-18
123 additions, 18 deletions
src/caoscrawler/converters.py
unittests/test_converters.py
+27
-19
27 additions, 19 deletions
unittests/test_converters.py
with
150 additions
and
37 deletions
src/caoscrawler/converters.py
+
123
−
18
View file @
2cbeb584
...
@@ -811,43 +811,148 @@ class DictElementConverter(Converter):
...
@@ -811,43 +811,148 @@ class DictElementConverter(Converter):
return
match_name_and_value
(
self
.
definition
,
element
.
name
,
element
.
value
)
return
match_name_and_value
(
self
.
definition
,
element
.
name
,
element
.
value
)
class
Heuristic
DictConverter
(
DictElementConverter
):
class
PropertiesFrom
DictConverter
(
DictElementConverter
):
"""
Extend the :py:class:`DictElementConverter` by a heuristic to set
"""
Extend the :py:class:`DictElementConverter` by a heuristic to set
property values from the dictionary keys.
property values from the dictionary keys.
"""
"""
def
_validate_definition
(
self
,
definition
:
dict
,
name
:
str
):
def
_validate_definition
(
self
):
if
"
record_from_dict
"
not
in
definition
or
definition
[
"
record_from_dict
"
]
is
None
:
if
"
record_from_dict
"
not
in
self
.
definition
or
self
.
definition
[
"
record_from_dict
"
]
is
None
:
raise
ValueError
(
raise
ValueError
(
"
You need to specify the (root) record, the properties of
"
"
You need to specify the (root) record, the properties of
"
f
"
which will be set from the dict in converter
{
name
}
.
"
f
"
which will be set from the dict in converter
{
self
.
name
}
.
"
)
)
def
__init__
(
self
,
definition
:
dict
,
name
:
str
,
converter_registry
:
dict
):
if
not
"
variable_name
"
in
self
.
definition
[
"
record_from_dict
"
]
or
not
self
.
definition
[
"
record_from_dict
"
][
"
variable_name
"
]:
raise
ValueError
(
f
"
The root record in converter
{
self
.
name
}
needs to have a
"
"
`variable_name` by which it is accessed in the subtree.
"
)
def
__init__
(
self
,
definition
:
dict
,
name
:
str
,
converter_registry
:
dict
,
referenced_record_callback
:
Optional
[
callable
]
=
None
):
_validate_definition
(
definition
)
super
().
__init__
(
definition
,
name
,
converter_registry
)
super
().
__init__
(
definition
,
name
,
converter_registry
)
self
.
_validate_definition
()
self
.
referenced_record_callback
=
referenced_record_callback
def
_recursively_create_records
(
self
,
subdict
:
dict
,
root_record
:
db
.
Record
,
root_rec_name
:
str
,
values
:
GeneralStore
,
records
:
RecordStore
,
referenced_record_callback
:
callable
,
keys_modified
:
list
=
[]
):
"""
Create a record form the given `subdict` and recursively create referenced records.
"""
blacklisted_keys
=
self
.
definition
[
"
record_from_dict
"
][
"
properties_blacklist
"
]
if
"
properties_blacklist
"
in
self
.
definition
[
"
record_from_dict
"
]
else
[]
special_references
=
self
.
definition
[
"
record_from_dict
"
][
"
references
"
]
if
"
references
"
in
self
.
definition
[
"
record_from_dict
"
]
else
[
]
for
key
,
value
in
subdict
.
items
():
if
key
in
blacklisted_keys
:
# We ignore this in the automated property generation
continue
if
isinstance
(
value
,
list
):
if
not
any
([
isinstance
(
val
,
dict
)
for
val
in
value
]):
# no dict in list, i.e., no references, so this is simple
root_record
.
add_property
(
name
=
key
,
value
=
value
)
else
:
if
not
all
([
isinstance
(
val
,
dict
)
for
val
in
value
]):
# if this is not an error (most probably it is), this
# needs to be handled manually for now.
raise
ValueError
(
f
"
{
key
}
in
{
subdict
}
contains a mixed list of references and scalars.
"
)
ref_recs
=
[]
for
ii
,
ref_dict
in
enumerate
(
value
):
ref_rec
=
db
.
Record
()
ref_var_name
=
f
"
{
root_rec_name
}
.
{
key
}
.
{
ii
}
"
if
key
in
special_references
:
for
par
in
special_references
[
key
][
"
parents
"
]:
ref_rec
.
add_parent
(
par
)
else
:
ref_rec
.
add_parent
(
key
)
records
[
ref_var_name
]
=
ref_rec
values
[
ref_var_name
]
=
ref_rec
keys_modified
,
ref_rec
=
self
.
_recursively_create_records
(
subdict
=
ref_dict
,
root_record
=
ref_rec
,
root_rec_name
=
ref_var_name
,
values
=
values
,
records
=
records
,
referenced_record_callback
=
referenced_record_callback
,
keys_modified
=
keys_modified
,
)
ref_recs
.
append
(
ref_rec
)
root_record
.
add_property
(
name
=
key
,
value
=
ref_recs
)
elif
isinstance
(
value
,
dict
):
ref_rec
=
db
.
Record
()
ref_var_name
=
f
"
{
root_rec_name
}
.
{
key
}
"
if
key
in
special_references
:
for
par
in
special_references
[
key
][
"
parents
"
]:
ref_rec
.
add_parent
(
par
)
else
:
ref_rec
.
add_parent
(
key
)
records
[
ref_var_name
]
=
ref_rec
values
[
ref_var_name
]
=
ref_rec
keys_modified
,
ref_rec
=
self
.
_recursively_create_records
(
subdict
=
value
,
root_record
=
ref_rec
,
root_rec_name
=
ref_var_name
,
values
=
values
,
records
=
records
,
referenced_record_callback
=
referenced_record_callback
,
keys_modified
=
keys_modified
)
root_record
.
add_property
(
key
,
ref_rec
)
else
:
if
key
.
lower
()
in
SPECIAL_PROPERTIES
:
setattr
(
root_record
,
key
.
lower
(),
value
)
else
:
root_record
.
add_property
(
name
=
key
,
value
=
value
)
keys_modified
.
append
((
root_rec_name
,
key
))
def
create_records
(
self
,
values
:
GeneralStore
,
records
:
RecordStore
,
if
referenced_record_callback
:
element
:
StructureElement
,
referenced_record_callback
:
root_record
=
referenced_record_callback
(
root_record
)
Optional
[
callable
]
=
None
):
keys_modified
=
[]
return
keys_modified
,
root_record
def
_insert_into_stores
(
rec
:
db
.
Record
,
rec_name
:
str
):
def
create_records
(
self
,
values
:
GeneralStore
,
records
:
RecordStore
,
element
:
StructureElement
):
records
[
rec_name
]
=
rec
keys_modified
=
[]
values
[
rec_name
]
=
rec
def
_create_or_return_record
(
rec_name
:
str
,
parent_names
:
Optional
[
Union
[
str
,
List
[
str
]]]
=
None
):
rfd
=
self
.
definition
[
"
record_from_dict
"
]
if
rfd
[
"
variable_name
"
]
not
in
records
:
rec
=
db
.
Record
()
if
"
name
"
in
rfd
:
rec
.
name
=
rfd
[
"
name
"
]
if
"
parents
"
in
rfd
:
for
par
in
rfd
[
"
parents
"
]:
rec
.
add_parent
(
par
)
else
:
rec
.
add_parent
(
rfd
[
"
variable_name
"
])
records
[
rfd
[
"
variable_name
"
]]
=
rec
values
[
rfd
[
"
variable_name
"
]]
=
rec
if
parent_names
is
None
:
else
:
parent_names
=
[
rec_name
]
rec
=
records
[
rfd
[
"
variable_name
"
]]
elif
not
isinstance
(
parent_names
,
list
):
parent_names
=
[
parent_names
]
keys_modified
,
rec
=
self
.
_recursively_create_records
(
subdict
=
element
.
value
,
root_record
=
rec
,
root_rec_name
=
rfd
[
"
variable_name
"
],
values
=
values
,
records
=
records
,
referenced_record_callback
=
self
.
referenced_record_callback
,
keys_modified
=
keys_modified
,
)
keys_modified
.
extend
(
super
().
create_records
(
keys_modified
.
extend
(
super
().
create_records
(
values
=
values
,
records
=
records
,
element
=
element
))
values
=
values
,
records
=
records
,
element
=
element
))
...
...
This diff is collapsed.
Click to expand it.
unittests/test_converters.py
+
27
−
19
View file @
2cbeb584
...
@@ -28,12 +28,15 @@ import importlib
...
@@ -28,12 +28,15 @@ import importlib
import
json
import
json
import
logging
import
logging
import
os
import
os
import
pytest
import
sys
import
sys
import
yaml
from
itertools
import
product
from
itertools
import
product
from
pathlib
import
Path
from
pathlib
import
Path
import
pytest
import
linkahead
as
db
import
yaml
from
caoscrawler.converters
import
(
Converter
,
ConverterValidationError
,
from
caoscrawler.converters
import
(
Converter
,
ConverterValidationError
,
DateElementConverter
,
DictElementConverter
,
DateElementConverter
,
DictElementConverter
,
DictIntegerElementConverter
,
DictIntegerElementConverter
,
...
@@ -697,6 +700,7 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -697,6 +700,7 @@ def test_properties_from_dict_basic(converter_registry):
"
a
"
:
5
,
"
a
"
:
5
,
"
b
"
:
[
"
a
"
,
"
b
"
,
"
c
"
],
"
b
"
:
[
"
a
"
,
"
b
"
,
"
c
"
],
"
scalar_ref
"
:
{
"
scalar_ref
"
:
{
"
name
"
:
"
Scalar Ref
"
,
"
a
"
:
23
,
"
a
"
:
23
,
"
blacklisted_int
"
:
42
"
blacklisted_int
"
:
42
},
},
...
@@ -718,7 +722,7 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -718,7 +722,7 @@ def test_properties_from_dict_basic(converter_registry):
"
blacklisted_ref
"
:
{
"
blacklisted_ref
"
:
{
"
a
"
:
25
"
a
"
:
25
},
},
"
author
"
:
{
"
author
s
"
:
{
"
full_name
"
:
"
Some Author
"
"
full_name
"
:
"
Some Author
"
}
}
})
})
...
@@ -726,6 +730,9 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -726,6 +730,9 @@ def test_properties_from_dict_basic(converter_registry):
assert
"
MyRec
"
in
records
assert
"
MyRec
"
in
records
my_rec
=
records
[
"
MyRec
"
]
my_rec
=
records
[
"
MyRec
"
]
assert
isinstance
(
my_rec
,
db
.
Record
)
assert
isinstance
(
my_rec
,
db
.
Record
)
assert
len
(
my_rec
.
parents
)
==
2
assert
"
DictRT1
"
in
[
par
.
name
for
par
in
my_rec
.
parents
]
assert
"
DictRT2
"
in
[
par
.
name
for
par
in
my_rec
.
parents
]
# scalar prop
# scalar prop
assert
my_rec
.
get_property
(
"
a
"
)
is
not
None
assert
my_rec
.
get_property
(
"
a
"
)
is
not
None
...
@@ -741,8 +748,9 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -741,8 +748,9 @@ def test_properties_from_dict_basic(converter_registry):
assert
my_rec
.
get_property
(
"
scalar_ref
"
)
is
not
None
assert
my_rec
.
get_property
(
"
scalar_ref
"
)
is
not
None
referenced
=
my_rec
.
get_property
(
"
scalar_ref
"
).
value
referenced
=
my_rec
.
get_property
(
"
scalar_ref
"
).
value
assert
isinstance
(
referenced
,
db
.
Record
)
assert
isinstance
(
referenced
,
db
.
Record
)
assert
referenced
.
name
==
"
Scalar Ref
"
assert
len
(
referenced
.
parents
)
==
1
assert
len
(
referenced
.
parents
)
==
1
assert
referenced
.
has_
parent
(
"
scalar_ref
"
)
assert
"
scalar_ref
"
in
[
par
.
name
for
par
in
referenced
.
parent
s
]
assert
referenced
.
get_property
(
"
a
"
)
is
not
None
assert
referenced
.
get_property
(
"
a
"
)
is
not
None
assert
referenced
.
get_property
(
"
a
"
).
value
==
23
assert
referenced
.
get_property
(
"
a
"
).
value
==
23
# blacklisted
# blacklisted
...
@@ -755,9 +763,9 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -755,9 +763,9 @@ def test_properties_from_dict_basic(converter_registry):
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
:
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
:
assert
isinstance
(
rec
,
db
.
Record
)
assert
isinstance
(
rec
,
db
.
Record
)
assert
len
(
rec
.
parents
)
==
1
assert
len
(
rec
.
parents
)
==
1
assert
rec
.
has_parent
(
"
list_ref
"
)
assert
"
list_ref
"
in
[
par
.
name
for
par
in
rec
.
parents
]
assert
rec
.
get_property
(
"
c
"
)
is
not
None
assert
rec
.
get_property
(
"
c
"
)
is
not
None
assert
type
(
rec
.
get_property
(
"
c
"
))
is
bool
assert
type
(
rec
.
get_property
(
"
c
"
)
.
value
)
is
bool
assert
True
in
[
rec
.
get_property
(
"
c
"
).
value
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
]
assert
True
in
[
rec
.
get_property
(
"
c
"
).
value
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
]
assert
False
in
[
rec
.
get_property
(
"
c
"
).
value
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
]
assert
False
in
[
rec
.
get_property
(
"
c
"
).
value
for
rec
in
my_rec
.
get_property
(
"
list_ref
"
).
value
]
...
@@ -766,14 +774,14 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -766,14 +774,14 @@ def test_properties_from_dict_basic(converter_registry):
outer_rec
=
my_rec
.
get_property
(
"
ref_with_ref
"
).
value
outer_rec
=
my_rec
.
get_property
(
"
ref_with_ref
"
).
value
assert
isinstance
(
outer_rec
,
db
.
Record
)
assert
isinstance
(
outer_rec
,
db
.
Record
)
assert
len
(
outer_rec
.
parents
)
==
1
assert
len
(
outer_rec
.
parents
)
==
1
assert
outer_rec
.
has_
parent
(
"
ref_with_ref
"
)
assert
"
ref_with_ref
"
in
[
par
.
name
for
par
in
outer_rec
.
parent
s
]
assert
outer_rec
.
get_property
(
"
a
"
)
is
not
None
assert
outer_rec
.
get_property
(
"
a
"
)
is
not
None
assert
outer_rec
.
get_property
(
"
a
"
).
value
==
789
assert
outer_rec
.
get_property
(
"
a
"
).
value
==
789
assert
outer_rec
.
get_property
(
"
ref_in_ref
"
)
is
not
None
assert
outer_rec
.
get_property
(
"
ref_in_ref
"
)
is
not
None
inner_rec
=
outer_rec
.
get_property
(
"
ref_in_ref
"
).
value
inner_rec
=
outer_rec
.
get_property
(
"
ref_in_ref
"
).
value
assert
isinstance
(
inner_rec
,
db
.
Record
)
assert
isinstance
(
inner_rec
,
db
.
Record
)
assert
len
(
inner_rec
.
parents
)
==
1
assert
len
(
inner_rec
.
parents
)
==
1
assert
inner_rec
.
has_
parent
(
"
ref_in_ref
"
)
assert
"
ref_in_ref
"
in
[
par
.
name
for
par
in
inner_rec
.
parent
s
]
assert
inner_rec
.
get_property
(
"
b
"
)
is
not
None
assert
inner_rec
.
get_property
(
"
b
"
)
is
not
None
assert
inner_rec
.
get_property
(
"
b
"
).
value
==
"
something
"
assert
inner_rec
.
get_property
(
"
b
"
).
value
==
"
something
"
...
@@ -782,11 +790,11 @@ def test_properties_from_dict_basic(converter_registry):
...
@@ -782,11 +790,11 @@ def test_properties_from_dict_basic(converter_registry):
assert
my_rec
.
get_property
(
"
blacklisted_ref
"
)
is
None
assert
my_rec
.
get_property
(
"
blacklisted_ref
"
)
is
None
# named reference property
# named reference property
assert
my_rec
.
get_property
(
"
author
"
)
is
not
None
assert
my_rec
.
get_property
(
"
author
s
"
)
is
not
None
author_rec
=
my_rec
.
get_property
(
"
author
"
).
value
author_rec
=
my_rec
.
get_property
(
"
author
s
"
).
value
assert
isinstance
(
author_rec
,
db
.
Record
)
assert
isinstance
(
author_rec
,
db
.
Record
)
assert
len
(
author_rec
.
parents
)
==
1
assert
len
(
author_rec
.
parents
)
==
1
assert
author_rec
.
has_
parent
(
"
Person
"
)
assert
"
Person
"
in
[
par
.
name
for
par
in
author_rec
.
parent
s
]
assert
author_rec
.
get_property
(
"
full_name
"
)
is
not
None
assert
author_rec
.
get_property
(
"
full_name
"
)
is
not
None
assert
author_rec
.
get_property
(
"
full_name
"
).
value
==
"
Some Author
"
assert
author_rec
.
get_property
(
"
full_name
"
).
value
==
"
Some Author
"
...
@@ -834,13 +842,13 @@ def test_properties_from_dict_callable(converter_registry):
...
@@ -834,13 +842,13 @@ def test_properties_from_dict_callable(converter_registry):
pdfc
=
PropertiesFromDictConverter
(
pdfc
=
PropertiesFromDictConverter
(
definition
=
{
definition
=
{
"
record_from_dict
"
:
{
"
record_from_dict
"
:
{
"
variable_name
"
:
"
MyRec
"
"
variable_name
"
:
"
MyRec
"
,
"
name
"
:
"
My New Record
"
"
name
"
:
"
My New Record
"
}
,
}
name
=
"
TestConverter
"
,
}
,
converter_registry
=
converter_registry
,
name
=
"
TestConverter
"
,
referenced_record_callback
=
convert_some_values
converter_registry
=
converter_registry
,
}
referenced_record_callback
=
convert_some_values
)
)
values
=
GeneralStore
()
values
=
GeneralStore
()
...
@@ -851,7 +859,7 @@ def test_properties_from_dict_callable(converter_registry):
...
@@ -851,7 +859,7 @@ def test_properties_from_dict_callable(converter_registry):
"
url
"
:
"
referenced
"
"
url
"
:
"
referenced
"
},
},
"
referenced2
"
:
{
"
referenced2
"
:
{
"
nourl
"
:
"
something else
"
"
nourl
"
:
"
something else
"
,
"
url
"
:
"
https://indiscale.com
"
"
url
"
:
"
https://indiscale.com
"
}
}
})
})
...
@@ -860,7 +868,7 @@ def test_properties_from_dict_callable(converter_registry):
...
@@ -860,7 +868,7 @@ def test_properties_from_dict_callable(converter_registry):
my_rec
=
records
[
"
MyRec
"
]
my_rec
=
records
[
"
MyRec
"
]
assert
isinstance
(
my_rec
,
db
.
Record
)
assert
isinstance
(
my_rec
,
db
.
Record
)
assert
len
(
my_rec
.
parents
)
==
1
assert
len
(
my_rec
.
parents
)
==
1
assert
my_rec
.
has_
parent
(
"
MyRec
"
)
assert
"
MyRec
"
in
[
par
.
name
for
par
in
my_rec
.
parent
s
]
assert
my_rec
.
name
==
"
My New Record
"
assert
my_rec
.
name
==
"
My New Record
"
# simple conversion
# simple conversion
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment