Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
c874edf5
Commit
c874edf5
authored
May 29, 2024
by
Alexander Schlemmer
Browse files
Options
Downloads
Patches
Plain Diff
STY: applied black style and removed unused imports
parent
4ac7d9a7
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!178
FIX: #96 Better error output for crawl.py script.
,
!167
Sync Graph
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/caoscrawler/sync_graph.py
+120
-64
120 additions, 64 deletions
src/caoscrawler/sync_graph.py
with
120 additions
and
64 deletions
src/caoscrawler/sync_graph.py
+
120
−
64
View file @
c874edf5
...
...
@@ -30,20 +30,21 @@ import logging
from
typing
import
Any
,
Optional
,
Union
,
Callable
import
linkahead
as
db
from
linkahead.apiutils
import
(
EntityMergeConflictError
,
compare_entities
,
merge_entities
)
from
linkahead.cached
import
cache_clear
,
cached_get_entity_by
from
linkahead.cached
import
cached_get_entity_by
from
linkahead.exceptions
import
EmptyUniqueQueryError
from
.exceptions
import
ImpossibleMergeError
,
MissingReferencingEntityError
from
.identifiable_adapters
import
IdentifiableAdapter
from
.identifiable
import
Identifiable
from
.sync_node
import
SyncNode
,
TempID
import
re
logger
=
logging
.
getLogger
(
__name__
)
def
_set_each_scalar_value
(
node
:
SyncNode
,
condition
:
Callable
[[
Any
],
bool
],
value
:
Any
):
def
_set_each_scalar_value
(
node
:
SyncNode
,
condition
:
Callable
[[
Any
],
bool
],
value
:
Any
):
"""
helper function that conditionally replaces each value element of each property of a node
If the property value is a list, the replacement is done for each list entry.
...
...
@@ -70,7 +71,7 @@ def _set_each_scalar_value(node: SyncNode, condition: Callable[[Any], bool], val
p
.
value
=
value
(
p
.
value
)
class
SyncGraph
()
:
class
SyncGraph
:
"""
A data model class for the graph of entities that shall be created during synchronization of
the crawler.
...
...
@@ -133,7 +134,9 @@ class SyncGraph():
# Note, that when ever one node is changed, we check all dependend nodes (see usage of
# `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot
# miss a necessary update.
def
__init__
(
self
,
entities
:
list
[
db
.
Entity
],
identifiableAdapter
:
IdentifiableAdapter
):
def
__init__
(
self
,
entities
:
list
[
db
.
Entity
],
identifiableAdapter
:
IdentifiableAdapter
):
self
.
identifiableAdapter
=
identifiableAdapter
# A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs.
# This dictionary is initially set using _mark_entities_with_path_or_id and later updated
...
...
@@ -192,8 +195,10 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
if
node
.
id
is
not
None
:
raise
RuntimeError
(
'
Cannot update ID.
\n
'
f
'
It already is
{
node
.
id
}
and shall be set to
{
node_id
}
.
'
)
raise
RuntimeError
(
"
Cannot update ID.
\n
"
f
"
It already is
{
node
.
id
}
and shall be set to
{
node_id
}
.
"
)
if
node_id
is
None
:
node_id
=
TempID
(
self
.
_get_new_id
())
node
.
id
=
node_id
...
...
@@ -233,9 +238,11 @@ class SyncGraph():
node_map
[
id
(
el
)]
=
entities
[
-
1
]
for
ent
in
entities
:
_set_each_scalar_value
(
ent
,
_set_each_scalar_value
(
ent
,
condition
=
lambda
val
:
isinstance
(
val
,
SyncNode
),
value
=
lambda
val
:
node_map
[
id
(
val
)])
value
=
lambda
val
:
node_map
[
id
(
val
)],
)
missing
=
[
el
for
el
in
entities
if
el
.
id
<
0
]
existing
=
[
el
for
el
in
entities
if
el
.
id
>
0
]
...
...
@@ -253,10 +260,16 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-27.
"""
return
any
([
id
(
ent
)
not
in
self
.
_missing
and
id
(
ent
)
not
in
self
.
_existing
for
ent
in
self
.
forward_references_id_props
[
id
(
node
)]]
+
[
id
(
ent
)
not
in
self
.
_missing
and
id
(
ent
)
not
in
self
.
_existing
for
ent
in
self
.
backward_references_backref
[
id
(
node
)]])
return
any
(
[
id
(
ent
)
not
in
self
.
_missing
and
id
(
ent
)
not
in
self
.
_existing
for
ent
in
self
.
forward_references_id_props
[
id
(
node
)]
]
+
[
id
(
ent
)
not
in
self
.
_missing
and
id
(
ent
)
not
in
self
.
_existing
for
ent
in
self
.
backward_references_backref
[
id
(
node
)]
]
)
def
unchecked_contains_circular_dependency
(
self
):
"""
...
...
@@ -309,9 +322,13 @@ class SyncGraph():
candidate
=
self
.
_path_look_up
[
entity
.
path
]
if
candidate
is
not
entity
:
return
candidate
if
(
entity
.
identifiable
is
not
None
and
entity
.
identifiable
.
get_representation
()
in
self
.
_identifiable_look_up
):
candidate
=
self
.
_identifiable_look_up
[
entity
.
identifiable
.
get_representation
()]
if
(
entity
.
identifiable
is
not
None
and
entity
.
identifiable
.
get_representation
()
in
self
.
_identifiable_look_up
):
candidate
=
self
.
_identifiable_look_up
[
entity
.
identifiable
.
get_representation
()
]
if
candidate
is
not
entity
:
return
candidate
return
None
...
...
@@ -324,8 +341,9 @@ class SyncGraph():
self
.
_remote_missing_counter
-=
1
return
self
.
_remote_missing_counter
def
_set_identifiable_of_node
(
self
,
node
:
SyncNode
,
identifiable
:
Optional
[
Identifiable
]
=
None
):
def
_set_identifiable_of_node
(
self
,
node
:
SyncNode
,
identifiable
:
Optional
[
Identifiable
]
=
None
):
"""
sets the identifiable and checks whether an equivalent node can be found with that new
information. If an equivalent node is found,
'
node
'
is merged into that node.
...
...
@@ -333,12 +351,14 @@ class SyncGraph():
Raises a ValueError if the equivalent node found does not have an identifiable.
Raises a RuntimeError if there is no equivalent node found and
the (unique) string representation of the identifiable of node is already contained in the identifiable_look_up.
the (unique) string representation of the identifiable of node is already contained in
the identifiable_look_up.
"""
if
identifiable
is
None
:
self
.
identifiableAdapter
.
all_identifying_properties_exist
(
node
)
identifiable
=
self
.
identifiableAdapter
.
get_identifiable
(
node
,
self
.
backward_references_backref
[
id
(
node
)])
node
,
self
.
backward_references_backref
[
id
(
node
)]
)
node
.
identifiable
=
identifiable
equivalent_se
=
self
.
get_equivalent
(
node
)
if
equivalent_se
is
not
None
and
equivalent_se
is
not
node
:
...
...
@@ -361,9 +381,13 @@ class SyncGraph():
if
ent
.
role
==
"
Record
"
and
len
(
ent
.
parents
)
==
0
:
raise
ValueError
(
f
"
Records must have a parent.
\n
{
ent
}
"
)
if
isinstance
(
ent
.
id
,
int
)
and
ent
.
id
<
0
:
raise
ValueError
(
f
"
Records must not have negative integers as IDs.
\n
{
ent
}
"
)
raise
ValueError
(
f
"
Records must not have negative integers as IDs.
\n
{
ent
}
"
)
if
isinstance
(
ent
.
id
,
str
)
and
re
.
match
(
r
"
^-\d+$
"
,
ent
.
id
):
raise
ValueError
(
f
"
Records must not have negative integers as IDs.
\n
{
ent
}
"
)
raise
ValueError
(
f
"
Records must not have negative integers as IDs.
\n
{
ent
}
"
)
def
_get_nodes_whose_identity_relies_on
(
self
,
node
:
SyncNode
):
"""
returns a set of nodes that reference the given node as identifying property or are
...
...
@@ -372,11 +396,14 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
return
(
self
.
backward_references_id_props
[
id
(
node
)].
union
(
self
.
forward_references_backref
[
id
(
node
)]))
return
self
.
backward_references_id_props
[
id
(
node
)].
union
(
self
.
forward_references_backref
[
id
(
node
)]
)
@staticmethod
def
_create_flat_list
(
ent_list
:
list
[
db
.
Entity
],
flat
:
Optional
[
list
[
db
.
Entity
]]
=
None
):
def
_create_flat_list
(
ent_list
:
list
[
db
.
Entity
],
flat
:
Optional
[
list
[
db
.
Entity
]]
=
None
):
"""
Recursively adds entities and all their properties contained in ent_list to
the output list flat.
...
...
@@ -446,20 +473,35 @@ class SyncGraph():
if
isinstance
(
v
,
SyncNode
):
forward_references
[
id
(
node
)].
add
(
v
)
backward_references
[
id
(
v
)].
add
(
node
)
if
(
node
.
registered_identifiable
is
not
None
and
len
([
el
.
name
for
el
in
node
.
registered_identifiable
.
properties
if
el
.
name
==
p
.
name
])
>
0
):
if
(
node
.
registered_identifiable
is
not
None
and
len
(
[
el
.
name
for
el
in
node
.
registered_identifiable
.
properties
if
el
.
name
==
p
.
name
]
)
>
0
):
forward_references_id_props
[
id
(
node
)].
add
(
v
)
backward_references_id_props
[
id
(
v
)].
add
(
node
)
if
(
v
.
registered_identifiable
is
not
None
and
IdentifiableAdapter
.
referencing_entity_has_appropriate_type
(
node
.
parents
,
v
.
registered_identifiable
)):
if
(
v
.
registered_identifiable
is
not
None
and
IdentifiableAdapter
.
referencing_entity_has_appropriate_type
(
node
.
parents
,
v
.
registered_identifiable
)
):
forward_references_backref
[
id
(
node
)].
add
(
v
)
backward_references_backref
[
id
(
v
)].
add
(
node
)
return
(
forward_references
,
backward_references
,
forward_references_id_props
,
backward_references_id_props
,
forward_references_backref
,
backward_references_backref
,
return
(
forward_references
,
backward_references
,
forward_references_id_props
,
backward_references_id_props
,
forward_references_backref
,
backward_references_backref
,
)
def
_mark_entities_with_path_or_id
(
self
):
...
...
@@ -509,20 +551,22 @@ class SyncGraph():
if
self
.
_id_look_up
[
source
.
id
]
!=
source
:
raise
ValueError
(
"
It is assumed that always only one node exists with a certain ID and that
"
"
node is in the look up
"
)
"
node is in the look up
"
)
if
target
.
path
is
None
and
source
.
path
is
not
None
:
if
self
.
_id_look_up
[
source
.
path
]
!=
source
:
raise
ValueError
(
"
It is assumed that always only one node exists with a certain path and that
"
"
node is in the look up
"
)
"
node is in the look up
"
)
target
.
update
(
source
)
# replace actual reference property values
for
node
in
self
.
backward_references
[
id
(
source
)]:
_set_each_scalar_value
(
node
,
condition
=
lambda
val
:
val
is
source
,
value
=
lambda
val
:
target
)
_set_each_scalar_value
(
node
,
condition
=
lambda
val
:
val
is
source
,
value
=
lambda
val
:
target
)
# update reference mappings
for
node
in
self
.
forward_references
.
pop
(
id
(
source
)):
...
...
@@ -562,10 +606,13 @@ class SyncGraph():
if
target
.
path
is
not
None
:
self
.
_path_look_up
[
target
.
path
]
=
target
if
target
.
identifiable
is
not
None
:
self
.
_identifiable_look_up
[
target
.
identifiable
.
get_representation
()]
=
target
self
.
_identifiable_look_up
[
target
.
identifiable
.
get_representation
()]
=
(
target
)
if
((
id
(
source
)
in
self
.
_existing
and
id
(
target
)
in
self
.
_missing
)
or
(
id
(
target
)
in
self
.
_existing
and
id
(
source
)
in
self
.
_missing
)):
if
(
id
(
source
)
in
self
.
_existing
and
id
(
target
)
in
self
.
_missing
)
or
(
id
(
target
)
in
self
.
_existing
and
id
(
source
)
in
self
.
_missing
):
raise
RuntimeError
(
"
Trying to merge missing and existing
"
)
if
id
(
source
)
in
self
.
_missing
and
id
(
target
)
not
in
self
.
_missing
:
...
...
@@ -595,9 +642,13 @@ class SyncGraph():
Last review by Alexander Schlemmer on 2024-05-24.
"""
return
(
node
.
identifiable
is
None
and
not
self
.
_identity_relies_on_unchecked_entity
(
node
)
return
(
node
.
identifiable
is
None
and
not
self
.
_identity_relies_on_unchecked_entity
(
node
)
and
self
.
identifiableAdapter
.
all_identifying_properties_exist
(
node
,
raise_exception
=
False
))
node
,
raise_exception
=
False
)
)
def
_initialize_nodes
(
self
,
entities
:
list
[
db
.
Entity
]):
"""
create initial set of SyncNodes from provided Entity list
"""
...
...
@@ -608,19 +659,24 @@ class SyncGraph():
# Create new sync nodes from the list of entities, their registered identifiables
# are set from the identifiable adapter.
for
el
in
entities
:
self
.
nodes
.
append
(
SyncNode
(
el
,
self
.
identifiableAdapter
.
get_registered_identifiable
(
el
))
)
self
.
nodes
.
append
(
SyncNode
(
el
,
self
.
identifiableAdapter
.
get_registered_identifiable
(
el
))
)
se_lookup
[
id
(
el
)]
=
self
.
nodes
[
-
1
]
# replace db.Entity objects with SyncNodes in references:
for
node
in
self
.
nodes
:
_set_each_scalar_value
(
node
,
_set_each_scalar_value
(
node
,
condition
=
lambda
val
:
id
(
val
)
in
se_lookup
,
value
=
lambda
val
:
se_lookup
[
id
(
val
)])
value
=
lambda
val
:
se_lookup
[
id
(
val
)],
)
def
_add_identifiables_to_dependend_nodes
(
self
,
node
):
"""
For each dependent node, we check whether this allows to create an identifiable
"""
"""
For each dependent node, we check whether this allows to create an identifiable
Last review by Alexander Schlemmer on 2024-05-29.
"""
for
other_node
in
self
.
_get_nodes_whose_identity_relies_on
(
node
):
if
self
.
_identifiable_is_needed
(
other_node
):
self
.
_set_identifiable_of_node
(
other_node
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment