Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
34da5422
Commit
34da5422
authored
1 year ago
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
mostly docs
parent
1983fd5f
No related branches found
No related tags found
2 merge requests
!178
FIX: #96 Better error output for crawl.py script.
,
!167
Sync Graph
Pipeline
#50944
failed
1 year ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/caoscrawler/sync_graph.py
+58
-30
58 additions, 30 deletions
src/caoscrawler/sync_graph.py
with
58 additions
and
30 deletions
src/caoscrawler/sync_graph.py
+
58
−
30
View file @
34da5422
...
...
@@ -193,8 +193,9 @@ class SyncGraph():
one with those that haven
'
t, an error is raised if there are any SyncNodes without an
(possibly negative) ID.
"""
if
len
(
self
.
unchecked
)
>
1
:
self
.
unchecked_contains_circular_dependency
()
# TODO reactivate once the implementation is appropriate
# if len(self.unchecked) > 1:
# self.unchecked_contains_circular_dependency()
for
el
in
self
.
nodes
:
if
el
.
id
is
None
:
...
...
@@ -239,6 +240,10 @@ class SyncGraph():
TODO: for the sake of detecting problems for split_into_inserts_and_updates we should only
consider references that are identifying properties.
"""
raise
NotImplementedError
(
"
This function is not yet properly implemented
"
)
# TODO if the first element is not part of the circle, then
# this will not work
# We must created a better implementation (see also TODO in docstring)
circle
=
[
self
.
unchecked
[
0
]]
closed
=
False
while
not
closed
:
...
...
@@ -258,6 +263,8 @@ class SyncGraph():
Return an equivalent SyncNode.
Equivalent means that ID, path or identifiable are the same.
Returns None if no equivalent node is found.
"""
if
entity
.
id
is
not
None
and
entity
.
id
in
self
.
_id_look_up
:
candidate
=
self
.
_id_look_up
[
entity
.
id
]
...
...
@@ -275,11 +282,17 @@ class SyncGraph():
return
None
def
_get_new_id
(
self
):
"""
returns the next unused temporary ID
"""
self
.
_remote_missing_counter
-=
1
return
self
.
_remote_missing_counter
def
_set_identifiable_of_node
(
self
,
node
:
SyncNode
,
identifiable
:
Optional
[
Identifiable
]
=
None
):
"""
sets the identifiable and checks whether an equivalent node can be found with that new
information.
if no identifiable is given, the identifiable is retrieved from the identifiable adapter
"""
if
identifiable
is
None
:
self
.
identifiableAdapter
.
check_identifying_props
(
node
)
identifiable
=
self
.
identifiableAdapter
.
get_identifiable
(
...
...
@@ -300,6 +313,10 @@ class SyncGraph():
raise
RuntimeError
(
f
"
Records must have a parent.
\n
{
ent
}
"
)
def
_get_nodes_whose_identity_relies_on
(
self
,
node
:
SyncNode
):
"""
returns a set of nodes that reference the given node as identifying property or are
referenced by the given node and the parent of the given node is listed as
"
is_referenced_by
"
"""
return
(
self
.
backward_id_references
[
id
(
node
)].
union
(
self
.
forward_id_referenced_by
[
id
(
node
)]))
...
...
@@ -336,17 +353,17 @@ class SyncGraph():
@staticmethod
def
_create_reference_mapping
(
flat
:
list
[
SyncNode
]):
"""
TODO update docstring
Create a dictionary of dictionaries of the form:
dict[int, dict[str, list[Union[int,None]]]]
- The integ
er i
ndex is the Python id of the value object.
- The string is the name of the first parent of the referencing object
.
Each value objects is taken from the va
lues
of
all
properties from the list flat.
So the returned mapping maps ids of entities to the ids of objects which are referr
ing
to them
.
Create six dictionaries that describe references among SyncNodes. All dictionaries use the
Python ID of SyncNodes as keys.
There is always one dictionary to describe the direction of the reference, i.e.
map[id(node)] -> other where other is a set of SyncNodes that are being referenced by node.
And then th
er
e
i
s always one dictionary for the inverse direction. The two dictionaries are
named
"
forward_
"
and
"
backward_
"
, respectively
.
Then there are three kinds of maps being generated: One inc
lu
d
es all
references
(
"
_references
"
), one includes references that are values of identifying properties
(
"
_id_references
"
) and one includes references that are relevant for identify
ing
backreferences/
"
is_referenced_by
"
(
"
_id_references_by
"
)
.
"""
# TODO we need to treat children of RecordTypes somehow.
forward_references
:
dict
[
int
,
set
[
SyncNode
]]
=
{}
...
...
@@ -425,35 +442,46 @@ class SyncGraph():
self
.
_nonidentifiable
[
id
(
node
)]
=
node
def
_merge_into
(
self
,
source
:
SyncNode
,
target
:
SyncNode
):
"""
FIXME tries to merge record into newrecord
If it fails, record is added to the try_to_merge_later list.
In any case, references are bent to the newrecord object.
"""
tries to merge source into target and performs the necessary updates:
- update the membervariables of target using source (``target.update(source)``).
- replaces reference values to source by target
- updates the reference map
- updates lookup tables
- removes source from node lists
- marks target as missing/existing if source was marked that way
- adds an identifiable if now possible (e.g. merging based on ID might allow create an
identifiable when none of the two nodes hand the sufficient properties on its own before)
- check whether dependend nodes can now get and identifiable (the merge might have set the
ID such that dependen nodes can now create an identifiable)
"""
# sanity checks
assert
source
is
not
target
if
source
is
target
:
raise
ValueError
(
"
source must not be target
"
)
if
target
.
id
is
None
and
source
.
id
is
not
None
:
assert
self
.
_id_look_up
[
source
.
id
]
==
source
,
(
"
It is assumed that always only one node exists with a certain ID and that node is
"
"
in the look up
"
)
if
self
.
_id_look_up
[
source
.
id
]
!=
source
:
raise
ValueError
(
"
It is assumed that always only one node exists with a certain ID and that
"
"
node is in the look up
"
)
if
target
.
path
is
None
and
source
.
path
is
not
None
:
assert
self
.
_id_look_up
[
source
.
path
]
==
source
,
(
"
It is assumed that always only one node exists with a certain path and that node
"
"
is in the look up
"
)
if
self
.
_id_look_up
[
source
.
path
]
!=
source
:
raise
ValueError
(
"
It is assumed that always only one node exists with a certain path and that
"
"
node is in the look up
"
)
target
.
update
(
source
)
# replace actual reference property values
for
node
in
self
.
backward_references
.
pop
(
id
(
source
)):
_set_each_scalar_value
(
node
,
condition
=
lambda
val
:
val
is
source
,
value
=
lambda
val
:
target
)
# update reference mappings
for
node
in
self
.
forward_references
.
pop
(
id
(
source
)):
self
.
forward_references
[
id
(
target
)].
add
(
node
)
self
.
backward_references
[
id
(
node
)].
remove
(
source
)
self
.
backward_references
[
id
(
node
)].
add
(
target
)
for
node
in
self
.
backward_references
.
pop
(
id
(
source
)):
# replace actual reference property values
_set_each_scalar_value
(
node
,
condition
=
lambda
val
:
val
is
source
,
value
=
lambda
val
:
target
)
self
.
backward_references
[
id
(
target
)].
add
(
node
)
self
.
forward_references
[
id
(
node
)].
remove
(
source
)
self
.
forward_references
[
id
(
node
)].
add
(
target
)
...
...
@@ -515,8 +543,8 @@ class SyncGraph():
def
_initialize_nodes
(
self
,
entities
:
list
[
db
.
Entity
]):
"""
create initial set of SyncNodes from provided Entity list
"""
entities
=
self
.
_create_flat_list
(
entities
)
self
.
_sanity_check
(
entities
)
entities
=
self
.
_create_flat_list
(
entities
)
se_lookup
:
dict
[
int
,
SyncNode
]
=
{}
# lookup: python id -> SyncNode
for
el
in
entities
:
self
.
nodes
.
append
(
SyncNode
(
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment