Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
000f3fe5
Commit
000f3fe5
authored
1 year ago
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
typehint and test
parent
2b863ace
No related branches found
No related tags found
2 merge requests
!178
FIX: #96 Better error output for crawl.py script.
,
!167
Sync Graph
Pipeline
#50898
passed with warnings
1 year ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/caoscrawler/sync_graph.py
+15
-31
15 additions, 31 deletions
src/caoscrawler/sync_graph.py
src/caoscrawler/sync_node.py
+5
-4
5 additions, 4 deletions
src/caoscrawler/sync_node.py
unittests/test_sync_graph.py
+55
-12
55 additions, 12 deletions
unittests/test_sync_graph.py
with
75 additions
and
47 deletions
src/caoscrawler/sync_graph.py
+
15
−
31
View file @
000f3fe5
...
...
@@ -42,7 +42,7 @@ from .sync_node import SyncNode
logger
=
logging
.
getLogger
(
__name__
)
def
_for_each_scalar_value
(
node
,
condition
,
kind
,
value
=
None
):
def
_for_each_scalar_value
(
node
:
SyncNode
,
condition
:
callable
,
kind
:
str
,
value
:
Any
=
None
):
"""
helper function that performs an action on each value element of each property of a node
The action (remove or set) is performed on each property value of each property: in case on
...
...
@@ -68,12 +68,12 @@ def _for_each_scalar_value(node, condition, kind, value=None):
p
.
value
=
value
(
p
.
value
)
def
_remove_each_scalar_value
(
node
,
condition
):
def
_remove_each_scalar_value
(
node
:
SyncNode
,
condition
:
callable
):
"""
"
remove
"
version of _for_each_scalar_value
"""
_for_each_scalar_value
(
node
,
condition
,
"
remove
"
)
def
_set_each_scalar_value
(
node
,
condition
,
value
):
def
_set_each_scalar_value
(
node
:
SyncNode
,
condition
:
callable
,
value
:
Any
):
"""
"
set
"
version of _for_each_scalar_value
"""
_for_each_scalar_value
(
node
,
condition
,
"
set
"
,
value
=
value
)
...
...
@@ -124,7 +124,7 @@ class SyncGraph():
export_record_lists.
"""
def
__init__
(
self
,
entities
:
List
[
db
.
Entity
],
identifiableAdapter
):
def
__init__
(
self
,
entities
:
List
[
db
.
Entity
],
identifiableAdapter
:
IdentifiableAdapter
):
self
.
identifiableAdapter
=
identifiableAdapter
self
.
_id_look_up
:
Dict
[
int
,
SyncNode
]
=
{}
self
.
_path_look_up
:
Dict
[
str
,
SyncNode
]
=
{}
...
...
@@ -150,10 +150,16 @@ class SyncGraph():
self
.
backward_id_referenced_by
,
)
=
self
.
_create_reference_mapping
(
self
.
nodes
)
# remove entities with path or ID from unchecked list
self
.
_mark_entities_with_path_or_id
()
# add identifiables where possible
for
node
in
list
(
self
.
nodes
):
if
self
.
_identifiable_is_needed
(
node
):
self
.
_set_identifiable_of_node
(
node
)
# everything in unchecked has no ID nor path. Thus, it must be possible to create an
# identifiable.
for
node
in
self
.
unchecked
:
self
.
identifiableAdapter
.
check_identifying_props
(
node
)
...
...
@@ -265,28 +271,6 @@ class SyncGraph():
if
candidate
is
not
entity
:
return
candidate
# def get_equivalent_existing(self, se):
# """ Check whether this Record exists on the remote server
# Returns: The stored Record
# """
# treated = self.get_any(se)
# if id(treated) in self._existing:
# return rec
# else:
# return None
# def get_equivalent_missing(self, se):
# """ Check whether this Record is missing on the remote server
# Returns: The stored Record
# """
# treated = self.get_any(record, identifiable)
# if id(treated) in self._missing:
# return rec
# else:
# return None
def
_get_new_id
(
self
):
self
.
_remote_missing_counter
-=
1
return
self
.
_remote_missing_counter
...
...
@@ -522,12 +506,12 @@ class SyncGraph():
if
self
.
_identifiable_is_needed
(
other_node
):
self
.
_set_identifiable_of_node
(
other_node
)
def
_identifiable_is_needed
(
self
,
node
):
def
_identifiable_is_needed
(
self
,
node
:
SyncNode
):
return
(
node
.
identifiable
is
None
and
not
self
.
_identity_relies_on_unchecked_entity
(
node
)
and
self
.
identifiableAdapter
.
check_identifying_props
(
node
,
raise_exception
=
False
))
def
_initialize_nodes
(
self
,
entities
):
"""
create initial set of S
emanticEntiti
es from provided Entity list
"""
def
_initialize_nodes
(
self
,
entities
:
list
[
db
.
Entity
]
):
"""
create initial set of S
yncNod
es from provided Entity list
"""
entities
=
self
.
_create_flat_list
(
entities
)
self
.
_sanity_check
(
entities
)
se_lookup
:
Dict
[
str
,
SyncNode
]
=
{}
# lookup: python id -> SyncNode
...
...
@@ -542,7 +526,7 @@ class SyncGraph():
condition
=
lambda
val
:
id
(
val
)
in
se_lookup
,
value
=
lambda
val
:
se_lookup
[
id
(
val
)])
def
_mark_missing
(
self
,
node
):
def
_mark_missing
(
self
,
node
:
SyncNode
):
self
.
_missing
[
id
(
node
)]
=
node
self
.
unchecked
.
remove
(
node
)
...
...
@@ -560,7 +544,7 @@ class SyncGraph():
print
(
f
"
set
\n
{
other_node
}
\n
to missing due to missing
\n
{
node
}
"
)
self
.
set_id_of_node
(
other_node
)
def
_mark_existing
(
self
,
node
):
def
_mark_existing
(
self
,
node
:
SyncNode
):
if
node
.
id
<=
0
:
raise
ValueError
(
"
ID must be positive for existing entities
"
)
self
.
_existing
[
id
(
node
)]
=
node
...
...
This diff is collapsed.
Click to expand it.
src/caoscrawler/sync_node.py
+
5
−
4
View file @
000f3fe5
...
...
@@ -155,7 +155,7 @@ class SyncNode():
raise
ime
return
ent
def
__repr__
(
self
):
def
__repr__
(
self
)
->
str
:
res
=
f
"
\n
=====================================================
\n
{
self
.
role
}
\n
"
if
hasattr
(
self
,
"
_metadata
"
):
res
+=
f
"
user:
{
self
.
_metadata
[
'
user
'
]
}
\n
"
...
...
@@ -181,11 +181,12 @@ class SyncNode():
return
(
res
+
yaml
.
dump
(
d
,
allow_unicode
=
True
)
+
"
=====================================================
\n
"
)
def
is_unidentifiable
(
self
):
def
is_unidentifiable
(
self
)
->
bool
:
"""
returns whether this is an unidentifiable Node
"""
return
self
.
registered_identifiable
.
get_property
(
"
no-ident
"
)
is
not
None
def
parent_in_list
(
parent
,
plist
)
:
def
parent_in_list
(
parent
:
db
.
Parent
,
plist
:
_ParentList
)
->
bool
:
"""
helper function that checks whether a parent with the same name or ID is in the plist
"""
missing
=
False
if
parent
.
name
is
not
None
:
...
...
@@ -197,7 +198,7 @@ def parent_in_list(parent, plist):
return
not
missing
def
property_in_list
(
prop
,
plist
)
:
def
property_in_list
(
prop
:
db
.
Property
,
plist
:
_Properties
)
->
bool
:
"""
helper function that checks whether a property with the same name or ID is in the plist
"""
missing
=
False
if
prop
.
name
is
not
None
:
...
...
This diff is collapsed.
Click to expand it.
unittests/test_sync_graph.py
+
55
−
12
View file @
000f3fe5
...
...
@@ -119,18 +119,69 @@ def test_create_reference_mapping():
assert
backward_id_referenced_by
[
id
(
ses
[
1
])]
==
set
()
def
test_SyncGraph
():
@patch
(
"
caoscrawler.sync_graph.cached_get_entity_by
"
,
new
=
Mock
(
side_effect
=
mock_get_entity_by
))
def
test_SyncGraph_init
():
# trivial case
a
=
db
.
Record
(
id
=
101
).
add_parent
(
"
A
"
)
ident_a
=
db
.
RecordType
().
add_parent
(
"
A
"
).
add_property
(
"
prop_ident
"
)
ident_adapter
=
CaosDBIdentifiableAdapter
()
ident_adapter
.
register_identifiable
(
"
A
"
,
ident_a
)
st
=
SyncGraph
([
a
],
ident_adapter
)
SyncGraph
([
a
],
ident_adapter
)
SyncGraph
([],
ident_adapter
)
# should not fail either...
entlist
=
[
db
.
Record
(
id
=
101
).
add_parent
(
"
A
"
),
db
.
Record
(
id
=
102
).
add_parent
(
"
A
"
),
db
.
File
(
path
=
'
a
'
).
add_parent
(
"
A
"
),
db
.
File
(
path
=
'
b
'
).
add_parent
(
"
A
"
),
db
.
Record
(
id
=
103
).
add_parent
(
"
A
"
),
db
.
Record
(
id
=
104
).
add_parent
(
"
A
"
).
add_property
(
name
=
'
prop_ident
'
,
value
=
"
MERGEME
"
),
db
.
Record
().
add_parent
(
"
A
"
).
add_property
(
name
=
'
prop_ident
'
,
value
=
"
MERGEME
"
),
db
.
File
(
path
=
'
a
'
,
file
=
'
b
'
).
add_parent
(
"
A
"
),
db
.
Record
(
id
=
101
).
add_parent
(
"
A
"
),
db
.
Record
().
add_parent
(
"
A
"
).
add_property
(
name
=
'
prop_ident
'
,
value
=
"
other
"
),
db
.
Record
().
add_parent
(
"
A
"
).
add_property
(
name
=
'
prop_ident
'
,
value
=
db
.
Record
().
add_parent
(
"
A
"
)
.
add_property
(
name
=
'
prop_ident
'
,
value
=
"
other
"
)),
db
.
File
(
path
=
'
a
'
,
file
=
'
b
'
).
add_parent
(
"
A
"
),
db
.
Record
(
id
=
101
).
add_parent
(
"
A
"
),
]
st
=
SyncGraph
(
entlist
,
ident_adapter
)
# all nodes with ID=101 have been merged
assert
len
([
el
for
el
in
st
.
nodes
if
el
.
id
==
101
])
==
1
# all nodes with path='a' have been merged
assert
len
([
el
for
el
in
st
.
nodes
if
el
.
path
==
'
a
'
])
==
1
# all nodes with ID or path were removed from unchecked
for
el
in
st
.
nodes
:
if
el
.
id
is
not
None
or
el
.
path
is
not
None
:
assert
el
not
in
st
.
unchecked
# all nodes with ID are in the ID lookup
for
el
in
st
.
nodes
:
if
el
.
id
is
not
None
:
assert
st
.
_id_look_up
[
el
.
id
]
is
el
# all nodes with path are in the path lookup
for
el
in
st
.
nodes
:
if
el
.
path
is
not
None
:
assert
st
.
_path_look_up
[
el
.
path
]
is
el
# all nodes with identifiable are in the identifiable lookup
for
el
in
st
.
nodes
:
if
el
.
identifiable
is
not
None
:
assert
st
.
_identifiable_look_up
[
el
.
identifiable
.
get_representation
()]
is
el
# node without ID but with identifiable was merged with other node with ID
assert
len
([
el
for
el
in
st
.
nodes
if
len
(
el
.
properties
)
>
0
and
el
.
properties
[
0
].
value
==
"
MERGEME
"
])
==
1
# every node that does not rely on something unchecked has an identifiable or an ID
for
el
in
st
.
nodes
:
if
not
st
.
_identity_relies_on_unchecked_entity
(
el
):
assert
el
.
identifiable
is
not
None
or
el
.
id
is
not
None
def
test_merge_into_trivial
(
simple_adapter
):
# simplest case: a -> c
# b
# (a reference c; b does not reference anything; a & b have the same target record)
# (a reference c; b does not reference anything; a & b have the same target
# record)
c
=
db
.
Record
(
name
=
'
c
'
).
add_parent
(
"
RT2
"
)
a
=
db
.
Record
(
name
=
'
a
'
).
add_parent
(
"
RT1
"
).
add_property
(
'
RT2
'
,
c
)
b
=
db
.
Record
(
id
=
101
).
add_parent
(
"
RT1
"
)
...
...
@@ -399,6 +450,7 @@ def test_set_id_of_node(simple_adapter):
assert
st
.
nodes
[
0
].
identifiable
is
not
None
assert
len
(
st
.
nodes
)
==
2
@patch
(
"
caoscrawler.sync_graph.cached_get_entity_by
"
,
new
=
Mock
(
side_effect
=
mock_get_entity_by
))
def
test_merging
(
simple_adapter
):
...
...
@@ -481,15 +533,6 @@ def test_something(simple_adapter):
assert
b_prop
.
id
==
101
# TODO create test that tests the assumptions after initialization:
# - no two (or more) nodes with the same id
# - no two (or more) nodes with the same path
# - nodes with id or path are not in unchecked
# - every node with id or path is in the corresponding look ups
# - every node that does not rely on something unchecked has an identifiable
# TODO create test that tests whether a cascading information application is fully applied
# TODO create test that tests whether setting something to missing allows to create an identifiable
# and thus a merge
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment