Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
187f8817
Commit
187f8817
authored
Dec 17, 2021
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
TST: add further tests
parent
e4b4f9bb
No related branches found
No related tags found
2 merge requests
!53
Release 0.1
,
!3
F retrieve
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/newcrawler/crawl.py
+14
-5
14 additions, 5 deletions
src/newcrawler/crawl.py
src/newcrawler/identifiable_adapters.py
+1
-1
1 addition, 1 deletion
src/newcrawler/identifiable_adapters.py
unittests/test_tool.py
+72
-35
72 additions, 35 deletions
unittests/test_tool.py
with
87 additions
and
41 deletions
src/newcrawler/crawl.py
+
14
−
5
View file @
187f8817
...
...
@@ -247,6 +247,8 @@ class Crawler(object):
that the same identifiable exists twice)
"""
identifiable
=
self
.
identifiableAdapter
.
get_identifiable
(
record
)
if
identifiable
is
None
:
return
None
if
identifiable
in
self
.
identified_cache
:
return
self
.
identified_cache
[
identifiable
]
else
:
...
...
@@ -261,6 +263,8 @@ class Crawler(object):
that the same identifiable exists twice)
"""
identifiable
=
self
.
identifiableAdapter
.
get_identifiable
(
record
)
if
identifiable
is
None
:
raise
RuntimeError
()
self
.
identified_cache
.
add
(
identifiable
=
identifiable
,
record
=
record
)
def
copy_attributes
(
self
,
fro
:
db
.
Entity
,
to
:
db
.
Entity
):
...
...
@@ -270,9 +274,14 @@ class Crawler(object):
to_be_inserted
=
[]
to_be_updated
=
[]
flat
=
list
(
ent_list
)
# assure all entities are direct members
# assure all entities are direct members
TODO Can this be removed at some point?Check only?
self
.
create_flat_list
(
ent_list
,
flat
)
# TODO: can the following be removed at some point
for
ent
in
flat
:
if
len
(
ent
.
parents
)
==
0
:
raise
RuntimeError
(
"
Records must have a parent.
"
)
resolved_references
=
True
# flat contains Entities which could not yet be checked against the remote server
while
resolved_references
and
len
(
flat
)
>
0
:
...
...
@@ -330,7 +339,7 @@ class Crawler(object):
"""
for
i
in
reversed
(
range
(
len
(
updateList
))):
record
=
updateList
[
i
]
identifiable
=
self
.
get_identifiable
(
record
)
identifiable
=
self
.
identifiableAdapter
.
get_identifiable
(
record
)
comp
=
compare_entities
(
record
,
identifiable
)
identical
=
True
...
...
@@ -385,10 +394,10 @@ class Crawler(object):
self
.
remove_unnecessary_updates
(
to_be_updated
)
# TODO
self
.
execute_inserts_in_list
(
insert
List
)
self
.
execute_updates_in_list
(
update
List
)
#
self.execute_inserts_in_list(
to_be_
insert
ed
)
#
self.execute_updates_in_list(
to_be_
update
d
)
return
(
insertList
,
update
List
)
return
(
to_be_inserted
,
to_be_
update
d
)
@staticmethod
def
debug_build_usage_tree
(
converter
:
Converter
):
...
...
...
...
This diff is collapsed.
Click to expand it.
src/newcrawler/identifiable_adapters.py
+
1
−
1
View file @
187f8817
...
...
@@ -112,7 +112,7 @@ class IdentifiableAdapter(object):
if
registered_identifiable
is
None
:
return
None
identifiable
=
db
.
Record
()
identifiable
=
db
.
Record
(
name
=
record
.
name
)
if
len
(
registered_identifiable
.
parents
)
!=
1
:
raise
RuntimeError
(
"
Multiple parents for identifiables not supported.
"
)
identifiable
.
add_parent
(
registered_identifiable
.
parents
[
0
])
...
...
...
...
This diff is collapsed.
Click to expand it.
unittests/test_tool.py
+
72
−
35
View file @
187f8817
...
...
@@ -353,10 +353,8 @@ def test_provenance_debug_data(crawler):
assert
check_key_count
(
"
Person
"
)
==
14
def
test_split_into_inserts_and_updates
(
crawler
):
# Try trivial argument
crawler
.
split_into_inserts_and_updates
([])
@pytest.fixture
def
mock_retrieve
(
crawler
):
# simulate remote server content by using the names to identify records
def
base_mocked_lookup
(
rec
,
known
):
if
rec
.
name
in
known
:
...
...
@@ -364,31 +362,37 @@ def test_split_into_inserts_and_updates(crawler):
else
:
return
None
cache
=
[]
def
trivial_cache_loockup
(
stuff
):
print
(
"
current cache
"
,
cache
)
if
stuff
.
name
in
cache
:
return
stuff
else
:
return
None
def
trivial_cache_add
(
stuff
):
cache
.
append
(
stuff
.
name
)
crawler
.
get_identified_record_from_local_cache
=
Mock
(
side_effect
=
trivial_cache_loockup
)
crawler
.
add_identified_record_to_local_cache
=
Mock
(
side_effect
=
trivial_cache_add
)
crawler
.
copy_attributes
=
Mock
()
# a record that is found remotely and should be added to the update list and one that is not
# found and should be added to the insert one
remote_known
=
{
"
A
"
:
db
.
Record
(
id
=
1111
,
name
=
"
A
"
)}
entlist
=
[
db
.
Record
(
name
=
"
A
"
),
db
.
Record
(
name
=
"
B
"
)]
crawler
.
identifiableAdapter
.
retrieve_identifiable
=
Mock
(
side_effect
=
partial
(
base_mocked_lookup
,
known
=
remote_known
))
crawler
.
identifiableAdapter
.
get_registered_identifiable
=
Mock
(
side_effect
=
partial
(
base_mocked_lookup
,
known
=
{
"
A
"
:
db
.
Record
(
name
=
"
A
"
).
add_parent
(
"
C
"
),
"
B
"
:
db
.
Record
(
name
=
"
B
"
).
add_parent
(
"
C
"
)}))
return
crawler
def
test_split_into_inserts_and_updates_trivial
(
crawler
):
# Try trivial argument
crawler
.
split_into_inserts_and_updates
([])
def
test_split_into_inserts_and_updates_single
(
mock_retrieve
):
crawler
=
mock_retrieve
entlist
=
[
db
.
Record
(
name
=
"
A
"
).
add_parent
(
"
C
"
),
db
.
Record
(
name
=
"
B
"
).
add_parent
(
"
C
"
)]
assert
crawler
.
get_identified_record_from_local_cache
(
entlist
[
0
])
is
None
assert
crawler
.
get_identified_record_from_local_cache
(
entlist
[
1
])
is
None
assert
crawler
.
can_be_checked_externally
(
entlist
[
0
])
assert
crawler
.
can_be_checked_externally
(
entlist
[
1
])
assert
crawler
.
identifiableAdapter
.
retrieve_identifiable
(
entlist
[
0
]).
id
==
1111
assert
crawler
.
identifiableAdapter
.
retrieve_identifiable
(
entlist
[
1
])
is
None
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
deepcopy
(
entlist
))
print
(
crawler
.
identifiableAdapter
.
retrieve_identifiable
.
call_args_list
)
print
(
entlist
)
# crawler.identifiableAdapter.retrieve_identifiable.assert_any_call(entlist[0])
# crawler.identifiableAdapter.retrieve_identifiable.assert_any_call(entlist[1])
assert
len
(
insert
)
==
1
...
...
@@ -396,28 +400,26 @@ def test_split_into_inserts_and_updates(crawler):
assert
len
(
update
)
==
1
assert
update
[
0
].
name
==
"
A
"
# reset cache
cache
.
clear
()
def
test_split_into_inserts_and_updates_with_ref
(
mock_retrieve
):
crawler
=
mock_retrieve
# try it with a reference
a
=
db
.
Record
(
name
=
"
A
"
)
b
=
db
.
Record
(
name
=
"
B
"
)
a
=
db
.
Record
(
name
=
"
A
"
)
.
add_parent
(
"
C
"
)
b
=
db
.
Record
(
name
=
"
B
"
)
.
add_parent
(
"
C
"
)
b
.
add_property
(
"
A
"
,
a
)
entlist
=
[
a
,
b
]
crawler
.
identifiableAdapter
.
retrieve_identifiable
=
Mock
(
side_effect
=
partial
(
base_mocked_lookup
,
known
=
remote_known
))
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
entlist
)
assert
len
(
insert
)
==
1
assert
insert
[
0
].
name
==
"
B
"
assert
len
(
update
)
==
1
assert
update
[
0
].
name
==
"
A
"
# reset cache
cache
.
clear
()
def
test_split_into_inserts_and_updates_with_circ
(
mock_retrieve
):
crawler
=
mock_retrieve
# try circular
a
=
db
.
Record
(
name
=
"
A
"
)
b
=
db
.
Record
(
name
=
"
B
"
)
a
=
db
.
Record
(
name
=
"
A
"
)
.
add_parent
(
"
C
"
)
b
=
db
.
Record
(
name
=
"
B
"
)
.
add_parent
(
"
C
"
)
b
.
add_property
(
"
A
"
,
a
)
a
.
add_property
(
"
B
"
,
b
)
entlist
=
[
a
,
b
]
...
...
@@ -436,5 +438,40 @@ def test_split_into_inserts_and_updates(crawler):
def
test_all_references_are_existing_already
(
crawler
):
pass
# crawler.all_references_are_existing_already(record)
def
base_mocked_lookup
(
rec
,
known
):
if
rec
.
name
in
known
:
return
known
[
rec
.
name
]
else
:
return
None
crawler
.
identifiableAdapter
.
get_registered_identifiable
=
Mock
(
side_effect
=
partial
(
base_mocked_lookup
,
known
=
{
"
A
"
:
db
.
Record
(
name
=
"
A
"
).
add_parent
(
"
C
"
),
"
B
"
:
db
.
Record
(
name
=
"
B
"
).
add_parent
(
"
C
"
)}))
assert
crawler
.
all_references_are_existing_already
(
db
.
Record
().
add_property
(
'
a
'
,
123
))
assert
crawler
.
all_references_are_existing_already
(
db
.
Record
()
.
add_property
(
'
a
'
,
db
.
Record
(
id
=
123
)))
assert
crawler
.
all_references_are_existing_already
(
db
.
Record
()
.
add_property
(
'
a
'
,
123
)
.
add_property
(
'
b
'
,
db
.
Record
(
id
=
123
)))
assert
not
crawler
.
all_references_are_existing_already
(
db
.
Record
()
.
add_property
(
'
a
'
,
123
)
.
add_property
(
'
b
'
,
db
.
Record
(
name
=
"
A
"
)
.
add_parent
(
"
C
"
)))
a
=
db
.
Record
(
name
=
"
A
"
).
add_parent
(
"
C
"
)
crawler
.
add_identified_record_to_local_cache
(
a
)
assert
crawler
.
all_references_are_existing_already
(
db
.
Record
()
.
add_property
(
'
a
'
,
123
)
.
add_property
(
'
b
'
,
a
))
def
test_can_be_checked_externally
(
crawler
):
assert
crawler
.
can_be_checked_externally
(
db
.
Record
().
add_property
(
'
a
'
,
123
))
assert
crawler
.
can_be_checked_externally
(
db
.
Record
()
.
add_property
(
'
a
'
,
db
.
Record
(
id
=
123
)))
assert
crawler
.
can_be_checked_externally
(
db
.
Record
()
.
add_property
(
'
a
'
,
123
)
.
add_property
(
'
b
'
,
db
.
Record
(
id
=
123
)))
assert
not
crawler
.
can_be_checked_externally
(
db
.
Record
()
.
add_property
(
'
a
'
,
123
)
.
add_property
(
'
b
'
,
db
.
Record
()))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment