Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
f57ee93e
Verified
Commit
f57ee93e
authored
11 months ago
by
Daniel Hornung
Browse files
Options
Downloads
Patches
Plain Diff
MAINT: Renaming split_into_inserts_and_updates.
parent
c22af986
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!178
FIX: #96 Better error output for crawl.py script.
,
!171
sav/spss converter
Pipeline
#51983
failed
11 months ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/caoscrawler/crawl.py
+9
-3
9 additions, 3 deletions
src/caoscrawler/crawl.py
unittests/test_crawler.py
+8
-8
8 additions, 8 deletions
unittests/test_crawler.py
unittests/test_sync_graph.py
+4
-2
4 additions, 2 deletions
unittests/test_sync_graph.py
with
21 additions
and
13 deletions
src/caoscrawler/crawl.py
+
9
−
3
View file @
f57ee93e
...
...
@@ -295,11 +295,17 @@ class Crawler(object):
self
.
crawled_data
=
data
return
data
def
split_into_inserts_and_updates
(
self
,
st
:
SyncGraph
):
def
_
split_into_inserts_and_updates
(
self
,
st
:
SyncGraph
):
"""
Classify nodes in the SyncGraph ``st`` with respect to their state on the server.
This method iteratively checks whether those nodes exist on the remote server and creates two lists,
one with the entities that need to be updated and the other with entities to be inserted.
.. todo::
Should this be made into a public method of SyncGraph instead? At the moment, this is a
purely static method that only operates on the state of ``st``.
"""
entity_was_treated
=
True
# st.unchecked contains Entities which could not yet be checked against the remote server
...
...
@@ -338,7 +344,7 @@ one with the entities that need to be updated and the other with entities to be
# )
raise
RuntimeError
(
"
Could not finish split_into_inserts_and_updates.
"
"
Could not finish
_
split_into_inserts_and_updates.
"
"
It might be due to a circular dependency
"
)
return
st
.
export_record_lists
()
...
...
@@ -650,7 +656,7 @@ one with the entities that need to be updated and the other with entities to be
"
use for example the Scanner to create this data.
"
))
crawled_data
=
self
.
crawled_data
to_be_inserted
,
to_be_updated
=
self
.
split_into_inserts_and_updates
(
to_be_inserted
,
to_be_updated
=
self
.
_
split_into_inserts_and_updates
(
SyncGraph
(
crawled_data
,
self
.
identifiableAdapter
))
for
el
in
to_be_updated
:
...
...
This diff is collapsed.
Click to expand it.
unittests/test_crawler.py
+
8
−
8
View file @
f57ee93e
...
...
@@ -329,7 +329,7 @@ def test_remove_unnecessary_updates():
def
test_split_into_inserts_and_updates_trivial
():
crawler
=
Crawler
()
st
=
SyncGraph
([],
crawler
.
identifiableAdapter
)
crawler
.
split_into_inserts_and_updates
(
st
)
crawler
.
_
split_into_inserts_and_updates
(
st
)
def
test_split_into_inserts_and_updates_simple
(
crawler_mocked_identifiable_retrieve
):
...
...
@@ -347,7 +347,7 @@ def test_split_into_inserts_and_updates_simple(crawler_mocked_identifiable_retri
assert
crawler
.
identifiableAdapter
.
retrieve_identified_record_for_record
(
identlist
[
1
])
is
None
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
st
)
insert
,
update
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
assert
len
(
insert
)
==
1
assert
insert
[
0
].
name
==
"
B
"
assert
len
(
update
)
==
1
...
...
@@ -370,7 +370,7 @@ def test_split_into_inserts_and_updates_with_circ(crawler_mocked_identifiable_re
st
=
SyncGraph
([
a
,
b
],
crawler
.
identifiableAdapter
)
with
pytest
.
raises
(
RuntimeError
):
crawler
.
split_into_inserts_and_updates
(
st
)
crawler
.
_
split_into_inserts_and_updates
(
st
)
def
test_split_into_inserts_and_updates_with_complex
(
crawler_mocked_identifiable_retrieve
):
...
...
@@ -389,7 +389,7 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
b
.
add_property
(
"
C
"
,
f
)
entlist
=
[
a
,
b
,
g
]
st
=
SyncGraph
(
entlist
,
crawler
.
identifiableAdapter
)
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
st
)
insert
,
update
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
assert
len
(
insert
)
==
3
assert
"
B
"
in
[
el
.
name
for
el
in
insert
]
assert
len
(
update
)
==
1
...
...
@@ -487,7 +487,7 @@ a: ([b1, b2])
# The Cs cannot be merged due to different identifying properties
# The Bs cannot be merged due to different references to Cs
with
raises
(
ImpossibleMergeError
)
as
rte
:
crawler
.
split_into_inserts_and_updates
(
st
)
crawler
.
_
split_into_inserts_and_updates
(
st
)
# TODO
# assert not isinstance(rte.value, NotImplementedError), \
# "Exception must not be NotImplementedError, but plain RuntimeError."
...
...
@@ -521,7 +521,7 @@ def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test)
identlist
[
1
])
is
None
# check the split...
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
st
)
insert
,
update
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
# A was found remotely and is therefore in the update list
assert
len
(
update
)
==
1
assert
update
[
0
].
name
==
"
A
"
...
...
@@ -550,7 +550,7 @@ def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_
assert
len
(
identifiable
.
backrefs
)
==
2
# check the split...
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
st
)
insert
,
update
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
assert
len
(
update
)
==
2
assert
len
(
insert
)
==
1
...
...
@@ -575,7 +575,7 @@ def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_
assert
len
(
identifiable
.
backrefs
)
==
2
# check the split...
insert
,
update
=
crawler
.
split_into_inserts_and_updates
(
st
)
insert
,
update
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
assert
len
(
update
)
==
2
assert
len
(
insert
)
==
1
...
...
This diff is collapsed.
Click to expand it.
unittests/test_sync_graph.py
+
4
−
2
View file @
f57ee93e
...
...
@@ -18,6 +18,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import
logging
from
functools
import
partial
from
unittest.mock
import
MagicMock
,
Mock
,
patch
...
...
@@ -631,8 +633,8 @@ def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog
assert
Crawler
.
detect_circular_dependency
([
d
])
is
None
st
=
SyncGraph
(
flat
,
crawler
.
identifiableAdapter
)
with
raises
(
RuntimeError
):
_
,
_
=
crawler
.
split_into_inserts_and_updates
(
st
)
with
pytest
.
raises
(
RuntimeError
):
_
,
_
=
crawler
.
_
split_into_inserts_and_updates
(
st
)
caplog
.
set_level
(
logging
.
ERROR
,
logger
=
"
caoscrawler.converters
"
)
assert
"
Found circular dependency
"
in
caplog
.
text
assert
"
\n
--------
\n\n
> Parent: C
\n\n
>> Name: a
\n
[
\'
C
\'
]
"
in
caplog
.
text
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment