Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
c6c305c5
Commit
c6c305c5
authored
Mar 22, 2024
by
Daniel Hornung
Browse files
Options
Downloads
Patches
Plain Diff
FIX: Linting.
parent
41b61e37
No related branches found
No related tags found
2 merge requests
!100
WIP: Filling XLSX: Seems to be working.
,
!93
Filling XLSX: Everything except multiple choice.
Pipeline
#49065
failed
Mar 22, 2024
Stage: setup
Stage: cert
Stage: style
Stage: unittest
Stage: integrationtest
Changes
3
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
Makefile
+1
-1
1 addition, 1 deletion
Makefile
pylintrc
+5
-0
5 additions, 0 deletions
pylintrc
src/caosadvancedtools/crawler.py
+34
-32
34 additions, 32 deletions
src/caosadvancedtools/crawler.py
with
40 additions
and
33 deletions
Makefile
+
1
−
1
View file @
c6c305c5
...
@@ -41,5 +41,5 @@ style:
...
@@ -41,5 +41,5 @@ style:
.PHONY
:
style
.PHONY
:
style
lint
:
lint
:
pylint
--unsafe-load-any-extension
=
y
-d
all
-e
E,F
--ignore
=
swagger_client src/caosadvancedtools
pylint
--unsafe-load-any-extension
=
y
-d
R,C
--ignore
=
swagger_client src/caosadvancedtools
.PHONY
:
lint
.PHONY
:
lint
This diff is collapsed.
Click to expand it.
pylintrc
+
5
−
0
View file @
c6c305c5
...
@@ -17,3 +17,8 @@ init-hook=
...
@@ -17,3 +17,8 @@ init-hook=
import
sys
;
sys
.
path
.
extend
([
"src/caosadvancedtools"
]);
import
sys
;
sys
.
path
.
extend
([
"src/caosadvancedtools"
]);
import
astroid
;
astroid
.
context
.
InferenceContext
.
max_inferred
=
500
;
import
astroid
;
astroid
.
context
.
InferenceContext
.
max_inferred
=
500
;
[
MESSAGES
CONTROL
]
disable
=
fixme
,
logging
-
format
-
interpolation
,
logging
-
not
-
lazy
,
This diff is collapsed.
Click to expand it.
src/caosadvancedtools/crawler.py
+
34
−
32
View file @
c6c305c5
...
@@ -41,14 +41,14 @@ match. This occurs in basically three steps:
...
@@ -41,14 +41,14 @@ match. This occurs in basically three steps:
import
logging
import
logging
import
os
import
os
import
subprocess
import
traceback
import
traceback
import
uuid
import
uuid
from
datetime
import
datetime
from
datetime
import
datetime
from
sqlite3
import
IntegrityError
from
sqlite3
import
IntegrityError
from
xml.sax.saxutils
import
escape
import
linkahead
as
db
import
linkahead
as
db
from
linkahead.exceptions
import
BadQueryError
from
linkahead.exceptions
import
BadQueryError
,
TransactionError
from
.cache
import
IdentifiableCache
,
UpdateCache
,
get_pretty_xml
from
.cache
import
IdentifiableCache
,
UpdateCache
,
get_pretty_xml
from
.cfood
import
RowCFood
,
add_files
,
get_ids_for_entities_with_names
from
.cfood
import
RowCFood
,
add_files
,
get_ids_for_entities_with_names
...
@@ -67,7 +67,7 @@ def separated(text):
...
@@ -67,7 +67,7 @@ def separated(text):
return
"
-
"
*
60
+
"
\n
"
+
text
return
"
-
"
*
60
+
"
\n
"
+
text
def
apply_list_of_updates
(
to_be_updated
,
update_flags
=
{}
,
def
apply_list_of_updates
(
to_be_updated
,
update_flags
=
None
,
update_cache
=
None
,
run_id
=
None
):
update_cache
=
None
,
run_id
=
None
):
"""
Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB
"""
Updates the `to_be_updated` Container, i.e., pushes the changes to CaosDB
after removing possible duplicates. If a chace is provided, uauthorized
after removing possible duplicates. If a chace is provided, uauthorized
...
@@ -87,6 +87,8 @@ def apply_list_of_updates(to_be_updated, update_flags={},
...
@@ -87,6 +87,8 @@ def apply_list_of_updates(to_be_updated, update_flags={},
Id with which the pending updates are cached. Only meaningful if
Id with which the pending updates are cached. Only meaningful if
`update_cache` is provided. Default is None.
`update_cache` is provided. Default is None.
"""
"""
if
update_flags
is
None
:
update_flags
=
{}
if
len
(
to_be_updated
)
==
0
:
if
len
(
to_be_updated
)
==
0
:
return
return
...
@@ -132,7 +134,7 @@ def apply_list_of_updates(to_be_updated, update_flags={},
...
@@ -132,7 +134,7 @@ def apply_list_of_updates(to_be_updated, update_flags={},
)
)
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
e
)
logger
.
debug
(
e
)
except
Exception
as
e
:
except
TransactionError
as
e
:
DataModelProblems
.
evaluate_exception
(
e
)
DataModelProblems
.
evaluate_exception
(
e
)
...
@@ -220,14 +222,16 @@ class Crawler(object):
...
@@ -220,14 +222,16 @@ class Crawler(object):
new_cont
=
db
.
Container
.
from_xml
(
new
)
new_cont
=
db
.
Container
.
from_xml
(
new
)
ids
=
[]
ids
=
[]
tmp
=
db
.
Container
()
tmp
=
db
.
Container
()
update_incomplete
=
False
# TODO what was this for?
# update_incomplete = False
# remove duplicate entities
# remove duplicate entities
for
el
in
new_cont
:
for
el
in
new_cont
:
if
el
.
id
not
in
ids
:
if
el
.
id
not
in
ids
:
ids
.
append
(
el
.
id
)
ids
.
append
(
el
.
id
)
tmp
.
append
(
el
)
tmp
.
append
(
el
)
else
:
else
:
update_incomplete
=
True
pass
# update_incomplete = True
new_cont
=
tmp
new_cont
=
tmp
if
new_cont
[
0
].
version
:
# pylint: disable=no-member
if
new_cont
[
0
].
version
:
# pylint: disable=no-member
valids
=
db
.
Container
()
valids
=
db
.
Container
()
...
@@ -238,7 +242,7 @@ class Crawler(object):
...
@@ -238,7 +242,7 @@ class Crawler(object):
if
ent
.
version
==
remote_ent
.
version
:
# pylint: disable=no-member
if
ent
.
version
==
remote_ent
.
version
:
# pylint: disable=no-member
valids
.
append
(
ent
)
valids
.
append
(
ent
)
else
:
else
:
update_incomplete
=
True
#
update_incomplete = True
nonvalids
.
append
(
remote_ent
)
nonvalids
.
append
(
remote_ent
)
valids
.
update
(
unique
=
False
)
valids
.
update
(
unique
=
False
)
logger
.
info
(
"
Successfully updated {} records!
"
.
format
(
logger
.
info
(
"
Successfully updated {} records!
"
.
format
(
...
@@ -317,10 +321,10 @@ class Crawler(object):
...
@@ -317,10 +321,10 @@ class Crawler(object):
logger
.
debug
(
e
)
logger
.
debug
(
e
)
# TODO: Generally: in which cases should exceptions be raised? When is
# TODO: Generally: in which cases should exceptions be raised? When is
# errors_occured set to True? The expected behavior must be documented.
# errors_occured set to True? The expected behavior must be documented.
except
Exception
as
e
:
except
TransactionError
as
e
:
try
:
try
:
DataModelProblems
.
evaluate_exception
(
e
)
DataModelProblems
.
evaluate_exception
(
e
)
except
BaseException
:
except
BaseException
:
# pylint: disable=broad-exception-caught
pass
pass
logger
.
debug
(
"
Failed during execution of {}!
"
.
format
(
logger
.
debug
(
"
Failed during execution of {}!
"
.
format
(
Cfood
.
__name__
))
Cfood
.
__name__
))
...
@@ -349,13 +353,12 @@ class Crawler(object):
...
@@ -349,13 +353,12 @@ class Crawler(object):
logger
.
info
(
"
Cannot access {}. However, it might be needed for
"
logger
.
info
(
"
Cannot access {}. However, it might be needed for
"
"
the correct execution
"
.
format
(
e
.
filename
))
"
the correct execution
"
.
format
(
e
.
filename
))
remove_cfoods
.
append
(
cfood
)
remove_cfoods
.
append
(
cfood
)
except
Exception
as
e
:
except
TransactionError
as
e
:
try
:
try
:
DataModelProblems
.
evaluate_exception
(
e
)
DataModelProblems
.
evaluate_exception
(
e
)
except
BaseException
:
except
BaseException
:
# pylint: disable=broad-exception-caught
pass
pass
logger
.
debug
(
"
Failed during execution of {}!
"
.
format
(
logger
.
debug
(
"
Failed during execution of {}!
"
.
format
(
cfood
.
__name__
))
Cfood
.
__name__
))
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
e
)
logger
.
debug
(
e
)
remove_cfoods
.
append
(
cfood
)
remove_cfoods
.
append
(
cfood
)
...
@@ -415,8 +418,8 @@ class Crawler(object):
...
@@ -415,8 +418,8 @@ class Crawler(object):
self
.
cache
.
insert_list
(
hashes
,
identifiables
)
self
.
cache
.
insert_list
(
hashes
,
identifiables
)
def
crawl
(
self
,
security_level
=
RETRIEVE
,
path
=
None
):
def
crawl
(
self
,
security_level
=
RETRIEVE
,
path
=
None
):
self
.
run_id
=
uuid
.
uuid1
()
run_id
=
uuid
.
uuid1
()
logger
.
info
(
"
Run Id:
"
+
str
(
self
.
run_id
))
logger
.
info
(
"
Run Id:
"
+
str
(
run_id
))
guard
.
set_level
(
level
=
security_level
)
guard
.
set_level
(
level
=
security_level
)
logger
.
info
(
"
Scanning the objects to be treated...
"
)
logger
.
info
(
"
Scanning the objects to be treated...
"
)
...
@@ -435,17 +438,17 @@ class Crawler(object):
...
@@ -435,17 +438,17 @@ class Crawler(object):
cfood
.
to_be_updated
,
cfood
.
to_be_updated
,
cfood
.
update_flags
,
cfood
.
update_flags
,
update_cache
=
self
.
update_cache
,
update_cache
=
self
.
update_cache
,
run_id
=
self
.
run_id
)
run_id
=
run_id
)
except
FileNotFoundError
as
e
:
except
FileNotFoundError
as
e
:
logger
.
info
(
"
Cannot access {}. However, it might be needed for
"
logger
.
info
(
"
Cannot access {}. However, it might be needed for
"
"
the correct execution
"
.
format
(
e
.
filename
))
"
the correct execution
"
.
format
(
e
.
filename
))
except
DataInconsistencyError
as
e
:
except
DataInconsistencyError
as
e
:
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
traceback
.
format_exc
())
logger
.
debug
(
e
)
logger
.
debug
(
e
)
except
Exception
as
e
:
except
TransactionError
as
e
:
try
:
try
:
DataModelProblems
.
evaluate_exception
(
e
)
DataModelProblems
.
evaluate_exception
(
e
)
except
Exception
:
except
Exception
:
# pylint: disable=broad-exception-caught
pass
pass
logger
.
info
(
"
Failed during execution of {}!
"
.
format
(
logger
.
info
(
"
Failed during execution of {}!
"
.
format
(
cfood
.
__class__
.
__name__
))
cfood
.
__class__
.
__name__
))
...
@@ -457,7 +460,7 @@ class Crawler(object):
...
@@ -457,7 +460,7 @@ class Crawler(object):
errors_occured
=
True
errors_occured
=
True
tbs
.
append
(
e
)
tbs
.
append
(
e
)
pending_changes
=
self
.
update_cache
.
get_updates
(
self
.
run_id
)
pending_changes
=
self
.
update_cache
.
get_updates
(
run_id
)
if
pending_changes
:
if
pending_changes
:
# Sending an Email with a link to a form to authorize updates is
# Sending an Email with a link to a form to authorize updates is
...
@@ -465,7 +468,7 @@ class Crawler(object):
...
@@ -465,7 +468,7 @@ class Crawler(object):
if
"
SHARED_DIR
"
in
os
.
environ
:
if
"
SHARED_DIR
"
in
os
.
environ
:
filename
=
Crawler
.
save_form
([
el
[
3
]
filename
=
Crawler
.
save_form
([
el
[
3
]
for
el
in
pending_changes
],
path
,
self
.
run_id
)
for
el
in
pending_changes
],
path
,
run_id
)
Crawler
.
send_mail
([
el
[
3
]
for
el
in
pending_changes
],
filename
)
Crawler
.
send_mail
([
el
[
3
]
for
el
in
pending_changes
],
filename
)
for
i
,
el
in
enumerate
(
pending_changes
):
for
i
,
el
in
enumerate
(
pending_changes
):
...
@@ -477,7 +480,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
...
@@ -477,7 +480,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
logger
.
info
(
"
There where unauthorized changes (see above). An
"
logger
.
info
(
"
There where unauthorized changes (see above). An
"
"
email was sent to the curator.
\n
"
"
email was sent to the curator.
\n
"
"
You can authorize the updates by invoking the crawler
"
"
You can authorize the updates by invoking the crawler
"
"
with the run id: {rid}
\n
"
.
format
(
rid
=
self
.
run_id
))
"
with the run id: {rid}
\n
"
.
format
(
rid
=
run_id
))
if
len
(
DataModelProblems
.
missing
)
>
0
:
if
len
(
DataModelProblems
.
missing
)
>
0
:
err_msg
=
(
"
There were problems with one or more RecordType or
"
err_msg
=
(
"
There were problems with one or more RecordType or
"
...
@@ -488,8 +491,9 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
...
@@ -488,8 +491,9 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
logger
.
error
(
err_msg
)
logger
.
error
(
err_msg
)
logger
.
error
(
'
Crawler finished with Datamodel Errors
'
)
logger
.
error
(
'
Crawler finished with Datamodel Errors
'
)
elif
errors_occured
:
elif
errors_occured
:
msg
=
"
There were fatal errors during execution, please
"
msg
=
(
"
There were fatal errors during execution, please
"
"
contact the system administrator!
"
"
contact the system administrator!
"
)
if
self
.
debug_file
:
if
self
.
debug_file
:
msg
+=
"
\n
Please provide the following path:
\n
{}
"
.
format
(
msg
+=
"
\n
Please provide the following path:
\n
{}
"
.
format
(
...
@@ -516,7 +520,6 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
...
@@ -516,7 +520,6 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
path: the path defining the subtree that is crawled
path: the path defining the subtree that is crawled
"""
"""
from
xml.sax.saxutils
import
escape
# TODO move path related stuff to sss_helper
# TODO move path related stuff to sss_helper
form
=
"""
form
=
"""
...
@@ -593,7 +596,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
...
@@ -593,7 +596,7 @@ ____________________\n""".format(i+1, len(pending_changes)) + str(el[3]))
randname
=
os
.
path
.
basename
(
os
.
path
.
abspath
(
directory
))
randname
=
os
.
path
.
basename
(
os
.
path
.
abspath
(
directory
))
filepath
=
os
.
path
.
abspath
(
os
.
path
.
join
(
directory
,
filename
))
filepath
=
os
.
path
.
abspath
(
os
.
path
.
join
(
directory
,
filename
))
filename
=
os
.
path
.
join
(
randname
,
filename
)
filename
=
os
.
path
.
join
(
randname
,
filename
)
with
open
(
filepath
,
"
w
"
)
as
f
:
with
open
(
filepath
,
"
w
"
,
encoding
=
"
utf8
"
)
as
f
:
f
.
write
(
form
)
f
.
write
(
form
)
return
filename
return
filename
...
@@ -673,7 +676,7 @@ carefully and if the changes are ok, click on the following link:
...
@@ -673,7 +676,7 @@ carefully and if the changes are ok, click on the following link:
guard
.
safe_insert
(
missing
,
unique
=
False
,
guard
.
safe_insert
(
missing
,
unique
=
False
,
flags
=
{
"
force-missing-obligatory
"
:
"
ignore
"
})
flags
=
{
"
force-missing-obligatory
"
:
"
ignore
"
})
inserted
.
append
(
ent
)
inserted
.
append
(
ent
)
except
Exception
as
e
:
except
TransactionError
as
e
:
DataModelProblems
.
evaluate_exception
(
e
)
DataModelProblems
.
evaluate_exception
(
e
)
if
len
(
existing
)
>
0
:
if
len
(
existing
)
>
0
:
info
=
"
Identified the following existing entities:
\n
"
info
=
"
Identified the following existing entities:
\n
"
...
@@ -774,14 +777,13 @@ class FileCrawler(Crawler):
...
@@ -774,14 +777,13 @@ class FileCrawler(Crawler):
path
if
path
.
endswith
(
"
/
"
)
else
path
+
"
/
"
)
+
"
**
'"
path
if
path
.
endswith
(
"
/
"
)
else
path
+
"
/
"
)
+
"
**
'"
q_info
=
"
Sending the following query:
'"
+
query_str
+
"'
\n
"
q_info
=
"
Sending the following query:
'"
+
query_str
+
"'
\n
"
files
=
db
.
execute_query
(
query_str
)
files
=
db
.
execute_query
(
query_str
)
logger
.
info
(
logger
.
info
(
q_info
+
f
"
Found
{
len
(
files
)
}
files that need to be processed.
"
)
q_info
+
"
Found {} files that need to be processed.
"
.
format
(
len
(
files
)))
return
files
return
files
class
TableCrawler
(
Crawler
):
class
TableCrawler
(
Crawler
):
"""
Crawler for table like data.
"""
def
__init__
(
self
,
table
,
unique_cols
,
recordtype
,
**
kwargs
):
def
__init__
(
self
,
table
,
unique_cols
,
recordtype
,
**
kwargs
):
"""
"""
...
@@ -796,6 +798,7 @@ class TableCrawler(Crawler):
...
@@ -796,6 +798,7 @@ class TableCrawler(Crawler):
# TODO I do not like this yet, but I do not see a better way so far.
# TODO I do not like this yet, but I do not see a better way so far.
class
ThisRowCF
(
RowCFood
):
class
ThisRowCF
(
RowCFood
):
"""
CFood for rows.
"""
def
__init__
(
self
,
item
):
def
__init__
(
self
,
item
):
super
().
__init__
(
item
,
unique_cols
,
recordtype
)
super
().
__init__
(
item
,
unique_cols
,
recordtype
)
...
@@ -821,7 +824,6 @@ def get_value(prop):
...
@@ -821,7 +824,6 @@ def get_value(prop):
if
isinstance
(
prop
.
value
,
db
.
Entity
):
if
isinstance
(
prop
.
value
,
db
.
Entity
):
return
prop
.
value
.
id
return
prop
.
value
.
id
el
if
isinstance
(
prop
.
value
,
datetime
):
if
isinstance
(
prop
.
value
,
datetime
):
return
prop
.
value
.
isoformat
()
return
prop
.
value
.
isoformat
()
else
:
return
prop
.
value
return
prop
.
value
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment