Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
23445e6e
Commit
23445e6e
authored
5 years ago
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
ENH: output can now be controlled by verbosity more specifically
parent
8f96c9cb
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caosadvancedtools/cfood.py
+36
-17
36 additions, 17 deletions
src/caosadvancedtools/cfood.py
src/caosadvancedtools/crawler.py
+61
-31
61 additions, 31 deletions
src/caosadvancedtools/crawler.py
with
97 additions
and
48 deletions
src/caosadvancedtools/cfood.py
+
36
−
17
View file @
23445e6e
...
@@ -38,6 +38,8 @@ import re
...
@@ -38,6 +38,8 @@ import re
import
caosdb
as
db
import
caosdb
as
db
from
.verbosity
import
INFO
,
VERBOSE
ENTITIES
=
{}
ENTITIES
=
{}
...
@@ -60,7 +62,7 @@ class AbstractCFood(object):
...
@@ -60,7 +62,7 @@ class AbstractCFood(object):
# function match()
# function match()
_pattern
=
None
_pattern
=
None
def
__init__
(
self
,
crawled_file
,
access
=
lambda
x
:
x
,
verbos
e
=
True
):
def
__init__
(
self
,
crawled_file
,
access
=
lambda
x
:
x
,
verbos
ity
=
INFO
):
"""
Abstract base class for Crawler food (CFood).
"""
Abstract base class for Crawler food (CFood).
Parameters
Parameters
...
@@ -77,7 +79,7 @@ class AbstractCFood(object):
...
@@ -77,7 +79,7 @@ class AbstractCFood(object):
self
.
match
=
type
(
self
).
match
(
crawled_file
.
path
)
self
.
match
=
type
(
self
).
match
(
crawled_file
.
path
)
self
.
to_be_updated
=
db
.
Container
()
self
.
to_be_updated
=
db
.
Container
()
self
.
identifiables
=
db
.
Container
()
self
.
identifiables
=
db
.
Container
()
self
.
verbos
e
=
verbos
e
self
.
verbos
ity
=
verbos
ity
self
.
attached_ones
=
[]
self
.
attached_ones
=
[]
@staticmethod
@staticmethod
...
@@ -127,7 +129,16 @@ class AbstractCFood(object):
...
@@ -127,7 +129,16 @@ class AbstractCFood(object):
if
len
(
self
.
to_be_updated
)
==
0
:
if
len
(
self
.
to_be_updated
)
==
0
:
return
return
get_ids_for_entities_with_names
(
self
.
to_be_updated
)
get_ids_for_entities_with_names
(
self
.
to_be_updated
)
if
self
.
verbosity
>=
INFO
:
print
(
"
/
"
*
60
)
print
(
"
UPDATE: updating the following entities
"
)
for
el
in
self
.
to_be_updated
:
print
(
el
.
name
if
el
.
name
is
not
None
else
el
.
id
)
print
(
"
/
"
*
60
)
self
.
to_be_updated
.
update
()
self
.
to_be_updated
.
update
()
def
attach
(
self
,
crawled_file
):
def
attach
(
self
,
crawled_file
):
...
@@ -173,7 +184,7 @@ class AbstractCFood(object):
...
@@ -173,7 +184,7 @@ class AbstractCFood(object):
def
assure_object_is_in_list
(
obj
,
containing_object
,
property_name
,
def
assure_object_is_in_list
(
obj
,
containing_object
,
property_name
,
to_be_updated
,
verbos
e
=
True
):
to_be_updated
,
verbos
ity
=
INFO
):
"""
"""
Checks whether `obj` is one of the values in the list property
Checks whether `obj` is one of the values in the list property
`property_name` of the supplied entity containing_object`.
`property_name` of the supplied entity containing_object`.
...
@@ -212,15 +223,17 @@ def assure_object_is_in_list(obj, containing_object, property_name,
...
@@ -212,15 +223,17 @@ def assure_object_is_in_list(obj, containing_object, property_name,
break
break
if
verbose
:
if
contained
:
if
contained
:
if
verbosity
>=
VERBOSE
:
print
(
"
{} is in {} of entity {}
"
.
format
(
print
(
"
{} is in {} of entity {}
"
.
format
(
o
,
property_name
,
containing_object
.
id
))
o
,
property_name
,
containing_object
.
id
))
else
:
print
(
"
Appending {} to {} of entity {}
"
.
format
(
o
,
property_name
,
containing_object
.
id
))
if
not
contained
:
else
:
if
verbosity
>=
INFO
:
print
(
"
/
"
*
60
)
print
(
"
UPDATE: Appending {} to {} of entity {}
"
.
format
(
o
,
property_name
,
containing_object
.
id
))
print
(
"
/
"
*
60
)
current_list
.
append
(
o
)
current_list
.
append
(
o
)
update
=
True
update
=
True
...
@@ -228,7 +241,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
...
@@ -228,7 +241,7 @@ def assure_object_is_in_list(obj, containing_object, property_name,
to_be_updated
.
append
(
containing_object
)
to_be_updated
.
append
(
containing_object
)
def
assure_has_parent
(
entity
,
parent
,
to_be_updated
=
None
,
verbos
e
=
True
):
def
assure_has_parent
(
entity
,
parent
,
to_be_updated
=
None
,
verbos
ity
=
INFO
):
"""
"""
Checks whether `entity` has a parent with name `parent`.
Checks whether `entity` has a parent with name `parent`.
...
@@ -247,13 +260,16 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
...
@@ -247,13 +260,16 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
break
break
if
contained
:
if
contained
:
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
entity {} has parent {}
"
.
format
(
entity
.
id
,
parent
))
print
(
"
entity {} has parent {}
"
.
format
(
entity
.
id
,
parent
))
return
return
if
verbose
:
if
verbosity
>=
INFO
:
print
(
"
Adding parent {} to entity {}
"
.
format
(
parent
,
entity
.
id
))
print
(
"
/
"
*
60
)
print
(
"
UPDATE: Adding parent {} to entity {}
"
.
format
(
parent
,
entity
.
id
))
print
(
"
/
"
*
60
)
entity
.
add_parent
(
parent
)
entity
.
add_parent
(
parent
)
if
to_be_updated
is
None
:
if
to_be_updated
is
None
:
...
@@ -262,7 +278,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
...
@@ -262,7 +278,8 @@ def assure_has_parent(entity, parent, to_be_updated=None, verbose=True):
to_be_updated
.
append
(
entity
)
to_be_updated
.
append
(
entity
)
def
assure_has_property
(
entity
,
name
,
value
,
to_be_updated
=
None
,
verbose
=
True
):
def
assure_has_property
(
entity
,
name
,
value
,
to_be_updated
=
None
,
verbosity
=
INFO
):
"""
"""
Checks whether `entity` has a property `name` with the value `value`.
Checks whether `entity` has a property `name` with the value `value`.
...
@@ -282,17 +299,19 @@ def assure_has_property(entity, name, value, to_be_updated=None, verbose=True):
...
@@ -282,17 +299,19 @@ def assure_has_property(entity, name, value, to_be_updated=None, verbose=True):
break
break
if
contained
:
if
contained
:
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
entity {} has property {} with value {}
"
.
format
(
print
(
"
entity {} has property {} with value {}
"
.
format
(
entity
.
id
,
entity
.
id
,
name
,
value
))
name
,
value
))
return
return
if
verbose
:
if
verbosity
>=
INFO
:
print
(
"
Adding property {} with value {} to entity {}
"
.
format
(
print
(
"
/
"
*
60
)
print
(
"
UPDATE: Adding property {} with value {} to entity {}
"
.
format
(
name
,
name
,
value
,
entity
.
id
))
value
,
entity
.
id
))
print
(
"
/
"
*
60
)
entity
.
add_property
(
name
=
name
,
value
=
value
)
entity
.
add_property
(
name
=
name
,
value
=
value
)
if
to_be_updated
is
None
:
if
to_be_updated
is
None
:
...
...
This diff is collapsed.
Click to expand it.
src/caosadvancedtools/crawler.py
+
61
−
31
View file @
23445e6e
...
@@ -42,10 +42,12 @@ import caosdb as db
...
@@ -42,10 +42,12 @@ import caosdb as db
from
caosdb.exceptions
import
TransactionError
from
caosdb.exceptions
import
TransactionError
from
.cache
import
Cache
from
.cache
import
Cache
from
.verbosity
import
DEBUG
,
INFO
,
VERBOSE
class
Crawler
(
object
):
class
Crawler
(
object
):
def
__init__
(
self
,
food
,
access
=
lambda
x
:
x
,
use_cache
=
False
,
verbose
=
True
):
def
__init__
(
self
,
food
,
access
=
lambda
x
:
x
,
use_cache
=
False
,
verbosity
=
INFO
):
"""
"""
Parameters
Parameters
----------
----------
...
@@ -63,15 +65,23 @@ class Crawler(object):
...
@@ -63,15 +65,23 @@ class Crawler(object):
self
.
access
=
access
self
.
access
=
access
self
.
report
=
db
.
Container
()
self
.
report
=
db
.
Container
()
self
.
use_cache
=
use_cache
self
.
use_cache
=
use_cache
self
.
verbos
e
=
verbos
e
self
.
verbos
ity
=
verbos
ity
if
self
.
use_cache
:
if
self
.
use_cache
:
self
.
cache
=
Cache
()
self
.
cache
=
Cache
()
def
crawl
(
self
,
files
):
def
crawl
(
self
,
files
):
files
=
sorted
(
files
,
key
=
lambda
x
:
x
.
path
)
cfoods
=
[]
cfoods
=
[]
matches
=
{
f
.
path
:
0
for
f
in
files
}
for
crawled_file
in
sorted
(
files
,
key
=
lambda
x
:
x
.
path
):
if
self
.
verbosity
>=
INFO
:
print
(
"
-
"
*
60
)
print
(
"
Matching files against CFoods
"
)
for
crawled_file
in
files
:
if
self
.
verbosity
>=
VERBOSE
:
print
(
"
Matching {}...
"
.
format
(
crawled_file
.
path
))
# if crawled_file.size == 0:
# if crawled_file.size == 0:
# crawled_file.add_message(
# crawled_file.add_message(
# type="Warning", description="This file is empty. Shouldn't we delete it?")
# type="Warning", description="This file is empty. Shouldn't we delete it?")
...
@@ -79,37 +89,47 @@ class Crawler(object):
...
@@ -79,37 +89,47 @@ class Crawler(object):
# continue
# continue
if
self
.
verbose
:
msg
=
"
Matching {}
"
.
format
(
crawled_file
.
path
)
print
(
"
=
"
*
len
(
msg
))
print
(
msg
)
print
(
"
=
"
*
len
(
msg
))
matches
=
0
for
Cfood
in
self
.
food
:
for
Cfood
in
self
.
food
:
if
Cfood
.
match
(
crawled_file
.
path
)
is
not
None
:
if
Cfood
.
match
(
crawled_file
.
path
)
is
not
None
:
matches
+=
1
matches
[
crawled_file
.
path
]
+=
1
if
self
.
verbos
e
:
if
self
.
verbos
ity
>=
VERBOSE
:
print
(
"
{} matched.
"
.
format
(
Cfood
.
__name__
))
print
(
"
{} matched.
"
.
format
(
Cfood
.
__name__
))
try
:
try
:
cfoods
.
append
(
Cfood
(
crawled_file
,
access
=
self
.
access
,
cfoods
.
append
(
Cfood
(
crawled_file
,
access
=
self
.
access
,
verbos
e
=
self
.
verbos
e
))
verbos
ity
=
self
.
verbos
ity
))
except
Exception
as
e
:
except
Exception
as
e
:
traceback
.
print_exc
()
traceback
.
print_exc
()
print
(
e
)
print
(
e
)
if
self
.
verbose
and
matches
==
0
:
if
self
.
verbosity
>=
INFO
:
print
(
"
ATTENTION: No matching cfood!
"
)
print
(
"
-
"
*
60
)
print
(
"
Trying to attach files to created CFoods
"
)
if
self
.
verbose
and
matches
>
1
:
for
crawled_file
in
files
:
print
(
"
Attention: More than one matching cfood!
"
)
if
self
.
verbosity
>=
VERBOSE
:
print
(
"
Matching {}...
"
.
format
(
crawled_file
.
path
))
for
crawled_file
in
sorted
(
files
,
key
=
lambda
x
:
x
.
path
):
for
cfood
in
cfoods
:
for
cfood
in
cfoods
:
if
cfood
.
looking_for
(
crawled_file
):
if
cfood
.
looking_for
(
crawled_file
):
if
self
.
verbosity
>=
VERBOSE
:
print
(
"
{} matched.
"
.
format
(
cfood
.
__class__
.
__name__
))
cfood
.
attach
(
crawled_file
)
cfood
.
attach
(
crawled_file
)
matches
[
crawled_file
.
path
]
+=
1
if
self
.
verbosity
>=
INFO
:
for
crawled_file
in
files
:
if
matches
[
crawled_file
.
path
]
==
0
:
print
(
"
ATTENTION: No matching cfood!
"
)
print
(
"
Tried to match {}
"
.
format
(
crawled_file
.
path
))
if
matches
[
crawled_file
.
path
]
>
1
:
print
(
"
Attention: More than one matching cfood!
"
)
print
(
"
Tried to match {}
"
.
format
(
crawled_file
.
path
))
if
self
.
verbosity
>=
INFO
:
print
(
"
-
"
*
60
)
print
(
"
Creating and updating Identifiables
"
)
for
cfood
in
cfoods
:
for
cfood
in
cfoods
:
try
:
try
:
...
@@ -120,7 +140,7 @@ class Crawler(object):
...
@@ -120,7 +140,7 @@ class Crawler(object):
cfood
.
identifiables
)
cfood
.
identifiables
)
self
.
find_or_insert_identifiables
(
cfood
.
identifiables
,
self
.
find_or_insert_identifiables
(
cfood
.
identifiables
,
self
.
verbos
e
)
self
.
verbos
ity
)
if
self
.
use_cache
:
if
self
.
use_cache
:
self
.
cache
.
insert_list
(
hashes
,
cfood
.
identifiables
)
self
.
cache
.
insert_list
(
hashes
,
cfood
.
identifiables
)
...
@@ -131,8 +151,13 @@ class Crawler(object):
...
@@ -131,8 +151,13 @@ class Crawler(object):
traceback
.
print_exc
()
traceback
.
print_exc
()
print
(
e
)
print
(
e
)
if
self
.
verbosity
>=
INFO
:
print
(
"
-
"
*
60
)
print
(
"
Crawler terminated successfully!
"
)
print
(
"
-
"
*
60
)
@staticmethod
@staticmethod
def
find_or_insert_identifiables
(
identifiables
,
verbos
e
=
True
):
def
find_or_insert_identifiables
(
identifiables
,
verbos
ity
=
INFO
):
"""
Sets the ids of identifiables (that do not have already an id from the
"""
Sets the ids of identifiables (that do not have already an id from the
cache) based on searching CaosDB and retrieves those entities.
cache) based on searching CaosDB and retrieves those entities.
The remaining entities (those which can not be retrieved) have no
The remaining entities (those which can not be retrieved) have no
...
@@ -141,20 +166,20 @@ class Crawler(object):
...
@@ -141,20 +166,20 @@ class Crawler(object):
# looking for matching entities in CaosDB when there is no valid id
# looking for matching entities in CaosDB when there is no valid id
# i.e. there was none set from a cache
# i.e. there was none set from a cache
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
-----------------------------------------------------
"
)
print
(
"
-----------------------------------------------------
"
)
for
ent
in
identifiables
:
for
ent
in
identifiables
:
if
ent
.
id
is
None
or
ent
.
id
<
0
:
if
ent
.
id
is
None
or
ent
.
id
<
0
:
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
Looking for:
"
)
print
(
"
Looking for:
"
)
print
(
ent
)
print
(
ent
)
existing
=
Crawler
.
find_existing
(
ent
)
existing
=
Crawler
.
find_existing
(
ent
,
verbosity
=
verbosity
)
if
existing
is
not
None
:
if
existing
is
not
None
:
ent
.
id
=
existing
.
id
ent
.
id
=
existing
.
id
else
:
else
:
if
verbos
e
:
if
verbos
ity
>=
DEBUG
:
print
(
"
Id is known of:
"
)
print
(
"
Id is known of:
"
)
print
(
ent
)
print
(
ent
)
...
@@ -167,22 +192,26 @@ class Crawler(object):
...
@@ -167,22 +192,26 @@ class Crawler(object):
for
ent
in
missing_identifiables
:
for
ent
in
missing_identifiables
:
ent
.
id
=
None
ent
.
id
=
None
if
verbos
e
:
if
verbos
ity
>=
INFO
and
len
(
missing_identifiables
)
>
0
:
print
(
"
Going to insert the following entities:
"
)
print
(
"
Going to insert the following entities:
"
)
for
ent
in
missing_identifiables
:
for
ent
in
missing_identifiables
:
print
(
ent
)
print
(
ent
)
if
verbosity
>=
VERBOSE
and
len
(
missing_identifiables
)
==
0
:
print
(
"
No new entities to be inserted.
"
)
missing_identifiables
.
insert
()
missing_identifiables
.
insert
()
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
Updat
ing entities from CaosDB...
"
)
print
(
"
Retriev
ing entities from CaosDB...
"
)
identifiables
.
retrieve
(
unique
=
True
,
raise_exception_on_error
=
False
)
identifiables
.
retrieve
(
unique
=
True
,
raise_exception_on_error
=
False
)
if
verbos
e
:
if
verbos
ity
>=
VERBOSE
:
print
(
"
-----------------------------------------------------
"
)
print
(
"
-----------------------------------------------------
"
)
@staticmethod
@staticmethod
def
find_existing
(
entity
):
def
find_existing
(
entity
,
verbosity
=
INFO
):
"""
searches for an entity that matches the identifiable in CaosDB
"""
searches for an entity that matches the identifiable in CaosDB
Characteristics of the identifiable like, properties, name or id are
Characteristics of the identifiable like, properties, name or id are
...
@@ -205,7 +234,8 @@ class Crawler(object):
...
@@ -205,7 +234,8 @@ class Crawler(object):
else
:
else
:
query_string
=
"
FIND
'
{}
'"
.
format
(
entity
.
name
)
query_string
=
"
FIND
'
{}
'"
.
format
(
entity
.
name
)
print
(
query_string
)
if
verbosity
>=
VERBOSE
:
print
(
query_string
)
q
=
db
.
Query
(
query_string
)
q
=
db
.
Query
(
query_string
)
# the identifiable should identify an object uniquely. Thus the query
# the identifiable should identify an object uniquely. Thus the query
# is using the unique keyword
# is using the unique keyword
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment