Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
43bc5aec
Commit
43bc5aec
authored
2 years ago
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
ENH: allow to store unauthorized inserts
parent
3c8e6ded
No related branches found
No related tags found
2 merge requests
!59
FIX: if multiple updates for one entity exist, the retrieve would result in an...
,
!56
F insert auth
Pipeline
#28055
passed
2 years ago
Stage: setup
Stage: cert
Stage: style
Stage: unittest
Stage: integrationtest
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caosadvancedtools/cache.py
+45
-18
45 additions, 18 deletions
src/caosadvancedtools/cache.py
src/caosadvancedtools/crawler.py
+8
-0
8 additions, 0 deletions
src/caosadvancedtools/crawler.py
with
53 additions
and
18 deletions
src/caosadvancedtools/cache.py
+
45
−
18
View file @
43bc5aec
...
@@ -72,9 +72,11 @@ class Cache(object):
...
@@ -72,9 +72,11 @@ class Cache(object):
without querying.
without querying.
"""
"""
def
__init__
(
self
,
db_file
=
None
):
def
__init__
(
self
,
db_file
=
None
,
default_name
=
"
cache.db
"
):
if
db_file
is
None
:
if
db_file
is
None
:
self
.
db_file
=
"
cache.db
"
tmppath
=
tempfile
.
gettempdir
()
tmpf
=
os
.
path
.
join
(
tmppath
,
default_name
)
self
.
db_file
=
tmpf
else
:
else
:
self
.
db_file
=
db_file
self
.
db_file
=
db_file
...
@@ -148,18 +150,14 @@ class Cache(object):
...
@@ -148,18 +150,14 @@ class Cache(object):
class
UpdateCache
(
Cache
):
class
UpdateCache
(
Cache
):
"""
"""
stores unauthorized updates
stores unauthorized
inserts and
updates
If the Guard is set to a mode that does not allow an update, the update can
If the Guard is set to a mode that does not allow an
insert or
update, the
insert or
update can
be stored in this cache such that it can be authorized and
done
later.
be stored in this cache such that it can be authorized and
performed
later.
"""
"""
def
__init__
(
self
,
db_file
=
None
):
def
__init__
(
self
,
db_file
=
None
):
if
db_file
is
None
:
super
().
__init__
(
db_file
=
db_file
,
default_name
=
"
crawler_insert_cache.db
"
)
tmppath
=
tempfile
.
gettempdir
()
tmpf
=
os
.
path
.
join
(
tmppath
,
"
crawler_update_cache.db
"
)
db_file
=
tmpf
super
().
__init__
(
db_file
=
db_file
)
@staticmethod
@staticmethod
def
get_previous_version
(
cont
):
def
get_previous_version
(
cont
):
...
@@ -174,23 +172,32 @@ class UpdateCache(Cache):
...
@@ -174,23 +172,32 @@ class UpdateCache(Cache):
return
old_ones
return
old_ones
def
insert
(
self
,
cont
,
run_id
):
def
insert
(
self
,
cont
,
run_id
,
insert
=
False
):
"""
Insert a pending, unauthorized
update
"""
Insert a pending, unauthorized
inserts
Parameters
Parameters
----------
----------
cont: Container with the records to be
upda
ted containing the desired
cont: Container with the records to be
inser
ted containing the desired
version, i.e. the state after the update.
version, i.e. the state after the update.
run_id: int
run_id: int
The id of the crawler run
The id of the crawler run
insert: bool
Whether the entities in the container shall be inserted or updated.
"""
"""
cont
=
put_in_container
(
cont
)
cont
=
put_in_container
(
cont
)
old_ones
=
UpdateCache
.
get_previous_version
(
cont
)
if
insert
:
old_ones
=
""
else
:
old_ones
=
UpdateCache
.
get_previous_version
(
cont
)
new_ones
=
cont
new_ones
=
cont
old_hash
=
Cache
.
hash_entity
(
old_ones
)
if
insert
:
old_hash
=
""
else
:
old_hash
=
Cache
.
hash_entity
(
old_ones
)
new_hash
=
Cache
.
hash_entity
(
new_ones
)
new_hash
=
Cache
.
hash_entity
(
new_ones
)
conn
=
sqlite3
.
connect
(
self
.
db_file
)
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
c
=
conn
.
cursor
()
...
@@ -210,20 +217,40 @@ class UpdateCache(Cache):
...
@@ -210,20 +217,40 @@ class UpdateCache(Cache):
conn
.
commit
()
conn
.
commit
()
conn
.
close
()
conn
.
close
()
def
get
_updates
(
self
,
run_id
):
def
get
(
self
,
run_id
,
querystring
):
"""
returns the pending updates for a given run id
"""
returns the pending updates for a given run id
Parameters:
Parameters:
-----------
-----------
run_id: the id of the crawler run
run_id: the id of the crawler run
querystring: the sql query
"""
"""
conn
=
sqlite3
.
connect
(
self
.
db_file
)
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
c
=
conn
.
cursor
()
c
.
execute
(
'''
Select * FROM updates WHERE run_id=?
'''
,
c
.
execute
(
querystring
,
(
str
(
run_id
),))
(
str
(
run_id
),))
res
=
c
.
fetchall
()
res
=
c
.
fetchall
()
conn
.
commit
()
conn
.
commit
()
conn
.
close
()
conn
.
close
()
return
res
return
res
def
get_inserts
(
self
,
run_id
):
"""
returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
"""
return
self
.
get
(
run_id
,
'''
Select * FROM updates WHERE olddigest=
''
AND run_id=?
'''
)
def
get_updates
(
self
,
run_id
):
"""
returns the pending updates for a given run id
Parameters:
-----------
run_id: the id of the crawler run
"""
return
self
.
get
(
run_id
,
'''
Select * FROM updates WHERE olddigest!=
''
AND run_id=?
'''
)
This diff is collapsed.
Click to expand it.
src/caosadvancedtools/crawler.py
+
8
−
0
View file @
43bc5aec
...
@@ -208,6 +208,14 @@ class Crawler(object):
...
@@ -208,6 +208,14 @@ class Crawler(object):
run_id: the id of the crawler run
run_id: the id of the crawler run
"""
"""
cache
=
UpdateCache
()
cache
=
UpdateCache
()
inserts
=
cache
.
get_inserts
(
run_id
)
for
_
,
_
,
_
,
new
,
_
in
inserts
:
new_cont
=
db
.
Container
()
new_cont
=
new_cont
.
from_xml
(
new
)
new_cont
.
insert
(
unique
=
False
)
logger
.
info
(
"
Successfully inserted {} records!
"
.
format
(
len
(
new_cont
)))
logger
.
info
(
"
Finished with authorized updates.
"
)
changes
=
cache
.
get_updates
(
run_id
)
changes
=
cache
.
get_updates
(
run_id
)
for
_
,
_
,
old
,
new
,
_
in
changes
:
for
_
,
_
,
old
,
new
,
_
in
changes
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment