Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
e14f1784
Commit
e14f1784
authored
4 years ago
by
Alexander Schlemmer
Browse files
Options
Downloads
Patches
Plain Diff
ENH: more tests, documentation and several fixes in code
parent
e59b78bc
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!59
FIX: if multiple updates for one entity exist, the retrieve would result in an...
,
!46
F cache version
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caosadvancedtools/cache.py
+47
-9
47 additions, 9 deletions
src/caosadvancedtools/cache.py
unittests/test_cache.py
+21
-6
21 additions, 6 deletions
unittests/test_cache.py
with
68 additions
and
15 deletions
src/caosadvancedtools/cache.py
+
47
−
9
View file @
e14f1784
...
...
@@ -87,6 +87,7 @@ class Cache(object):
If it exists, but the schema is outdated, an exception will be raised.
"""
print
(
self
.
db_file
)
if
not
os
.
path
.
exists
(
self
.
db_file
):
self
.
create_cache
()
else
:
...
...
@@ -94,14 +95,17 @@ class Cache(object):
current_schema
=
self
.
get_cache_version
()
except
sqlite3
.
OperationalError
:
current_schema
=
1
# TODO: Write unit tests for too old, too new and non-existent version of cache.
if
current_schema
>
CACHE_SCHEMA_VERSION
:
raise
RuntimeError
(
"
Cache is corrupt or was created with a future version of this program.
"
)
elif
current_schema
<
CACHE_SCHEMA_VERSION
:
raise
RuntimeError
(
"
Cache version too old.
"
)
def
get_cache_version
(
self
):
"""
Return the version of the cache stored in self.db_file.
The version is stored as the only entry in colum schema of table version.
"""
try
:
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
...
...
@@ -114,6 +118,13 @@ class Cache(object):
conn
.
close
()
def
create_cache
(
self
):
"""
Create a new SQLITE cache file in self.db_file.
Two tables will be created:
- identifiables is the actual cache.
- version is a table with version information about the cache.
"""
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
c
.
execute
(
...
...
@@ -127,23 +138,41 @@ class Cache(object):
@staticmethod
def
hash_entity
(
ent
):
"""
Format an entity as
"
pretty
"
XML and return the SHA256 hash.
"""
xml
=
get_pretty_xml
(
ent
)
digest
=
sha256
(
xml
.
encode
(
"
utf-8
"
)).
hexdigest
()
return
digest
def
insert
(
self
,
ent_hash
,
ent_id
):
def
insert
(
self
,
ent_hash
,
ent_id
,
ent_version
):
"""
Insert a new cache entry.
ent_hash: Hash of the entity. Should be generated with Cache.hash_entity
ent_id: ID of the entity
ent_version: Version string of the entity
"""
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
c
.
execute
(
'''
INSERT INTO identifiables VALUES (?, ?)
'''
,
(
ent_hash
,
ent_id
))
c
.
execute
(
'''
INSERT INTO identifiables VALUES (?,
?,
?)
'''
,
(
ent_hash
,
ent_id
,
ent_version
))
conn
.
commit
()
conn
.
close
()
def
check_existing
(
self
,
ent_hash
):
"""
Check the cache for a hash.
ent_hash: The hash to search for.
Return the ID and the version ID of the hashed entity.
Return None if no entity with that hash is in the cache.
"""
conn
=
sqlite3
.
connect
(
self
.
db_file
)
c
=
conn
.
cursor
()
c
.
execute
(
'''
Select
* FROM identifiables WHERE digest=?
'''
,
c
.
execute
(
'''
Select * FROM identifiables WHERE digest=?
'''
,
(
ent_hash
,))
res
=
c
.
fetchone
()
conn
.
commit
()
...
...
@@ -152,7 +181,7 @@ class Cache(object):
if
res
is
None
:
return
res
else
:
return
res
[
1
]
return
res
[
1
:
]
def
update_ids_from_cache
(
self
,
entities
):
"""
sets ids of those entities that are in cache
...
...
@@ -167,7 +196,7 @@ class Cache(object):
eid
=
self
.
check_existing
(
ehash
)
if
eid
is
not
None
:
ent
.
id
=
eid
ent
.
id
=
eid
[
0
]
return
hashes
...
...
@@ -177,9 +206,16 @@ class Cache(object):
The hashes must correspond to the entities in the list
"""
# Check whether all entities have IDs and versions:
for
ent
in
entities
:
if
ent
.
id
is
None
:
raise
RuntimeError
(
"
Entity has no ID.
"
)
if
ent
.
version
is
None
or
ent
.
version
.
id
is
None
:
raise
RuntimeError
(
"
Entity has no version ID.
"
)
for
ehash
,
ent
in
zip
(
hashes
,
entities
):
if
self
.
check_existing
(
ehash
)
is
None
:
self
.
insert
(
ehash
,
ent
.
id
)
self
.
insert
(
ehash
,
ent
.
id
,
ent
.
version
.
id
)
class
UpdateCache
(
Cache
):
...
...
@@ -192,6 +228,8 @@ class UpdateCache(Cache):
def
__init__
(
self
,
db_file
=
None
):
if
db_file
is
None
:
# TODO: check whether a hardcoded temp file is really wanted
# Why not crawler_update_cache.db in current working directory?
db_file
=
"
/tmp/crawler_update_cache.db
"
super
().
__init__
(
db_file
=
db_file
)
...
...
This diff is collapsed.
Click to expand it.
unittests/test_cache.py
+
21
−
6
View file @
e14f1784
...
...
@@ -36,7 +36,7 @@ import pytest
class
CacheTest
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
cache
=
Cache
(
db_file
=
NamedTemporaryFile
(
delete
=
False
).
name
,
force_creation
)
force_creation
=
True
)
def
test_hash
(
self
):
ent
=
db
.
Record
()
...
...
@@ -50,8 +50,8 @@ class CacheTest(unittest.TestCase):
ent2
.
add_parent
(
name
=
"
Experiment
"
)
ent_hash
=
Cache
.
hash_entity
(
ent
)
ent2_hash
=
Cache
.
hash_entity
(
ent2
)
self
.
cache
.
insert
(
ent2_hash
,
1235
)
assert
isinstance
(
self
.
cache
.
check_existing
(
ent2_hash
),
int
)
self
.
cache
.
insert
(
ent2_hash
,
1235
,
"
ajkfljadsklf
"
)
assert
isinstance
(
self
.
cache
.
check_existing
(
ent2_hash
)
[
0
]
,
int
)
assert
self
.
cache
.
check_existing
(
ent_hash
)
is
None
def
test_hirarchy
(
self
):
...
...
@@ -67,17 +67,32 @@ class CacheTest(unittest.TestCase):
ent3
=
db
.
Record
()
ent3
.
add_parent
(
name
=
"
Analysis
"
)
test_id
=
2353243
self
.
cache
.
insert
(
Cache
.
hash_entity
(
ent2
),
test_id
)
self
.
cache
.
insert
(
Cache
.
hash_entity
(
ent2
),
test_id
,
"
ajdsklfjadslf
"
)
entities
=
[
ent
,
ent2
,
ent3
]
hashes
=
self
.
cache
.
update_ids_from_cache
(
entities
)
self
.
assertEqual
(
ent2
.
id
,
test_id
)
# TODO: is that wanted?
self
.
assertEqual
(
ent
.
id
,
-
1
)
self
.
assertEqual
(
ent3
.
id
,
-
1
)
# TODO: I expected this instead:
# with pytest.raises(RuntimeError, match=r".*no ID.*"):
# self.cache.insert_list(hashes, entities)
# test
ent
.
id
=
1001
ent3
.
id
=
1003
with
pytest
.
raises
(
RuntimeError
,
match
=
r
"
.*no version ID.*
"
):
self
.
cache
.
insert_list
(
hashes
,
entities
)
ent
.
version
=
db
.
common
.
versioning
.
Version
(
"
jkadsjfldf
"
)
ent2
.
version
=
db
.
common
.
versioning
.
Version
(
"
jkadsjfldf
"
)
ent3
.
version
=
db
.
common
.
versioning
.
Version
(
"
jkadsjfldf
"
)
self
.
cache
.
insert_list
(
hashes
,
entities
)
self
.
assertEqual
(
self
.
cache
.
check_existing
(
hashes
[
0
]),
1001
)
self
.
assertEqual
(
self
.
cache
.
check_existing
(
hashes
[
2
]),
1003
)
self
.
assertEqual
(
self
.
cache
.
check_existing
(
hashes
[
0
])
[
0
]
,
1001
)
self
.
assertEqual
(
self
.
cache
.
check_existing
(
hashes
[
2
])
[
0
]
,
1003
)
def
create_sqlite_file
(
commands
):
"""
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment