Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
58bf1590
Commit
58bf1590
authored
6 years ago
by
Henrik tom Wörden
Browse files
Options
Downloads
Patches
Plain Diff
cache
parent
a484051c
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/caosadvancedtools/cfood.py
+64
-1
64 additions, 1 deletion
src/caosadvancedtools/cfood.py
with
64 additions
and
1 deletion
src/caosadvancedtools/cfood.py
+
64
−
1
View file @
58bf1590
...
...
@@ -26,8 +26,10 @@
import
argparse
import
re
from
hashlib
import
sha256
from
argparse
import
RawTextHelpFormatter
from
datetime
import
datetime
import
sqlite3
import
caosdb
as
db
from
caosdb.exceptions
import
TransactionError
...
...
@@ -38,16 +40,77 @@ def get_entity(name):
ent
.
retrieve
()
return
ent
def
get_pretty_xml
(
cont
):
if
isinstance
(
cont
,
list
):
cont
=
db
.
Container
().
extend
(
cont
)
if
isinstance
(
cont
,
db
.
Entity
):
cont
=
db
.
Container
().
insert
(
cont
)
return
etree
.
tounicode
(
cont
.
to_xml
(
local_serialization
=
True
),
pretty_print
=
True
)
# TODO this is implementing a cache on client side. Should it be on
# server side?
class
Cache
(
object
):
CACHE_DB
=
"
cache.db
"
def
__init__
(
self
):
if
not
os
.
path
.
exists
(
Cache
.
CACHE_DB
):
self
.
create_cache
()
def
create_cache
(
self
):
conn
=
sqlite3
.
connect
(
Cache
.
CACHE_DB
)
c
=
conn
.
cursor
()
c
.
execute
(
'''
CREATE TABLE identifiables (digest text primary key, caosdb_id integer)
'''
)
conn
.
commit
()
conn
.
close
()
def
hash_entity
(
ent
):
xml
=
get_pretty_xml
(
ent
)
digest
=
sha256
(
xml
.
encode
(
"
utf-8
"
)).
hexdigest
()
def
insert
(
ent
):
conn
=
sqlite3
.
connect
(
Cache
.
CACHE_DB
)
c
=
conn
.
cursor
()
c
.
execute
(
'''
INSERT INTO identifiables VALUES ({}, {})
'''
.
format
(
Cache
.
hash_entity
(
ent
),
ent
.
id
)
)
conn
.
commit
()
conn
.
close
()
def
check_existing
(
ent
):
conn
=
sqlite3
.
connect
(
Cache
.
CACHE_DB
)
c
=
conn
.
cursor
()
c
.
execute
(
'''
Select * FROM stocks WHERE digest=?
'''
,
Cache
.
hash_entity
(
ent
))
res
=
c
.
fetchone
()
conn
.
commit
()
conn
.
close
()
return
res
class
AbstractCFood
(
object
):
def
__init__
(
self
,
pattern
):
# TODO restructure this class such that no instance is needed to check for
# a match
# instances shall be used to keep track of a match; i.e. entities can be
# object variable
def
__init__
(
self
,
pattern
,
use_cache
=
False
):
self
.
pattern
=
re
.
compile
(
pattern
)
self
.
use_cache
=
use_cache
def
treat_match
(
self
,
crawled_file
,
match
):
entities
=
self
.
create_identifiables
(
crawled_file
,
match
)
for
key
,
ent
in
entities
.
items
():
existing
=
None
if
self
.
use_cache
:
c
=
Cache
()
existing_in_cache
=
c
.
check_existing
(
ent
)
if
existing_in_cache
is
not
None
:
existing
=
db
.
Entity
(
id
=
existing_in_cache
).
retrieve
()
existing
=
AbstractCFood
.
find_existing
(
ent
)
if
existing
is
None
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment