Skip to content
Snippets Groups Projects
Commit acc3b20f authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-rename-module' into 'dev'

F rename module

See merge request !23
parents 15eb6a2d b470246f
No related branches found
No related tags found
2 merge requests!53Release 0.1,!23F rename module
Pipeline #23086 passed
Showing
with 47 additions and 39 deletions
src/newcrawler.egg-info/
src/caoscrawler.egg-info/
.coverage
__pycache__
.tox
......
......@@ -19,7 +19,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
variables:
CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/crawler2.0/testenv:$CI_COMMIT_REF_NAME
CI_REGISTRY_IMAGE: $CI_REGISTRY/caosdb/src/caosdb-crawler/testenv:$CI_COMMIT_REF_NAME
CI_REGISTRY_IMAGE_BASE: $CI_REGISTRY/caosdb/src/caosdb-pyinttest/base:latest
stages:
......
......@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
* Renamed module from `newcrawler` to `caoscrawler`
### Deprecated
### Removed
......
# newcrawler
# caoscrawler
A new crawler for CaosDB.
......
......@@ -33,9 +33,9 @@ from caosdb import EmptyUniqueQueryError
import argparse
import sys
from argparse import RawTextHelpFormatter
from newcrawler import Crawler
from caoscrawler import Crawler
import caosdb as db
from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
import pytest
from caosadvancedtools.models.parser import parse_model_from_yaml
import yaml
......@@ -105,7 +105,8 @@ def crawler_extended(ident):
updateList = cr.updateList
fileList = [r for r in updateList if r.role == "File"]
for f in fileList:
f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
f.file = rfp("..", "unittests", "test_directories",
"examples_article", f.file)
return cr
......@@ -201,7 +202,8 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
if (record.parents[0].name == "Measurement" and
record.get_property("date").value == "2020-01-03"):
# maybe a bit weird, but add an email address to a measurement
record.add_property(name="email", value="testperson@testaccount.test")
record.add_property(
name="email", value="testperson@testaccount.test")
print("one change")
break
ins, ups = cr.synchronize()
......@@ -240,7 +242,8 @@ def test_identifiable_update(clear_database, usemodel, ident, crawler):
def test_file_insertion_dry(clear_database, usemodel, ident):
crawler_extended = Crawler(debug=True, identifiableAdapter=ident)
crawl_standard_test_directory(crawler_extended, cfood="scifolder_extended.yml")
crawl_standard_test_directory(
crawler_extended, cfood="scifolder_extended.yml")
updateList = crawler_extended.updateList
fileList = [r for r in updateList if r.role == "File"]
assert len(fileList) == 11
......@@ -281,7 +284,8 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
updateList = cr.updateList
fileList = [r for r in updateList if r.role == "File"]
for f in fileList:
f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
f.file = rfp("..", "unittests", "test_directories",
"examples_article", f.file)
ins2, ups2 = cr.synchronize(commit_changes=True)
assert len(ups1) == 0
assert len(ups2) == 0
......@@ -297,7 +301,8 @@ def test_file_update(clear_database, usemodel, ident, crawler_extended):
updateList = cr2.updateList
fileList = [r for r in updateList if r.role == "File"]
for f in fileList:
f.file = rfp("..", "unittests", "test_directories", "examples_article", f.file)
f.file = rfp("..", "unittests", "test_directories",
"examples_article", f.file)
ins3, ups3 = cr2.synchronize(commit_changes=True)
assert len(ups3) == 11
......
......@@ -29,10 +29,10 @@ import os
import caosdb as db
from newcrawler.crawl import Crawler
from newcrawler.converters import JSONFileConverter, DictConverter
from newcrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from newcrawler.structure_elements import File, JSONFile, Directory
from caoscrawler.crawl import Crawler
from caoscrawler.converters import JSONFileConverter, DictConverter
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.structure_elements import File, JSONFile, Directory
import pytest
from caosadvancedtools.models.parser import parse_model_from_json_schema, parse_model_from_yaml
......@@ -86,7 +86,8 @@ def test_dataset(
).add_parent("Person").add_property("full_name"))
crawler = Crawler(debug=True, identifiableAdapter=ident)
crawler_definition = crawler.load_definition(os.path.join(DATADIR, "dataset_cfoods.yml"))
crawler_definition = crawler.load_definition(
os.path.join(DATADIR, "dataset_cfoods.yml"))
# print(json.dumps(crawler_definition, indent=3))
# Load and register converter packages:
converter_registry = crawler.load_converters(crawler_definition)
......
[metadata]
name = newcrawler
name = caoscrawler
version = 0.1
author = Alexander Schlemmer
author_email = alexander.schlemmer@ds.mpg.de
......@@ -35,4 +35,4 @@ per-file-ignores = __init__.py:F401
[options.entry_points]
console_scripts =
crawler2.0 = newcrawler.crawl:main
caosdb-crawler = caoscrawler.crawl:main
File moved
File moved
File moved
......@@ -205,7 +205,7 @@ class Crawler(object):
# tested in the next lines of code:
# Load the cfood schema:
with open(files('newcrawler').joinpath('cfood-schema.yml'), "r") as f:
with open(files('caoscrawler').joinpath('cfood-schema.yml'), "r") as f:
schema = yaml.safe_load(f)
# Add custom converters to converter enum in schema:
......@@ -260,43 +260,43 @@ class Crawler(object):
converter_registry: dict[str, dict[str, str]] = {
"Directory": {
"converter": "DirectoryConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"SimpleFile": {
"converter": "SimpleFileConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"MarkdownFile": {
"converter": "MarkdownFileConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"File": {
"converter": "FileConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"JSONFile": {
"converter": "JSONFileConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"Dict": {
"converter": "DictConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictBooleanElement": {
"converter": "DictBooleanElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictFloatElement": {
"converter": "DictFloatElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictTextElement": {
"converter": "DictTextElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictIntegerElement": {
"converter": "DictIntegerElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictListElement": {
"converter": "DictListElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"DictDictElement": {
"converter": "DictDictElementConverter",
"package": "newcrawler.converters"},
"package": "caoscrawler.converters"},
"TextElement": {
"converter": "TextElementConverter",
"package": "newcrawler.converters"}
"package": "caoscrawler.converters"}
}
# More converters from definition file:
......
File moved
File moved
......@@ -29,7 +29,7 @@
SPHINXOPTS ?= -a
SPHINXBUILD ?= sphinx-build
SPHINXAPIDOC ?= sphinx-apidoc
PY_BASEDIR = ../newcrawler
PY_BASEDIR = ../caoscrawler
SOURCEDIR = .
BUILDDIR = ../../build/doc
......
......@@ -28,7 +28,7 @@ import sphinx_rtd_theme # noqa: E402
# -- Project information -----------------------------------------------------
project = 'caosdb-newcrawler'
project = 'caosdb-caoscrawler'
copyright = '2021, MPIDS'
author = 'Alexander Schlemmer'
......@@ -115,7 +115,7 @@ html_static_path = ['_static']
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'caosdb-newcrawlerdoc'
htmlhelp_basename = 'caosdb-caoscrawlerdoc'
# -- Options for LaTeX output ------------------------------------------------
......@@ -142,7 +142,7 @@ latex_elements = {
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'caosdb-newcrawler.tex', 'caosdb-newcrawler Documentation',
(master_doc, 'caosdb-caoscrawler.tex', 'caosdb-caoscrawler Documentation',
'MPIDS', 'manual'),
]
......@@ -152,7 +152,7 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
(master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation',
[author], 1)
]
......@@ -163,8 +163,8 @@ man_pages = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'caosdb-newcrawler', 'caosdb-newcrawler documentation',
author, 'caosdb-newcrawler', 'One line description of project.',
(master_doc, 'caosdb-caoscrawler', 'caosdb-caoscrawler documentation',
author, 'caosdb-caoscrawler', 'One line description of project.',
'Miscellaneous'),
]
......
......@@ -5,7 +5,7 @@
import caosdb as db
from pytest import raises
from newcrawler.identified_cache import _create_hashable_string as create_hash_string
from caoscrawler.identified_cache import _create_hashable_string as create_hash_string
def test_normal_hash_creation():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment