Skip to content
Snippets Groups Projects
Commit b4e115f5 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

first tests for base functions

parent 379e7493
No related branches found
No related tags found
No related merge requests found
Metadata-Version: 2.1
Name: newcrawler
Version: 0.1
Summary: A new crawler for caosdb
Home-page: UNKNOWN
Author: Alexander Schlemmer
Author-email: alexander.schlemmer@ds.mpg.de
License: UNKNOWN
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: AGPLv3
Classifier: Operating System :: OS Independent
Requires-Python: >=3.6
Description-Content-Type: text/markdown
License-File: LICENSE
# newcrawler
A new crawler for CaosDB.
This package has yaml-header-tools as a dependency:
https://gitlab.com/salexan/yaml-header-tools
This python package can be installed using `pip`, e.g.:
```bash
pip install --user .
```
# Usage
work in progress
# Running the tests
After installation of the package run (within the project folder):
```bash
pytest
```
# Contributers
The original authors of this package are:
- Alexander Schlemmer
- Henrik tom Wörden
- Florian Spreckelsen
# License
Copyright (C) 2021 Research Group Biomedical Physics, Max Planck Institute for
Dynamics and Self-Organization Göttingen.
All files in this repository are licensed under a [GNU Affero General Public
License](LICENCE) (version 3 or later).
LICENSE
README.md
pyproject.toml
setup.cfg
setup.py
src/newcrawler/__init__.py
src/newcrawler/crawl.py
src/newcrawler.egg-info/PKG-INFO
src/newcrawler.egg-info/SOURCES.txt
src/newcrawler.egg-info/dependency_links.txt
src/newcrawler.egg-info/top_level.txt
\ No newline at end of file
newcrawler
......@@ -99,6 +99,25 @@ def get_subnode_with_defaults(node: dict,
return subnode
def match_complete(node: dict):
"""Determine whether the match is complete.
This function checks whether all nodes and subnodes have a value.
Parameters
----------
node : The node to check.
Returns
-------
True if the match is complete and False otherwise.
"""
if "value" not in node:
return False
if "children" in node:
return all([match_complete(element) for element in node["children"]])
return True
def crawl_cfood(dirname: str,
cfood: str):
"""
......@@ -115,25 +134,25 @@ def crawl_cfood(dirname: str,
if len(root_node) != 1:
raise ValueError("Only a single cfood root is allowed.")
current_node = get_subnode_with_defaults(root_node, list(root_node.keys())[0])
current_dir = dirname
# Strategy: keep a list of currently matching candidates...
for currentpath, dirs, files in os.walk(dirname):
# for current nodes of type dir look in the list of dirs for matches
# dir is the default
if current_node["type"] == "dir":
for dirname in dirs:
match = match_file_object(current_node, dirname)
if match is not None:
print(json.dumps(match, indent=2))
elif current_node["type"] == "file":
for filename in files:
match = match_file_object(current_node, dirname)
if match is not None:
print(match)
else:
# work in progress
pass
matches = []
for element in os.listdir(current_dir):
path = os.path.join(dirname, element)
if current_node["type"] == "dir" and os.path.isdir(path):
match = match_file_object(current_node, dirname)
if match is not None:
matches.append((path, match))
elif current_node["tpye"] == "file" and not os.path.isdir(path):
match = match_file_object(current_node, dirname)
if match is not None:
matches.append((path, match))
def crawl(dirname: str,
......
#!/bin/python
# Tests for main functions of crawler
# A. Schlemmer, 07/2021
from newcrawler import match_complete
def test_match_complete():
node = {"name": "bla"}
assert match_complete(node) == False
node = {"name": "bla",
"children": [{
"name": "test",
"value": 234}, {
"name": "test",
"value": 234}]}
assert match_complete(node) == False
node = {"name": "bla",
"value": "ok",
"children": [{
"name": "test",
"value": 234}, {
"name": "test",
"value": 234}]}
assert match_complete(node) == True
node = {"name": "bla",
"value": "ok",
"children": [{
"name": "test"}, {
"name": "test",
"value": 234}]}
assert match_complete(node) == False
......@@ -7,8 +7,6 @@ from newcrawler import crawl
from os.path import join, dirname
def test_examples_article():
m = crawl(join(dirname(__file__), "test_directories/examples_article"))
assert len(m) == 14
for r in m:
assert len(r[2]) == 0
m = crawl(join(dirname(__file__), "test_directories/examples_article"),
[join(dirname(__file__), "scifolder_cfood.yml")])
assert True
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment