From b4e115f5647255cd38692a4950677152a20c1d84 Mon Sep 17 00:00:00 2001
From: Alexander Schlemmer <alexander@mail-schlemmer.de>
Date: Fri, 9 Jul 2021 13:03:37 +0200
Subject: [PATCH] first tests for base functions

---
 src/newcrawler.egg-info/PKG-INFO             | 61 ++++++++++++++++++++
 src/newcrawler.egg-info/SOURCES.txt          | 11 ++++
 src/newcrawler.egg-info/dependency_links.txt |  1 +
 src/newcrawler.egg-info/top_level.txt        |  1 +
 src/newcrawler/crawl.py                      | 51 +++++++++++-----
 tests/test_functions.py                      | 34 +++++++++++
 tests/test_tool.py                           |  8 +--
 7 files changed, 146 insertions(+), 21 deletions(-)
 create mode 100644 src/newcrawler.egg-info/PKG-INFO
 create mode 100644 src/newcrawler.egg-info/SOURCES.txt
 create mode 100644 src/newcrawler.egg-info/dependency_links.txt
 create mode 100644 src/newcrawler.egg-info/top_level.txt
 create mode 100644 tests/test_functions.py

diff --git a/src/newcrawler.egg-info/PKG-INFO b/src/newcrawler.egg-info/PKG-INFO
new file mode 100644
index 00000000..4c6e7241
--- /dev/null
+++ b/src/newcrawler.egg-info/PKG-INFO
@@ -0,0 +1,61 @@
+Metadata-Version: 2.1
+Name: newcrawler
+Version: 0.1
+Summary: A new crawler for caosdb
+Home-page: UNKNOWN
+Author: Alexander Schlemmer
+Author-email: alexander.schlemmer@ds.mpg.de
+License: UNKNOWN
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: AGPLv3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
+License-File: LICENSE
+
+# newcrawler
+
+A new crawler for CaosDB.
+
+
+This package has yaml-header-tools as a dependency:
+https://gitlab.com/salexan/yaml-header-tools
+
+
+
+This python package can be installed using `pip`, e.g.:
+```bash
+pip install --user .
+```
+
+# Usage
+
+work in progress
+
+# Running the tests
+
+After installation of the package run (within the project folder):
+
+```bash
+pytest
+```
+
+
+# Contributers
+
+The original authors of this package are:
+
+- Alexander Schlemmer
+- Henrik tom Wörden
+- Florian Spreckelsen
+
+# License
+
+Copyright (C) 2021 Research Group Biomedical Physics, Max Planck Institute for
+Dynamics and Self-Organization Göttingen.
+
+All files in this repository are licensed under a [GNU Affero General Public
+License](LICENCE) (version 3 or later).
+
+
diff --git a/src/newcrawler.egg-info/SOURCES.txt b/src/newcrawler.egg-info/SOURCES.txt
new file mode 100644
index 00000000..4b7fb6ab
--- /dev/null
+++ b/src/newcrawler.egg-info/SOURCES.txt
@@ -0,0 +1,11 @@
+LICENSE
+README.md
+pyproject.toml
+setup.cfg
+setup.py
+src/newcrawler/__init__.py
+src/newcrawler/crawl.py
+src/newcrawler.egg-info/PKG-INFO
+src/newcrawler.egg-info/SOURCES.txt
+src/newcrawler.egg-info/dependency_links.txt
+src/newcrawler.egg-info/top_level.txt
\ No newline at end of file
diff --git a/src/newcrawler.egg-info/dependency_links.txt b/src/newcrawler.egg-info/dependency_links.txt
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/src/newcrawler.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/newcrawler.egg-info/top_level.txt b/src/newcrawler.egg-info/top_level.txt
new file mode 100644
index 00000000..db6194c5
--- /dev/null
+++ b/src/newcrawler.egg-info/top_level.txt
@@ -0,0 +1 @@
+newcrawler
diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index f441a1a0..5c02ed92 100755
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -99,6 +99,25 @@ def get_subnode_with_defaults(node: dict,
 
     return subnode
 
+def match_complete(node: dict):
+    """Determine whether the match is complete.
+
+    This function checks whether all nodes and subnodes have a value.
+
+    Parameters
+    ----------
+    node : The node to check.
+
+    Returns
+    -------
+    True if the match is complete and False otherwise.
+    """
+    if "value" not in node:
+        return False
+    if "children" in node:
+        return all([match_complete(element) for element in node["children"]])
+    return True
+
 def crawl_cfood(dirname: str,
                 cfood: str):
     """
@@ -115,25 +134,25 @@ def crawl_cfood(dirname: str,
     if len(root_node) != 1:
         raise ValueError("Only a single cfood root is allowed.")
     current_node = get_subnode_with_defaults(root_node, list(root_node.keys())[0])
+    current_dir = dirname
 
     # Strategy: keep a list of currently matching candidates...
 
-    for currentpath, dirs, files in os.walk(dirname):
-        # for current nodes of type dir look in the list of dirs for matches
-        # dir is the default
-        if current_node["type"] == "dir":
-            for dirname in dirs:
-                match = match_file_object(current_node, dirname)
-                if match is not None:
-                    print(json.dumps(match, indent=2))
-        elif current_node["type"] == "file":
-            for filename in files:
-                match = match_file_object(current_node, dirname)
-                if match is not None:
-                    print(match)
-        else:
-            # work in progress
-            pass
+    matches = []
+    for element in os.listdir(current_dir):
+        path = os.path.join(dirname, element)
+        
+        
+        if current_node["type"] == "dir" and os.path.isdir(path):
+            match = match_file_object(current_node, dirname)
+            if match is not None:
+                matches.append((path, match))
+        elif current_node["tpye"] == "file" and not os.path.isdir(path):
+            match = match_file_object(current_node, dirname)
+            if match is not None:
+                matches.append((path, match))
+
+
             
 
 def crawl(dirname: str,
diff --git a/tests/test_functions.py b/tests/test_functions.py
new file mode 100644
index 00000000..a33f0d21
--- /dev/null
+++ b/tests/test_functions.py
@@ -0,0 +1,34 @@
+#!/bin/python
+# Tests for main functions of crawler
+# A. Schlemmer, 07/2021
+
+from newcrawler import match_complete
+
+def test_match_complete():
+    node = {"name": "bla"}
+    assert match_complete(node) == False
+
+    node = {"name": "bla",
+            "children": [{
+                "name": "test",
+                "value": 234}, {
+                "name": "test",
+                "value": 234}]}
+    assert match_complete(node) == False
+
+    node = {"name": "bla",
+            "value": "ok",
+            "children": [{
+                "name": "test",
+                "value": 234}, {
+                "name": "test",
+                "value": 234}]}
+    assert match_complete(node) == True
+
+    node = {"name": "bla",
+            "value": "ok",
+            "children": [{
+                "name": "test"}, {
+                "name": "test",
+                "value": 234}]}
+    assert match_complete(node) == False
diff --git a/tests/test_tool.py b/tests/test_tool.py
index 28add00b..3ba782c3 100755
--- a/tests/test_tool.py
+++ b/tests/test_tool.py
@@ -7,8 +7,6 @@ from newcrawler import crawl
 from os.path import join, dirname
 
 def test_examples_article():
-    m = crawl(join(dirname(__file__), "test_directories/examples_article"))
-    assert len(m) == 14
-
-    for r in m:
-        assert len(r[2]) == 0
+    m = crawl(join(dirname(__file__), "test_directories/examples_article"),
+              [join(dirname(__file__), "scifolder_cfood.yml")])
+    assert True
-- 
GitLab