Skip to content
Snippets Groups Projects
Commit f4f3a766 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

added a preliminary cfood for testing

parent 9248267a
No related branches found
No related tags found
No related merge requests found
......@@ -71,7 +71,7 @@ def setup_package():
package_data={},
entry_points={
"console_scripts": [
"crawl = crawl.__main__:main",
"crawl = src.__main__:main",
]
}
)
......
src/crawl.py 100644 → 100755
......@@ -5,8 +5,32 @@
import argparse
import os
import sys
import yaml
import re
def crawl_cfood(dirname: str,
cfood: str):
"""
Crawl a single cfood.
"""
# Load the cfood from a yaml file:
with open(cfood, "r") as f:
cf = yaml.load(f, Loader=yaml.SafeLoader)
for currentpath, dirs, files in os.walk(dirname):
# for current nodes of type dir look in the list of dirs for matches
# dir is the default
if current_node["type"] == "dir":
for dirname in dirs:
pass
elif current_node["type"] == "file":
for filename in files:
pass
else:
# work in progress
pass
def crawl(dirname: str,
cfoods: list[str]):
......@@ -17,10 +41,12 @@ def crawl(dirname: str,
cfoods : a list of filenames of cfood files
"""
pass
# simplified for testing:
for cfood in cfoods:
crawl_cfood(dirname, cfood)
def main():
pass
crawl(sys.args[1], [sys.args[2]])
if __name__ == "__main__":
......
DataAnalysis:
children:
project_dir:
re: (?P<date>.*?)_(?P<identifier>.*)
handlers:
- type: identifiable
name: idf_project
children:
single:
re: (?P<date>.*?)_(?P<identifier>.*)
children:
README:
type: file
case: insensitive
re: README\.md
children:
- description:
type: TEXT
- responsible:
children:
type: TEXT
re: (?P<first_name>.+) (?P<last_name>.+)
handlers:
type: identifiable
name: idf_person
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment