Skip to content
Snippets Groups Projects
Commit cd2ff85c authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

new version with variable information backends

parent 0e743800
No related branches found
No related tags found
No related merge requests found
......@@ -8,89 +8,12 @@ import sys
import yaml
import re
import json
import yaml_header_tools
from abc import abstractmethod
def match_file_object(node: dict,
filename: str):
"""
Try to match a filename with the supplied node.
This function only uses the current path name specified by filename.
It does not check whether the file system object behind that path is valid
and matching the type of the node.
Parameters
----------
node : A dictionary containing the matcher.
filename : A filename to match.
Returns
-------
None if the matcher does not match and otherwise a dict with the values of the matcher.
"""
flags = 0
if node["case"] == "insensitive":
flags += re.IGNORECASE
regexp = node["re"]
pattern = re.compile(regexp)
matcher = re.match(pattern, filename)
if matcher is None:
return None
# Value of node:
# - Add the numeric groups
# - Add the dictionary groups as well
valdict = {0: matcher.group()}
for i in range(len(matcher.groups())):
valdict[i+1] = matcher.group(i+1)
for k, v in matcher.groupdict().items():
valdict[k] = v
return valdict
def get_subnode_with_defaults(node: dict,
key: str):
"""
Return the key from node as subnode setting some important defaults for
the cfood specification.
Currently this is:
- Creating an "re" (regular expression) from the key, if no re is set.
- Add type "dir" if no type is present.
- Add default case "sensitive" to the node.
Parameters
----------
node : The dictionary containing the subnode as key.
key : The key of the dictionary.
Returns
-------
The subnode including the defaults.
"""
if key not in node:
raise ValueError("Key {} is not in node.".format(key))
subnode = node[key]
if "re" not in subnode:
subnode["re"] = re.escape(key)
if "type" not in subnode:
subnode["type"] = "dir"
if "case" not in subnode:
subnode["case"] = "sensitive"
if "nodeName" not in subnode:
subnode["nodeName"] = key
return subnode
def match_complete(node: dict):
"""Determine whether the match is complete.
......@@ -121,13 +44,84 @@ class InformationBackend(object):
return
@abstractmethod
def sub_matcher(self, current_node, current_element):
def sub_matcher(self, current_node, current_element, subelement):
pass
class DirectoryInformationBackend(InformationBackend):
def __init__(current_dir):
def __init__(self, current_dir):
self.current_dir = current_dir
def add_defaults(self, node: dict):
"""
Return the key from node as subnode setting some important defaults for
the cfood specification.
Currently this is:
- Creating an "re" (regular expression) from the key, if no re is set.
- Add type "dir" if no type is present.
- Add default case "sensitive" to the node.
Parameters
----------
node : The dictionary containing the subnode as key.
key : The key of the dictionary.
Returns
-------
The subnode including the defaults.
"""
if "re" not in node:
node["re"] = re.escape(node["nodeName"])
if "type" not in node:
node["type"] = "dir"
if "case" not in node:
node["case"] = "sensitive"
def match_file_object(self, current_node: dict,
current_element: str):
"""
Try to match a filename with the supplied current_node.
This function only uses the current path name specified by filename.
It does not check whether the file system object behind that path is valid
and matching the type of the current_node.
Parameters
----------
current_node : A dictionary containing the matcher.
filename : A filename to match.
Returns
-------
None if the matcher does not match and otherwise a dict with the values of the matcher.
"""
flags = 0
if current_node["case"] == "insensitive":
flags += re.IGNORECASE
regexp = current_node["re"]
pattern = re.compile(regexp)
matcher = re.match(pattern, current_element)
if matcher is None:
return None
# Value of current_node:
# - Add the numeric groups
# - Add the dictionary groups as well
valdict = {0: matcher.group()}
for i in range(len(matcher.groups())):
valdict[i+1] = matcher.group(i+1)
for k, v in matcher.groupdict().items():
valdict[k] = v
return valdict
def check_type(self, current_node, current_element):
path = os.path.join(self.current_dir, current_element)
......@@ -141,74 +135,82 @@ class DirectoryInformationBackend(InformationBackend):
def list_elements_function(self):
return os.listdir(self.current_dir)
def sub_matcher(self, current_node, subelement):
def sub_matcher(self, current_node, current_element, subelement):
path = os.path.join(self.current_dir, current_element)
if current_node["type"] == "dir":
match_current_dir_node(path, subelement)
match_current_dir_node(subelement,
DirectoryInformationBackend(path))
elif current_node["type"] == "file":
if current_node["representer"] == "markdown":
match_markdown_node(path, subelement)
print("MARKDOWN")
match_current_dir_node(subelement,
MarkdownInformationBackend(path))
else:
raise RuntimeError("Not implemented")
def match_current_dir_node(current_dir, current_node):
"""Do the recursive matching in the file tree.
class MarkdownInformationBackend(InformationBackend):
def __init__(self, filename=None, header=None):
"""
Parameters
----------
filename : str
The filename of the markdown file. If None, header will be used directly.
header : dict
The header dictionary object.
"""
if filename is None and header is None:
raise ValueError("filename and header cannot both be None.")
if filename is not None:
self.header = yaml_header_tools.get_header_from_file(filename, clean=False)
else:
self.header = header
"""
def add_defaults(self, node: dict):
if "re" not in node:
node["re"] = ".*"
for element in os.listdir(current_dir):
path = os.path.join(current_dir, element)
if current_node["type"] == "dir" and not os.path.isdir(path):
continue
elif current_node["type"] == "file" and os.path.isdir(path):
continue
match = match_file_object(current_node, element)
if match is not None:
if "value" not in current_node:
current_node["value"] = []
current_node["value"].append(match)
if "type" not in node:
node["type"] = "LIST"
if "children" in current_node:
match["children"] = []
for subelement_name in current_node["children"]:
subelement = get_subnode_with_defaults(
current_node["children"], subelement_name).copy()
match["children"].append(subelement)
if current_node["type"] == "dir":
match_current_dir_node(path, subelement)
elif current_node["type"] == "file":
if current_node["representer"] == "markdown":
match_markdown_node(path, subelement)
def get_dict_match(node, key, value):
"""
Try to match a dict element with key and value with the information supplied in node.
if "case" not in node:
node["case"] = "sensitive"
This is absolutely work-in-progress also in the specification, e.g.:
- It is currently not possible to match the name with a regexp.
"""
def match_file_object(self, current_node: dict,
current_element: str):
"""
Try to match a filename with the supplied current_node.
This function only uses the current path name specified by filename.
It does not check whether the file system object behind that path is valid
and matching the type of the current_node.
Parameters
----------
current_node : A dictionary containing the matcher.
filename : A filename to match.
Returns
-------
None if the matcher does not match and otherwise a dict with the values of the matcher.
"""
if current_node["nodeName"] != current_element:
return None
if node["type"] == "TEXT":
flags = 0
if node["case"] == "insensitive":
if current_node["case"] == "insensitive":
flags += re.IGNORECASE
if "re" in node:
regexp = node["re"]
else:
regexp = ".*"
regexp = current_node["re"]
pattern = re.compile(regexp)
matcher = re.match(pattern, )
matcher = re.match(pattern, self.header[current_element])
if matcher is None:
return None
# Value of node:
# Value of current_node:
# - Add the numeric groups
# - Add the dictionary groups as well
......@@ -217,22 +219,55 @@ def get_dict_match(node, key, value):
valdict[i+1] = matcher.group(i+1)
for k, v in matcher.groupdict().items():
valdict[k] = v
else:
raise RuntimeError("Only TEXT is supported at the moment.")
return valdict
def match_dict_node(current_dict, current_node):
for key, value in current_dict:
return valdict
def list_elements_function(self):
print(list(self.header.keys()))
return self.header
def check_type(self, current_node, current_element):
if current_node["type"] == "LIST" and not type(self.header[current_element]) == list:
return False
if current_node["type"] == "TEXT" and not type(self.header[current_element]) == str:
return False
return True
def match_markdown_node(current_dir, current_node):
import yaml_header_tools
def sub_matcher(self, current_node, current_element, subelement):
print(current_node)
if current_node["type"] == "LIST":
print("sub ok")
match_current_dir_node(subelement,
MarkdownInformationBackend(header=self.header[current_element]))
else:
pass
header = yaml_header_tools.get_header_from_file(current_dir)
match_dict_node(header, current_node)
def match_current_dir_node(current_node, information_backend):
"""Do the recursive matching in the file tree.
"""
information_backend.add_defaults(current_node)
for element in information_backend.list_elements_function():
if not information_backend.check_type(current_node, element):
continue
match = information_backend.match_file_object(current_node, element)
if match is not None:
if "value" not in current_node:
current_node["value"] = []
current_node["value"].append(match)
if "children" in current_node:
match["children"] = []
for subelement_name in current_node["children"]:
subelement = current_node["children"][subelement_name].copy()
subelement["nodeName"] = subelement_name
match["children"].append(subelement)
information_backend.sub_matcher(current_node, element, subelement)
def crawl_cfood(dirname: str,
cfood: str):
......@@ -249,12 +284,13 @@ def crawl_cfood(dirname: str,
# Assume root to have a single element (for now):
if len(root_node) != 1:
raise ValueError("Only a single cfood root is allowed.")
current_node = get_subnode_with_defaults(root_node, list(root_node.keys())[0])
current_dir = dirname
match_current_dir_node(current_dir, current_node)
root_node_name = list(root_node.keys())[0]
root_node[root_node_name]["nodeName"] = root_node_name
match_current_dir_node(root_node[root_node_name],
DirectoryInformationBackend(dirname))
return current_node
return root_node
......
......@@ -25,6 +25,7 @@ root:
description:
type: TEXT
responsible:
type: LIST
children:
person:
type: TEXT
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment