diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 1fb944db76a82351be7e4cfc5f7e850120d4b51f..e79fbcabdb4fe48bc791287fa5551c0ade69e842 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -39,9 +39,8 @@ from abc import ABCMeta, abstractmethod from string import Template import yaml_header_tools -from caosdb.high_level_api import (CaosDBPythonEntity, - create_entity_container) import pandas as pd + import yaml # These are special properties which are (currently) treated differently @@ -452,57 +451,6 @@ class SimpleFileConverter(Converter): return None return m.groupdict() -class YamlFileCaosDBRecord(Converter): - """ - Load a file using pylib high level API and convert the contained - record into caosdb records. - """ - - def typecheck(self, element: StructureElement): - return isinstance(element, File) - - def create_children(self, generalStore: GeneralStore, - element: StructureElement): - return list() - - def match(self, element: StructureElement): - if not isinstance(element, File): - raise RuntimeError("Element must be a file.") - m = re.match(self.definition["match"], element.name) - if m is None: - return None - return m.groupdict() - - def create_records(self, values: GeneralStore, - records: RecordStore, - element: StructureElement): - if not isinstance(element, File): - raise RuntimeError("A yaml file is needed to create children.") - - keys_modified = [] - - with open(element.path, "r") as f: - entries = yaml.safe_load(f) - - entity = CaosDBPythonEntity.deserialize(entries) - entities = create_entity_container(entity) - - for n, ent in enumerate(entities): - name = ent.name - if name is None: - name = "YamlRecord_{}".format(n + 1) - records[name] = ent - values[name] = ent - - for propname in ent.properties: - keys_modified.append((name, propname.name)) - - # Process the records section of the yaml definition: - keys_modified.extend( - super().create_records(values, records, element)) - - return keys_modified - class FileConverter(SimpleFileConverter): def __init__(self, *args, **kwargs): diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 7fed3cb7362ebc077bedfd401bcf5569a9f0bd0c..cccbbdb040c556da8f904f27dd9d03aafb6d4872 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -330,9 +330,6 @@ class Crawler(object): "SimpleFile": { "converter": "SimpleFileConverter", "package": "caoscrawler.converters"}, - "YamlFileCaosDBRecord": { - "converter": "YamlFileCaosDBRecord", - "package": "caoscrawler.converters"}, "MarkdownFile": { "converter": "MarkdownFileConverter", "package": "caoscrawler.converters"}, @@ -1281,8 +1278,8 @@ def crawler_main(crawled_directory_path: str, if dry_run: ins, upd = crawler.synchronize(commit_changes=False) - inserts = [convert_to_python_object(i).serialize() for i in ins] - updates = [convert_to_python_object(i).serialize() for i in upd] + inserts = [str(i) for i in ins] + updates = [str(i) for i in upd] with open("dry.yml", "w") as f: f.write(yaml.dump({ "insert": inserts,