diff --git a/src/newcrawler/converters.py b/src/newcrawler/converters.py index caf1aa667a8cbb26021c2b3feeb347a168fa95b4..14a4020c744ad69ffe6884ad8a317214dd2ad02e 100644 --- a/src/newcrawler/converters.py +++ b/src/newcrawler/converters.py @@ -365,14 +365,12 @@ class MarkdownFileConverter(Converter): class JSONFileConverter(Converter): - @staticmethod - def typecheck(element: StructureElement): - return isinstance(element, JSONFile) + def typecheck(self, element: StructureElement): + print(type(element)) + return isinstance(element, File) def match(self, element: StructureElement): - if not JSONFileConverter.typecheck(element): - # TODO(salexan) Should we be more precise than just raising runtime - # errors here? + if not self.typecheck(element): raise RuntimeError("Element must be a file") m = re.match(self.definition["match"], element.name) if m is None: @@ -383,9 +381,8 @@ class JSONFileConverter(Converter): pass return m.groupdict() - @staticmethod - def create_children(generalStore: GeneralStore, element: StructureElement): - if not JSONFileConverter.typecheck(element): + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): raise RuntimeError("A JSON file is needed to create children") with open(element.path, 'r') as json_file: json_data = json.load(json_file) @@ -398,9 +395,8 @@ class JSONFileConverter(Converter): class DictConverter(Converter): # TODO use Dict as typecheck? - @staticmethod - def create_children(generalStore: GeneralStore, element: StructureElement): - if not DictConverter.typecheck(element): + def create_children(self, generalStore: GeneralStore, element: StructureElement): + if not self.typecheck(element): raise RuntimeError("A dict is needed to create children") children = [] @@ -419,8 +415,7 @@ class DictConverter(Converter): return children - @staticmethod - def typecheck(element: StructureElement): + def typecheck(self, element: StructureElement): return isinstance(element, Dict) # TODO use Dict as typecheck? diff --git a/unittests/jsontest_cfood.yml b/unittests/test_directories/single_file_test_data/jsontest_cfood.yml similarity index 100% rename from unittests/jsontest_cfood.yml rename to unittests/test_directories/single_file_test_data/jsontest_cfood.yml diff --git a/unittests/test_json.py b/unittests/test_json.py index ee2454eed5b4517638a4acaf209e3fb1050e2447..85635f32d7d5e5044d3f23844782c7cafafa82aa 100644 --- a/unittests/test_json.py +++ b/unittests/test_json.py @@ -38,7 +38,8 @@ from test_tool import rfp, dircheckstr def test_json(): - crawler_definition_path = rfp("jsontest_cfood.yml") + crawler_definition_path = rfp("test_directories", "single_file_test_data", + "jsontest_cfood.yml") json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json") crawler = Crawler(debug=True) @@ -49,12 +50,10 @@ def test_json(): crawler.start_crawling( JSONFile(os.path.basename(json_file_path), json_file_path), crawler_definition, - converter_registry, - JSONFileConverter + converter_registry ) subd = crawler.debug_tree subc = crawler.debug_metadata #print(json.dumps(subd, indent=3)) print(subd) print(subc) - lskdjf