diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 9beadc11ef8bb8fe6bc21b8df55cd21240586273..ba4844e15387cd13aa15db88521b2022fa52bfd6 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,4 +1,4 @@ -from . import converters, utils, xml_converter +from . import converters, utils from .crawl import Crawler, SecurityMode from .version import CfoodRequiredVersionError, get_caoscrawler_version diff --git a/src/caoscrawler/converters/__init__.py b/src/caoscrawler/converters/__init__.py index b7bae03b6d779d52291c9858a6f7d29df67aef85..540a4cfca9ff19248baab2bc0fe8d10987d4bd1f 100644 --- a/src/caoscrawler/converters/__init__.py +++ b/src/caoscrawler/converters/__init__.py @@ -22,11 +22,11 @@ from .. import utils from .converters import * +from .xml_converter import * try: - from .converters.spss import SPSSConverter + from .spss import SPSSConverter except ImportError as err: SPSSConverter: type = utils.MissingImport( name="SPSSConverter", hint="Try installing with the `spss` extra option.", err=err) - diff --git a/src/caoscrawler/converters/converters.py b/src/caoscrawler/converters/converters.py index d8efe1f35eca8e02282343bb1c3d526c2ab39d35..f31a0f4463ea805472044e5bd7697ed1316d1d9b 100644 --- a/src/caoscrawler/converters/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -43,9 +43,9 @@ from jsonschema import ValidationError, validate from ..stores import GeneralStore, RecordStore from ..structure_elements import (BooleanElement, DictElement, Directory, File, - FloatElement, IntegerElement, JSONFile, - ListElement, NoneElement, StructureElement, - TextElement) + FloatElement, IntegerElement, JSONFile, + ListElement, NoneElement, StructureElement, + TextElement) from ..utils import has_parent # These are special properties which are (currently) treated differently diff --git a/src/caoscrawler/converters/spss.py b/src/caoscrawler/converters/spss.py index 3f1c36628f5e98e03aa84fba2bb3b776c8883f82..b4f03aeaed6663be98487a4780bb96237e72e27e 100644 --- a/src/caoscrawler/converters/spss.py +++ b/src/caoscrawler/converters/spss.py @@ -28,8 +28,7 @@ import pandas as pd import pyreadstat import yaml -import .converters - +from . import converters from ..stores import GeneralStore from ..structure_elements import (File, StructureElement) from typing import Optional, Any diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/converters/xml_converter.py similarity index 98% rename from src/caoscrawler/xml_converter.py rename to src/caoscrawler/converters/xml_converter.py index 6d350c26d467372e65c4acc0fd397d6679279b24..dbdc9d381d0ee1dcf48074b258fe160ea71408db 100644 --- a/src/caoscrawler/xml_converter.py +++ b/src/caoscrawler/converters/xml_converter.py @@ -35,12 +35,12 @@ from typing import Any, Callable, Optional, Union import linkahead as db from jsonschema import ValidationError, validate -from .stores import GeneralStore, RecordStore -from .structure_elements import (BooleanElement, DictElement, Directory, File, +from ..stores import GeneralStore, RecordStore +from ..structure_elements import (BooleanElement, DictElement, Directory, File, FloatElement, IntegerElement, JSONFile, ListElement, NoneElement, StructureElement, TextElement, XMLTagElement, XMLTextNode, XMLAttributeNode) -from .utils import has_parent +from ..utils import has_parent import lxml.etree from .converters import SimpleFileConverter, ConverterValidationError, Converter diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml index cb4a7d8c63489158c15dcf86b83fd940cd608460..a78c1579fc05c2ede424c076e7590d25550ea2f3 100644 --- a/src/caoscrawler/default_converters.yml +++ b/src/caoscrawler/default_converters.yml @@ -102,12 +102,12 @@ XLSXTableConverter: XMLFile: converter: XMLFileConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters XMLTag: converter: XMLTagConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters XMLTextNode: converter: XMLTextNodeConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml index 4b95a0a31bc43a902eb63dc3aa09b805fc28c2aa..24cbf4a4fd5972ff9a044136f9dd3f02efd87cd2 100644 --- a/unittests/h5_cfood.yml +++ b/unittests/h5_cfood.yml @@ -5,16 +5,16 @@ metadata: Converters: H5Dataset: converter: H5DatasetConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5File: converter: H5FileConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5Group: converter: H5GroupConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5Ndarray: converter: H5NdarrayConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter # Top-level, we have just the HDF5 file. ParentDirectory: type: Directory diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 3d4d8dd7a1faf02c49febc1a112fab7c3cef4830..530b091bfa340e596c9d332c7b7dc9d3853b061e 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -46,8 +46,8 @@ from caoscrawler.converters import (Converter, ConverterValidationError, ListElementConverter, MarkdownFileConverter, PropertiesFromDictConverter, YAMLFileConverter, - _AbstractScalarValueElementConverter, handle_value, replace_variables) +from caoscrawler.converters.converters import _AbstractScalarValueElementConverter from caoscrawler.crawl import Crawler from caoscrawler.scanner import (_load_definition_from_yaml_dict, create_converter_registry, @@ -648,10 +648,7 @@ def test_load_converters(): # All of them are contained in caoscrawler.converters # except for the xml converters: for conv_key, conv in converter_registry.items(): - if conv_key in ("XMLTag", "XMLFile", "XMLTextNode"): - assert conv["package"] == "caoscrawler.xml_converter" - else: - assert conv["package"] == "caoscrawler.converters" + assert conv["package"] == "caoscrawler.converters" # ... and their names all end in "Converter" assert conv["converter"].endswith("Converter") diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py index 7f244e2cbdccb0d4eee6a62f59e9cea5684295a6..95060451badb0523cf91c70e5be345e35ec3964d 100644 --- a/unittests/test_h5_converter.py +++ b/unittests/test_h5_converter.py @@ -26,9 +26,9 @@ from pytest import fixture, importorskip import linkahead as db from caoscrawler.debug_tree import DebugTree -from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array, - convert_h5_element, H5GroupElement, - H5DatasetElement, H5NdarrayElement) +from caoscrawler.converters.hdf5_converter import (convert_basic_element_with_nd_array, + convert_h5_element, H5GroupElement, + H5DatasetElement, H5NdarrayElement) from caoscrawler.scanner import scan_directory from caoscrawler.structure_elements import (FloatElement, ListElement, TextElement) diff --git a/unittests/test_xml_converter.py b/unittests/test_xml_converter.py index 93e4a422d94a9315eadca24b8c799682d7d99964..fb4c7746fa2d0b6c3d4ec95fc1de3139493a703f 100644 --- a/unittests/test_xml_converter.py +++ b/unittests/test_xml_converter.py @@ -21,40 +21,22 @@ # """ -test the converters module +test the XML converters """ -import datetime import importlib import json -import logging -import os +import pytest import sys -from itertools import product +import yaml + +from lxml.etree import fromstring from pathlib import Path -import pytest -import yaml -from caoscrawler.converters import (Converter, ConverterValidationError, - DateElementConverter, DictElementConverter, - DictIntegerElementConverter, - DirectoryConverter, FloatElementConverter, - IntegerElementConverter, JSONFileConverter, - ListElementConverter, - MarkdownFileConverter, YAMLFileConverter, - _AbstractScalarValueElementConverter, - handle_value, replace_variables) -from caoscrawler.crawl import Crawler -from caoscrawler.scanner import (_load_definition_from_yaml_dict, - create_converter_registry, - create_transformer_registry, load_definition) +from caoscrawler.converters import XMLTagConverter +from caoscrawler.scanner import load_definition from caoscrawler.stores import GeneralStore -from caoscrawler.structure_elements import (BooleanElement, DictElement, - Directory, File, FloatElement, - IntegerElement, ListElement, - TextElement, XMLTagElement) -from caoscrawler.xml_converter import XMLTagConverter +from caoscrawler.structure_elements import XMLTagElement -from lxml.etree import fromstring UNITTESTDIR = Path(__file__).parent @@ -62,19 +44,13 @@ UNITTESTDIR = Path(__file__).parent @pytest.fixture def converter_registry(): converter_registry: dict[str, dict[str, str]] = { - "Directory": { - "converter": "DirectoryConverter", - "package": "caoscrawler.converters"}, - "TextElement": { - "converter": "TextElementConverter", - "package": "caoscrawler.converters"}, "XMLTag": { "converter": "XMLTagConverter", - "package": "caoscrawler.xml_converter"}, + "package": "caoscrawler.converters"}, "XMLTextNode": { "converter": "XMLTextNodeConverter", - "package": "caoscrawler.xml_converter"}, + "package": "caoscrawler.converters"}, } for key, value in converter_registry.items():