diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ba6c84749478314882a4131754bf9cc7fc5b184..be582170bc598736a55ca5d38fd06c4477a0eaf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed ### +* Moved the optional `hdf5_converter` to the `converters` + submodule. When updating from 0.8 or below, this means that you have + to adapt the converter package path in your cfood definition from + `caoscrawler.hdf5_converter` to + `caoscrawler.converters.hdf5_converter`. + ### Deprecated ### ### Removed ### diff --git a/src/caoscrawler/__init__.py b/src/caoscrawler/__init__.py index 27bdbfd371e10826d007480b4189bd2cd148344c..ba4844e15387cd13aa15db88521b2022fa52bfd6 100644 --- a/src/caoscrawler/__init__.py +++ b/src/caoscrawler/__init__.py @@ -1,15 +1,5 @@ -from . import converters, utils, xml_converter -try: - from .conv_impl.spss import SPSSConverter -except ImportError as err: - SPSSConverter: type = utils.MissingImport( - name="SPSSConverter", hint="Try installing with the `spss` extra option.", - err=err) +from . import converters, utils from .crawl import Crawler, SecurityMode from .version import CfoodRequiredVersionError, get_caoscrawler_version __version__ = get_caoscrawler_version() - -# Convenience members ######################################################### -# mypy: disable-error-code="attr-defined" -converters.SPSSConverter = SPSSConverter diff --git a/src/caoscrawler/conv_impl/__init__.py b/src/caoscrawler/conv_impl/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/caoscrawler/converters/__init__.py b/src/caoscrawler/converters/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..540a4cfca9ff19248baab2bc0fe8d10987d4bd1f --- /dev/null +++ b/src/caoscrawler/converters/__init__.py @@ -0,0 +1,32 @@ +# encoding: utf-8 +# +# This file is a part of the LinkAhead Project. +# +# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com> +# Copyright (C) 2024 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Submdule containing all default and optional converters.""" + +from .. import utils +from .converters import * +from .xml_converter import * + +try: + from .spss import SPSSConverter +except ImportError as err: + SPSSConverter: type = utils.MissingImport( + name="SPSSConverter", hint="Try installing with the `spss` extra option.", + err=err) diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters/converters.py similarity index 99% rename from src/caoscrawler/converters.py rename to src/caoscrawler/converters/converters.py index 9805d1103e380f688b40a9bfd4c3d03129dbd591..f31a0f4463ea805472044e5bd7697ed1316d1d9b 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters/converters.py @@ -41,12 +41,12 @@ import yaml import yaml_header_tools from jsonschema import ValidationError, validate -from .stores import GeneralStore, RecordStore -from .structure_elements import (BooleanElement, DictElement, Directory, File, - FloatElement, IntegerElement, JSONFile, - ListElement, NoneElement, StructureElement, - TextElement) -from .utils import has_parent +from ..stores import GeneralStore, RecordStore +from ..structure_elements import (BooleanElement, DictElement, Directory, File, + FloatElement, IntegerElement, JSONFile, + ListElement, NoneElement, StructureElement, + TextElement) +from ..utils import has_parent # These are special properties which are (currently) treated differently # by the converters: diff --git a/src/caoscrawler/hdf5_converter.py b/src/caoscrawler/converters/hdf5_converter.py similarity index 98% rename from src/caoscrawler/hdf5_converter.py rename to src/caoscrawler/converters/hdf5_converter.py index 482d59c12d2d0b8540c01bd04da718d9c514ddc4..a4d974bd53fc4b0e22d155f01a6a47295b79e984 100644 --- a/src/caoscrawler/hdf5_converter.py +++ b/src/caoscrawler/converters/hdf5_converter.py @@ -36,8 +36,8 @@ import linkahead as db from .converters import (convert_basic_element, Converter, DictElementConverter, match_name_and_value, SimpleFileConverter) -from .stores import GeneralStore, RecordStore -from .structure_elements import DictElement, File, FloatElement, IntegerElement, StructureElement +from ..stores import GeneralStore, RecordStore +from ..structure_elements import DictElement, File, FloatElement, IntegerElement, StructureElement def convert_attributes(elt: Union[h5py.File, h5py.Group, h5py.Dataset]): diff --git a/src/caoscrawler/conv_impl/spss.py b/src/caoscrawler/converters/spss.py similarity index 99% rename from src/caoscrawler/conv_impl/spss.py rename to src/caoscrawler/converters/spss.py index 5dfad0ff8be55e2ca3ddf0db3397dbac5fc9f2b0..b4f03aeaed6663be98487a4780bb96237e72e27e 100644 --- a/src/caoscrawler/conv_impl/spss.py +++ b/src/caoscrawler/converters/spss.py @@ -28,7 +28,7 @@ import pandas as pd import pyreadstat import yaml -from .. import converters +from . import converters from ..stores import GeneralStore from ..structure_elements import (File, StructureElement) from typing import Optional, Any diff --git a/src/caoscrawler/xml_converter.py b/src/caoscrawler/converters/xml_converter.py similarity index 93% rename from src/caoscrawler/xml_converter.py rename to src/caoscrawler/converters/xml_converter.py index 6d350c26d467372e65c4acc0fd397d6679279b24..d1d8b8871f9dad9762f35ee79e1a9106c259f4a9 100644 --- a/src/caoscrawler/xml_converter.py +++ b/src/caoscrawler/converters/xml_converter.py @@ -22,28 +22,17 @@ from __future__ import annotations -import datetime -import json -import logging -import os +import lxml.etree import re -import warnings -from inspect import signature -from string import Template -from typing import Any, Callable, Optional, Union -import linkahead as db -from jsonschema import ValidationError, validate +from typing import Optional -from .stores import GeneralStore, RecordStore -from .structure_elements import (BooleanElement, DictElement, Directory, File, - FloatElement, IntegerElement, JSONFile, - ListElement, NoneElement, StructureElement, - TextElement, XMLTagElement, XMLTextNode, XMLAttributeNode) -from .utils import has_parent +import linkahead as db -import lxml.etree from .converters import SimpleFileConverter, ConverterValidationError, Converter +from ..stores import GeneralStore, RecordStore +from ..structure_elements import (File, StructureElement, + XMLTagElement, XMLTextNode, XMLAttributeNode) class XMLFileConverter(SimpleFileConverter): diff --git a/src/caoscrawler/default_converters.yml b/src/caoscrawler/default_converters.yml index cb4a7d8c63489158c15dcf86b83fd940cd608460..a78c1579fc05c2ede424c076e7590d25550ea2f3 100644 --- a/src/caoscrawler/default_converters.yml +++ b/src/caoscrawler/default_converters.yml @@ -102,12 +102,12 @@ XLSXTableConverter: XMLFile: converter: XMLFileConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters XMLTag: converter: XMLTagConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters XMLTextNode: converter: XMLTextNodeConverter - package: caoscrawler.xml_converter + package: caoscrawler.converters diff --git a/src/doc/converters/cfood_definition.rst b/src/doc/converters/cfood_definition.rst index 13c04fd38df8b00c435192a1c3cf02147f870b4c..ea2f14b23bec04e659aa3166f089c7d274f74811 100644 --- a/src/doc/converters/cfood_definition.rst +++ b/src/doc/converters/cfood_definition.rst @@ -32,13 +32,16 @@ The yaml definition may look like this: The **<NodeName>** is a description of what the current block represents (e.g. ``experiment-folder``) and is used as an identifier. -**<type>** selects the converter that is going to be matched against the current structure -element. If the structure element matches (this is a combination of a typecheck and a detailed -match, see the :py:class:`~caoscrawler.converters.Converter` source documentation for details), the -converter will: - -- generate records (with :py:meth:`~caoscrawler.converters.Converter.create_records`) -- possibly process a subtree (with :py:meth:`caoscrawler.converters.Converter.create_children`) +**<type>** selects the converter that is going to be matched against +the current structure element. If the structure element matches (this +is a combination of a typecheck and a detailed match, see the +:py:class:`~caoscrawler.converters.converters.Converter` source +documentation for details), the converter will: + +- generate records (with + :py:meth:`~caoscrawler.converters.converters.Converter.create_records`) +- possibly process a subtree (with + :py:meth:`~caoscrawler.converters.converters.Converter.create_children`) **match** *TODO* diff --git a/src/doc/converters/custom_converters.rst b/src/doc/converters/custom_converters.rst index 573d9714488eaacd2c794b1fa497306a8d110a5f..2738d66c483148fdecb9b189edac45e5b9a55a8b 100644 --- a/src/doc/converters/custom_converters.rst +++ b/src/doc/converters/custom_converters.rst @@ -27,20 +27,20 @@ Details: - **<NameOfTheConverterInYamlFile>**: This is the name of the converter as it is going to be used in the present yaml file. - **<python>.<module>.<name>**: The name of the module where the converter class resides. -- **<PythonClassName>**: Within this specified module there must be a class inheriting from base class :py:class:`caoscrawler.converters.Converter`. +- **<PythonClassName>**: Within this specified module there must be a class inheriting from base class :py:class:`caoscrawler.converters.converters.Converter`. Implementing a custom converter =============================== -Converters inherit from the :py:class:`~caoscrawler.converters.Converter` class. +Converters inherit from the :py:class:`~caoscrawler.converters.converters.Converter` class. The following methods are abstract and need to be overwritten by your custom converter to make it work: -:py:meth:`~caoscrawler.converters.Converter.create_children`: +:py:meth:`~caoscrawler.converters.converters.Converter.create_children`: Return a list of child StructureElement objects. -- :py:meth:`~caoscrawler.converters.Converter.match` -- :py:meth:`~caoscrawler.converters.Converter.typecheck` +- :py:meth:`~caoscrawler.converters.converters.Converter.match` +- :py:meth:`~caoscrawler.converters.converters.Converter.typecheck` Example @@ -71,10 +71,10 @@ First we will create our package and module structure, which might be: doc/ unittests/ -Now we need to create a class called "SourceResolver" in the file "sources.py". In this - more advanced - example, we will not inherit our converter directly from :py:class:`~caoscrawler.converters.Converter`, but use :py:class:`~caoscrawler.converters.TextElementConverter`. The latter already implements :py:meth:`~caoscrawler.converters.Converter.match` and :py:meth:`~caoscrawler.converters.Converter.typecheck`, so only an implementation for :py:meth:`~caoscrawler.converters.Converter.create_children` has to be provided by us. -Furthermore we will customize the method :py:meth:`~caoscrawler.converters.Converter.create_records` that allows us to specify a more complex record generation procedure than provided in the standard implementation. One specific limitation of the standard implementation is, that only a fixed -number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.Converter.create_records` is recommended. -In this context it is recommended to make use of the function :func:`caoscrawler.converters.create_records` that implements creation of record objects from python dictionaries of the same structure +Now we need to create a class called "SourceResolver" in the file "sources.py". In this - more advanced - example, we will not inherit our converter directly from :py:class:`~caoscrawler.converters.converters.Converter`, but use :py:class:`~caoscrawler.converters.converters.TextElementConverter`. The latter already implements :py:meth:`~caoscrawler.converters.converters.Converter.match` and :py:meth:`~caoscrawler.converters.converters.Converter.typecheck`, so only an implementation for :py:meth:`~caoscrawler.converters.converters.Converter.create_children` has to be provided by us. +Furthermore we will customize the method :py:meth:`~caoscrawler.converters.converters.Converter.create_records` that allows us to specify a more complex record generation procedure than provided in the standard implementation. One specific limitation of the standard implementation is, that only a fixed +number of records can be generated by the yaml definition. So for any applications - like here - that require an arbitrary number of records to be created, a customized implementation of :py:meth:`~caoscrawler.converters.converters.Converter.create_records` is recommended. +In this context it is recommended to make use of the function :func:`caoscrawler.converters.converters.create_records` that implements creation of record objects from python dictionaries of the same structure that would be given using a yaml definition (see next section below). .. code-block:: python @@ -179,7 +179,7 @@ The following yaml block will register the converter in a yaml file: Using the `create_records` API function ======================================= -The function :func:`caoscrawler.converters.create_records` was already mentioned above and it is +The function :func:`caoscrawler.converters.converters.create_records` was already mentioned above and it is the recommended way to create new records from custom converters. Let's have a look at the function signature: @@ -229,14 +229,14 @@ The `dir_name` is set explicitely here, everything else is identical to the yaml The role of `keys_modified` =========================== -You probably have noticed already, that :func:`caoscrawler.converters.create_records` returns +You probably have noticed already, that :func:`caoscrawler.converters.converters.create_records` returns `keys_modified` which is a list of tuples. Each element of `keys_modified` has two elements: - Element 0 is the name of the record that is modified (as used in the record store `records`). - Element 1 is the name of the property that is modified. It is important, that the correct list of modified keys is returned by -:py:meth:`~caoscrawler.converters.Converter.create_records` to make the crawler process work. +:py:meth:`~caoscrawler.converters.converters.Converter.create_records` to make the crawler process work. So, a sketch of a typical implementation within a custom converter could look like this: diff --git a/src/doc/converters/further_converters.rst b/src/doc/converters/further_converters.rst index 539c5159eb1de01765a78e3c04e10fb3f0be9be5..a334c8778f440e108fd141b0fc53ec06765deb8c 100644 --- a/src/doc/converters/further_converters.rst +++ b/src/doc/converters/further_converters.rst @@ -64,26 +64,28 @@ H5FileConverter --------------- This is an extension of the -:py:class:`~caoscrawler.converters.SimpleFileConverter` class. It opens the HDF5 -file and creates children for any contained group or dataset. Additionally, the -root-level attributes of the HDF5 file are accessible as children. +:py:class:`~caoscrawler.converters.converters.SimpleFileConverter` +class. It opens the HDF5 file and creates children for any contained +group or dataset. Additionally, the root-level attributes of the HDF5 +file are accessible as children. H5GroupConverter ---------------- This is an extension of the -:py:class:`~caoscrawler.converters.DictElementConverter` class. Children are -created for all subgroups and datasets in this HDF5 group. Additionally, the -group-level attributes are accessible as children. +:py:class:`~caoscrawler.converters.converters.DictElementConverter` +class. Children are created for all subgroups and datasets in this +HDF5 group. Additionally, the group-level attributes are accessible as +children. H5DatasetConverter ------------------ This is an extension of the -:py:class:`~caoscrawler.converters.DictElementConverter` class. Most -importantly, it stores the array data in HDF5 dataset into -:py:class:`~caoscrawler.hdf5_converter.H5NdarrayElement` which is added to its -children, as well as the dataset attributes. +:py:class:`~caoscrawler.converters.converters.DictElementConverter` +class. Most importantly, it stores the array data in HDF5 dataset into +:py:class:`~caoscrawler.converters.hdf5_converter.H5NdarrayElement` +which is added to its children, as well as the dataset attributes. H5NdarrayConverter ------------------ diff --git a/src/doc/converters/standard_converters.rst b/src/doc/converters/standard_converters.rst index 3dc3c882e76e10706d030ba0695d498631bf7b28..586b84b48be78f1307298a11ad61a2448c3c3cd7 100644 --- a/src/doc/converters/standard_converters.rst +++ b/src/doc/converters/standard_converters.rst @@ -131,9 +131,9 @@ CSV File → DictElement PropertiesFromDictConverter =========================== -The :py:class:`~caoscrawler.converters.PropertiesFromDictConverter` is +The :py:class:`~caoscrawler.converters.converters.PropertiesFromDictConverter` is a specialization of the -:py:class:`~caoscrawler.converters.DictElementConverter` and offers +:py:class:`~caoscrawler.converters.converters.DictElementConverter` and offers all its functionality. It is meant to operate on dictionaries (e.g., from reading in a json or a table file), the keys of which correspond closely to properties in a LinkAhead datamodel. This is especially @@ -141,7 +141,7 @@ handy in cases where properties may be added to the data model and data sources that are not yet known when writing the cfood definition. The converter definition of the -:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has an +:py:class:`~caoscrawler.converters.converters.PropertiesFromDictConverter` has an additional required entry ``record_from_dict`` which specifies the Record to which the properties extracted from the dict are attached to. This Record is identified by its ``variable_name`` by which it can @@ -183,7 +183,7 @@ with value "Silvia Scientist": .. image:: ../img/properties-from-dict-records-author.png :height: 210 :alt: A Record "New Name" and an author Record with full_name - "Silvia Scientist" are generated and filled automatically. + "Silvia Scientist" are generated and filled automatically. Note how the different dictionary keys are handled differently depending on their types: scalar and list values are understood @@ -219,7 +219,7 @@ property: .. image:: ../img/properties-from-dict-records-person.png :height: 200 :alt: A new Person Record is created which is referenced as an - author. + author. For the time being, only the parents of the referenced record can be set via this option. More complicated treatments can be implemented @@ -228,22 +228,22 @@ via the ``referenced_record_callback`` (see below). Properties can be blacklisted with the ``properties_blacklist`` keyword, i.e., all keys listed under ``properties_blacklist`` will be excluded from automated treatment. Since the -:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` has +:py:class:`~caoscrawler.converters.converters.PropertiesFromDictConverter` has all the functionality of the -:py:class:`~caoscrawler.converters.DictElementConverter`, individual +:py:class:`~caoscrawler.converters.converters.DictElementConverter`, individual properties can still be used in a subtree. Together with ``properties_blacklist`` this can be used to add custom treatment to specific properties by blacklisting them in ``record_from_dict`` and then treating them in the subtree the same as you would do it in the standard -:py:class:`~caoscrawler.converters.DictElementConverter`. Note that +:py:class:`~caoscrawler.converters.converters.DictElementConverter`. Note that the blacklisted keys are excluded on **all** levels of the dictionary, i.e., also when they occur in a referenced entity. For further customization, the -:py:class:`~caoscrawler.converters.PropertiesFromDictConverter` can be -used as a basis for :ref:`custom converters<Custom Converters>` which -can make use of its ``referenced_record_callback`` argument. The +:py:class:`~caoscrawler.converters.converters.PropertiesFromDictConverter` +can be used as a basis for :ref:`custom converters<Custom Converters>` +which can make use of its ``referenced_record_callback`` argument. The ``referenced_record_callback`` can be a callable object which takes exactly a Record as an argument and needs to return that Record after doing whatever custom treatment is needed. Additionally, it is given diff --git a/src/doc/how-to-upgrade.md b/src/doc/how-to-upgrade.md index 30d23f8f3a4ad88f6b3f4fca18013e26fbcb1dc1..8af805ea30cc85cdde88d789ee3538b2bbaef7e3 100644 --- a/src/doc/how-to-upgrade.md +++ b/src/doc/how-to-upgrade.md @@ -1,5 +1,45 @@ # How to upgrade + +## 0.8.x to 0.9.0 + +If you were using the optional HDF5 converter classes, you need to +adapt the package path in your cfood definition from the **old** + +```yaml +Converters: + H5Dataset: + converter: H5DatasetConverter + package: caoscrawler.hdf5_converter + H5File: + converter: H5FileConverter + package: caoscrawler.hdf5_converter + H5Group: + converter: H5GroupConverter + package: caoscrawler.hdf5_converter + H5Ndarray: + converter: H5NdarrayConverter + package: caoscrawler.hdf5_converter +``` + +to the **new** paths: + +```yaml +Converters: + H5Dataset: + converter: H5DatasetConverter + package: caoscrawler.converters.hdf5_converter + H5File: + converter: H5FileConverter + package: caoscrawler.converters.hdf5_converter + H5Group: + converter: H5GroupConverter + package: caoscrawler.converters.hdf5_converter + H5Ndarray: + converter: H5NdarrayConverter + package: caoscrawler.converters.hdf5_converter +``` + ## 0.6.x to 0.7.0 If you added Parents to Records at multiple places in the CFood, you must now do this at a single location because this key now overwrites previously set diff --git a/unittests/h5_cfood.yml b/unittests/h5_cfood.yml index 4b95a0a31bc43a902eb63dc3aa09b805fc28c2aa..24cbf4a4fd5972ff9a044136f9dd3f02efd87cd2 100644 --- a/unittests/h5_cfood.yml +++ b/unittests/h5_cfood.yml @@ -5,16 +5,16 @@ metadata: Converters: H5Dataset: converter: H5DatasetConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5File: converter: H5FileConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5Group: converter: H5GroupConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter H5Ndarray: converter: H5NdarrayConverter - package: caoscrawler.hdf5_converter + package: caoscrawler.converters.hdf5_converter # Top-level, we have just the HDF5 file. ParentDirectory: type: Directory diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 3d4d8dd7a1faf02c49febc1a112fab7c3cef4830..530b091bfa340e596c9d332c7b7dc9d3853b061e 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -46,8 +46,8 @@ from caoscrawler.converters import (Converter, ConverterValidationError, ListElementConverter, MarkdownFileConverter, PropertiesFromDictConverter, YAMLFileConverter, - _AbstractScalarValueElementConverter, handle_value, replace_variables) +from caoscrawler.converters.converters import _AbstractScalarValueElementConverter from caoscrawler.crawl import Crawler from caoscrawler.scanner import (_load_definition_from_yaml_dict, create_converter_registry, @@ -648,10 +648,7 @@ def test_load_converters(): # All of them are contained in caoscrawler.converters # except for the xml converters: for conv_key, conv in converter_registry.items(): - if conv_key in ("XMLTag", "XMLFile", "XMLTextNode"): - assert conv["package"] == "caoscrawler.xml_converter" - else: - assert conv["package"] == "caoscrawler.converters" + assert conv["package"] == "caoscrawler.converters" # ... and their names all end in "Converter" assert conv["converter"].endswith("Converter") diff --git a/unittests/test_h5_converter.py b/unittests/test_h5_converter.py index 7f244e2cbdccb0d4eee6a62f59e9cea5684295a6..95060451badb0523cf91c70e5be345e35ec3964d 100644 --- a/unittests/test_h5_converter.py +++ b/unittests/test_h5_converter.py @@ -26,9 +26,9 @@ from pytest import fixture, importorskip import linkahead as db from caoscrawler.debug_tree import DebugTree -from caoscrawler.hdf5_converter import (convert_basic_element_with_nd_array, - convert_h5_element, H5GroupElement, - H5DatasetElement, H5NdarrayElement) +from caoscrawler.converters.hdf5_converter import (convert_basic_element_with_nd_array, + convert_h5_element, H5GroupElement, + H5DatasetElement, H5NdarrayElement) from caoscrawler.scanner import scan_directory from caoscrawler.structure_elements import (FloatElement, ListElement, TextElement) diff --git a/unittests/test_xml_converter.py b/unittests/test_xml_converter.py index 93e4a422d94a9315eadca24b8c799682d7d99964..fb4c7746fa2d0b6c3d4ec95fc1de3139493a703f 100644 --- a/unittests/test_xml_converter.py +++ b/unittests/test_xml_converter.py @@ -21,40 +21,22 @@ # """ -test the converters module +test the XML converters """ -import datetime import importlib import json -import logging -import os +import pytest import sys -from itertools import product +import yaml + +from lxml.etree import fromstring from pathlib import Path -import pytest -import yaml -from caoscrawler.converters import (Converter, ConverterValidationError, - DateElementConverter, DictElementConverter, - DictIntegerElementConverter, - DirectoryConverter, FloatElementConverter, - IntegerElementConverter, JSONFileConverter, - ListElementConverter, - MarkdownFileConverter, YAMLFileConverter, - _AbstractScalarValueElementConverter, - handle_value, replace_variables) -from caoscrawler.crawl import Crawler -from caoscrawler.scanner import (_load_definition_from_yaml_dict, - create_converter_registry, - create_transformer_registry, load_definition) +from caoscrawler.converters import XMLTagConverter +from caoscrawler.scanner import load_definition from caoscrawler.stores import GeneralStore -from caoscrawler.structure_elements import (BooleanElement, DictElement, - Directory, File, FloatElement, - IntegerElement, ListElement, - TextElement, XMLTagElement) -from caoscrawler.xml_converter import XMLTagConverter +from caoscrawler.structure_elements import XMLTagElement -from lxml.etree import fromstring UNITTESTDIR = Path(__file__).parent @@ -62,19 +44,13 @@ UNITTESTDIR = Path(__file__).parent @pytest.fixture def converter_registry(): converter_registry: dict[str, dict[str, str]] = { - "Directory": { - "converter": "DirectoryConverter", - "package": "caoscrawler.converters"}, - "TextElement": { - "converter": "TextElementConverter", - "package": "caoscrawler.converters"}, "XMLTag": { "converter": "XMLTagConverter", - "package": "caoscrawler.xml_converter"}, + "package": "caoscrawler.converters"}, "XMLTextNode": { "converter": "XMLTextNodeConverter", - "package": "caoscrawler.xml_converter"}, + "package": "caoscrawler.converters"}, } for key, value in converter_registry.items():