diff --git a/src/doc/tutorials/single_file.rst b/src/doc/tutorials/single_file.rst index 9e0fc90e8a7ab14e7d13d7fe2a194459f788ccec..86dcc793f8e4fb26efc3d4460d798dd2eac10796 100644 --- a/src/doc/tutorials/single_file.rst +++ b/src/doc/tutorials/single_file.rst @@ -15,13 +15,13 @@ This tutorial is based on the following simple data model: Fish: recommended_properties: date: - datatype: DATETIME + datatype: DATETIME number: - datatype: INTEGER + datatype: INTEGER weight: - datatype: DOUBLE + datatype: DOUBLE species: - datatype: TEXT + datatype: TEXT You can insert this model with the following command: @@ -51,48 +51,48 @@ And we will use the following crawler configuration. metadata: crawler-version: 0.9.1 --- - + fish_data_file: type: CSVTableConverter match: "^fish_data_.*.csv$" # CSV file with a name that starts with "fish_data_" sep: ";" subtree: table_row: # Eine Zeile in der Datei - type: DictElement - match_name: .* # we want to treat every row - match_value: .* - records: - Fish: # Record for the current row; information from statements below - # are added to this Record - subtree: - date: # Spalte mit Art der Maßnahme - type: TextElement - match_name: date # Name of the column in the table file - match_value: (?P<column_value>.*) - records: # Records edited for each cell - Fish: - date: $column_value - species: - type: TextElement - match_name: species - match_value: (?P<column_value>.*) - records: - Fish: - species: $column_value - number: - type: TextElement - match_name: identifier - match_value: (?P<column_value>.*) - records: - Fish: - number: $column_value - weight: - type: TextElement - match_name: weight - match_value: (?P<column_value>.*) - records: - Fish: - weight: $column_value + type: DictElement + match_name: .* # we want to treat every row + match_value: .* + records: + Fish: # Record for the current row; information from statements below + # are added to this Record + subtree: + date: # Spalte mit Art der Maßnahme + type: TextElement + match_name: date # Name of the column in the table file + match_value: (?P<column_value>.*) + records: # Records edited for each cell + Fish: + date: $column_value + species: + type: TextElement + match_name: species + match_value: (?P<column_value>.*) + records: + Fish: + species: $column_value + number: + type: TextElement + match_name: identifier + match_value: (?P<column_value>.*) + records: + Fish: + number: $column_value + weight: + type: TextElement + match_name: weight + match_value: (?P<column_value>.*) + records: + Fish: + weight: $column_value @@ -105,7 +105,7 @@ Records and synchronize those with LinkAhead. .. code-block:: python #!/usr/bin/env python3 - + # Copyright (C) 2023-2024 IndiScale GmbH <info@indiscale.com> # # This program is free software: you can redistribute it and/or modify @@ -120,28 +120,28 @@ Records and synchronize those with LinkAhead. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. - + """Crawler for fish data""" - + import os import argparse import sys import logging - + from caoscrawler.scanner import load_definition, create_converter_registry, scan_structure_elements from caoscrawler.structure_elements import File from caoscrawler import Crawler, SecurityMode from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter - - + + def crawl_file(filename: str, dry_run: bool = False): """Read a CSV file into a LinkAhead container. - + Parameters ---------- filename : str The name of the CSV file. - + dry_run : bool If True, do not modify the database. """ @@ -149,44 +149,44 @@ Records and synchronize those with LinkAhead. logger = logging.getLogger("caoscrawler") logger.setLevel(level=(logging.DEBUG)) logger.addHandler(logging.StreamHandler(stream=sys.stdout)) - + # load crawler configuration definition = load_definition("cfood.yml") converter_registry = create_converter_registry(definition) - + # crawl the CSV file records = scan_structure_elements(items=File(name= os.path.basename(filename), path=filename), - crawler_definition=definition, - converter_registry=converter_registry) + crawler_definition=definition, + converter_registry=converter_registry) logger.debug(records) - + crawler = Crawler(securityMode=SecurityMode.UPDATE) # This defines how Records on the server are identified with the ones we have locally ident = CaosDBIdentifiableAdapter() ident.load_from_yaml_definition("identifiables.yml") crawler.identifiableAdapter = ident - + # Here we synchronize the data inserts, updates = crawler.synchronize(commit_changes=True, unique_names=True, - crawled_data=records) - + crawled_data=records) + #from IPython import embed #embed() - + def _parse_arguments(): """Parse the arguments.""" parser = argparse.ArgumentParser(description='Crawler for fish data') parser.add_argument('-n', '--dry-run', help="Do not modify the database.", action="store_true") parser.add_argument('csv_file', metavar="csv file", help="The csv file to be crawled.") return parser.parse_args() - - + + def main(): """Main function.""" args = _parse_arguments() crawl_file(args.csv_file, dry_run=args.dry_run) - - + + if __name__ == '__main__': main() @@ -212,7 +212,7 @@ If you have created all the files, you can run: python3 crawl.py fish_data_2.csv Note, that you can run the same script again and you will not see any changes -being done to the data in LinkAhead. +being done to the data in LinkAhead. You may play around with changing data in the data table. Changes will