diff --git a/integrationtests/test.py b/integrationtests/test.py index 809fc3ccb625386190da5e676c6da436e372b6eb..af73c44fe7fff8806eb6f17fb7d3c278d1027f6a 100755 --- a/integrationtests/test.py +++ b/integrationtests/test.py @@ -153,7 +153,6 @@ def test_insertion_and_update(clear_database, usemodel, ident, crawler): # cr.save_debug_data(rfp("provenance.yml")) assert len(cr.updateList) == 3 ins, ups = cr.synchronize() - # breakpoint() assert len(ins) == 0 assert len(ups) == 1 diff --git a/src/doc/index.rst b/src/doc/index.rst index a096741c95804da14a8a9d919ce755a44f0c4841..94862168e5ae809d1e249e93bceef51240980c4e 100644 --- a/src/doc/index.rst +++ b/src/doc/index.rst @@ -153,3 +153,34 @@ The crawler can be considered the main program doing the synchronization in basi Relevant sources in: src/crawl.py + + +Special Cases +============= + +Variable Precedence +++++++++++++ + +Let's assume the following situation + +.. code-block:: yaml + description: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description + + +Making use of the $description variable could refer to two different variables created here: +1. The structure element path. +2. The value of the matched expression. + +The matched expression does take precedence over the structure element path and shadows it. + +Make sure, that if you want to be able to use the structure element path, to give unique names +to the variables like: + +.. code-block:: yaml + description_text_block: + type: DictTextElement + match_value: (?P<description>.*) + match_name: description diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py index 6e6267f4b3dd503a6a2b09c76da5d4b49d8290a6..947aa7503443b50798abcc964fbef536a41b42b5 100644 --- a/src/newcrawler/crawl.py +++ b/src/newcrawler/crawl.py @@ -590,16 +590,14 @@ class Crawler(object): converter.match(element) is not None): generalStore_copy = generalStore.create_scoped_copy() recordStore_copy = recordStore.create_scoped_copy() - # extracts values from structure element and stores them in the converter - # this could e.g. be the storage of a variable from the regexp in the - # converter object or the extraction from values from a file and storage - # in the converter object - # -> rather store it in the variable storage than in the converter? - converter.create_values(generalStore_copy, element) # Create an entry for this matched structure element: generalStore_copy[converter.name] = ( os.path.join(*(structure_elements_path + [element.get_name()]))) + + # extracts values from structure element and stores them in the + # variable store + converter.create_values(generalStore_copy, element) keys_modified = converter.create_records( generalStore_copy, recordStore_copy, element)