From efdb10e3c4ddb741a4b5b474d052ec8649ba8efc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com>
Date: Wed, 31 Jan 2024 18:48:47 +0100
Subject: [PATCH] ENH: allow wildcard is_referenced_by

---
 CHANGELOG.md                             |  2 ++
 src/caoscrawler/identifiable_adapters.py | 19 ++++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8eeed54f..ff413461 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added ###
 * 'transform' sections can be added to a CFood to apply functions to values stored in variables.
 * default transform functions: submatch, split and replace.
+* `*` can now be used as a wildcard in the identifiables parameter file to denote
+  that any Record may reference the identified one.
 
 ### Changed ###
 - If the `parents` key is used in a cfood at a lower level for a Record that
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index d9c9c00b..d0dfd752 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -219,18 +219,23 @@ identifiabel, identifiable and identified record) for a Record.
                 # separate class too
                 if prop.name.lower() == "is_referenced_by":
                     for givenrt in prop.value:
-                        rt_and_children = get_children_of_rt(givenrt)
                         found = False
-                        for rtname in rt_and_children:
-                            if (id(record) in referencing_entities
-                                    and rtname in referencing_entities[id(record)]):
-                                identifiable_backrefs.extend(
-                                    referencing_entities[id(record)][rtname])
+                        if givenrt == "*":
+                            for rt, rec in referencing_entities[id(record)].items():
+                                identifiable_backrefs.extend(rec)
                                 found = True
+                        else:
+                            rt_and_children = get_children_of_rt(givenrt)
+                            for rtname in rt_and_children:
+                                if (id(record) in referencing_entities
+                                        and (rtname in referencing_entities[id(record)])):
+                                    identifiable_backrefs.extend(
+                                        referencing_entities[id(record)][rtname])
+                                    found = True
                         if not found:
                             # TODO: is this the appropriate error?
                             raise NotImplementedError(
-                                f"The following record is missing an identifying property:"
+                                f"The following record is missing an identifying property:\n"
                                 f"RECORD\n{record}\nIdentifying PROPERTY\n{prop.name}"
                             )
                     continue
-- 
GitLab