Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
3e8b13df
Commit
3e8b13df
authored
8 months ago
by
Alexander Schlemmer
Browse files
Options
Downloads
Patches
Plain Diff
ENH: redesigned structure elements for the xml conversion
parent
10df18a9
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!181
Release 0.9.0
,
!174
XML Converter
Pipeline
#54413
failed
8 months ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/caoscrawler/structure_elements.py
+3
-6
3 additions, 6 deletions
src/caoscrawler/structure_elements.py
src/caoscrawler/xml_converter.py
+25
-1
25 additions, 1 deletion
src/caoscrawler/xml_converter.py
unittests/test_xml_converter.py
+51
-0
51 additions, 0 deletions
unittests/test_xml_converter.py
with
79 additions
and
7 deletions
src/caoscrawler/structure_elements.py
+
3
−
6
View file @
3e8b13df
...
...
@@ -196,7 +196,7 @@ class XMLTextNode(StructureElement):
- XMLTextNodes just have a text and the name is just for identifying the structure element.
They can only be matched using the match entry in the XMLTextNodeConverter.
"""
super
().
__init__
(
element
.
getroottree
().
getelementpath
(
element
))
super
().
__init__
(
element
.
getroottree
().
getelementpath
(
element
)
+
"
/text()
"
)
self
.
tag
=
element
self
.
value
=
element
.
text
...
...
@@ -207,17 +207,14 @@ class XMLAttributeNode(StructureElement):
"""
def
__init__
(
self
,
element
:
lxml
.
etree
.
Element
,
key
:
str
,
tree
:
lxml
.
etree
.
ElementTree
):
key
:
str
):
"""
Initializes this XML attribute node.
element: The xml tree element containing the attribute.
key: The key which identifies the attribute in the list of attributes.
tree: The tree containing the element which is used to set the node path.
"""
super
().
__init__
(
element
.
getroottree
().
getelementpath
(
element
))
super
().
__init__
(
element
.
getroottree
().
getelementpath
(
element
)
+
"
@
"
+
key
)
self
.
value
=
element
.
attrib
[
key
]
self
.
key
=
key
# TODO: use getpath instead of getlementpath?
self
.
tag
=
element
This diff is collapsed.
Click to expand it.
src/caoscrawler/xml_converter.py
+
25
−
1
View file @
3e8b13df
...
...
@@ -72,6 +72,28 @@ class XMLFileConverter(SimpleFileConverter):
class
XMLTagConverter
(
Converter
):
def
create_children
(
self
,
generalStore
:
GeneralStore
,
element
:
StructureElement
):
"""
Children that are generated by this function are the result of the xpath query given in
the yaml property
"
xpath
"
. Its default (when not given) is
"
child::*
"
, so the direct children
of the current xml node.
The xpath expression must be designed in a way that it returns xml tags (and no attributes or texts).
That means, that the axis
"
attribute::
"
and the function
"
text()
"
must not be used.
The following yaml properties can be used to generate other types of nodes (text nodes and attribute nodes)
as subtree structure elements:
# _*_ marks the default:
attribs_as_children: true # true / _false_
text_as_children: true # true / _false_
tags_as_children: true # _true_ / false
The default is to generate the tags matched by the xpath expression only.
- When text_as_children is set to true, text nodes will be generated that contain the text
contained in the matched tags.
- When attribs_as_children is set to true, attribute nodes will be generated from the attributes
of the matched tags.
"""
if
not
isinstance
(
element
,
XMLTagElement
):
raise
TypeError
(
"
Element must be an instance of XMLTagElement.
"
)
...
...
@@ -89,7 +111,7 @@ class XMLTagConverter(Converter):
el_lst
=
[]
for
el
in
children
:
if
isinstance
(
el
,
str
):
el_lst
.
append
(
XMLTextNode
(
el
)
)
raise
RuntimeError
(
"
Only standard xml nodes are supported as results of xpath queries.
"
)
elif
isinstance
(
el
,
lxml
.
etree
.
_Element
):
el_lst
.
append
(
XMLTagElement
(
el
))
else
:
...
...
@@ -174,4 +196,6 @@ class XMLTextNodeConverter(Converter):
if
not
isinstance
(
element
,
XMLTextNode
):
raise
TypeError
(
"
Element must be an instance of XMLTextNode.
"
)
raise
NotImplementedError
()
return
None
This diff is collapsed.
Click to expand it.
unittests/test_xml_converter.py
+
51
−
0
View file @
3e8b13df
...
...
@@ -230,3 +230,54 @@ def test_nested_simple_xml(basic_xmltag_converter, basic_xpath_xmltag_converter)
assert
isinstance
(
children
[
0
],
XMLTagElement
)
assert
children
[
0
].
name
==
"
img/testnode
"
def
test_namespace_xml
(
converter_registry
):
"""
Test for xml conversion including children.
Nodes have namespaces.
"""
xml_text
=
"""
<root xmlns=
"
default-namespace
"
xmlns:test=
"
alternative-namespace
"
>
<node1>
Bla
</node1>
<node1>
text
<node2 xmlns=
"
sub-namespace
"
>
<node3>
ok
</node3>
</node2>
2
<test:node2>
sep
</test:node2>
3
</node1>
</root>
"""
xpaths
=
(
"
default:node1/text()
"
,
)
for
xpath
in
xpaths
:
converter
=
XMLTagConverter
(
yaml
.
safe_load
(
"""
type: XMLTag
match_tag:
\\
{{default-namespace
\\
}}root
xpath:
"
{}
"
default_namespace: default
subtree:
Text:
type: XMLTextNode
match: (?P<result>.*)
"""
.
format
(
xpath
)),
"
TestXMLTagConverter
"
,
converter_registry
)
tag
=
XMLTagElement
(
fromstring
(
xml_text
))
m
=
converter
.
match
(
tag
)
assert
m
is
not
None
general_store
=
GeneralStore
()
children
=
converter
.
create_children
(
general_store
,
tag
)
assert
len
(
children
)
==
4
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment