Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
CaosDB Crawler
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
CaosDB Crawler
Commits
a6b41c7a
Commit
a6b41c7a
authored
10 months ago
by
Alexander Schlemmer
Browse files
Options
Downloads
Patches
Plain Diff
TST: added test for xml converters that do not match and a stub for nested xml
parent
8619b7b9
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!181
Release 0.9.0
,
!174
XML Converter
Pipeline
#54244
failed
10 months ago
Stage: info
Stage: setup
Stage: cert
Stage: style
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
unittests/test_xml_converter.py
+69
-4
69 additions, 4 deletions
unittests/test_xml_converter.py
with
69 additions
and
4 deletions
unittests/test_xml_converter.py
+
69
−
4
View file @
a6b41c7a
...
@@ -54,7 +54,7 @@ from caoscrawler.structure_elements import (BooleanElement, DictElement,
...
@@ -54,7 +54,7 @@ from caoscrawler.structure_elements import (BooleanElement, DictElement,
TextElement
,
XMLTagElement
)
TextElement
,
XMLTagElement
)
from
caoscrawler.xml_converter
import
XMLTagConverter
from
caoscrawler.xml_converter
import
XMLTagConverter
import
lxml
from
lxml.etree
import
fromstring
UNITTESTDIR
=
Path
(
__file__
).
parent
UNITTESTDIR
=
Path
(
__file__
).
parent
...
@@ -91,7 +91,7 @@ match_tag: a
...
@@ -91,7 +91,7 @@ match_tag: a
match_attrib: # default is the empty dictionary
match_attrib: # default is the empty dictionary
"
(?P<ref>(href|url))
"
:
"
test(?P<number>[0-9])
"
# either the
"
href
"
or the
"
url
"
attribute must be set
"
(?P<ref>(href|url))
"
:
"
test(?P<number>[0-9])
"
# either the
"
href
"
or the
"
url
"
attribute must be set
alt: (.+) # this attribute must be present and contain at least one character
alt: (.+) # this attribute must be present and contain at least one character
match_text:
.* # allow any text, also empty (this is the default)
match_text:
\\
s*(?P<node_text>.+)
\\
s*
# _*_ marks the default:
# _*_ marks the default:
attribs_as_children: true # true / _false_
attribs_as_children: true # true / _false_
...
@@ -106,6 +106,12 @@ subtree:
...
@@ -106,6 +106,12 @@ subtree:
type: TextElement
type: TextElement
match_name: alt
match_name: alt
match_value: ^(?P<text>.*)$
match_value: ^(?P<text>.*)$
img:
type: XMLTag
match_name: img
match_attrib:
src: test2
"""
)
"""
)
return
xml_cfood
return
xml_cfood
...
@@ -127,13 +133,72 @@ def test_simple_xml(basic_xmltag_converter):
...
@@ -127,13 +133,72 @@ def test_simple_xml(basic_xmltag_converter):
</a>
</a>
"""
"""
tag
=
XMLTagElement
(
"
tag
"
,
lxml
.
etree
.
fromstring
(
xml_text
))
tag
=
XMLTagElement
(
"
tag
"
,
fromstring
(
xml_text
))
m
=
basic_xmltag_converter
.
match
(
tag
)
m
=
basic_xmltag_converter
.
match
(
tag
)
assert
m
is
not
None
assert
m
is
not
None
assert
m
[
"
ref
"
]
==
"
href
"
assert
m
[
"
ref
"
]
==
"
href
"
assert
m
[
"
number
"
]
==
"
1
"
assert
m
[
"
number
"
]
==
"
1
"
assert
m
[
"
node_text
"
]
==
"
test
"
def
test_not_matching
(
basic_xmltag_converter
):
def
test_not_matching
(
basic_xmltag_converter
):
pass
m
=
basic_xmltag_converter
.
match
(
XMLTagElement
(
"
tag
"
,
fromstring
(
"""
<a href=
"
test1
"
>
test <img src=
"
test2
"
/>
</a>
"""
)))
assert
m
is
None
# alt-attribute was missing
m
=
basic_xmltag_converter
.
match
(
XMLTagElement
(
"
tag
"
,
fromstring
(
"""
<a href=
"
test
"
alt=
"
no link
"
>
test <img src=
"
test2
"
/>
</a>
"""
)))
assert
m
is
None
# href attribute did not match
m
=
basic_xmltag_converter
.
match
(
XMLTagElement
(
"
tag
"
,
fromstring
(
"""
<a href=
"
test1
"
url=
"
http
"
alt=
"
no link
"
>
test <img src=
"
test2
"
/>
</a>
"""
)))
assert
m
is
None
# href and url must not be present simultaneously
m
=
basic_xmltag_converter
.
match
(
XMLTagElement
(
"
tag
"
,
fromstring
(
"""
<a href=
"
test1
"
alt=
"
no link
"
><img src=
"
test2
"
/></a>
"""
)))
assert
m
is
None
# text node is empty
m
=
basic_xmltag_converter
.
match
(
XMLTagElement
(
"
tag
"
,
fromstring
(
"""
<a href=
"
test1
"
alt=
"
no link
"
/>
"""
)))
assert
m
is
None
# text node is empty
# TODO: adapt converter -> empty (==None) text node is equivalent to empty string text node
# TODO: adapt tests
# TODO: how to match " ajskdlfjaldsf ajsdklfjadkl " without the whitespaces in regexp correctly?
def
test_nested_simple_xml
(
basic_xmltag_converter
):
"""
Test for xml conversion including children.
"""
xml_text
=
"""
<a href=
"
test1
"
alt=
"
no link
"
>
test <img src=
"
test2
"
/>
</a>
"""
tag
=
XMLTagElement
(
"
tag
"
,
fromstring
(
xml_text
))
m
=
basic_xmltag_converter
.
match
(
tag
)
assert
m
is
not
None
general_store
=
GeneralStore
()
children
=
basic_xmltag_converter
.
create_children
(
general_store
,
tag
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment