Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
54b402d1
Verified
Commit
54b402d1
authored
1 year ago
by
Daniel Hornung
Browse files
Options
Downloads
Patches
Plain Diff
WIP: Filling XLSX
parent
7ba88b81
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!100
WIP: Filling XLSX: Seems to be working.
,
!93
Filling XLSX: Everything except multiple choice.
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+103
-53
103 additions, 53 deletions
src/caosadvancedtools/table_json_conversion/fill_xlsx.py
unittests/table_json_conversion/example_template.xlsx
+0
-0
0 additions, 0 deletions
unittests/table_json_conversion/example_template.xlsx
with
103 additions
and
53 deletions
src/caosadvancedtools/table_json_conversion/fill_xlsx.py
+
103
−
53
View file @
54b402d1
...
...
@@ -21,10 +21,12 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import
json
from
collections
import
OrderedDict
from
types
import
SimpleNamespace
from
typing
import
List
,
Union
,
TextIO
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Union
,
TextIO
from
openpyxl
import
load_workbook
from
openpyxl
import
load_workbook
,
Workbook
from
openpyxl.worksheet.worksheet
import
Worksheet
from
.table_generator
import
ColumnType
,
RowType
...
...
@@ -39,7 +41,32 @@ def _fill_leaves(json_doc: dict, workbook):
workbook
.
cell
(
1
,
2
,
el
)
def
_is_exploded_sheet
(
sheet
:
Worksheet
)
->
bool
:
"""
Return True if this is a an
"
exploded
"
sheet.
An exploded sheet is a sheet whose data entries are LIST valued properties of entries in another
sheet. A sheet is detected as exploded iff it has FOREIGN columns.
"""
column_types
=
_get_column_types
(
sheet
)
return
ColumnType
.
FOREIGN
.
value
in
column_types
.
values
()
def
_get_column_types
(
sheet
:
Worksheet
)
->
OrderedDict
:
"""
Return an OrderedDict: column index -> column type for the sheet.
"""
result
=
OrderedDict
()
type_row_index
=
_get_row_type_column_index
(
sheet
)
-
1
for
idx
,
col
in
enumerate
(
sheet
.
columns
):
type_cell
=
col
[
type_row_index
]
result
[
idx
]
=
type_cell
.
value
assert
hasattr
(
ColumnType
,
type_cell
.
value
)
or
type_cell
.
value
is
None
,
(
f
"
Unexpected column type value:
{
type_cell
.
value
}
"
)
return
result
def
_get_row_type_column_index
(
worksheet
):
"""
Return the column index (1-indexed) of the column which defines the row types.
"""
for
col
in
worksheet
.
columns
:
for
cell
in
col
:
if
cell
.
value
==
RowType
.
COL_TYPE
.
name
:
...
...
@@ -48,6 +75,7 @@ def _get_row_type_column_index(worksheet):
def
_get_path_rows
(
worksheet
):
"""
Return the 1-based indices of the rows which represent paths.
"""
rows
=
[]
rt_col
=
_get_row_type_column_index
(
worksheet
)
for
cell
in
list
(
worksheet
.
columns
)[
rt_col
-
1
]:
...
...
@@ -60,8 +88,8 @@ def _get_path_rows(worksheet):
def
_next_row_index
(
sheet
)
->
int
:
"""
Return the index for the next data row.
This is defined as the first row without any content.
"""
This is defined as the first row without any content.
"""
return
sheet
.
max_row
...
...
@@ -74,11 +102,11 @@ class TemplateFiller:
"""
Fill the data into the workbook.
"""
self
.
_handle_data
(
data
=
data
,
current_path
=
[])
def
_create_index
(
self
,
):
def
_create_index
(
self
):
"""
Create a sheet index for the workbook.
Index the sheets by
their relevant path array
. Also create a simple column index by
column
type and path.
Index the sheets by
all path arrays leading to them
. Also create a simple column index by
column
type and path.
"""
self
.
_sheet_index
=
{}
...
...
@@ -92,8 +120,6 @@ class TemplateFiller:
# Get the paths, use without the leaf component for sheet indexing, with type prefix and
# leaf for column indexing.
paths
=
[]
col_index
=
{}
for
col_idx
,
col
in
enumerate
(
sheet
.
columns
):
if
col
[
coltype_idx
].
value
==
RowType
.
COL_TYPE
.
name
:
continue
...
...
@@ -101,37 +127,47 @@ class TemplateFiller:
for
path_idx
in
path_indices
:
if
col
[
path_idx
].
value
is
not
None
:
path
.
append
(
col
[
path_idx
].
value
)
col_key
=
"
.
"
.
join
([
col
[
coltype_idx
].
value
]
+
path
)
col_index
[
col_key
]
=
SimpleNamespace
(
column
=
col
,
col_index
=
col_idx
)
#
col_key = ".".join([col[coltype_idx].value] + path)
#
col_index[col_key] = SimpleNamespace(column=col, col_index=col_idx)
if
col
[
coltype_idx
].
value
not
in
[
ColumnType
.
SCALAR
.
name
,
ColumnType
.
LIST
.
name
]:
continue
paths
.
append
(
path
[:
-
1
])
# Find common components:
common_path
=
[]
for
idx
,
component
in
enumerate
(
paths
[
0
]):
for
path
in
paths
:
if
not
path
[
idx
]
==
component
:
break
else
:
common_path
.
append
(
component
)
assert
len
(
common_path
)
>=
1
self
.
_sheet_index
[
"
.
"
.
join
(
common_path
)]
=
SimpleNamespace
(
common_path
=
common_path
,
sheetname
=
sheetname
,
sheet
=
sheet
,
col_index
=
col_index
)
def
_handle_data
(
self
,
data
:
dict
,
current_path
:
List
[
str
]
=
None
):
path_str
=
"
.
"
.
join
(
path
)
assert
path_str
not
in
self
.
_sheet_index
self
.
_sheet_index
[
path_str
]
=
SimpleNamespace
(
sheetname
=
sheetname
,
sheet
=
sheet
,
col_index
=
col_idx
,
col_type
=
col
[
coltype_idx
].
value
)
def
_handle_data
(
self
,
data
:
dict
,
current_path
:
List
[
str
]
=
None
,
only_collect_insertables
:
bool
=
False
,
)
->
Optional
[
Dict
[
str
,
Any
]]:
"""
Handle the data and write it into ``workbook``.
Parameters
----------
data: dict
The data at the current path position. Elements may be dicts, lists or simple scalar values.
current_path: list[str], optional
If this is None or empty, we are at the top level. This means that all children shall be entered
into their respective sheets and not into a sheet at this level.
only_collect_insertables: bool, optional
If True, do not insert anything on this level, but return a dict with entries to be inserted.
Returns
-------
out: union[dict, None]
If ``only_collect_insertables`` is True, return a dict (path string -> value)
"""
if
current_path
is
None
:
current_path
=
[]
insertables
:
Dict
[
str
,
Any
]
=
{}
for
name
,
content
in
data
.
items
():
path
=
current_path
+
[
name
]
# preprocessing
if
isinstance
(
content
,
list
):
if
not
content
:
continue
...
...
@@ -142,34 +178,48 @@ data: dict
for
entry
in
content
:
self
.
_handle_data
(
data
=
entry
,
current_path
=
path
)
continue
self
.
_handle_simple_data
(
data
=
content
,
current_path
=
path
)
def
_handle_simple_data
(
self
,
data
,
current_path
:
List
[
str
]):
"""
Enter this single data item into the workbook.
Parameters
----------
data: dict
The data at the current path position. Must be single items (dict or simple scalar) or lists of
simple values.
"""
sheet_meta
=
self
.
_sheet_index
[
"
.
"
.
join
(
current_path
)]
sheet
=
sheet_meta
.
sheet
next_row
=
_next_row_index
(
sheet
)
for
name
,
content
in
data
.
items
():
if
isinstance
(
content
,
list
):
# TODO handle later
# scalar elements: semicolon separated
# nested dicts: recurse
pass
elif
isinstance
(
content
,
dict
):
pass
# scalars
if
not
current_path
:
# Special handling for top level
self
.
_handle_data
(
content
,
current_path
=
path
)
continue
insert
=
self
.
_handle_data
(
content
,
current_path
=
path
,
only_collect_insertables
=
True
)
assert
isinstance
(
insert
,
dict
)
assert
not
any
(
key
in
insertables
for
key
in
insert
)
insertables
.
update
(
insert
)
continue
else
:
# scalars
content
=
[
content
]
# collecting the data
assert
isinstance
(
content
,
list
)
if
len
(
content
)
==
1
:
value
=
content
[
0
]
else
:
path
=
current_path
+
[
name
]
path_str
=
"
.
"
.
join
([
ColumnType
.
SCALAR
.
name
]
+
path
)
col_index
=
sheet_meta
.
col_index
[
path_str
].
col_index
sheet
.
cell
(
row
=
next_row
+
1
,
column
=
col_index
+
1
,
value
=
content
)
value
=
"
;
"
.
join
(
content
)
path_str
=
"
.
"
.
join
(
path
)
assert
path_str
not
in
insertables
insertables
[
path_str
]
=
value
if
only_collect_insertables
:
return
insertables
if
not
current_path
:
return
# actual data insertion
insert_row
=
None
sheet
=
None
for
path_str
,
value
in
insertables
.
items
():
sheet_meta
=
self
.
_sheet_index
[
path_str
]
if
sheet
is
None
:
sheet
=
sheet_meta
.
sheet
assert
sheet
is
sheet_meta
.
sheet
,
"
All entries must be in the same sheet.
"
col_index
=
sheet_meta
.
col_index
if
insert_row
is
None
:
insert_row
=
_next_row_index
(
sheet
)
sheet
.
cell
(
row
=
insert_row
+
1
,
column
=
col_index
+
1
,
value
=
value
)
# self._handle_simple_data(data=content, current_path=path)
return
None
def
fill_template
(
data
:
Union
[
dict
,
str
,
TextIO
],
template
:
str
,
result
:
str
)
->
None
:
...
...
This diff is collapsed.
Click to expand it.
unittests/table_json_conversion/example_template.xlsx
+
0
−
0
View file @
54b402d1
No preview for this file type
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment