Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
caosdb-advanced-user-tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
caosdb
Software
caosdb-advanced-user-tools
Commits
596dfe47
Commit
596dfe47
authored
5 years ago
by
Alexander Schlemmer
Browse files
Options
Downloads
Patches
Plain Diff
Replaced the old read md by up to date pandoc header tools
parent
40d1d858
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/caosadvancedtools/pandoc_header_tools.py
+240
-0
240 additions, 0 deletions
src/caosadvancedtools/pandoc_header_tools.py
src/caosadvancedtools/read_md_header.py
+86
-80
86 additions, 80 deletions
src/caosadvancedtools/read_md_header.py
with
326 additions
and
80 deletions
src/caosadvancedtools/pandoc_header_tools.py
0 → 100644
+
240
−
0
View file @
596dfe47
#!/usr/bin/env python3
# This is taken from the file manage_header.py
# in a CaosDB management repository. The file manage_header.py
# is not released yet, but creating a library might be useful.
# A. Schlemmer, 04/2019
# ** header v3.0
# This file is a part of the CaosDB Project.
# Copyright (C) 2018 Research Group Biomedical Physics,
# Max-Planck-Institute for Dynamics and Self-Organization Göttingen
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# ** end header
# Tool to manage yaml header in markdown document
# A. Schlemmer, 01/2019
# D. Hornung 2019-02
# T. Fitschen 2019-02
import
argparse
import
glob
import
os
import
re
import
yaml
class
NoValidHeader
(
Exception
):
def
__init__
(
self
,
filename
,
*
args
,
**
kwargs
):
self
.
filename
=
filename
msg
=
(
"
Header missing in {}
\n
Fix this with the modify subcommand
"
"
using -f option
"
.
format
(
filename
))
super
().
__init__
(
msg
,
*
args
,
**
kwargs
)
class
MetadataFileMissing
(
Exception
):
def
__init__
(
self
,
filename
,
*
args
,
**
kwargs
):
self
.
filename
=
filename
msg
=
"
Metadata file README.md missing in
"
+
filename
super
().
__init__
(
msg
,
*
args
,
**
kwargs
)
class
ParseErrorsInHeader
(
Exception
):
def
__init__
(
self
,
filename
,
reason
,
*
args
,
**
kwargs
):
self
.
filename
=
filename
self
.
reason
=
reason
msg
=
"
Invalid header in {}. Reason: {}
"
.
format
(
filename
,
reason
)
super
().
__init__
(
msg
,
*
args
,
**
kwargs
)
TEMPLATEHEADER
=
"""
---
responsible:
description:
...
"""
def
get_header
(
filename
,
add_header
=
False
):
"""
Open an md file identified by filename and read out the yaml
header.
filename can also be a folder. In this case folder/README.md will be used for
getting the header.
If a header is found a tuple is returned: (first yaml header line index, last+1
yaml header line index, header)
Otherwise, if `add_header` is True, a header is added and the function is called
again.
The header is normalized in the following way:
- If the value to a key is a string, a list with that string as only element is
returned.
From https://pandoc.org/MANUAL.html:
A YAML metadata block is a valid YAML object, delimited by a line of three
hyphens (---) at the top and a line of three hyphens (---) or three dots (...)
at the bottom. A YAML metadata block may occur anywhere in the document, but if
it is not at the beginning, it must be preceded by a blank line.
"""
if
os
.
path
.
isdir
(
filename
):
filename
=
os
.
path
.
join
(
filename
,
"
README.md
"
)
if
not
os
.
path
.
exists
(
filename
):
filename
=
filename
[:
-
9
]
filename
=
os
.
path
.
join
(
filename
,
"
readme.md
"
)
if
not
os
.
path
.
exists
(
filename
):
raise
MetadataFileMissing
(
filename
)
with
open
(
filename
)
as
f
:
textlines
=
f
.
readlines
()
state
=
0
found_0
=
-
1
found_1
=
-
1
found_2
=
-
1
for
i
,
line
in
enumerate
(
textlines
):
if
len
(
line
)
==
1
and
state
in
{
-
1
,
0
}:
found_0
=
i
state
=
0
continue
if
line
.
rstrip
()
==
"
---
"
and
state
==
0
:
found_1
=
i
+
1
state
=
1
continue
if
(
line
.
rstrip
()
==
"
...
"
or
line
.
rstrip
()
==
"
---
"
)
and
state
==
1
:
found_2
=
i
state
=
2
break
# Else: reset state to -1, unless it is 1 (in this case, leave it
# untouched
if
state
==
1
:
pass
else
:
state
=
-
1
# If a header section was found:
if
state
==
2
:
headerlines
=
[]
for
l
in
textlines
[
found_1
:
found_2
]:
l
=
l
.
replace
(
"
\t
"
,
"
"
)
l
=
l
.
rstrip
()
headerlines
.
append
(
l
)
# try:
try
:
yaml_part
=
yaml
.
load
(
"
\n
"
.
join
(
headerlines
),
Loader
=
yaml
.
BaseLoader
)
except
yaml
.
scanner
.
ScannerError
as
e
:
raise
ParseErrorsInHeader
(
filename
,
e
)
# except yaml.error.MarkedYAMLError as e:
# raise NoValidHeader(filename)
if
type
(
yaml_part
)
!=
dict
:
raise
NoValidHeader
(
filename
)
return
(
found_1
,
found_2
,
clean_header
(
yaml_part
))
if
not
add_header
:
raise
NoValidHeader
(
filename
)
else
:
print
(
"
Adding header in: {fn}
"
.
format
(
fn
=
filename
))
add_header
(
filename
)
return
_get_header
(
filename
)
def
save_header
(
filename
,
header_data
):
"""
Save a header identified by the tuple header_data to the file
identified by filename.
filename can also be a folder. In this case folder/README.md will
be used for getting the header.
"""
if
os
.
path
.
isdir
(
filename
):
filename
=
os
.
path
.
join
(
filename
,
"
README.md
"
)
with
open
(
filename
)
as
f
:
textlines
=
f
.
readlines
()
while
textlines
[
header_data
[
0
]]
!=
"
...
\n
"
:
del
textlines
[
header_data
[
0
]]
data
=
header_data
[
2
]
data
=
{
key
:
val
if
len
(
val
)
>
1
else
val
[
0
]
for
key
,
val
in
data
.
items
()}
textlines
.
insert
(
header_data
[
0
],
yaml
.
dump
(
data
,
default_flow_style
=
False
,
allow_unicode
=
True
))
with
open
(
filename
,
"
w
"
)
as
f
:
f
.
writelines
(
textlines
)
def
add_header
(
filename
,
header_dict
=
None
):
"""
Add a header to an md file.
If the file does not exist it will be created.
If header_dict is a dictionary and not None the header
will be created based on the keys and values of that dictionary.
"""
if
os
.
path
.
isdir
(
filename
):
filename
=
os
.
path
.
join
(
filename
,
"
README.md
"
)
if
os
.
path
.
exists
(
filename
):
with
open
(
filename
)
as
f
:
textlines
=
f
.
readlines
()
else
:
textlines
=
""
if
header_dict
is
None
:
localheader
=
TEMPLATEHEADER
else
:
localheader
=
"
---
\n
"
+
yaml
.
dump
(
header_dict
,
default_flow_style
=
False
,
allow_unicode
=
True
)
+
"
...
\n
"
with
open
(
filename
,
"
w
"
)
as
f
:
f
.
write
(
localheader
)
f
.
writelines
(
textlines
)
def
clean_header
(
header
):
# Fill empty fields with empty string
for
k
,
v
in
header
.
items
():
if
v
==
"
null
"
:
header
[
k
]
=
""
if
v
is
None
:
header
[
k
]
=
""
for
k
,
v
in
header
.
items
():
# Plain string is put into list
if
type
(
v
)
==
str
:
header
[
k
]
=
[
v
]
return
header
def
kw_present
(
header
,
kw
):
"""
Check whether keywords are present in the header.
"""
return
kw
in
header
and
header
[
kw
]
is
not
None
and
len
(
header
[
kw
])
>
0
This diff is collapsed.
Click to expand it.
src/caosadvancedtools/read_md_header.py
+
86
−
80
View file @
596dfe47
...
@@ -25,119 +25,125 @@
...
@@ -25,119 +25,125 @@
# ** end header
# ** end header
#
#
import
os
from
.
import
pandoc_header_tools
import
re
import
caosdb
as
db
def
get_header
(
fn
):
import
yaml
return
pandoc_header_tools
.
get_header
(
fn
)[
2
]
from
.cfood
import
AbstractCFood
,
get_entity
# import os
from
.utils
import
string_to_person
# import re
# import caosdb as db
# import yaml
def
_clean_header
(
header
):
# from .cfood import AbstractCFood, get_entity
# Fill empty fields with empty string
# from .utils import string_to_person
for
k
,
v
in
header
.
items
():
# TODO: I have an improved version of this tool in filesystemspecification.
if
v
==
"
null
"
:
header
[
k
]
=
""
if
v
is
None
:
# def _clean_header(header)
:
header
[
k
]
=
""
#
# Fill empty fields with empty string
for
k
,
v
in
header
.
items
():
# for k, v in header.items():
# Plain string is put into list
# if v == "null":
# header[k] = ""
if
type
(
v
)
==
str
:
#
if
v is None
:
header
[
k
]
=
[
v
]
#
header[k] =
""
return
header
# for k, v in header.items():
# # Plain string is put into list
# if type(v) == str:
# header[k] = [v]
class
NoValidHeader
(
Exception
):
# return header
pass
def
get_header
(
filename
):
# class NoValidHeader(Exception):
"""
Open an md file identified by filename and read out the yaml
# pass
header.
filename can also be a folder. In this case folder/readme.md will be used for
getting the header.
If a header is found a tuple is returned: (first yaml header line index, last+1
# def get_header(filename):
yaml header line index, header)
# """Open an md file identified by filename and read out the yaml
# header.
Otherwise, if `add_header` is True, a header is added and the function is called
# filename can also be a folder. In this case folder/readme.md will be used for
again
.
# getting the header
.
The header is normalized in the following way:
# If a header is found a tuple is returned: (first yaml header line index, last+1
# yaml header line index, header)
- If the value to a key is a string, a list with that string as only element is
# Otherwise, if `add_header` is True, a header is added and the function is called
returned
.
# again
.
From https://pandoc.org/MANUAL.html
:
# The header is normalized in the following way
:
A YAML metadata block is a valid YAML object, delimited by a line of three
# - If the value to a key is a string, a list with that string as only element is
hyphens (---) at the top and a line of three hyphens (---) or three dots (...)
# returned.
at the bottom. A YAML metadata block may occur anywhere in the document, but if
it is not at the beginning, it must be preceded by a blank line.
"""
# From https://pandoc.org/MANUAL.html:
if
os
.
path
.
isdir
(
filename
):
# A YAML metadata block is a valid YAML object, delimited by a line of three
filename
=
os
.
path
.
join
(
filename
,
"
readme.md
"
)
# hyphens (---) at the top and a line of three hyphens (---) or three dots (...)
# at the bottom. A YAML metadata block may occur anywhere in the document, but if
# it is not at the beginning, it must be preceded by a blank line.
with
open
(
filename
)
as
f
:
# """
textlines
=
f
.
readlines
()
state
=
0
# if os.path.isdir(filename):
found_0
=
-
1
# filename = os.path.join(filename, "readme.md")
found_1
=
-
1
found_2
=
-
1
for
i
,
line
in
enumerate
(
textlines
):
# with open(filename) as f:
if
len
(
line
)
==
1
and
state
in
{
-
1
,
0
}:
# textlines = f.readlines()
found_0
=
i
state
=
0
continue
# state = 0
# found_0 = -1
# found_1 = -1
# found_2 = -1
if
line
.
rstrip
()
==
"
---
"
and
state
==
0
:
# for i, line in enumerate(textlines):
found_1
=
i
+
1
# if len(line) == 1 and state in {-1, 0}:
state
=
1
# found_0 = i
# state = 0
continue
#
continue
if
line
.
rstrip
()
==
"
...
"
and
state
==
1
:
#
if line.rstrip() == "
---
" and state ==
0
:
found_
2
=
i
#
found_
1
= i
+1
state
=
2
#
state =
1
break
# continue
# Else: reset state to -1, unless it is 1 (in this case, leave it
# untouched
if
state
==
1
:
# if line.rstrip() == "..." and state == 1:
pass
# found_2 = i
else
:
# state = 2
state
=
-
1
# If a header section was found:
# break
# # Else: reset state to -1, unless it is 1 (in this case, leave it
# # untouched
if
state
==
2
:
# if state == 1:
headerlines
=
[]
# pass
# else:
# state = -1
for
l
in
textlines
[
found_1
:
found_2
]:
# # If a header section was found:
l
=
l
.
replace
(
"
\t
"
,
"
"
)
l
=
l
.
rstrip
()
headerlines
.
append
(
l
)
try
:
yaml_part
=
yaml
.
load
(
"
\n
"
.
join
(
headerlines
))
except
yaml
.
error
.
MarkedYAMLError
as
e
:
# print("Error in file {}:".format(filename))
# print(headerlines)
raise
NoValidHeader
(
filename
)
return
_clean_header
(
yaml_part
)
# if state == 2:
# headerlines = []
raise
NoValidHeader
(
filename
)
# for l in textlines[found_1:found_2]:
# l = l.replace("\t", " ")
# l = l.rstrip()
# headerlines.append(l)
# try:
# yaml_part = yaml.load("\n".join(headerlines))
# except yaml.error.MarkedYAMLError as e:
# # print("Error in file {}:".format(filename))
# # print(headerlines)
# raise NoValidHeader(filename)
# return _clean_header(yaml_part)
# raise NoValidHeader(filename)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment