From 99bb9dac1be361b3e2849a3efbe5d5d583009f9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org>
Date: Thu, 16 May 2019 10:49:48 +0200
Subject: [PATCH] read md

---
 src/caosadvancedtools/read_md_header.py | 195 ++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 src/caosadvancedtools/read_md_header.py

diff --git a/src/caosadvancedtools/read_md_header.py b/src/caosadvancedtools/read_md_header.py
new file mode 100644
index 00000000..f185a38f
--- /dev/null
+++ b/src/caosadvancedtools/read_md_header.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# ** header v3.0
+# This file is a part of the CaosDB Project.
+#
+# Copyright (C)
+# A. Schlemmer, 01/2019
+# D. Hornung 2019-02
+# T. Fitschen 2019-02
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# ** end header
+#
+
+import os
+import re
+
+import yaml
+
+TEMPLATEHEADER = """
+---
+title: {title}
+location:
+author:
+participants:
+date: {date}
+keywords:
+tags:
+category:
+...
+
+"""
+
+
+def _clean_header(header):
+    # Fill empty fields with empty string
+
+    for k, v in header.items():
+        if v == "null":
+            header[k] = ""
+
+        if v is None:
+            header[k] = ""
+
+    for k, v in header.items():
+        # Plain string is put into list
+
+        if type(v) == str:
+            header[k] = [v]
+
+    return header
+
+
+class NoValidHeader(Exception):
+    def __init__(self, filename, *args, **kwargs):
+        msg = ("Header missing in {}\nFix this with the modify subcommand "
+               "using -f option".format(filename))
+        super().__init__(msg, *args, **kwargs)
+
+
+def get_header(filename, add_header=True):
+    """Open an md file identified by filename and read out the yaml
+header.
+
+filename can also be a folder. In this case folder/readme.md will be used for
+getting the header.
+
+If a header is found a tuple is returned: (first yaml header line index, last+1
+yaml header line index, header)
+
+Otherwise, if `add_header` is True, a header is added and the function is called
+again.
+
+The header is normalized in the following way:
+
+- If the value to a key is a string, a list with that string as only element is
+  returned.
+
+From https://pandoc.org/MANUAL.html:
+
+A YAML metadata block is a valid YAML object, delimited by a line of three
+hyphens (---) at the top and a line of three hyphens (---) or three dots (...)
+at the bottom. A YAML metadata block may occur anywhere in the document, but if
+it is not at the beginning, it must be preceded by a blank line.
+
+    """
+
+    if os.path.isdir(filename):
+        filename = os.path.join(filename, "readme.md")
+
+    with open(filename) as f:
+        textlines = f.readlines()
+
+    state = 0
+    found_0 = -1
+    found_1 = -1
+    found_2 = -1
+
+    for i, line in enumerate(textlines):
+        if len(line) == 1 and state in {-1, 0}:
+            found_0 = i
+            state = 0
+
+            continue
+
+        if line.rstrip() == "---" and state == 0:
+            found_1 = i+1
+            state = 1
+
+            continue
+
+        if line.rstrip() == "..." and state == 1:
+            found_2 = i
+            state = 2
+
+            break
+        # Else: reset state to -1, unless it is 1 (in this case, leave it
+        # untouched
+
+        if state == 1:
+            pass
+        else:
+            state = -1
+
+    # If a header section was found:
+
+    if state == 2:
+        headerlines = []
+
+        for l in textlines[found_1:found_2]:
+            l = l.replace("\t", "  ")
+            l = l.rstrip()
+            headerlines.append(l)
+        try:
+            yaml_part = yaml.load("\n".join(headerlines))
+        except yaml.error.MarkedYAMLError as e:
+            # print("Error in file {}:".format(filename))
+            # print(headerlines)
+            raise NoValidHeader(filename)
+
+        return (found_1, found_2, _clean_header(yaml_part))
+
+    if not add_header:
+        raise NoValidHeader(filename)
+    else:
+        print("Adding header in: {fn}".format(fn=filename))
+        _add_header(filename)
+
+        return get_header(filename)
+
+
+def get_date_and_title_from_name(filename):
+    dir_re = re.compile('(?P<title>.+)\.(?P<date>[0-9\-]+)')
+    file_re = re.compile('(?P<title>.+)\.(?P<date>[0-9\-]+)\.md')
+    if os.path.basename(filename).lower() == "readme.md":
+        for part in os.path.abspath(filename).split(os.path.sep):
+            m = dir_re.match(part)
+            if m is not None:
+                break
+
+    else:
+        m = file_re.match(filename)
+
+    if m is not None:
+        return m.group("date"),  m.group("title")
+    else:
+        return "", ""
+
+
+def _add_header(filename):
+    """
+    Add a header to an md file. Date and title are guessed from the filename.
+    """
+    with open(filename) as f:
+        textlines = f.readlines()
+
+    date, title = get_date_and_title_from_name(filename)
+    localheader = TEMPLATEHEADER.format(date=str(date), title=title)
+
+    with open(filename, "w") as f:
+        f.write(localheader)
+        f.writelines(textlines)
-- 
GitLab