From 99bb9dac1be361b3e2849a3efbe5d5d583009f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Thu, 16 May 2019 10:49:48 +0200 Subject: [PATCH] read md --- src/caosadvancedtools/read_md_header.py | 195 ++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 src/caosadvancedtools/read_md_header.py diff --git a/src/caosadvancedtools/read_md_header.py b/src/caosadvancedtools/read_md_header.py new file mode 100644 index 00000000..f185a38f --- /dev/null +++ b/src/caosadvancedtools/read_md_header.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) +# A. Schlemmer, 01/2019 +# D. Hornung 2019-02 +# T. Fitschen 2019-02 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +import os +import re + +import yaml + +TEMPLATEHEADER = """ +--- +title: {title} +location: +author: +participants: +date: {date} +keywords: +tags: +category: +... + +""" + + +def _clean_header(header): + # Fill empty fields with empty string + + for k, v in header.items(): + if v == "null": + header[k] = "" + + if v is None: + header[k] = "" + + for k, v in header.items(): + # Plain string is put into list + + if type(v) == str: + header[k] = [v] + + return header + + +class NoValidHeader(Exception): + def __init__(self, filename, *args, **kwargs): + msg = ("Header missing in {}\nFix this with the modify subcommand " + "using -f option".format(filename)) + super().__init__(msg, *args, **kwargs) + + +def get_header(filename, add_header=True): + """Open an md file identified by filename and read out the yaml +header. + +filename can also be a folder. In this case folder/readme.md will be used for +getting the header. + +If a header is found a tuple is returned: (first yaml header line index, last+1 +yaml header line index, header) + +Otherwise, if `add_header` is True, a header is added and the function is called +again. + +The header is normalized in the following way: + +- If the value to a key is a string, a list with that string as only element is + returned. + +From https://pandoc.org/MANUAL.html: + +A YAML metadata block is a valid YAML object, delimited by a line of three +hyphens (---) at the top and a line of three hyphens (---) or three dots (...) +at the bottom. A YAML metadata block may occur anywhere in the document, but if +it is not at the beginning, it must be preceded by a blank line. + + """ + + if os.path.isdir(filename): + filename = os.path.join(filename, "readme.md") + + with open(filename) as f: + textlines = f.readlines() + + state = 0 + found_0 = -1 + found_1 = -1 + found_2 = -1 + + for i, line in enumerate(textlines): + if len(line) == 1 and state in {-1, 0}: + found_0 = i + state = 0 + + continue + + if line.rstrip() == "---" and state == 0: + found_1 = i+1 + state = 1 + + continue + + if line.rstrip() == "..." and state == 1: + found_2 = i + state = 2 + + break + # Else: reset state to -1, unless it is 1 (in this case, leave it + # untouched + + if state == 1: + pass + else: + state = -1 + + # If a header section was found: + + if state == 2: + headerlines = [] + + for l in textlines[found_1:found_2]: + l = l.replace("\t", " ") + l = l.rstrip() + headerlines.append(l) + try: + yaml_part = yaml.load("\n".join(headerlines)) + except yaml.error.MarkedYAMLError as e: + # print("Error in file {}:".format(filename)) + # print(headerlines) + raise NoValidHeader(filename) + + return (found_1, found_2, _clean_header(yaml_part)) + + if not add_header: + raise NoValidHeader(filename) + else: + print("Adding header in: {fn}".format(fn=filename)) + _add_header(filename) + + return get_header(filename) + + +def get_date_and_title_from_name(filename): + dir_re = re.compile('(?P<title>.+)\.(?P<date>[0-9\-]+)') + file_re = re.compile('(?P<title>.+)\.(?P<date>[0-9\-]+)\.md') + if os.path.basename(filename).lower() == "readme.md": + for part in os.path.abspath(filename).split(os.path.sep): + m = dir_re.match(part) + if m is not None: + break + + else: + m = file_re.match(filename) + + if m is not None: + return m.group("date"), m.group("title") + else: + return "", "" + + +def _add_header(filename): + """ + Add a header to an md file. Date and title are guessed from the filename. + """ + with open(filename) as f: + textlines = f.readlines() + + date, title = get_date_and_title_from_name(filename) + localheader = TEMPLATEHEADER.format(date=str(date), title=title) + + with open(filename, "w") as f: + f.write(localheader) + f.writelines(textlines) -- GitLab