view abcfield.py @ 1026:e4d31e094d24

Changes to get Beginners booke building. The web pages include prev and next links, so to get those right we have to make the tune filenames distinct from the main booke names. So fall back to taking the easy way, and just prefix all tune names with @ or _.
author Jim Hague <jim.hague@acm.org>
date Tue, 18 Feb 2020 15:13:38 +0000
parents fc2b75f6c284
children
line wrap: on
line source

#!/usr/bin/env python3
#
# Extact a text field (title, by default) from a .abc file, and print it out
# with any ABC accented characters converted to HTML (default) or Latex.
#
# Optionally rearrange a field into display format:
# * In Title fields, change 'sort' form such as 'Exploding Potato, The'
#   to display format 'The Exploding Potato'.
# * In Key fields, translate the ABC key representation to full text,
#   e.g. G#dor becomes G# Dorian.
#
# Recognise continuation header fields and print those too. The ABC standard
# defines continuation fields as starting ':+'. Regrettably none of the tools
# I am using the Booke recognise that syntax, so I am adopting a Booke
# convention of '<header>:+' *also* being a continuation. Note that a
# continuation is a distinct line in the field value; the value has a line
# break between it and the previous line.
#

import argparse
import pathlib
import re
import subprocess
import sys

accentedletters = {
    # Acute accents
    "'A" : ("&Aacute;", "\\'{A}"),
    "'E" : ("&Eacute;", "\\'{E}"),
    "'I" : ("&Iacute;", "\\'{I}"),
    "'O" : ("&Oacute;", "\\'{O}"),
    "'U" : ("&Uacute;", "\\'{U}"),
    "'Y" : ("&Yacute;", "\\'{Y}"),
    "'a" : ("&aacute;", "\\'{a}"),
    "'e" : ("&eacute;", "\\'{e}"),
    "'i" : ("&iacute;", "\\'{i}"),
    "'o" : ("&oacute;", "\\'{o}"),
    "'u" : ("&uacute;", "\\'{u}"),
    "'y" : ("&yacute;", "\\'{y}"),

    # Grave accents
    "`A" : ("&Agrave;", "\\`{A}"),
    "`E" : ("&Egrave;", "\\`{E}"),
    "`I" : ("&Igrave;", "\\`{I}"),
    "`O" : ("&Ograve;", "\\`{O}"),
    "`U" : ("&Ugrave;", "\\`{U}"),
    "`a" : ("&agrave;", "\\`{a}"),
    "`e" : ("&egrave;", "\\`{e}"),
    "`i" : ("&igrave;", "\\`{i}"),
    "`o" : ("&ograve;", "\\`{o}"),
    "`u" : ("&ugrave;", "\\`{u}"),

    # Umlauts
    "\"A" : ("&Auml;", "\\\"{A}"),
    "\"E" : ("&Euml;", "\\\"{E}"),
    "\"I" : ("&Iuml;", "\\\"{I}"),
    "\"O" : ("&Ouml;", "\\\"{O}"),
    "\"U" : ("&Uuml;", "\\\"{U}"),
    "\"Y" : ("&Yuml;", "\\\"{Y}"),
    "\"a" : ("&auml;", "\\\"{a}"),
    "\"e" : ("&euml;", "\\\"{e}"),
    "\"i" : ("&iuml;", "\\\"{\i}"),
    "\"o" : ("&ouml;", "\\\"{o}"),
    "\"u" : ("&uuml;", "\\\"{u}"),
    "\"y" : ("&yuml;", "\\\"{y}"),

    # Circumflexes
    "^A" : ("&Acirc;", "\\^{A}"),
    "^E" : ("&Ecirc;", "\\^{E}"),
    "^I" : ("&Icirc;", "\\^{I}"),
    "^O" : ("&Ocirc;", "\\^{O}"),
    "^U" : ("&Ucirc;", "\\^{U}"),
    "^a" : ("&acirc;", "\\^{a}"),
    "^e" : ("&ecirc;", "\\^{e}"),
    "^i" : ("&icirc;", "\\^{\i}"),
    "^o" : ("&ocirc;", "\\^{o}"),
    "^u" : ("&ucirc;", "\\^{u}"),

    # Tilde
    "~A" : ("&Atilde;", "\\~{A}"),
    "~N" : ("&Ntilde;", "\\~{N}"),
    "~O" : ("&Otilde;", "\\~{O}"),
    "~a" : ("&atilde;", "\\~{a}"),
    "~n" : ("&ntilde;", "\\~{n}"),
    "~o" : ("&otilde;", "\\~{o}"),

    # Cedilla
    ",C" : ("&Ccedil;", "\\c{C}"),
    ",c" : ("&ccedil;", "\\c{c}"),

    # Slash
    "/O" : ("&Oslash;", "\\O"),
    "/o" : ("&oslash;", "\\o"),

    # Ring
    "AA" : ("&Aring;", "\\r{A}"),
    "aa" : ("&aring;", "\\r{a}"),

    # Ligatures
    "AE" : ("&AElig;", "\\AE"),
    "ae" : ("&aelig;", "\\ae"),
    "ss" : ("&szlig;", "\\ss"),

    # Quote marks
    "''" : ("&apos;", "'"),
    "'`" : ("&ldquo;", "``"),
    "'\"" : ("&rdquo;", "''"),
}

abckeys = {
    "m":   "Minor",
    "min": "Minor",
    "mix": "Mixolydian",
    "dor": "Dorian",
    "phr": "Phrygian",
    "lyd": "Lydian",
    "loc": "Locrian",
}

# Convert ABC accented chars to HTML entities or LaTex.
def convertAccents(t, latex=False):
    res = ""
    while True:
        p = t.partition('\\')
        res += p[0]
        if p[1] == "":
            break
        abc = p[2][0:2]
        t = p[2][2:]
        if abc in accentedletters:
            if latex:
                res += accentedletters[abc][1]
            else:
                res += accentedletters[abc][0]
        else:
            res += "\\" + abc
    return res

# Convert Title fields from sort to display, so Bat, The->The Bat.
# This only happens for the main title, i.e. the first title in a tune.
# Subtitles are not affected, as they don't influence sort order.
def convertTitleToDisplay(t):
    p = t.rpartition(',')
    if p[1] == "":
        return t
    else:
        first = p[2].strip()
        second = p[0].strip()
        return (first + " " if first.isalnum() else first) + second

# Convert Key field from ABC to display, so G#dor->G# Dorian.
def convertKeyToDisplay(t):
    letter = t[0].upper()
    accidental = ""
    mode = ""
    try:
        accidental = t[1]
        if accidental == '#' or accidental == 'b':
            mode = t[2:]
        else:
            accidental = ""
            mode = t[1:]
    except IndexError:
        pass
    mode = mode.strip().lower()
    return letter + accidental + ' ' + abckeys.get(mode, "Major")

# Convert input string from Markdown to HTML or LaTeX. Fix up link
# targets so any 'foo.abc' target links to the tune with that name.
def convertMarkdown(t, latex):
    if latex:
        target = "--to=latex"
    else:
        target = "--to=html"
    res = subprocess.check_output(['pandoc', '--from=markdown', target], input=t, universal_newlines=True)
    if latex:
        res = re.sub(r'\\href{(.*?).abc}', r'\\hyperlink{\1}', res)
    else:
        res = re.sub(r'href="(.*?).abc"', r'href="../\1/index.html"', res)
    return res.strip()

# Implement a custom Markdown shorthand for referencing ABC files.
# <foo.abc> will expand to ['title of foo'](foo.abc).
def expandCustomMarkdown(t, dir):
    # Given a match to (foo.abc), return a markdown link to the tune with the
    # title (and subtitle, if present) of the tune as the text of the link.
    # Because we're going through Markdown, character entities must be
    # HTML. Pandoc will convert them to UTF-8.
    def getTitleLink(m):
        fname = m.group(1) + ".abc"
        path = pathlib.Path(dir, fname)
        if not path.exists():
            path = pathlib.Path(dir, '@' + fname)
        if not path.exists():
            path = pathlib.Path(dir, '_' + fname)
        with path.open() as f:
            lines = f.readlines()
            return "[" + getFullTitle(lines, dir) + "](" + fname + ")"
    return re.sub(r'<(.*?).abc>', getTitleLink, t)

# Return the raw text for a given field. Optionally the nth field is taken,
# or the field data must start with a designated string to be recognised.
def getFieldText(lines, field, n = 1, starts = None):
    res = ""
    for line in lines:
        line = line.strip()
        if len(line) > 2 and line[1] == ':':
            if line[0] == "+" or (line[0] == field and line[2] == "+"):
                if not res:
                    continue
                if line[0] == "+":
                    line = line[2:]
                else:
                    line = line[3:]
                res = res + '\n' + line.strip()
            else:
                if res:
                    break
                if line[0] == field:
                    line = line[2:].strip()
                    if starts:
                        if line.find(starts) != 0:
                            continue
                        line = line[len(starts):].strip()
                    if n > 1:
                        n = n - 1
                        continue
                    res = line
    return res

# Return display text for a given field.
def getFieldDisplayText(lines, dir, field, n = 1, starts = None, latex = False):
    res = getFieldText(lines, field, n, starts)
    if res:
        # Fields that go through Markdown must have HTML entities.
        mdfield = field.upper() in ['H', 'N'];
        res = convertAccents(res, False if mdfield else latex)
        if field.upper() == "T" and n == 1:
            res = convertTitleToDisplay(res)
        elif field.upper() == "K":
            res = convertKeyToDisplay(res)
        elif mdfield:
            res = convertMarkdown(expandCustomMarkdown(res, dir), latex)
    return res

# Return full title (title + [" (" + subtitle + ")"] if subtitle exists).
def getFullTitle(lines, dir, starts = None, latex = False):
    title = getFieldDisplayText(lines, dir, "T", starts=starts, latex=latex)
    subtitle = getFieldDisplayText(lines, dir, "T", n=2, starts=starts, latex=latex)
    return title if len(subtitle) == 0 else title + " (" + subtitle + ")"

if __name__ == "__main__":
    def process(f, dir, args):
        lines = f.readlines()
        if args.display:
            if args.field.upper() == "FT":
                line = getFullTitle(lines, dir, args.starts, args.latex)
            else:
                line = getFieldDisplayText(lines, dir, args.field, args.index, args.starts, args.latex)
        else:
            if args.field.upper() == "FT":
                args.field = "T"
            line = getFieldText(lines, args.field, args.index, args.starts)
        if line:
            print(line)
            return True
        else:
            return False

    # execute only if run as a script
    parser = argparse.ArgumentParser(description="Extract field data from ABC file.")
    parser.add_argument("-f", "--field", dest="field", default="T",
                        help=("extract the given field [default: %(default)s]. "
                              "Field FT is special; it returns the full title "
                              "- the title followed by subtitle in () if "
                              "present - for display text, or just the title "
                              "for non-display text."))
    parser.add_argument("-l", "--latex", dest="latex",
                        action="store_true", default=False,
                        help="convert special characters for LaTeX (default HTML)")
    parser.add_argument("-d", "--display", dest="display",
                        action="store_true", default=False,
                        help=("convert to display text. Convert accents to "
                              "LaTeX or HTML, in titles convert 'Tune, The' to "
                              "'The Tune', convert keys to full key name, "
                              "and expand Markdown in notes and history."))
    parser.add_argument("-n", "--index", dest="index",
                        action="store", type=int, default=1,
                        help="report INDEXth value [default: %(default)s]")
    parser.add_argument("-s", "--starts", dest="starts",
                        action="store", default=None,
                        help=("report only if line starts with CONTENT "
                              "and remove CONTENT"),
                        metavar="CONTENT")
    parser.add_argument('input', type=argparse.FileType('r'),
                        help='input ABC file')
    args = parser.parse_args()

    path = pathlib.Path(args.input.name)
    with path.open() as f:
        res = process(f, path.parent, args)
    sys.exit(int(not res))