view abcfield.py @ 650:9982077ac9b2

Update tune format files. In the main one, just remove a couple of now deprecated settings that did nothing anyway. For the web format, which is actually the format used to generate the downloadable PDFs, bring it into line with the main format but leave the margins at the default settings. Also remove the deprecated settings.
author Jim Hague <jim.hague@acm.org>
date Thu, 10 Nov 2016 00:47:55 +0000
parents 82e818c41e81
children 972d3dab1142
line wrap: on
line source

#!/usr/bin/env python3
#
# Extact a text field (title, by default) from a .abc file, and print it out
# with any ABC accented characters converted to HTML (default) or Latex.
#
# Optionally rearrange a field into display format:
# * In Title fields, change 'sort' form such as 'Exploding Potato, The'
#   to display format 'The Exploding Potato'.
# * In Key fields, translate the ABC key representation to full text,
#   e.g. G#dor becomes G# Dorian.
#
# Recognise continuation header fields and print those too. The ABC standard
# defines continuation fields as starting ':+'. Regrettably none of the tools
# I am using the Booke recognise that syntax, so I am adopting a Booke
# convention of '<header>:+' *also* being a continuation. Note that a
# continuation is a distinct line in the field value; the value has a line
# break between it and the previous line.
#

import optparse
import pathlib
import re
import subprocess
import sys

accentedletters = {
    # Acute accents
    "'A" : ("&Aacute;", "\\'{A}"),
    "'E" : ("&Eacute;", "\\'{E}"),
    "'I" : ("&Iacute;", "\\'{I}"),
    "'O" : ("&Oacute;", "\\'{O}"),
    "'U" : ("&Uacute;", "\\'{U}"),
    "'Y" : ("&Yacute;", "\\'{Y}"),
    "'a" : ("&aacute;", "\\'{a}"),
    "'e" : ("&eacute;", "\\'{e}"),
    "'i" : ("&iacute;", "\\'{i}"),
    "'o" : ("&oacute;", "\\'{o}"),
    "'u" : ("&uacute;", "\\'{u}"),
    "'y" : ("&yacute;", "\\'{y}"),

    # Grave accents
    "`A" : ("&Agrave;", "\\`{A}"),
    "`E" : ("&Egrave;", "\\`{E}"),
    "`I" : ("&Igrave;", "\\`{I}"),
    "`O" : ("&Ograve;", "\\`{O}"),
    "`U" : ("&Ugrave;", "\\`{U}"),
    "`a" : ("&agrave;", "\\`{a}"),
    "`e" : ("&egrave;", "\\`{e}"),
    "`i" : ("&igrave;", "\\`{i}"),
    "`o" : ("&ograve;", "\\`{o}"),
    "`u" : ("&ugrave;", "\\`{u}"),

    # Umlauts
    "\"A" : ("&Auml;", "\\\"{A}"),
    "\"E" : ("&Euml;", "\\\"{E}"),
    "\"I" : ("&Iuml;", "\\\"{I}"),
    "\"O" : ("&Ouml;", "\\\"{O}"),
    "\"U" : ("&Uuml;", "\\\"{U}"),
    "\"Y" : ("&Yuml;", "\\\"{Y}"),
    "\"a" : ("&auml;", "\\\"{a}"),
    "\"e" : ("&euml;", "\\\"{e}"),
    "\"i" : ("&iuml;", "\\\"{\i}"),
    "\"o" : ("&ouml;", "\\\"{o}"),
    "\"u" : ("&uuml;", "\\\"{u}"),
    "\"y" : ("&yuml;", "\\\"{y}"),

    # Circumflexes
    "^A" : ("&Acirc;", "\\^{A}"),
    "^E" : ("&Ecirc;", "\\^{E}"),
    "^I" : ("&Icirc;", "\\^{I}"),
    "^O" : ("&Ocirc;", "\\^{O}"),
    "^U" : ("&Ucirc;", "\\^{U}"),
    "^a" : ("&acirc;", "\\^{a}"),
    "^e" : ("&ecirc;", "\\^{e}"),
    "^i" : ("&icirc;", "\\^{\i}"),
    "^o" : ("&ocirc;", "\\^{o}"),
    "^u" : ("&ucirc;", "\\^{u}"),

    # Tilde
    "~A" : ("&Atilde;", "\\~{A}"),
    "~N" : ("&Ntilde;", "\\~{N}"),
    "~O" : ("&Otilde;", "\\~{O}"),
    "~a" : ("&atilde;", "\\~{a}"),
    "~n" : ("&ntilde;", "\\~{n}"),
    "~o" : ("&otilde;", "\\~{o}"),

    # Cedilla
    ",C" : ("&Ccedil;", "\\c{C}"),
    ",c" : ("&ccedil;", "\\c{c}"),

    # Slash
    "/O" : ("&Oslash;", "\\O"),
    "/o" : ("&oslash;", "\\o"),

    # Ring
    "AA" : ("&Aring;", "\\r{A}"),
    "aa" : ("&aring;", "\\r{a}"),

    # Ligatures
    "AE" : ("&AElig;", "\\AE"),
    "ae" : ("&aelig;", "\\ae"),
    "ss" : ("&szlig;", "\\ss"),
}

abckeys = {
    "m":   "Minor",
    "min": "Minor",
    "mix": "Mixolydian",
    "dor": "Dorian",
    "phr": "Phrygian",
    "lyd": "Lydian",
    "loc": "Locrian",
}

# Convert ABC accented chars to HTML entities or LaTex.
def convertAccents(t, latex=False):
    res = ""
    while True:
        p = t.partition('\\')
        res += p[0]
        if p[1] == "":
            break
        abc = p[2][0:2]
        t = p[2][2:]
        if abc in accentedletters:
            if latex:
                res += accentedletters[abc][1]
            else:
                res += accentedletters[abc][0]
        else:
            res += "\\" + abc
    return res

# Convert Title fields from sort to display, so Bat, The->The Bat.
def convertTitleToDisplay(t):
    p = t.rpartition(',')
    if p[1] == "":
        return t
    else:
        return p[2].strip() + " " + p[0].strip()

# Convert Key field from ABC to display, so G#dor->G# Dorian.
def convertKeyToDisplay(t):
    letter = t[0].upper()
    accidental = ""
    mode = ""
    try:
        accidental = t[1]
        if accidental == '#' or accidental == 'b':
            mode = t[2:]
        else:
            accidental = ""
            mode = t[1:]
    except IndexError:
        pass
    mode = mode.strip().lower()
    return letter + accidental + ' ' + abckeys.get(mode, "Major")

# Convert input string from Markdown to HTML or LaTeX. Fix up link
# targets so any 'foo.abc' target links to the tune with that name.
def convertMarkdown(t, latex):
    if latex:
        target = "--to=latex"
    else:
        target = "--to=html"
    res = subprocess.check_output(['pandoc', '--from=markdown', target], input=t, universal_newlines=True)
    if latex:
        res = re.sub(r'\\href{(.*?).abc}', r'\\hyperlink{\1}', res)
    else:
        res = re.sub(r'href="(.*?).abc"', r'href="\1.html"', res)
    return res.strip()

# Implement a custom Markdown shorthand for referencing ABC files.
# <foo.abc> will expand to ['title of foo'](foo.abc).
def expandCustomMarkdown(t, dir, latex):
    # Given a match to (foo.abc), return a markdown link to the tune with the
    # title of the tune as the text of the link.
    def getTitle(m):
        fname = m.group(1) + ".abc"
        path = pathlib.Path(dir, fname)
        with path.open() as f:
            return "[" + getFieldDisplayText(f, dir, "T", latex) + "](" + fname + ")"
    return re.sub(r'<(.*?).abc>', getTitle, t)

# Return the raw text for a given field. Optionally the nth field is taken,
# or the field data must start with a designated string to be recognised.
def getFieldText(inf, field, n = 1, starts = None):
    res = ""
    for line in inf:
        line = line.strip()
        if len(line) > 2 and line[1] == ':':
            if line[0] == "+" or (line[0] == field and line[2] == "+"):
                if not res:
                    continue
                if line[0] == "+":
                    line = line[2:]
                else:
                    line = line[3:]
                res = res + '\n' + line.strip()
            else:
                if res:
                    break
                if line[0] == field:
                    line = line[2:].strip()
                    if starts:
                        if line.find(starts) != 0:
                            continue
                        line = line[len(starts):].strip()
                    if n > 1:
                        n = n - 1
                        continue
                    res = line
    return res

# Return display text for a given field.
def getFieldDisplayText(inf, dir, field, n = 1, starts = None, latex = False):
    res = getFieldText(inf, field, n, starts)
    if res:
        res = convertAccents(res, latex)
        if field.upper() == "T":
            res = convertTitleToDisplay(res)
        elif field.upper() == "K":
            res = convertKeyToDisplay(res)
        elif field.upper() in ["H", "N"]:
            res = convertMarkdown(expandCustomMarkdown(res, dir, latex), latex)
    return res

if __name__ == "__main__":
    def process(inf, dir, options):
        if options.display:
            line = getFieldDisplayText(inf, dir, options.field, options.index, options.starts, options.latex)
        else:
            line = getFieldText(inf, options.field, options.index, options.starts)
        if line:
            print(line)
            return True
        else:
            return False

    # execute only if run as a script
    parser = optparse.OptionParser(usage="usage: %prog [options] [filename]\n\n"
                                   "  Extract field data from ABC file.")
    parser.add_option("-f", "--field", dest="field", default="T",
                      help="extract the field FIELD", metavar="FIELD")
    parser.add_option("-l", "--latex", dest="latex",
                      action="store_true", default=False,
                      help="convert special characters for LaTeX")
    parser.add_option("-d", "--display", dest="display",
                      action="store_true", default=False,
                      help="convert to display text")
    parser.add_option("-n", "--index", dest="index",
                      action="store", type="int", default=1,
                      help="report INDEXth value [default: %default]",
                      metavar="INDEX")
    parser.add_option("-s", "--starts", dest="starts",
                      action="store", type="string", default=None,
                      help="report only if line starts CONTENT and remove CONTENT",
                      metavar="CONTENT")
    (options, args) = parser.parse_args()

    res = False
    if len(args) > 0:
        for arg in args:
            path = pathlib.Path(arg)
            with path.open() as f:
                res = res or process(f, path.parent, options)
    else:
        res = process(sys.stdin, ".", options)
    sys.exit(int(not res))