view abcfield.py @ 522:92a3a10847fb build-default-219

And another 'The' to ', The'.
author Jim Hague <jim.hague@acm.org>
date Thu, 14 Aug 2014 14:33:44 +0100
parents 27f29e8aafea
children 760d0ae5acea
line wrap: on
line source

#!/usr/bin/env python
#
# Extact a text field (title, by default) from a .abc file, and print it out
# formatted for use in LaTeX or HTML.
#

import optparse
import sys

accentedletters = {
    # Acute accents
    "'A" : ("&Aacute;", "\\'{A}"),
    "'E" : ("&Eacute;", "\\'{E}"),
    "'I" : ("&Iacute;", "\\'{I}"),
    "'O" : ("&Oacute;", "\\'{O}"),
    "'U" : ("&Uacute;", "\\'{U}"),
    "'Y" : ("&Yacute;", "\\'{Y}"),
    "'a" : ("&aacute;", "\\'{a}"),
    "'e" : ("&eacute;", "\\'{e}"),
    "'i" : ("&iacute;", "\\'{i}"),
    "'o" : ("&oacute;", "\\'{o}"),
    "'u" : ("&uacute;", "\\'{u}"),
    "'y" : ("&yacute;", "\\'{y}"),

    # Grave accents
    "`A" : ("&Agrave;", "\\`{A}"),
    "`E" : ("&Egrave;", "\\`{E}"),
    "`I" : ("&Igrave;", "\\`{I}"),
    "`O" : ("&Ograve;", "\\`{O}"),
    "`U" : ("&Ugrave;", "\\`{U}"),
    "`a" : ("&agrave;", "\\`{a}"),
    "`e" : ("&egrave;", "\\`{e}"),
    "`i" : ("&igrave;", "\\`{i}"),
    "`o" : ("&ograve;", "\\`{o}"),
    "`u" : ("&ugrave;", "\\`{u}"),

    # Umlauts
    "\"A" : ("&Auml;", "\\\"{A}"),
    "\"E" : ("&Euml;", "\\\"{E}"),
    "\"I" : ("&Iuml;", "\\\"{I}"),
    "\"O" : ("&Ouml;", "\\\"{O}"),
    "\"U" : ("&Uuml;", "\\\"{U}"),
    "\"Y" : ("&Yuml;", "\\\"{Y}"),
    "\"a" : ("&auml;", "\\\"{a}"),
    "\"e" : ("&euml;", "\\\"{e}"),
    "\"i" : ("&iuml;", "\\\"{\i}"),
    "\"o" : ("&ouml;", "\\\"{o}"),
    "\"u" : ("&uuml;", "\\\"{u}"),
    "\"y" : ("&yuml;", "\\\"{y}"),

    # Circumflexes
    "^A" : ("&Acirc;", "\\^{A}"),
    "^E" : ("&Ecirc;", "\\^{E}"),
    "^I" : ("&Icirc;", "\\^{I}"),
    "^O" : ("&Ocirc;", "\\^{O}"),
    "^U" : ("&Ucirc;", "\\^{U}"),
    "^a" : ("&acirc;", "\\^{a}"),
    "^e" : ("&ecirc;", "\\^{e}"),
    "^i" : ("&icirc;", "\\^{\i}"),
    "^o" : ("&ocirc;", "\\^{o}"),
    "^u" : ("&ucirc;", "\\^{u}"),

    # Tilde
    "~A" : ("&Atilde;", "\\~{A}"),
    "~N" : ("&Ntilde;", "\\~{N}"),
    "~O" : ("&Otilde;", "\\~{O}"),
    "~a" : ("&atilde;", "\\~{a}"),
    "~n" : ("&ntilde;", "\\~{n}"),
    "~o" : ("&otilde;", "\\~{o}"),

    # Cedilla
    ",C" : ("&Ccedil;", "\\c{C}"),
    ",c" : ("&ccedil;", "\\c{c}"),

    # Slash
    "/O" : ("&Oslash;", "\\O"),
    "/o" : ("&oslash;", "\\o"),

    # Ring
    "AA" : ("&Aring;", "\\r{A}"),
    "aa" : ("&aring;", "\\r{a}"),

    # Ligatures
    "AE" : ("&AElig;", "\\AE"),
    "ae" : ("&aelig;", "\\ae"),
    "ss" : ("&szlig;", "\\ss"),
}

def convertTitle(t, options):
    res = ""
    while True:
        p = t.partition('\\')
        res += p[0]
        if p[1] == "":
            break
        abc = p[2][0:2]
        t = p[2][2:]
        if (options.html or options.latex) and abc in accentedletters:
            if options.html:
                res += accentedletters[abc][0]
            else:
                res += accentedletters[abc][1]
        else:
            res += "\\" + abc
    return res

def process(inf, options):
    n = options.index
    for line in inf:
        line = line.strip()
        if len(line) > 2 and line[0] == options.field and line[1] == ':':
            if len(options.contains) > 0:
                if line.find(options.contains) < 0:
                    continue
            if n > 1:
                n = n - 1
            else:
                print(convertTitle(line[2:].strip(), options))
                break

parser = optparse.OptionParser(usage="usage: %prog [options] [filename]\n\n"
                                     "  Extract field data from ABC file.")
parser.add_option("-f", "--field", dest="field", default="T",
                  help="extract the field FIELD", metavar="FIELD")
parser.add_option("-m", "--html", dest="html",
                  action="store_true", default=False,
                  help="format output for HTML")
parser.add_option("-l", "--latex", dest="latex",
                  action="store_true", default=False,
                  help="format ouput for LaTeX")
parser.add_option("-n", "--index", dest="index",
                  action="store", type="int", default=1,
                  help="report INDEXth value [default: %default]",
                  metavar="INDEX")
parser.add_option("-c", "--contains", dest="contains",
                  action="store", type="string", default="",
                  help="report only if line contains CONTENT",
                  metavar="CONTENT")
(options, args) = parser.parse_args()

if options.html and options.latex:
    sys.exit("You must choose one of HTML or LaTeX output")

if len(args) > 0:
    for arg in args:
        try:
            inf = open(arg, "r")
            process(inf, options)
        finally:
            inf.close()
else:
    process(sys.stdin, options)
sys.exit(0)