view abcfield.py @ 359:2a7d03d6a89f

Use txt2tags to format intro text. That way we can include basic markup in the into text and use it in both LaTeX and HTML. Take advantage of this to expand the wording of the intro text in all books.
author Jim Hague <jim.hague@acm.org>
date Wed, 21 Aug 2013 10:28:10 +0100
parents 27f29e8aafea
children 760d0ae5acea
line wrap: on
line source

#!/usr/bin/env python
#
# Extact a text field (title, by default) from a .abc file, and print it out
# formatted for use in LaTeX or HTML.
#

import optparse
import sys

accentedletters = {
    # Acute accents
    "'A" : ("&Aacute;", "\\'{A}"),
    "'E" : ("&Eacute;", "\\'{E}"),
    "'I" : ("&Iacute;", "\\'{I}"),
    "'O" : ("&Oacute;", "\\'{O}"),
    "'U" : ("&Uacute;", "\\'{U}"),
    "'Y" : ("&Yacute;", "\\'{Y}"),
    "'a" : ("&aacute;", "\\'{a}"),
    "'e" : ("&eacute;", "\\'{e}"),
    "'i" : ("&iacute;", "\\'{i}"),
    "'o" : ("&oacute;", "\\'{o}"),
    "'u" : ("&uacute;", "\\'{u}"),
    "'y" : ("&yacute;", "\\'{y}"),

    # Grave accents
    "`A" : ("&Agrave;", "\\`{A}"),
    "`E" : ("&Egrave;", "\\`{E}"),
    "`I" : ("&Igrave;", "\\`{I}"),
    "`O" : ("&Ograve;", "\\`{O}"),
    "`U" : ("&Ugrave;", "\\`{U}"),
    "`a" : ("&agrave;", "\\`{a}"),
    "`e" : ("&egrave;", "\\`{e}"),
    "`i" : ("&igrave;", "\\`{i}"),
    "`o" : ("&ograve;", "\\`{o}"),
    "`u" : ("&ugrave;", "\\`{u}"),

    # Umlauts
    "\"A" : ("&Auml;", "\\\"{A}"),
    "\"E" : ("&Euml;", "\\\"{E}"),
    "\"I" : ("&Iuml;", "\\\"{I}"),
    "\"O" : ("&Ouml;", "\\\"{O}"),
    "\"U" : ("&Uuml;", "\\\"{U}"),
    "\"Y" : ("&Yuml;", "\\\"{Y}"),
    "\"a" : ("&auml;", "\\\"{a}"),
    "\"e" : ("&euml;", "\\\"{e}"),
    "\"i" : ("&iuml;", "\\\"{\i}"),
    "\"o" : ("&ouml;", "\\\"{o}"),
    "\"u" : ("&uuml;", "\\\"{u}"),
    "\"y" : ("&yuml;", "\\\"{y}"),

    # Circumflexes
    "^A" : ("&Acirc;", "\\^{A}"),
    "^E" : ("&Ecirc;", "\\^{E}"),
    "^I" : ("&Icirc;", "\\^{I}"),
    "^O" : ("&Ocirc;", "\\^{O}"),
    "^U" : ("&Ucirc;", "\\^{U}"),
    "^a" : ("&acirc;", "\\^{a}"),
    "^e" : ("&ecirc;", "\\^{e}"),
    "^i" : ("&icirc;", "\\^{\i}"),
    "^o" : ("&ocirc;", "\\^{o}"),
    "^u" : ("&ucirc;", "\\^{u}"),

    # Tilde
    "~A" : ("&Atilde;", "\\~{A}"),
    "~N" : ("&Ntilde;", "\\~{N}"),
    "~O" : ("&Otilde;", "\\~{O}"),
    "~a" : ("&atilde;", "\\~{a}"),
    "~n" : ("&ntilde;", "\\~{n}"),
    "~o" : ("&otilde;", "\\~{o}"),

    # Cedilla
    ",C" : ("&Ccedil;", "\\c{C}"),
    ",c" : ("&ccedil;", "\\c{c}"),

    # Slash
    "/O" : ("&Oslash;", "\\O"),
    "/o" : ("&oslash;", "\\o"),

    # Ring
    "AA" : ("&Aring;", "\\r{A}"),
    "aa" : ("&aring;", "\\r{a}"),

    # Ligatures
    "AE" : ("&AElig;", "\\AE"),
    "ae" : ("&aelig;", "\\ae"),
    "ss" : ("&szlig;", "\\ss"),
}

def convertTitle(t, options):
    res = ""
    while True:
        p = t.partition('\\')
        res += p[0]
        if p[1] == "":
            break
        abc = p[2][0:2]
        t = p[2][2:]
        if (options.html or options.latex) and abc in accentedletters:
            if options.html:
                res += accentedletters[abc][0]
            else:
                res += accentedletters[abc][1]
        else:
            res += "\\" + abc
    return res

def process(inf, options):
    n = options.index
    for line in inf:
        line = line.strip()
        if len(line) > 2 and line[0] == options.field and line[1] == ':':
            if len(options.contains) > 0:
                if line.find(options.contains) < 0:
                    continue
            if n > 1:
                n = n - 1
            else:
                print(convertTitle(line[2:].strip(), options))
                break

parser = optparse.OptionParser(usage="usage: %prog [options] [filename]\n\n"
                                     "  Extract field data from ABC file.")
parser.add_option("-f", "--field", dest="field", default="T",
                  help="extract the field FIELD", metavar="FIELD")
parser.add_option("-m", "--html", dest="html",
                  action="store_true", default=False,
                  help="format output for HTML")
parser.add_option("-l", "--latex", dest="latex",
                  action="store_true", default=False,
                  help="format ouput for LaTeX")
parser.add_option("-n", "--index", dest="index",
                  action="store", type="int", default=1,
                  help="report INDEXth value [default: %default]",
                  metavar="INDEX")
parser.add_option("-c", "--contains", dest="contains",
                  action="store", type="string", default="",
                  help="report only if line contains CONTENT",
                  metavar="CONTENT")
(options, args) = parser.parse_args()

if options.html and options.latex:
    sys.exit("You must choose one of HTML or LaTeX output")

if len(args) > 0:
    for arg in args:
        try:
            inf = open(arg, "r")
            process(inf, options)
        finally:
            inf.close()
else:
    process(sys.stdin, options)
sys.exit(0)