diff abcfield.py @ 581:760d0ae5acea

Revise abcfield.py to recognise continuation fields. Also default to HTML entity output, and replace --contains with --starts, which does the same thing but checks only the start of the line and removes the matched item.
author Jim Hague <jim.hague@acm.org>
date Sat, 29 Oct 2016 19:32:53 +0100
parents 27f29e8aafea
children 696c461c8dc0
line wrap: on
line diff
--- a/abcfield.py	Sun Oct 16 17:41:56 2016 +0100
+++ b/abcfield.py	Sat Oct 29 19:32:53 2016 +0100
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 #
 # Extact a text field (title, by default) from a .abc file, and print it out
-# formatted for use in LaTeX or HTML.
+# with any ABC accented characters converted to HTML (default) or Latex.
+# Recognise continuation fields and print those too.
 #
 
 import optparse
@@ -86,7 +87,7 @@
     "ss" : ("&szlig;", "\\ss"),
 }
 
-def convertTitle(t, options):
+def convertField(t, options):
     res = ""
     while True:
         p = t.partition('\\')
@@ -95,7 +96,7 @@
             break
         abc = p[2][0:2]
         t = p[2][2:]
-        if (options.html or options.latex) and abc in accentedletters:
+        if abc in accentedletters:
             if options.html:
                 res += accentedletters[abc][0]
             else:
@@ -106,41 +107,47 @@
 
 def process(inf, options):
     n = options.index
+    found = False
     for line in inf:
         line = line.strip()
-        if len(line) > 2 and line[0] == options.field and line[1] == ':':
-            if len(options.contains) > 0:
-                if line.find(options.contains) < 0:
+        if len(line) > 2 and line[1] == ':':
+            if found:
+                if line[0] != '+':
+                    break
+                line = line[2:].strip()
+            elif line[0] == options.field:
+                if n > 1:
+                    n = n - 1
                     continue
-            if n > 1:
-                n = n - 1
+                else:
+                    line = line[2:].strip()
+                    if len(options.starts) > 0:
+                        if line.find(options.starts) == 0:
+                            line = line[len(options.starts):].strip()
+                        else:
+                            continue
             else:
-                print(convertTitle(line[2:].strip(), options))
-                break
+                continue
+            found = True
+            print(convertField(line, options))
 
 parser = optparse.OptionParser(usage="usage: %prog [options] [filename]\n\n"
                                      "  Extract field data from ABC file.")
 parser.add_option("-f", "--field", dest="field", default="T",
                   help="extract the field FIELD", metavar="FIELD")
-parser.add_option("-m", "--html", dest="html",
-                  action="store_true", default=False,
-                  help="format output for HTML")
 parser.add_option("-l", "--latex", dest="latex",
                   action="store_true", default=False,
-                  help="format ouput for LaTeX")
+                  help="convert special characters for LaTeX")
 parser.add_option("-n", "--index", dest="index",
                   action="store", type="int", default=1,
                   help="report INDEXth value [default: %default]",
                   metavar="INDEX")
-parser.add_option("-c", "--contains", dest="contains",
+parser.add_option("-s", "--starts", dest="starts",
                   action="store", type="string", default="",
-                  help="report only if line contains CONTENT",
+                  help="report only if line starts CONTENT and remove CONTENT",
                   metavar="CONTENT")
 (options, args) = parser.parse_args()
 
-if options.html and options.latex:
-    sys.exit("You must choose one of HTML or LaTeX output")
-
 if len(args) > 0:
     for arg in args:
         try: