comparison abc2xml/pyparsing.py @ 1084:b1dbb76f4eb9 build-default-404

Update abc2xml to latest - Python3 friendly.
author Jim Hague <jim.hague@acm.org>
date Fri, 18 Nov 2022 21:42:55 +0000
parents 4fab69a1027d
children
comparison
equal deleted inserted replaced
1083:b66bc498220d 1084:b1dbb76f4eb9
1 # module pyparsing.py 1 # module pyparsing.py
2 # 2 #
3 # Copyright (c) 2003-2011 Paul T. McGuire 3 # Copyright (c) 2003-2013 Paul T. McGuire
4 # 4 #
5 # Permission is hereby granted, free of charge, to any person obtaining 5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the 6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including 7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, 8 # without limitation the rights to use, copy, modify, merge, publish,
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 # 23 #
24 #from __future__ import generators
25 24
26 __doc__ = \ 25 __doc__ = \
27 """ 26 """
28 pyparsing module - Classes and methods to define and execute parsing grammars 27 pyparsing module - Classes and methods to define and execute parsing grammars
29 28
38 37
39 # define grammar of a greeting 38 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!" 39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41 40
42 hello = "Hello, World!" 41 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello ) 42 print (hello, "->", greet.parseString( hello ))
44 43
45 The program outputs the following:: 44 The program outputs the following::
46 45
47 Hello, World! -> ['Hello', ',', 'World', '!'] 46 Hello, World! -> ['Hello', ',', 'World', '!']
48 47
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings 56 - quoted strings
58 - embedded comments 57 - embedded comments
59 """ 58 """
60 59
61 __version__ = "1.5.6" 60 __version__ = "2.0.1"
62 __versionTime__ = "26 June 2011 10:53" 61 __versionTime__ = "16 July 2013 22:22"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64 63
65 import string 64 import string
66 from weakref import ref as wkref 65 from weakref import ref as wkref
67 import copy 66 import copy
68 import sys 67 import sys
69 import warnings 68 import warnings
70 import re 69 import re
71 import sre_constants 70 import sre_constants
71 import collections
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73 73
74 __all__ = [ 74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor', 91 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation',
92 ] 92 ]
93 93
94 """ 94 PY_3 = sys.version.startswith('3')
95 Detect if we are running version 3.X and make appropriate changes 95 if PY_3:
96 Robert A. Clark
97 """
98 _PY3K = sys.version_info[0] > 2
99 if _PY3K:
100 _MAX_INT = sys.maxsize 96 _MAX_INT = sys.maxsize
101 basestring = str 97 basestring = str
102 unichr = chr 98 unichr = chr
103 _ustr = str 99 _ustr = str
104 alphas = string.ascii_lowercase + string.ascii_uppercase 100
101 # build list of single arg builtins, that can be used as parse actions
102 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
103
105 else: 104 else:
106 _MAX_INT = sys.maxint 105 _MAX_INT = sys.maxint
107 range = xrange 106 range = xrange
108 set = lambda s : dict( [(c,0) for c in s] )
109 alphas = string.lowercase + string.uppercase
110 107
111 def _ustr(obj): 108 def _ustr(obj):
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
114 then < returns the unicode object | encodes it with the default encoding | ... >. 111 then < returns the unicode object | encodes it with the default encoding | ... >.
132 # Replace unprintables with escape codes? 129 # Replace unprintables with escape codes?
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 130 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
134 # Replace unprintables with question marks? 131 # Replace unprintables with question marks?
135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 132 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
136 # ... 133 # ...
137 134
138 alphas = string.lowercase + string.uppercase 135 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
139 136 singleArgBuiltins = []
140 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 137 import __builtin__
141 singleArgBuiltins = [] 138 for fname in "sum len sorted reversed list tuple set any all min max".split():
142 import __builtin__ 139 try:
143 for fname in "sum len enumerate sorted reversed list tuple set any all".split(): 140 singleArgBuiltins.append(getattr(__builtin__,fname))
144 try: 141 except AttributeError:
145 singleArgBuiltins.append(getattr(__builtin__,fname)) 142 continue
146 except AttributeError: 143
147 continue
148 144
149 def _xml_escape(data): 145 def _xml_escape(data):
150 """Escape &, <, >, ", ', etc. in a string of data.""" 146 """Escape &, <, >, ", ', etc. in a string of data."""
151 147
152 # ampersand must be replaced first 148 # ampersand must be replaced first
153 from_symbols = '&><"\'' 149 from_symbols = '&><"\''
154 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 150 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
155 for from_,to_ in zip(from_symbols, to_symbols): 151 for from_,to_ in zip(from_symbols, to_symbols):
156 data = data.replace(from_, to_) 152 data = data.replace(from_, to_)
157 return data 153 return data
158 154
159 class _Constants(object): 155 class _Constants(object):
160 pass 156 pass
161 157
162 nums = string.digits 158 alphas = string.ascii_lowercase + string.ascii_uppercase
159 nums = "0123456789"
163 hexnums = nums + "ABCDEFabcdef" 160 hexnums = nums + "ABCDEFabcdef"
164 alphanums = alphas + nums 161 alphanums = alphas + nums
165 _bslash = chr(92) 162 _bslash = chr(92)
166 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 163 printables = "".join(c for c in string.printable if c not in string.whitespace)
167 164
168 class ParseBaseException(Exception): 165 class ParseBaseException(Exception):
169 """base exception class for all parsing runtime exceptions""" 166 """base exception class for all parsing runtime exceptions"""
170 # Performance tuning: we construct a *lot* of these, so keep this 167 # Performance tuning: we construct a *lot* of these, so keep this
171 # constructor as small and fast as possible 168 # constructor as small and fast as possible
204 the location of the exception with a special symbol. 201 the location of the exception with a special symbol.
205 """ 202 """
206 line_str = self.line 203 line_str = self.line
207 line_column = self.column - 1 204 line_column = self.column - 1
208 if markerString: 205 if markerString:
209 line_str = "".join( [line_str[:line_column], 206 line_str = "".join(line_str[:line_column],
210 markerString, line_str[line_column:]]) 207 markerString, line_str[line_column:])
211 return line_str.strip() 208 return line_str.strip()
212 def __dir__(self): 209 def __dir__(self):
213 return "loc msg pstr parserElement lineno col line " \ 210 return "loc msg pstr parserElement lineno col line " \
214 "markInputLine __str__ __repr__".split() 211 "markInputline __str__ __repr__".split()
215 212
216 class ParseException(ParseBaseException): 213 class ParseException(ParseBaseException):
217 """exception thrown when parse expressions don't match class; 214 """exception thrown when parse expressions don't match class;
218 supported attributes by name are: 215 supported attributes by name are:
219 - lineno - returns the line number of the exception text 216 - lineno - returns the line number of the exception text
226 """user-throwable exception thrown when inconsistent parse content 223 """user-throwable exception thrown when inconsistent parse content
227 is found; stops all parsing immediately""" 224 is found; stops all parsing immediately"""
228 pass 225 pass
229 226
230 class ParseSyntaxException(ParseFatalException): 227 class ParseSyntaxException(ParseFatalException):
231 """just like C{ParseFatalException}, but thrown internally when an 228 """just like C{L{ParseFatalException}}, but thrown internally when an
232 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because 229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
233 an unbacktrackable syntax error has been found""" 230 an unbacktrackable syntax error has been found"""
234 def __init__(self, pe): 231 def __init__(self, pe):
235 super(ParseSyntaxException, self).__init__( 232 super(ParseSyntaxException, self).__init__(
236 pe.pstr, pe.loc, pe.msg, pe.parserElement) 233 pe.pstr, pe.loc, pe.msg, pe.parserElement)
237 234
442 439
443 def __repr__( self ): 440 def __repr__( self ):
444 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 441 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
445 442
446 def __str__( self ): 443 def __str__( self ):
447 out = "[" 444 out = []
448 sep = ""
449 for i in self.__toklist: 445 for i in self.__toklist:
450 if isinstance(i, ParseResults): 446 if isinstance(i, ParseResults):
451 out += sep + _ustr(i) 447 out.append(_ustr(i))
452 else: 448 else:
453 out += sep + repr(i) 449 out.append(repr(i))
454 sep = ", " 450 return '[' + ', '.join(out) + ']'
455 out += "]"
456 return out
457 451
458 def _asStringList( self, sep='' ): 452 def _asStringList( self, sep='' ):
459 out = [] 453 out = []
460 for item in self.__toklist: 454 for item in self.__toklist:
461 if out and sep: 455 if out and sep:
491 485
492 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 486 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
493 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 487 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
494 nl = "\n" 488 nl = "\n"
495 out = [] 489 out = []
496 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 490 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
497 for v in vlist ] ) 491 for v in vlist)
498 nextLevelIndent = indent + " " 492 nextLevelIndent = indent + " "
499 493
500 # collapse out indents if formatting is not desired 494 # collapse out indents if formatting is not desired
501 if not formatted: 495 if not formatted:
502 indent = "" 496 indent = ""
614 self.__parent = wkref(par) 608 self.__parent = wkref(par)
615 else: 609 else:
616 self.__parent = None 610 self.__parent = None
617 611
618 def __dir__(self): 612 def __dir__(self):
619 return dir(super(ParseResults,self)) + self.keys() 613 return dir(super(ParseResults,self)) + list(self.keys())
614
615 if hasattr (collections, 'MutableMapping'):
616 collections.MutableMapping.register(ParseResults)
617 else:
618 from collections.abc import MutableMapping
619 MutableMapping.register (ParseResults)
620 620
621 def col (loc,strg): 621 def col (loc,strg):
622 """Returns current column within a string, counting newlines as line separators. 622 """Returns current column within a string, counting newlines as line separators.
623 The first column is number 1. 623 The first column is number 1.
624 624
625 Note: the default parsing behavior is to expand tabs in the input string 625 Note: the default parsing behavior is to expand tabs in the input string
626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
627 on parsing strings containing <TAB>s, and suggested methods to maintain a 627 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
628 consistent view of the parsed string, the parse location, and line and column 628 consistent view of the parsed string, the parse location, and line and column
629 positions within the parsed string. 629 positions within the parsed string.
630 """ 630 """
631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
632 632
634 """Returns current line number within a string, counting newlines as line separators. 634 """Returns current line number within a string, counting newlines as line separators.
635 The first line is number 1. 635 The first line is number 1.
636 636
637 Note: the default parsing behavior is to expand tabs in the input string 637 Note: the default parsing behavior is to expand tabs in the input string
638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
639 on parsing strings containing <TAB>s, and suggested methods to maintain a 639 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
640 consistent view of the parsed string, the parse location, and line and column 640 consistent view of the parsed string, the parse location, and line and column
641 positions within the parsed string. 641 positions within the parsed string.
642 """ 642 """
643 return strg.count("\n",0,loc) + 1 643 return strg.count("\n",0,loc) + 1
644 644
651 return strg[lastCR+1:nextCR] 651 return strg[lastCR+1:nextCR]
652 else: 652 else:
653 return strg[lastCR+1:] 653 return strg[lastCR+1:]
654 654
655 def _defaultStartDebugAction( instring, loc, expr ): 655 def _defaultStartDebugAction( instring, loc, expr ):
656 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 656 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
657 657
658 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 658 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
660 660
661 def _defaultExceptionDebugAction( instring, loc, expr, exc ): 661 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
663 663
664 def nullDebugAction(*args): 664 def nullDebugAction(*args):
665 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 665 """'Do-nothing' debug action, to suppress debugging output during parsing."""
666 pass 666 pass
667 667
668 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
669 #~ 'decorator to trim function calls to match the arity of the target'
670 #~ def _trim_arity(func, maxargs=3):
671 #~ if func in singleArgBuiltins:
672 #~ return lambda s,l,t: func(t)
673 #~ limit = 0
674 #~ foundArity = False
675 #~ def wrapper(*args):
676 #~ nonlocal limit,foundArity
677 #~ while 1:
678 #~ try:
679 #~ ret = func(*args[limit:])
680 #~ foundArity = True
681 #~ return ret
682 #~ except TypeError:
683 #~ if limit == maxargs or foundArity:
684 #~ raise
685 #~ limit += 1
686 #~ continue
687 #~ return wrapper
688
689 # this version is Python 2.x-3.x cross-compatible
668 'decorator to trim function calls to match the arity of the target' 690 'decorator to trim function calls to match the arity of the target'
669 if not _PY3K: 691 def _trim_arity(func, maxargs=2):
670 def _trim_arity(func, maxargs=2): 692 if func in singleArgBuiltins:
671 limit = [0] 693 return lambda s,l,t: func(t)
672 def wrapper(*args): 694 limit = [0]
673 while 1: 695 foundArity = [False]
674 try: 696 def wrapper(*args):
675 return func(*args[limit[0]:]) 697 while 1:
676 except TypeError: 698 try:
677 if limit[0] <= maxargs: 699 ret = func(*args[limit[0]:])
678 limit[0] += 1 700 foundArity[0] = True
679 continue 701 return ret
680 raise 702 except TypeError:
681 return wrapper 703 if limit[0] <= maxargs and not foundArity[0]:
682 else: 704 limit[0] += 1
683 def _trim_arity(func, maxargs=2): 705 continue
684 limit = maxargs 706 raise
685 def wrapper(*args): 707 return wrapper
686 #~ nonlocal limit 708
687 while 1:
688 try:
689 return func(*args[limit:])
690 except TypeError:
691 if limit:
692 limit -= 1
693 continue
694 raise
695 return wrapper
696
697 class ParserElement(object): 709 class ParserElement(object):
698 """Abstract base level parser element class.""" 710 """Abstract base level parser element class."""
699 DEFAULT_WHITE_CHARS = " \n\t\r" 711 DEFAULT_WHITE_CHARS = " \n\t\r"
700 verbose_stacktrace = False 712 verbose_stacktrace = False
701 713
702 def setDefaultWhitespaceChars( chars ): 714 def setDefaultWhitespaceChars( chars ):
703 """Overrides the default whitespace chars 715 """Overrides the default whitespace chars
704 """ 716 """
705 ParserElement.DEFAULT_WHITE_CHARS = chars 717 ParserElement.DEFAULT_WHITE_CHARS = chars
706 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 718 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
719
720 def inlineLiteralsUsing(cls):
721 """
722 Set class to be used for inclusion of string literals into a parser.
723 """
724 ParserElement.literalStringClass = cls
725 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
707 726
708 def __init__( self, savelist=False ): 727 def __init__( self, savelist=False ):
709 self.parseAction = list() 728 self.parseAction = list()
710 self.failAction = None 729 self.failAction = None
711 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 730 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
787 """Define action to perform when successfully matching parse element definition. 806 """Define action to perform when successfully matching parse element definition.
788 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 807 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
789 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 808 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
790 - s = the original string being parsed (see note below) 809 - s = the original string being parsed (see note below)
791 - loc = the location of the matching substring 810 - loc = the location of the matching substring
792 - toks = a list of the matched tokens, packaged as a ParseResults object 811 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
793 If the functions in fns modify the tokens, they can return them as the return 812 If the functions in fns modify the tokens, they can return them as the return
794 value from fn, and the modified list of tokens will replace the original. 813 value from fn, and the modified list of tokens will replace the original.
795 Otherwise, fn does not need to return any value. 814 Otherwise, fn does not need to return any value.
796 815
797 Note: the default parsing behavior is to expand tabs in the input string 816 Note: the default parsing behavior is to expand tabs in the input string
798 before starting the parsing process. See L{I{parseString}<parseString>} for more information 817 before starting the parsing process. See L{I{parseString}<parseString>} for more information
799 on parsing strings containing <TAB>s, and suggested methods to maintain a 818 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
800 consistent view of the parsed string, the parse location, and line and column 819 consistent view of the parsed string, the parse location, and line and column
801 positions within the parsed string. 820 positions within the parsed string.
802 """ 821 """
803 self.parseAction = list(map(_trim_arity, list(fns))) 822 self.parseAction = list(map(_trim_arity, list(fns)))
804 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 823 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
816 C{fn(s,loc,expr,err)} where: 835 C{fn(s,loc,expr,err)} where:
817 - s = string being parsed 836 - s = string being parsed
818 - loc = location where expression match was attempted and failed 837 - loc = location where expression match was attempted and failed
819 - expr = the parse expression that failed 838 - expr = the parse expression that failed
820 - err = the exception thrown 839 - err = the exception thrown
821 The function returns no value. It may throw C{ParseFatalException} 840 The function returns no value. It may throw C{L{ParseFatalException}}
822 if it is desired to stop parsing immediately.""" 841 if it is desired to stop parsing immediately."""
823 self.failAction = fn 842 self.failAction = fn
824 return self 843 return self
825 844
826 def _skipIgnorables( self, instring, loc ): 845 def _skipIgnorables( self, instring, loc ):
870 try: 889 try:
871 try: 890 try:
872 loc,tokens = self.parseImpl( instring, preloc, doActions ) 891 loc,tokens = self.parseImpl( instring, preloc, doActions )
873 except IndexError: 892 except IndexError:
874 raise ParseException( instring, len(instring), self.errmsg, self ) 893 raise ParseException( instring, len(instring), self.errmsg, self )
875 except ParseBaseException: 894 except ParseBaseException as err:
876 #~ print ("Exception raised:", err) 895 #~ print ("Exception raised:", err)
877 err = None
878 if self.debugActions[2]: 896 if self.debugActions[2]:
879 err = sys.exc_info()[1]
880 self.debugActions[2]( instring, tokensStart, self, err ) 897 self.debugActions[2]( instring, tokensStart, self, err )
881 if self.failAction: 898 if self.failAction:
882 if err is None:
883 err = sys.exc_info()[1]
884 self.failAction( instring, tokensStart, self, err ) 899 self.failAction( instring, tokensStart, self, err )
885 raise 900 raise
886 else: 901 else:
887 if callPreParse and self.callPreparse: 902 if callPreParse and self.callPreparse:
888 preloc = self.preParse( instring, loc ) 903 preloc = self.preParse( instring, loc )
908 if tokens is not None: 923 if tokens is not None:
909 retTokens = ParseResults( tokens, 924 retTokens = ParseResults( tokens,
910 self.resultsName, 925 self.resultsName,
911 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 926 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
912 modal=self.modalResults ) 927 modal=self.modalResults )
913 except ParseBaseException: 928 except ParseBaseException as err:
914 #~ print "Exception raised in user parse action:", err 929 #~ print "Exception raised in user parse action:", err
915 if (self.debugActions[2] ): 930 if (self.debugActions[2] ):
916 err = sys.exc_info()[1]
917 self.debugActions[2]( instring, tokensStart, self, err ) 931 self.debugActions[2]( instring, tokensStart, self, err )
918 raise 932 raise
919 else: 933 else:
920 for fn in self.parseAction: 934 for fn in self.parseAction:
921 tokens = fn( instring, tokensStart, retTokens ) 935 tokens = fn( instring, tokensStart, retTokens )
950 else: 964 else:
951 try: 965 try:
952 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 966 value = self._parseNoCache( instring, loc, doActions, callPreParse )
953 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 967 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
954 return value 968 return value
955 except ParseBaseException: 969 except ParseBaseException as pe:
956 pe = sys.exc_info()[1] 970 pe.__traceback__ = None
957 ParserElement._exprArgCache[ lookup ] = pe 971 ParserElement._exprArgCache[ lookup ] = pe
958 raise 972 raise
959 973
960 _parse = _parseNoCache 974 _parse = _parseNoCache
961 975
992 This is the main interface to the client code, once the complete 1006 This is the main interface to the client code, once the complete
993 expression has been built. 1007 expression has been built.
994 1008
995 If you want the grammar to require that the entire input string be 1009 If you want the grammar to require that the entire input string be
996 successfully parsed, then set C{parseAll} to True (equivalent to ending 1010 successfully parsed, then set C{parseAll} to True (equivalent to ending
997 the grammar with C{StringEnd()}). 1011 the grammar with C{L{StringEnd()}}).
998 1012
999 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1013 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1000 in order to report proper column numbers in parse actions. 1014 in order to report proper column numbers in parse actions.
1001 If the input string contains tabs and 1015 If the input string contains tabs and
1002 the grammar uses parse actions that use the C{loc} argument to index into the 1016 the grammar uses parse actions that use the C{loc} argument to index into the
1021 loc, tokens = self._parse( instring, 0 ) 1035 loc, tokens = self._parse( instring, 0 )
1022 if parseAll: 1036 if parseAll:
1023 loc = self.preParse( instring, loc ) 1037 loc = self.preParse( instring, loc )
1024 se = Empty() + StringEnd() 1038 se = Empty() + StringEnd()
1025 se._parse( instring, loc ) 1039 se._parse( instring, loc )
1026 except ParseBaseException: 1040 except ParseBaseException as exc:
1027 if ParserElement.verbose_stacktrace: 1041 if ParserElement.verbose_stacktrace:
1028 raise 1042 raise
1029 else: 1043 else:
1030 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1044 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1031 exc = sys.exc_info()[1]
1032 raise exc 1045 raise exc
1033 else: 1046 else:
1034 return tokens 1047 return tokens
1035 1048
1036 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 1049 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1074 loc += 1 1087 loc += 1
1075 else: 1088 else:
1076 loc = nextLoc 1089 loc = nextLoc
1077 else: 1090 else:
1078 loc = preloc+1 1091 loc = preloc+1
1079 except ParseBaseException: 1092 except ParseBaseException as exc:
1080 if ParserElement.verbose_stacktrace: 1093 if ParserElement.verbose_stacktrace:
1081 raise 1094 raise
1082 else: 1095 else:
1083 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1096 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1084 exc = sys.exc_info()[1]
1085 raise exc 1097 raise exc
1086 1098
1087 def transformString( self, instring ): 1099 def transformString( self, instring ):
1088 """Extension to C{scanString}, to modify matching text with modified tokens that may 1100 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1089 be returned from a parse action. To use C{transformString}, define a grammar and 1101 be returned from a parse action. To use C{transformString}, define a grammar and
1090 attach a parse action to it that modifies the returned token list. 1102 attach a parse action to it that modifies the returned token list.
1091 Invoking C{transformString()} on a target string will then scan for matches, 1103 Invoking C{transformString()} on a target string will then scan for matches,
1092 and replace the matched text patterns according to the logic in the parse 1104 and replace the matched text patterns according to the logic in the parse
1093 action. C{transformString()} returns the resulting transformed string.""" 1105 action. C{transformString()} returns the resulting transformed string."""
1108 out.append(t) 1120 out.append(t)
1109 lastE = e 1121 lastE = e
1110 out.append(instring[lastE:]) 1122 out.append(instring[lastE:])
1111 out = [o for o in out if o] 1123 out = [o for o in out if o]
1112 return "".join(map(_ustr,_flatten(out))) 1124 return "".join(map(_ustr,_flatten(out)))
1113 except ParseBaseException: 1125 except ParseBaseException as exc:
1114 if ParserElement.verbose_stacktrace: 1126 if ParserElement.verbose_stacktrace:
1115 raise 1127 raise
1116 else: 1128 else:
1117 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1129 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1118 exc = sys.exc_info()[1]
1119 raise exc 1130 raise exc
1120 1131
1121 def searchString( self, instring, maxMatches=_MAX_INT ): 1132 def searchString( self, instring, maxMatches=_MAX_INT ):
1122 """Another extension to C{scanString}, simplifying the access to the tokens found 1133 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1123 to match the given parse expression. May be called with optional 1134 to match the given parse expression. May be called with optional
1124 C{maxMatches} argument, to clip searching after 'n' matches are found. 1135 C{maxMatches} argument, to clip searching after 'n' matches are found.
1125 """ 1136 """
1126 try: 1137 try:
1127 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1138 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1128 except ParseBaseException: 1139 except ParseBaseException as exc:
1129 if ParserElement.verbose_stacktrace: 1140 if ParserElement.verbose_stacktrace:
1130 raise 1141 raise
1131 else: 1142 else:
1132 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1143 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1133 exc = sys.exc_info()[1]
1134 raise exc 1144 raise exc
1135 1145
1136 def __add__(self, other ): 1146 def __add__(self, other ):
1137 """Implementation of + operator - returns And""" 1147 """Implementation of + operator - returns C{L{And}}"""
1138 if isinstance( other, basestring ): 1148 if isinstance( other, basestring ):
1139 other = Literal( other ) 1149 other = ParserElement.literalStringClass( other )
1140 if not isinstance( other, ParserElement ): 1150 if not isinstance( other, ParserElement ):
1141 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1142 SyntaxWarning, stacklevel=2) 1152 SyntaxWarning, stacklevel=2)
1143 return None 1153 return None
1144 return And( [ self, other ] ) 1154 return And( [ self, other ] )
1145 1155
1146 def __radd__(self, other ): 1156 def __radd__(self, other ):
1147 """Implementation of + operator when left operand is not a C{ParserElement}""" 1157 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1148 if isinstance( other, basestring ): 1158 if isinstance( other, basestring ):
1149 other = Literal( other ) 1159 other = ParserElement.literalStringClass( other )
1150 if not isinstance( other, ParserElement ): 1160 if not isinstance( other, ParserElement ):
1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1152 SyntaxWarning, stacklevel=2) 1162 SyntaxWarning, stacklevel=2)
1153 return None 1163 return None
1154 return other + self 1164 return other + self
1155 1165
1156 def __sub__(self, other): 1166 def __sub__(self, other):
1157 """Implementation of - operator, returns C{And} with error stop""" 1167 """Implementation of - operator, returns C{L{And}} with error stop"""
1158 if isinstance( other, basestring ): 1168 if isinstance( other, basestring ):
1159 other = Literal( other ) 1169 other = ParserElement.literalStringClass( other )
1160 if not isinstance( other, ParserElement ): 1170 if not isinstance( other, ParserElement ):
1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1162 SyntaxWarning, stacklevel=2) 1172 SyntaxWarning, stacklevel=2)
1163 return None 1173 return None
1164 return And( [ self, And._ErrorStop(), other ] ) 1174 return And( [ self, And._ErrorStop(), other ] )
1165 1175
1166 def __rsub__(self, other ): 1176 def __rsub__(self, other ):
1167 """Implementation of - operator when left operand is not a C{ParserElement}""" 1177 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1168 if isinstance( other, basestring ): 1178 if isinstance( other, basestring ):
1169 other = Literal( other ) 1179 other = ParserElement.literalStringClass( other )
1170 if not isinstance( other, ParserElement ): 1180 if not isinstance( other, ParserElement ):
1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1181 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1172 SyntaxWarning, stacklevel=2) 1182 SyntaxWarning, stacklevel=2)
1173 return None 1183 return None
1174 return other - self 1184 return other - self
1177 """Implementation of * operator, allows use of C{expr * 3} in place of 1187 """Implementation of * operator, allows use of C{expr * 3} in place of
1178 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1188 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1179 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1189 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1180 may also include C{None} as in: 1190 may also include C{None} as in:
1181 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1191 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1182 to C{expr*n + ZeroOrMore(expr)} 1192 to C{expr*n + L{ZeroOrMore}(expr)}
1183 (read as "at least n instances of C{expr}") 1193 (read as "at least n instances of C{expr}")
1184 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1194 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1185 (read as "0 to n instances of C{expr}") 1195 (read as "0 to n instances of C{expr}")
1186 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} 1196 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1187 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} 1197 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1188 1198
1189 Note that C{expr*(None,n)} does not raise an exception if 1199 Note that C{expr*(None,n)} does not raise an exception if
1190 more than n exprs exist in the input stream; that is, 1200 more than n exprs exist in the input stream; that is,
1191 C{expr*(None,n)} does not enforce a maximum number of expr 1201 C{expr*(None,n)} does not enforce a maximum number of expr
1192 occurrences. If this behavior is desired, then write 1202 occurrences. If this behavior is desired, then write
1243 1253
1244 def __rmul__(self, other): 1254 def __rmul__(self, other):
1245 return self.__mul__(other) 1255 return self.__mul__(other)
1246 1256
1247 def __or__(self, other ): 1257 def __or__(self, other ):
1248 """Implementation of | operator - returns C{MatchFirst}""" 1258 """Implementation of | operator - returns C{L{MatchFirst}}"""
1249 if isinstance( other, basestring ): 1259 if isinstance( other, basestring ):
1250 other = Literal( other ) 1260 other = ParserElement.literalStringClass( other )
1251 if not isinstance( other, ParserElement ): 1261 if not isinstance( other, ParserElement ):
1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1253 SyntaxWarning, stacklevel=2) 1263 SyntaxWarning, stacklevel=2)
1254 return None 1264 return None
1255 return MatchFirst( [ self, other ] ) 1265 return MatchFirst( [ self, other ] )
1256 1266
1257 def __ror__(self, other ): 1267 def __ror__(self, other ):
1258 """Implementation of | operator when left operand is not a C{ParserElement}""" 1268 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1259 if isinstance( other, basestring ): 1269 if isinstance( other, basestring ):
1260 other = Literal( other ) 1270 other = ParserElement.literalStringClass( other )
1261 if not isinstance( other, ParserElement ): 1271 if not isinstance( other, ParserElement ):
1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1263 SyntaxWarning, stacklevel=2) 1273 SyntaxWarning, stacklevel=2)
1264 return None 1274 return None
1265 return other | self 1275 return other | self
1266 1276
1267 def __xor__(self, other ): 1277 def __xor__(self, other ):
1268 """Implementation of ^ operator - returns C{Or}""" 1278 """Implementation of ^ operator - returns C{L{Or}}"""
1269 if isinstance( other, basestring ): 1279 if isinstance( other, basestring ):
1270 other = Literal( other ) 1280 other = ParserElement.literalStringClass( other )
1271 if not isinstance( other, ParserElement ): 1281 if not isinstance( other, ParserElement ):
1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1273 SyntaxWarning, stacklevel=2) 1283 SyntaxWarning, stacklevel=2)
1274 return None 1284 return None
1275 return Or( [ self, other ] ) 1285 return Or( [ self, other ] )
1276 1286
1277 def __rxor__(self, other ): 1287 def __rxor__(self, other ):
1278 """Implementation of ^ operator when left operand is not a C{ParserElement}""" 1288 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1279 if isinstance( other, basestring ): 1289 if isinstance( other, basestring ):
1280 other = Literal( other ) 1290 other = ParserElement.literalStringClass( other )
1281 if not isinstance( other, ParserElement ): 1291 if not isinstance( other, ParserElement ):
1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1283 SyntaxWarning, stacklevel=2) 1293 SyntaxWarning, stacklevel=2)
1284 return None 1294 return None
1285 return other ^ self 1295 return other ^ self
1286 1296
1287 def __and__(self, other ): 1297 def __and__(self, other ):
1288 """Implementation of & operator - returns C{Each}""" 1298 """Implementation of & operator - returns C{L{Each}}"""
1289 if isinstance( other, basestring ): 1299 if isinstance( other, basestring ):
1290 other = Literal( other ) 1300 other = ParserElement.literalStringClass( other )
1291 if not isinstance( other, ParserElement ): 1301 if not isinstance( other, ParserElement ):
1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1293 SyntaxWarning, stacklevel=2) 1303 SyntaxWarning, stacklevel=2)
1294 return None 1304 return None
1295 return Each( [ self, other ] ) 1305 return Each( [ self, other ] )
1296 1306
1297 def __rand__(self, other ): 1307 def __rand__(self, other ):
1298 """Implementation of & operator when left operand is not a C{ParserElement}""" 1308 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1299 if isinstance( other, basestring ): 1309 if isinstance( other, basestring ):
1300 other = Literal( other ) 1310 other = ParserElement.literalStringClass( other )
1301 if not isinstance( other, ParserElement ): 1311 if not isinstance( other, ParserElement ):
1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1312 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1303 SyntaxWarning, stacklevel=2) 1313 SyntaxWarning, stacklevel=2)
1304 return None 1314 return None
1305 return other & self 1315 return other & self
1306 1316
1307 def __invert__( self ): 1317 def __invert__( self ):
1308 """Implementation of ~ operator - returns C{NotAny}""" 1318 """Implementation of ~ operator - returns C{L{NotAny}}"""
1309 return NotAny( self ) 1319 return NotAny( self )
1310 1320
1311 def __call__(self, name): 1321 def __call__(self, name):
1312 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: 1322 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1313 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1323 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1314 could be written as:: 1324 could be written as::
1315 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1325 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1316 1326
1317 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1327 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1401 the entire file is opened, read, and closed before parsing. 1411 the entire file is opened, read, and closed before parsing.
1402 """ 1412 """
1403 try: 1413 try:
1404 file_contents = file_or_filename.read() 1414 file_contents = file_or_filename.read()
1405 except AttributeError: 1415 except AttributeError:
1406 f = open(file_or_filename, "rb") 1416 f = open(file_or_filename, "r")
1407 file_contents = f.read() 1417 file_contents = f.read()
1408 f.close() 1418 f.close()
1409 try: 1419 try:
1410 return self.parseString(file_contents, parseAll) 1420 return self.parseString(file_contents, parseAll)
1411 except ParseBaseException: 1421 except ParseBaseException as exc:
1412 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1422 if ParserElement.verbose_stacktrace:
1413 exc = sys.exc_info()[1] 1423 raise
1414 raise exc 1424 else:
1415 1425 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1416 def getException(self): 1426 raise exc
1417 return ParseException("",0,self.errmsg,self)
1418
1419 def __getattr__(self,aname):
1420 if aname == "myException":
1421 self.myException = ret = self.getException();
1422 return ret;
1423 else:
1424 raise AttributeError("no such attribute " + aname)
1425 1427
1426 def __eq__(self,other): 1428 def __eq__(self,other):
1427 if isinstance(other, ParserElement): 1429 if isinstance(other, ParserElement):
1428 return self is other or self.__dict__ == other.__dict__ 1430 return self is other or self.__dict__ == other.__dict__
1429 elif isinstance(other, basestring): 1431 elif isinstance(other, basestring):
1476 self.mayReturnEmpty = True 1478 self.mayReturnEmpty = True
1477 self.mayIndexError = False 1479 self.mayIndexError = False
1478 self.errmsg = "Unmatchable token" 1480 self.errmsg = "Unmatchable token"
1479 1481
1480 def parseImpl( self, instring, loc, doActions=True ): 1482 def parseImpl( self, instring, loc, doActions=True ):
1481 exc = self.myException 1483 raise ParseException(instring, loc, self.errmsg, self)
1482 exc.loc = loc
1483 exc.pstr = instring
1484 raise exc
1485 1484
1486 1485
1487 class Literal(Token): 1486 class Literal(Token):
1488 """Token to exactly match a specified string.""" 1487 """Token to exactly match a specified string."""
1489 def __init__( self, matchString ): 1488 def __init__( self, matchString ):
1507 #~ @profile 1506 #~ @profile
1508 def parseImpl( self, instring, loc, doActions=True ): 1507 def parseImpl( self, instring, loc, doActions=True ):
1509 if (instring[loc] == self.firstMatchChar and 1508 if (instring[loc] == self.firstMatchChar and
1510 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1509 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1511 return loc+self.matchLen, self.match 1510 return loc+self.matchLen, self.match
1512 #~ raise ParseException( instring, loc, self.errmsg ) 1511 raise ParseException(instring, loc, self.errmsg, self)
1513 exc = self.myException
1514 exc.loc = loc
1515 exc.pstr = instring
1516 raise exc
1517 _L = Literal 1512 _L = Literal
1513 ParserElement.literalStringClass = Literal
1518 1514
1519 class Keyword(Token): 1515 class Keyword(Token):
1520 """Token to exactly match a specified string as a keyword, that is, it must be 1516 """Token to exactly match a specified string as a keyword, that is, it must be
1521 immediately followed by a non-keyword character. Compare with C{Literal}:: 1517 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1522 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1518 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1523 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1519 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1524 Accepts two optional constructor arguments in addition to the keyword string: 1520 Accepts two optional constructor arguments in addition to the keyword string:
1525 C{identChars} is a string of characters that would be valid identifier characters, 1521 C{identChars} is a string of characters that would be valid identifier characters,
1526 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1522 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1557 if (instring[loc] == self.firstMatchChar and 1553 if (instring[loc] == self.firstMatchChar and
1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1554 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1555 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1560 (loc == 0 or instring[loc-1] not in self.identChars) ): 1556 (loc == 0 or instring[loc-1] not in self.identChars) ):
1561 return loc+self.matchLen, self.match 1557 return loc+self.matchLen, self.match
1562 #~ raise ParseException( instring, loc, self.errmsg ) 1558 raise ParseException(instring, loc, self.errmsg, self)
1563 exc = self.myException
1564 exc.loc = loc
1565 exc.pstr = instring
1566 raise exc
1567 1559
1568 def copy(self): 1560 def copy(self):
1569 c = super(Keyword,self).copy() 1561 c = super(Keyword,self).copy()
1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1562 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1571 return c 1563 return c
1589 self.errmsg = "Expected " + self.name 1581 self.errmsg = "Expected " + self.name
1590 1582
1591 def parseImpl( self, instring, loc, doActions=True ): 1583 def parseImpl( self, instring, loc, doActions=True ):
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1584 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1593 return loc+self.matchLen, self.returnString 1585 return loc+self.matchLen, self.returnString
1594 #~ raise ParseException( instring, loc, self.errmsg ) 1586 raise ParseException(instring, loc, self.errmsg, self)
1595 exc = self.myException
1596 exc.loc = loc
1597 exc.pstr = instring
1598 raise exc
1599 1587
1600 class CaselessKeyword(Keyword): 1588 class CaselessKeyword(Keyword):
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): 1589 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 1590 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603 1591
1604 def parseImpl( self, instring, loc, doActions=True ): 1592 def parseImpl( self, instring, loc, doActions=True ):
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1593 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1594 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1607 return loc+self.matchLen, self.match 1595 return loc+self.matchLen, self.match
1608 #~ raise ParseException( instring, loc, self.errmsg ) 1596 raise ParseException(instring, loc, self.errmsg, self)
1609 exc = self.myException
1610 exc.loc = loc
1611 exc.pstr = instring
1612 raise exc
1613 1597
1614 class Word(Token): 1598 class Word(Token):
1615 """Token for matching words composed of allowed character sets. 1599 """Token for matching words composed of allowed character sets.
1616 Defined with string containing all allowed initial characters, 1600 Defined with string containing all allowed initial characters,
1617 an optional string containing allowed body characters (if omitted, 1601 an optional string containing allowed body characters (if omitted,
1624 except for one or two characters, for instance. 1608 except for one or two characters, for instance.
1625 """ 1609 """
1626 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 1610 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1627 super(Word,self).__init__() 1611 super(Word,self).__init__()
1628 if excludeChars: 1612 if excludeChars:
1629 initChars = ''.join([c for c in initChars if c not in excludeChars]) 1613 initChars = ''.join(c for c in initChars if c not in excludeChars)
1630 if bodyChars: 1614 if bodyChars:
1631 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) 1615 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1632 self.initCharsOrig = initChars 1616 self.initCharsOrig = initChars
1633 self.initChars = set(initChars) 1617 self.initChars = set(initChars)
1634 if bodyChars : 1618 if bodyChars :
1635 self.bodyCharsOrig = bodyChars 1619 self.bodyCharsOrig = bodyChars
1636 self.bodyChars = set(bodyChars) 1620 self.bodyChars = set(bodyChars)
1679 1663
1680 def parseImpl( self, instring, loc, doActions=True ): 1664 def parseImpl( self, instring, loc, doActions=True ):
1681 if self.re: 1665 if self.re:
1682 result = self.re.match(instring,loc) 1666 result = self.re.match(instring,loc)
1683 if not result: 1667 if not result:
1684 exc = self.myException 1668 raise ParseException(instring, loc, self.errmsg, self)
1685 exc.loc = loc
1686 exc.pstr = instring
1687 raise exc
1688 1669
1689 loc = result.end() 1670 loc = result.end()
1690 return loc, result.group() 1671 return loc, result.group()
1691 1672
1692 if not(instring[ loc ] in self.initChars): 1673 if not(instring[ loc ] in self.initChars):
1693 #~ raise ParseException( instring, loc, self.errmsg ) 1674 raise ParseException(instring, loc, self.errmsg, self)
1694 exc = self.myException 1675
1695 exc.loc = loc
1696 exc.pstr = instring
1697 raise exc
1698 start = loc 1676 start = loc
1699 loc += 1 1677 loc += 1
1700 instrlen = len(instring) 1678 instrlen = len(instring)
1701 bodychars = self.bodyChars 1679 bodychars = self.bodyChars
1702 maxloc = start + self.maxLen 1680 maxloc = start + self.maxLen
1712 if self.asKeyword: 1690 if self.asKeyword:
1713 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1691 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1714 throwException = True 1692 throwException = True
1715 1693
1716 if throwException: 1694 if throwException:
1717 #~ raise ParseException( instring, loc, self.errmsg ) 1695 raise ParseException(instring, loc, self.errmsg, self)
1718 exc = self.myException
1719 exc.loc = loc
1720 exc.pstr = instring
1721 raise exc
1722 1696
1723 return loc, instring[start:loc] 1697 return loc, instring[start:loc]
1724 1698
1725 def __str__( self ): 1699 def __str__( self ):
1726 try: 1700 try:
1785 self.mayReturnEmpty = True 1759 self.mayReturnEmpty = True
1786 1760
1787 def parseImpl( self, instring, loc, doActions=True ): 1761 def parseImpl( self, instring, loc, doActions=True ):
1788 result = self.re.match(instring,loc) 1762 result = self.re.match(instring,loc)
1789 if not result: 1763 if not result:
1790 exc = self.myException 1764 raise ParseException(instring, loc, self.errmsg, self)
1791 exc.loc = loc
1792 exc.pstr = instring
1793 raise exc
1794 1765
1795 loc = result.end() 1766 loc = result.end()
1796 d = result.groupdict() 1767 d = result.groupdict()
1797 ret = ParseResults(result.group()) 1768 ret = ParseResults(result.group())
1798 if d: 1769 if d:
1819 """ 1790 """
1820 Defined with the following parameters: 1791 Defined with the following parameters:
1821 - quoteChar - string of one or more characters defining the quote delimiting string 1792 - quoteChar - string of one or more characters defining the quote delimiting string
1822 - escChar - character to escape quotes, typically backslash (default=None) 1793 - escChar - character to escape quotes, typically backslash (default=None)
1823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1794 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1824 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1795 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1796 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1797 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1827 """ 1798 """
1828 super(QuotedString,self).__init__() 1799 super(QuotedString,self).__init__()
1829 1800
1830 # remove white space from quote chars - wont work anyway 1801 # remove white space from quote chars - wont work anyway
1831 quoteChar = quoteChar.strip() 1802 quoteChar = quoteChar.strip()
1862 ( re.escape(self.quoteChar), 1833 ( re.escape(self.quoteChar),
1863 _escapeRegexRangeChars(self.endQuoteChar[0]), 1834 _escapeRegexRangeChars(self.endQuoteChar[0]),
1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1835 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1865 if len(self.endQuoteChar) > 1: 1836 if len(self.endQuoteChar) > 1:
1866 self.pattern += ( 1837 self.pattern += (
1867 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1838 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1868 _escapeRegexRangeChars(self.endQuoteChar[i])) 1839 _escapeRegexRangeChars(self.endQuoteChar[i]))
1869 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1840 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
1870 ) 1841 )
1871 if escQuote: 1842 if escQuote:
1872 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1843 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1873 if escChar: 1844 if escChar:
1874 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1845 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1890 self.mayReturnEmpty = True 1861 self.mayReturnEmpty = True
1891 1862
1892 def parseImpl( self, instring, loc, doActions=True ): 1863 def parseImpl( self, instring, loc, doActions=True ):
1893 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1864 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1894 if not result: 1865 if not result:
1895 exc = self.myException 1866 raise ParseException(instring, loc, self.errmsg, self)
1896 exc.loc = loc
1897 exc.pstr = instring
1898 raise exc
1899 1867
1900 loc = result.end() 1868 loc = result.end()
1901 ret = result.group() 1869 ret = result.group()
1902 1870
1903 if self.unquoteResults: 1871 if self.unquoteResults:
1959 self.mayReturnEmpty = ( self.minLen == 0 ) 1927 self.mayReturnEmpty = ( self.minLen == 0 )
1960 self.mayIndexError = False 1928 self.mayIndexError = False
1961 1929
1962 def parseImpl( self, instring, loc, doActions=True ): 1930 def parseImpl( self, instring, loc, doActions=True ):
1963 if instring[loc] in self.notChars: 1931 if instring[loc] in self.notChars:
1964 #~ raise ParseException( instring, loc, self.errmsg ) 1932 raise ParseException(instring, loc, self.errmsg, self)
1965 exc = self.myException
1966 exc.loc = loc
1967 exc.pstr = instring
1968 raise exc
1969 1933
1970 start = loc 1934 start = loc
1971 loc += 1 1935 loc += 1
1972 notchars = self.notChars 1936 notchars = self.notChars
1973 maxlen = min( start+self.maxLen, len(instring) ) 1937 maxlen = min( start+self.maxLen, len(instring) )
1974 while loc < maxlen and \ 1938 while loc < maxlen and \
1975 (instring[loc] not in notchars): 1939 (instring[loc] not in notchars):
1976 loc += 1 1940 loc += 1
1977 1941
1978 if loc - start < self.minLen: 1942 if loc - start < self.minLen:
1979 #~ raise ParseException( instring, loc, self.errmsg ) 1943 raise ParseException(instring, loc, self.errmsg, self)
1980 exc = self.myException
1981 exc.loc = loc
1982 exc.pstr = instring
1983 raise exc
1984 1944
1985 return loc, instring[start:loc] 1945 return loc, instring[start:loc]
1986 1946
1987 def __str__( self ): 1947 def __str__( self ):
1988 try: 1948 try:
2001 class White(Token): 1961 class White(Token):
2002 """Special matching class for matching whitespace. Normally, whitespace is ignored 1962 """Special matching class for matching whitespace. Normally, whitespace is ignored
2003 by pyparsing grammars. This class is included when some whitespace structures 1963 by pyparsing grammars. This class is included when some whitespace structures
2004 are significant. Define with a string containing the whitespace characters to be 1964 are significant. Define with a string containing the whitespace characters to be
2005 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 1965 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2006 as defined for the C{Word} class.""" 1966 as defined for the C{L{Word}} class."""
2007 whiteStrs = { 1967 whiteStrs = {
2008 " " : "<SPC>", 1968 " " : "<SPC>",
2009 "\t": "<TAB>", 1969 "\t": "<TAB>",
2010 "\n": "<LF>", 1970 "\n": "<LF>",
2011 "\r": "<CR>", 1971 "\r": "<CR>",
2012 "\f": "<FF>", 1972 "\f": "<FF>",
2013 } 1973 }
2014 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 1974 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2015 super(White,self).__init__() 1975 super(White,self).__init__()
2016 self.matchWhite = ws 1976 self.matchWhite = ws
2017 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 1977 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2018 #~ self.leaveWhitespace() 1978 #~ self.leaveWhitespace()
2019 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 1979 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2020 self.mayReturnEmpty = True 1980 self.mayReturnEmpty = True
2021 self.errmsg = "Expected " + self.name 1981 self.errmsg = "Expected " + self.name
2022 1982
2023 self.minLen = min 1983 self.minLen = min
2024 1984
2031 self.maxLen = exact 1991 self.maxLen = exact
2032 self.minLen = exact 1992 self.minLen = exact
2033 1993
2034 def parseImpl( self, instring, loc, doActions=True ): 1994 def parseImpl( self, instring, loc, doActions=True ):
2035 if not(instring[ loc ] in self.matchWhite): 1995 if not(instring[ loc ] in self.matchWhite):
2036 #~ raise ParseException( instring, loc, self.errmsg ) 1996 raise ParseException(instring, loc, self.errmsg, self)
2037 exc = self.myException
2038 exc.loc = loc
2039 exc.pstr = instring
2040 raise exc
2041 start = loc 1997 start = loc
2042 loc += 1 1998 loc += 1
2043 maxloc = start + self.maxLen 1999 maxloc = start + self.maxLen
2044 maxloc = min( maxloc, len(instring) ) 2000 maxloc = min( maxloc, len(instring) )
2045 while loc < maxloc and instring[loc] in self.matchWhite: 2001 while loc < maxloc and instring[loc] in self.matchWhite:
2046 loc += 1 2002 loc += 1
2047 2003
2048 if loc - start < self.minLen: 2004 if loc - start < self.minLen:
2049 #~ raise ParseException( instring, loc, self.errmsg ) 2005 raise ParseException(instring, loc, self.errmsg, self)
2050 exc = self.myException
2051 exc.loc = loc
2052 exc.pstr = instring
2053 raise exc
2054 2006
2055 return loc, instring[start:loc] 2007 return loc, instring[start:loc]
2056 2008
2057 2009
2058 class _PositionToken(Token): 2010 class _PositionToken(Token):
2100 2052
2101 def parseImpl( self, instring, loc, doActions=True ): 2053 def parseImpl( self, instring, loc, doActions=True ):
2102 if not( loc==0 or 2054 if not( loc==0 or
2103 (loc == self.preParse( instring, 0 )) or 2055 (loc == self.preParse( instring, 0 )) or
2104 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2056 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2105 #~ raise ParseException( instring, loc, "Expected start of line" ) 2057 raise ParseException(instring, loc, self.errmsg, self)
2106 exc = self.myException
2107 exc.loc = loc
2108 exc.pstr = instring
2109 raise exc
2110 return loc, [] 2058 return loc, []
2111 2059
2112 class LineEnd(_PositionToken): 2060 class LineEnd(_PositionToken):
2113 """Matches if current position is at the end of a line within the parse string""" 2061 """Matches if current position is at the end of a line within the parse string"""
2114 def __init__( self ): 2062 def __init__( self ):
2119 def parseImpl( self, instring, loc, doActions=True ): 2067 def parseImpl( self, instring, loc, doActions=True ):
2120 if loc<len(instring): 2068 if loc<len(instring):
2121 if instring[loc] == "\n": 2069 if instring[loc] == "\n":
2122 return loc+1, "\n" 2070 return loc+1, "\n"
2123 else: 2071 else:
2124 #~ raise ParseException( instring, loc, "Expected end of line" ) 2072 raise ParseException(instring, loc, self.errmsg, self)
2125 exc = self.myException
2126 exc.loc = loc
2127 exc.pstr = instring
2128 raise exc
2129 elif loc == len(instring): 2073 elif loc == len(instring):
2130 return loc+1, [] 2074 return loc+1, []
2131 else: 2075 else:
2132 exc = self.myException 2076 raise ParseException(instring, loc, self.errmsg, self)
2133 exc.loc = loc
2134 exc.pstr = instring
2135 raise exc
2136 2077
2137 class StringStart(_PositionToken): 2078 class StringStart(_PositionToken):
2138 """Matches if current position is at the beginning of the parse string""" 2079 """Matches if current position is at the beginning of the parse string"""
2139 def __init__( self ): 2080 def __init__( self ):
2140 super(StringStart,self).__init__() 2081 super(StringStart,self).__init__()
2142 2083
2143 def parseImpl( self, instring, loc, doActions=True ): 2084 def parseImpl( self, instring, loc, doActions=True ):
2144 if loc != 0: 2085 if loc != 0:
2145 # see if entire string up to here is just whitespace and ignoreables 2086 # see if entire string up to here is just whitespace and ignoreables
2146 if loc != self.preParse( instring, 0 ): 2087 if loc != self.preParse( instring, 0 ):
2147 #~ raise ParseException( instring, loc, "Expected start of text" ) 2088 raise ParseException(instring, loc, self.errmsg, self)
2148 exc = self.myException
2149 exc.loc = loc
2150 exc.pstr = instring
2151 raise exc
2152 return loc, [] 2089 return loc, []
2153 2090
2154 class StringEnd(_PositionToken): 2091 class StringEnd(_PositionToken):
2155 """Matches if current position is at the end of the parse string""" 2092 """Matches if current position is at the end of the parse string"""
2156 def __init__( self ): 2093 def __init__( self ):
2157 super(StringEnd,self).__init__() 2094 super(StringEnd,self).__init__()
2158 self.errmsg = "Expected end of text" 2095 self.errmsg = "Expected end of text"
2159 2096
2160 def parseImpl( self, instring, loc, doActions=True ): 2097 def parseImpl( self, instring, loc, doActions=True ):
2161 if loc < len(instring): 2098 if loc < len(instring):
2162 #~ raise ParseException( instring, loc, "Expected end of text" ) 2099 raise ParseException(instring, loc, self.errmsg, self)
2163 exc = self.myException
2164 exc.loc = loc
2165 exc.pstr = instring
2166 raise exc
2167 elif loc == len(instring): 2100 elif loc == len(instring):
2168 return loc+1, [] 2101 return loc+1, []
2169 elif loc > len(instring): 2102 elif loc > len(instring):
2170 return loc, [] 2103 return loc, []
2171 else: 2104 else:
2172 exc = self.myException 2105 raise ParseException(instring, loc, self.errmsg, self)
2173 exc.loc = loc
2174 exc.pstr = instring
2175 raise exc
2176 2106
2177 class WordStart(_PositionToken): 2107 class WordStart(_PositionToken):
2178 """Matches if the current position is at the beginning of a Word, and 2108 """Matches if the current position is at the beginning of a Word, and
2179 is not preceded by any character in a given set of C{wordChars} 2109 is not preceded by any character in a given set of C{wordChars}
2180 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2110 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2188 2118
2189 def parseImpl(self, instring, loc, doActions=True ): 2119 def parseImpl(self, instring, loc, doActions=True ):
2190 if loc != 0: 2120 if loc != 0:
2191 if (instring[loc-1] in self.wordChars or 2121 if (instring[loc-1] in self.wordChars or
2192 instring[loc] not in self.wordChars): 2122 instring[loc] not in self.wordChars):
2193 exc = self.myException 2123 raise ParseException(instring, loc, self.errmsg, self)
2194 exc.loc = loc
2195 exc.pstr = instring
2196 raise exc
2197 return loc, [] 2124 return loc, []
2198 2125
2199 class WordEnd(_PositionToken): 2126 class WordEnd(_PositionToken):
2200 """Matches if the current position is at the end of a Word, and 2127 """Matches if the current position is at the end of a Word, and
2201 is not followed by any character in a given set of C{wordChars} 2128 is not followed by any character in a given set of C{wordChars}
2212 def parseImpl(self, instring, loc, doActions=True ): 2139 def parseImpl(self, instring, loc, doActions=True ):
2213 instrlen = len(instring) 2140 instrlen = len(instring)
2214 if instrlen>0 and loc<instrlen: 2141 if instrlen>0 and loc<instrlen:
2215 if (instring[loc] in self.wordChars or 2142 if (instring[loc] in self.wordChars or
2216 instring[loc-1] not in self.wordChars): 2143 instring[loc-1] not in self.wordChars):
2217 #~ raise ParseException( instring, loc, "Expected end of word" ) 2144 raise ParseException(instring, loc, self.errmsg, self)
2218 exc = self.myException
2219 exc.loc = loc
2220 exc.pstr = instring
2221 raise exc
2222 return loc, [] 2145 return loc, []
2223 2146
2224 2147
2225 class ParseExpression(ParserElement): 2148 class ParseExpression(ParserElement):
2226 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 2149 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2329 May be constructed using the C{'+'} operator. 2252 May be constructed using the C{'+'} operator.
2330 """ 2253 """
2331 2254
2332 class _ErrorStop(Empty): 2255 class _ErrorStop(Empty):
2333 def __init__(self, *args, **kwargs): 2256 def __init__(self, *args, **kwargs):
2334 super(Empty,self).__init__(*args, **kwargs) 2257 super(And._ErrorStop,self).__init__(*args, **kwargs)
2258 self.name = '-'
2335 self.leaveWhitespace() 2259 self.leaveWhitespace()
2336 2260
2337 def __init__( self, exprs, savelist = True ): 2261 def __init__( self, exprs, savelist = True ):
2338 super(And,self).__init__(exprs, savelist) 2262 super(And,self).__init__(exprs, savelist)
2339 self.mayReturnEmpty = True 2263 self.mayReturnEmpty = True
2357 if errorStop: 2281 if errorStop:
2358 try: 2282 try:
2359 loc, exprtokens = e._parse( instring, loc, doActions ) 2283 loc, exprtokens = e._parse( instring, loc, doActions )
2360 except ParseSyntaxException: 2284 except ParseSyntaxException:
2361 raise 2285 raise
2362 except ParseBaseException: 2286 except ParseBaseException as pe:
2363 pe = sys.exc_info()[1] 2287 pe.__traceback__ = None
2364 raise ParseSyntaxException(pe) 2288 raise ParseSyntaxException(pe)
2365 except IndexError: 2289 except IndexError:
2366 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2290 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2367 else: 2291 else:
2368 loc, exprtokens = e._parse( instring, loc, doActions ) 2292 loc, exprtokens = e._parse( instring, loc, doActions )
2385 def __str__( self ): 2309 def __str__( self ):
2386 if hasattr(self,"name"): 2310 if hasattr(self,"name"):
2387 return self.name 2311 return self.name
2388 2312
2389 if self.strRepr is None: 2313 if self.strRepr is None:
2390 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2314 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2391 2315
2392 return self.strRepr 2316 return self.strRepr
2393 2317
2394 2318
2395 class Or(ParseExpression): 2319 class Or(ParseExpression):
2410 maxMatchLoc = -1 2334 maxMatchLoc = -1
2411 maxException = None 2335 maxException = None
2412 for e in self.exprs: 2336 for e in self.exprs:
2413 try: 2337 try:
2414 loc2 = e.tryParse( instring, loc ) 2338 loc2 = e.tryParse( instring, loc )
2415 except ParseException: 2339 except ParseException as err:
2416 err = sys.exc_info()[1] 2340 err.__traceback__ = None
2417 if err.loc > maxExcLoc: 2341 if err.loc > maxExcLoc:
2418 maxException = err 2342 maxException = err
2419 maxExcLoc = err.loc 2343 maxExcLoc = err.loc
2420 except IndexError: 2344 except IndexError:
2421 if len(instring) > maxExcLoc: 2345 if len(instring) > maxExcLoc:
2434 2358
2435 return maxMatchExp._parse( instring, loc, doActions ) 2359 return maxMatchExp._parse( instring, loc, doActions )
2436 2360
2437 def __ixor__(self, other ): 2361 def __ixor__(self, other ):
2438 if isinstance( other, basestring ): 2362 if isinstance( other, basestring ):
2439 other = Literal( other ) 2363 other = ParserElement.literalStringClass( other )
2440 return self.append( other ) #Or( [ self, other ] ) 2364 return self.append( other ) #Or( [ self, other ] )
2441 2365
2442 def __str__( self ): 2366 def __str__( self ):
2443 if hasattr(self,"name"): 2367 if hasattr(self,"name"):
2444 return self.name 2368 return self.name
2445 2369
2446 if self.strRepr is None: 2370 if self.strRepr is None:
2447 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2371 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2448 2372
2449 return self.strRepr 2373 return self.strRepr
2450 2374
2451 def checkRecursion( self, parseElementList ): 2375 def checkRecursion( self, parseElementList ):
2452 subRecCheckList = parseElementList[:] + [ self ] 2376 subRecCheckList = parseElementList[:] + [ self ]
2475 maxException = None 2399 maxException = None
2476 for e in self.exprs: 2400 for e in self.exprs:
2477 try: 2401 try:
2478 ret = e._parse( instring, loc, doActions ) 2402 ret = e._parse( instring, loc, doActions )
2479 return ret 2403 return ret
2480 except ParseException, err: 2404 except ParseException as err:
2481 if err.loc > maxExcLoc: 2405 if err.loc > maxExcLoc:
2482 maxException = err 2406 maxException = err
2483 maxExcLoc = err.loc 2407 maxExcLoc = err.loc
2484 except IndexError: 2408 except IndexError:
2485 if len(instring) > maxExcLoc: 2409 if len(instring) > maxExcLoc:
2493 else: 2417 else:
2494 raise ParseException(instring, loc, "no defined alternatives to match", self) 2418 raise ParseException(instring, loc, "no defined alternatives to match", self)
2495 2419
2496 def __ior__(self, other ): 2420 def __ior__(self, other ):
2497 if isinstance( other, basestring ): 2421 if isinstance( other, basestring ):
2498 other = Literal( other ) 2422 other = ParserElement.literalStringClass( other )
2499 return self.append( other ) #MatchFirst( [ self, other ] ) 2423 return self.append( other ) #MatchFirst( [ self, other ] )
2500 2424
2501 def __str__( self ): 2425 def __str__( self ):
2502 if hasattr(self,"name"): 2426 if hasattr(self,"name"):
2503 return self.name 2427 return self.name
2504 2428
2505 if self.strRepr is None: 2429 if self.strRepr is None:
2506 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2430 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2507 2431
2508 return self.strRepr 2432 return self.strRepr
2509 2433
2510 def checkRecursion( self, parseElementList ): 2434 def checkRecursion( self, parseElementList ):
2511 subRecCheckList = parseElementList[:] + [ self ] 2435 subRecCheckList = parseElementList[:] + [ self ]
2560 tmpOpt.remove(e) 2484 tmpOpt.remove(e)
2561 if len(failed) == len(tmpExprs): 2485 if len(failed) == len(tmpExprs):
2562 keepMatching = False 2486 keepMatching = False
2563 2487
2564 if tmpReqd: 2488 if tmpReqd:
2565 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2489 missing = ", ".join(_ustr(e) for e in tmpReqd)
2566 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2490 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2567 2491
2568 # add any unmatched Optionals, in case they have default values defined 2492 # add any unmatched Optionals, in case they have default values defined
2569 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2493 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2570 2494
2589 def __str__( self ): 2513 def __str__( self ):
2590 if hasattr(self,"name"): 2514 if hasattr(self,"name"):
2591 return self.name 2515 return self.name
2592 2516
2593 if self.strRepr is None: 2517 if self.strRepr is None:
2594 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2518 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2595 2519
2596 return self.strRepr 2520 return self.strRepr
2597 2521
2598 def checkRecursion( self, parseElementList ): 2522 def checkRecursion( self, parseElementList ):
2599 subRecCheckList = parseElementList[:] + [ self ] 2523 subRecCheckList = parseElementList[:] + [ self ]
2704 try: 2628 try:
2705 self.expr.tryParse( instring, loc ) 2629 self.expr.tryParse( instring, loc )
2706 except (ParseException,IndexError): 2630 except (ParseException,IndexError):
2707 pass 2631 pass
2708 else: 2632 else:
2709 #~ raise ParseException(instring, loc, self.errmsg ) 2633 raise ParseException(instring, loc, self.errmsg, self)
2710 exc = self.myException
2711 exc.loc = loc
2712 exc.pstr = instring
2713 raise exc
2714 return loc, [] 2634 return loc, []
2715 2635
2716 def __str__( self ): 2636 def __str__( self ):
2717 if hasattr(self,"name"): 2637 if hasattr(self,"name"):
2718 return self.name 2638 return self.name
2876 failParse = False 2796 failParse = False
2877 if self.ignoreExpr is not None: 2797 if self.ignoreExpr is not None:
2878 while 1: 2798 while 1:
2879 try: 2799 try:
2880 loc = self.ignoreExpr.tryParse(instring,loc) 2800 loc = self.ignoreExpr.tryParse(instring,loc)
2881 # print "found ignoreExpr, advance to", loc 2801 # print("found ignoreExpr, advance to", loc)
2882 except ParseBaseException: 2802 except ParseBaseException:
2883 break 2803 break
2884 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2804 expr._parse( instring, loc, doActions=False, callPreParse=False )
2885 skipText = instring[startLoc:loc] 2805 skipText = instring[startLoc:loc]
2886 if self.includeMatch: 2806 if self.includeMatch:
2896 except (ParseException,IndexError): 2816 except (ParseException,IndexError):
2897 if failParse: 2817 if failParse:
2898 raise 2818 raise
2899 else: 2819 else:
2900 loc += 1 2820 loc += 1
2901 exc = self.myException 2821 raise ParseException(instring, loc, self.errmsg, self)
2902 exc.loc = loc
2903 exc.pstr = instring
2904 raise exc
2905 2822
2906 class Forward(ParseElementEnhance): 2823 class Forward(ParseElementEnhance):
2907 """Forward declaration of an expression to be defined later - 2824 """Forward declaration of an expression to be defined later -
2908 used for recursive grammars, such as algebraic infix notation. 2825 used for recursive grammars, such as algebraic infix notation.
2909 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2826 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2914 will actually be evaluated as:: 2831 will actually be evaluated as::
2915 (fwdExpr << a) | b | c 2832 (fwdExpr << a) | b | c
2916 thereby leaving b and c out as parseable alternatives. It is recommended that you 2833 thereby leaving b and c out as parseable alternatives. It is recommended that you
2917 explicitly group the values inserted into the C{Forward}:: 2834 explicitly group the values inserted into the C{Forward}::
2918 fwdExpr << (a | b | c) 2835 fwdExpr << (a | b | c)
2836 Converting to use the '<<=' operator instead will avoid this problem.
2919 """ 2837 """
2920 def __init__( self, other=None ): 2838 def __init__( self, other=None ):
2921 super(Forward,self).__init__( other, savelist=False ) 2839 super(Forward,self).__init__( other, savelist=False )
2922 2840
2923 def __lshift__( self, other ): 2841 def __ilshift__( self, other ):
2924 if isinstance( other, basestring ): 2842 if isinstance( other, basestring ):
2925 other = Literal(other) 2843 other = ParserElement.literalStringClass(other)
2926 self.expr = other 2844 self.expr = other
2927 self.mayReturnEmpty = other.mayReturnEmpty 2845 self.mayReturnEmpty = other.mayReturnEmpty
2928 self.strRepr = None 2846 self.strRepr = None
2929 self.mayIndexError = self.expr.mayIndexError 2847 self.mayIndexError = self.expr.mayIndexError
2930 self.mayReturnEmpty = self.expr.mayReturnEmpty 2848 self.mayReturnEmpty = self.expr.mayReturnEmpty
2931 self.setWhitespaceChars( self.expr.whiteChars ) 2849 self.setWhitespaceChars( self.expr.whiteChars )
2932 self.skipWhitespace = self.expr.skipWhitespace 2850 self.skipWhitespace = self.expr.skipWhitespace
2933 self.saveAsList = self.expr.saveAsList 2851 self.saveAsList = self.expr.saveAsList
2934 self.ignoreExprs.extend(self.expr.ignoreExprs) 2852 self.ignoreExprs.extend(self.expr.ignoreExprs)
2853 return self
2854
2855 def __lshift__(self, other):
2856 warnings.warn("Operator '<<' is deprecated, use '<<=' instead",
2857 DeprecationWarning,stacklevel=2)
2858 self <<= other
2935 return None 2859 return None
2936 2860
2937 def leaveWhitespace( self ): 2861 def leaveWhitespace( self ):
2938 self.skipWhitespace = False 2862 self.skipWhitespace = False
2939 return self 2863 return self
2940 2864
2941 def streamline( self ): 2865 def streamline( self ):
2991 super(Upcase,self).__init__(*args) 2915 super(Upcase,self).__init__(*args)
2992 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2916 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2993 DeprecationWarning,stacklevel=2) 2917 DeprecationWarning,stacklevel=2)
2994 2918
2995 def postParse( self, instring, loc, tokenlist ): 2919 def postParse( self, instring, loc, tokenlist ):
2996 return list(map( string.upper, tokenlist )) 2920 return list(map( str.upper, tokenlist ))
2997 2921
2998 2922
2999 class Combine(TokenConverter): 2923 class Combine(TokenConverter):
3000 """Converter to concatenate all matching tokens to a single string. 2924 """Converter to concatenate all matching tokens to a single string.
3001 By default, the matching patterns must also be contiguous in the input string; 2925 By default, the matching patterns must also be contiguous in the input string;
3027 return [ retToks ] 2951 return [ retToks ]
3028 else: 2952 else:
3029 return retToks 2953 return retToks
3030 2954
3031 class Group(TokenConverter): 2955 class Group(TokenConverter):
3032 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" 2956 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3033 def __init__( self, expr ): 2957 def __init__( self, expr ):
3034 super(Group,self).__init__( expr ) 2958 super(Group,self).__init__( expr )
3035 self.saveAsList = True 2959 self.saveAsList = True
3036 2960
3037 def postParse( self, instring, loc, tokenlist ): 2961 def postParse( self, instring, loc, tokenlist ):
3103 if len(paArgs)>3: 3027 if len(paArgs)>3:
3104 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3028 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3105 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3029 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3106 try: 3030 try:
3107 ret = f(*paArgs) 3031 ret = f(*paArgs)
3108 except Exception: 3032 except Exception as exc:
3109 exc = sys.exc_info()[1]
3110 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3033 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3111 raise 3034 raise
3112 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3035 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3113 return ret 3036 return ret
3114 try: 3037 try:
3122 # 3045 #
3123 def delimitedList( expr, delim=",", combine=False ): 3046 def delimitedList( expr, delim=",", combine=False ):
3124 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3047 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3125 By default, the list elements and delimiters can have intervening whitespace, and 3048 By default, the list elements and delimiters can have intervening whitespace, and
3126 comments, but this can be overridden by passing C{combine=True} in the constructor. 3049 comments, but this can be overridden by passing C{combine=True} in the constructor.
3127 If C{combine} is set to True, the matching tokens are returned as a single token 3050 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3128 string, with the delimiters included; otherwise, the matching tokens are returned 3051 string, with the delimiters included; otherwise, the matching tokens are returned
3129 as a list of tokens, with the delimiters suppressed. 3052 as a list of tokens, with the delimiters suppressed.
3130 """ 3053 """
3131 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3054 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3132 if combine: 3055 if combine:
3224 return _ustr(s) 3147 return _ustr(s)
3225 3148
3226 def oneOf( strs, caseless=False, useRegex=True ): 3149 def oneOf( strs, caseless=False, useRegex=True ):
3227 """Helper to quickly define a set of alternative Literals, and makes sure to do 3150 """Helper to quickly define a set of alternative Literals, and makes sure to do
3228 longest-first testing when there is a conflict, regardless of the input order, 3151 longest-first testing when there is a conflict, regardless of the input order,
3229 but returns a C{MatchFirst} for best performance. 3152 but returns a C{L{MatchFirst}} for best performance.
3230 3153
3231 Parameters: 3154 Parameters:
3232 - strs - a string of space-delimited literals, or a list of string literals 3155 - strs - a string of space-delimited literals, or a list of string literals
3233 - caseless - (default=False) - treat all literals as caseless 3156 - caseless - (default=False) - treat all literals as caseless
3234 - useRegex - (default=True) - as an optimization, will generate a Regex 3157 - useRegex - (default=True) - as an optimization, will generate a Regex
3269 3192
3270 if not caseless and useRegex: 3193 if not caseless and useRegex:
3271 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3194 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3272 try: 3195 try:
3273 if len(symbols)==len("".join(symbols)): 3196 if len(symbols)==len("".join(symbols)):
3274 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3197 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) )
3275 else: 3198 else:
3276 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3199 return Regex( "|".join(re.escape(sym) for sym in symbols) )
3277 except: 3200 except:
3278 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3201 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3279 SyntaxWarning, stacklevel=2) 3202 SyntaxWarning, stacklevel=2)
3280 3203
3281 3204
3282 # last resort, just use MatchFirst 3205 # last resort, just use MatchFirst
3283 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 3206 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3284 3207
3285 def dictOf( key, value ): 3208 def dictOf( key, value ):
3286 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3209 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3287 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens 3210 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3288 in the proper order. The key pattern can include delimiting markers or punctuation, 3211 in the proper order. The key pattern can include delimiting markers or punctuation,
3289 as long as they are suppressed, thereby leaving the significant key text. The value 3212 as long as they are suppressed, thereby leaving the significant key text. The value
3290 pattern can include named results, so that the C{Dict} results can include named token 3213 pattern can include named results, so that the C{Dict} results can include named token
3291 fields. 3214 fields.
3292 """ 3215 """
3299 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3222 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
3300 require the inspect module to chase up the call stack. By default, returns a 3223 require the inspect module to chase up the call stack. By default, returns a
3301 string containing the original parsed text. 3224 string containing the original parsed text.
3302 3225
3303 If the optional C{asString} argument is passed as C{False}, then the return value is a 3226 If the optional C{asString} argument is passed as C{False}, then the return value is a
3304 C{ParseResults} containing any results names that were originally matched, and a 3227 C{L{ParseResults}} containing any results names that were originally matched, and a
3305 single token containing the original matched text from the input string. So if 3228 single token containing the original matched text from the input string. So if
3306 the expression passed to C{L{originalTextFor}} contains expressions with defined 3229 the expression passed to C{L{originalTextFor}} contains expressions with defined
3307 results names, you must set C{asString} to C{False} if you want to preserve those 3230 results names, you must set C{asString} to C{False} if you want to preserve those
3308 results name values.""" 3231 results name values."""
3309 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3232 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3332 lineEnd = LineEnd().setName("lineEnd") 3255 lineEnd = LineEnd().setName("lineEnd")
3333 stringStart = StringStart().setName("stringStart") 3256 stringStart = StringStart().setName("stringStart")
3334 stringEnd = StringEnd().setName("stringEnd") 3257 stringEnd = StringEnd().setName("stringEnd")
3335 3258
3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3259 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3337 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3260 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3338 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
3339 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3261 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3340 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3262 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1)
3341 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3263 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3342 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3264 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3343 3265
3344 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3266 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) or p)
3345 3267
3346 def srange(s): 3268 def srange(s):
3347 r"""Helper to easily define string ranges for use in Word construction. Borrows 3269 r"""Helper to easily define string ranges for use in Word construction. Borrows
3348 syntax from regexp '[]' string range definitions:: 3270 syntax from regexp '[]' string range definitions::
3349 srange("[0-9]") -> "0123456789" 3271 srange("[0-9]") -> "0123456789"
3359 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3281 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3360 a range of any of the above, separated by a dash ('a-z', etc.) 3282 a range of any of the above, separated by a dash ('a-z', etc.)
3361 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3283 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3362 """ 3284 """
3363 try: 3285 try:
3364 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3286 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3365 except: 3287 except:
3366 return "" 3288 return ""
3367 3289
3368 def matchOnlyAtCol(n): 3290 def matchOnlyAtCol(n):
3369 """Helper method for defining parse actions that require matching at a specific 3291 """Helper method for defining parse actions that require matching at a specific
3374 raise ParseException(strg,locn,"matched token not at column %d" % n) 3296 raise ParseException(strg,locn,"matched token not at column %d" % n)
3375 return verifyCol 3297 return verifyCol
3376 3298
3377 def replaceWith(replStr): 3299 def replaceWith(replStr):
3378 """Helper method for common parse actions that simply return a literal value. Especially 3300 """Helper method for common parse actions that simply return a literal value. Especially
3379 useful when used with C{transformString()}. 3301 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3380 """ 3302 """
3381 def _replFunc(*args): 3303 def _replFunc(*args):
3382 return [replStr] 3304 return [replStr]
3383 return _replFunc 3305 return _replFunc
3384 3306
3396 def downcaseTokens(s,l,t): 3318 def downcaseTokens(s,l,t):
3397 """Helper parse action to convert tokens to lower case.""" 3319 """Helper parse action to convert tokens to lower case."""
3398 return [ tt.lower() for tt in map(_ustr,t) ] 3320 return [ tt.lower() for tt in map(_ustr,t) ]
3399 3321
3400 def keepOriginalText(s,startLoc,t): 3322 def keepOriginalText(s,startLoc,t):
3401 """DEPRECATED - use new helper method C{originalTextFor}. 3323 """DEPRECATED - use new helper method C{L{originalTextFor}}.
3402 Helper parse action to preserve original parsed text, 3324 Helper parse action to preserve original parsed text,
3403 overriding any nested parse actions.""" 3325 overriding any nested parse actions."""
3404 try: 3326 try:
3405 endloc = getTokensEndLoc() 3327 endloc = getTokensEndLoc()
3406 except ParseException: 3328 except ParseException:
3438 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3360 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3439 openTag = Suppress("<") + tagStr("tag") + \ 3361 openTag = Suppress("<") + tagStr("tag") + \
3440 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3362 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3441 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3363 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3442 else: 3364 else:
3443 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3365 printablesLessRAbrack = "".join(c for c in printables if c not in ">")
3444 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3366 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3445 openTag = Suppress("<") + tagStr("tag") + \ 3367 openTag = Suppress("<") + tagStr("tag") + \
3446 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3368 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3447 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3369 Optional( Suppress("=") + tagAttrValue ) ))) + \
3448 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3370 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3384 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3463 return _makeTags( tagStr, True ) 3385 return _makeTags( tagStr, True )
3464 3386
3465 def withAttribute(*args,**attrDict): 3387 def withAttribute(*args,**attrDict):
3466 """Helper to create a validating parse action to be used with start tags created 3388 """Helper to create a validating parse action to be used with start tags created
3467 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag 3389 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3468 with a required attribute value, to avoid false matches on common tags such as 3390 with a required attribute value, to avoid false matches on common tags such as
3469 C{<TD>} or C{<DIV>}. 3391 C{<TD>} or C{<DIV>}.
3470 3392
3471 Call C{withAttribute} with a series of attribute names and values. Specify the list 3393 Call C{withAttribute} with a series of attribute names and values. Specify the list
3472 of filter attributes names and values as: 3394 of filter attributes names and values as:
3497 3419
3498 opAssoc = _Constants() 3420 opAssoc = _Constants()
3499 opAssoc.LEFT = object() 3421 opAssoc.LEFT = object()
3500 opAssoc.RIGHT = object() 3422 opAssoc.RIGHT = object()
3501 3423
3502 def operatorPrecedence( baseExpr, opList ): 3424 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3503 """Helper method for constructing grammars of expressions made up of 3425 """Helper method for constructing grammars of expressions made up of
3504 operators working in a precedence hierarchy. Operators may be unary or 3426 operators working in a precedence hierarchy. Operators may be unary or
3505 binary, left- or right-associative. Parse actions can also be attached 3427 binary, left- or right-associative. Parse actions can also be attached
3506 to operator expressions. 3428 to operator expressions.
3507 3429
3516 two operators separating the 3 terms 3438 two operators separating the 3 terms
3517 - numTerms is the number of terms for this operator (must 3439 - numTerms is the number of terms for this operator (must
3518 be 1, 2, or 3) 3440 be 1, 2, or 3)
3519 - rightLeftAssoc is the indicator whether the operator is 3441 - rightLeftAssoc is the indicator whether the operator is
3520 right or left associative, using the pyparsing-defined 3442 right or left associative, using the pyparsing-defined
3521 constants opAssoc.RIGHT and opAssoc.LEFT. 3443 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3522 - parseAction is the parse action to be associated with 3444 - parseAction is the parse action to be associated with
3523 expressions matching this operator expression (the 3445 expressions matching this operator expression (the
3524 parse action tuple member may be omitted) 3446 parse action tuple member may be omitted)
3447 - lpar - expression for matching left-parentheses (default=Suppress('('))
3448 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3525 """ 3449 """
3526 ret = Forward() 3450 ret = Forward()
3527 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3451 lastExpr = baseExpr | ( lpar + ret + rpar )
3528 for i,operDef in enumerate(opList): 3452 for i,operDef in enumerate(opList):
3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3453 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3530 if arity == 3: 3454 if arity == 3:
3531 if opExpr is None or len(opExpr) != 2: 3455 if opExpr is None or len(opExpr) != 2:
3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3456 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3567 matchExpr.setParseAction( pa ) 3491 matchExpr.setParseAction( pa )
3568 thisExpr << ( matchExpr | lastExpr ) 3492 thisExpr << ( matchExpr | lastExpr )
3569 lastExpr = thisExpr 3493 lastExpr = thisExpr
3570 ret << lastExpr 3494 ret << lastExpr
3571 return ret 3495 return ret
3496 operatorPrecedence = infixNotation
3572 3497
3573 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3498 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3574 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3499 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3575 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3500 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3576 unicodeString = Combine(_L('u') + quotedString.copy()) 3501 unicodeString = Combine(_L('u') + quotedString.copy())
3695 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3620 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3696 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3621 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3697 3622
3698 javaStyleComment = cppStyleComment 3623 javaStyleComment = cppStyleComment
3699 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3624 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3700 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3625 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3701 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3702 Optional( Word(" \t") + 3626 Optional( Word(" \t") +
3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3627 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3628 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3705 3629
3706 3630
3713 print (teststring + "->" + str(tokenlist)) 3637 print (teststring + "->" + str(tokenlist))
3714 print ("tokens = " + str(tokens)) 3638 print ("tokens = " + str(tokens))
3715 print ("tokens.columns = " + str(tokens.columns)) 3639 print ("tokens.columns = " + str(tokens.columns))
3716 print ("tokens.tables = " + str(tokens.tables)) 3640 print ("tokens.tables = " + str(tokens.tables))
3717 print (tokens.asXML("SQL",True)) 3641 print (tokens.asXML("SQL",True))
3718 except ParseBaseException: 3642 except ParseBaseException as err:
3719 err = sys.exc_info()[1]
3720 print (teststring + "->") 3643 print (teststring + "->")
3721 print (err.line) 3644 print (err.line)
3722 print (" "*(err.column-1) + "^") 3645 print (" "*(err.column-1) + "^")
3723 print (err) 3646 print (err)
3724 print() 3647 print()