Mercurial > dottes
comparison abc2xml/pyparsing.py @ 1084:b1dbb76f4eb9 build-default-404
Update abc2xml to latest - Python3 friendly.
author | Jim Hague <jim.hague@acm.org> |
---|---|
date | Fri, 18 Nov 2022 21:42:55 +0000 |
parents | 4fab69a1027d |
children |
comparison
equal
deleted
inserted
replaced
1083:b66bc498220d | 1084:b1dbb76f4eb9 |
---|---|
1 # module pyparsing.py | 1 # module pyparsing.py |
2 # | 2 # |
3 # Copyright (c) 2003-2011 Paul T. McGuire | 3 # Copyright (c) 2003-2013 Paul T. McGuire |
4 # | 4 # |
5 # Permission is hereby granted, free of charge, to any person obtaining | 5 # Permission is hereby granted, free of charge, to any person obtaining |
6 # a copy of this software and associated documentation files (the | 6 # a copy of this software and associated documentation files (the |
7 # "Software"), to deal in the Software without restriction, including | 7 # "Software"), to deal in the Software without restriction, including |
8 # without limitation the rights to use, copy, modify, merge, publish, | 8 # without limitation the rights to use, copy, modify, merge, publish, |
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | 19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | 20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | 21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 # | 23 # |
24 #from __future__ import generators | |
25 | 24 |
26 __doc__ = \ | 25 __doc__ = \ |
27 """ | 26 """ |
28 pyparsing module - Classes and methods to define and execute parsing grammars | 27 pyparsing module - Classes and methods to define and execute parsing grammars |
29 | 28 |
38 | 37 |
39 # define grammar of a greeting | 38 # define grammar of a greeting |
40 greet = Word( alphas ) + "," + Word( alphas ) + "!" | 39 greet = Word( alphas ) + "," + Word( alphas ) + "!" |
41 | 40 |
42 hello = "Hello, World!" | 41 hello = "Hello, World!" |
43 print hello, "->", greet.parseString( hello ) | 42 print (hello, "->", greet.parseString( hello )) |
44 | 43 |
45 The program outputs the following:: | 44 The program outputs the following:: |
46 | 45 |
47 Hello, World! -> ['Hello', ',', 'World', '!'] | 46 Hello, World! -> ['Hello', ',', 'World', '!'] |
48 | 47 |
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) | 55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) |
57 - quoted strings | 56 - quoted strings |
58 - embedded comments | 57 - embedded comments |
59 """ | 58 """ |
60 | 59 |
61 __version__ = "1.5.6" | 60 __version__ = "2.0.1" |
62 __versionTime__ = "26 June 2011 10:53" | 61 __versionTime__ = "16 July 2013 22:22" |
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" | 62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" |
64 | 63 |
65 import string | 64 import string |
66 from weakref import ref as wkref | 65 from weakref import ref as wkref |
67 import copy | 66 import copy |
68 import sys | 67 import sys |
69 import warnings | 68 import warnings |
70 import re | 69 import re |
71 import sre_constants | 70 import sre_constants |
71 import collections | |
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) | 72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) |
73 | 73 |
74 __all__ = [ | 74 __all__ = [ |
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', | 75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', |
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', | 76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', |
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', | 79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', |
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', | 80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', |
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', | 81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', |
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', | 82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', |
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', | 83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', |
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', | 84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', |
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', | 85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', |
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', | 86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', |
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', | 87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', |
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', | 88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', |
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', | 89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', |
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', | 90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', |
91 'indentedBlock', 'originalTextFor', | 91 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', |
92 ] | 92 ] |
93 | 93 |
94 """ | 94 PY_3 = sys.version.startswith('3') |
95 Detect if we are running version 3.X and make appropriate changes | 95 if PY_3: |
96 Robert A. Clark | |
97 """ | |
98 _PY3K = sys.version_info[0] > 2 | |
99 if _PY3K: | |
100 _MAX_INT = sys.maxsize | 96 _MAX_INT = sys.maxsize |
101 basestring = str | 97 basestring = str |
102 unichr = chr | 98 unichr = chr |
103 _ustr = str | 99 _ustr = str |
104 alphas = string.ascii_lowercase + string.ascii_uppercase | 100 |
101 # build list of single arg builtins, that can be used as parse actions | |
102 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] | |
103 | |
105 else: | 104 else: |
106 _MAX_INT = sys.maxint | 105 _MAX_INT = sys.maxint |
107 range = xrange | 106 range = xrange |
108 set = lambda s : dict( [(c,0) for c in s] ) | |
109 alphas = string.lowercase + string.uppercase | |
110 | 107 |
111 def _ustr(obj): | 108 def _ustr(obj): |
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries | 109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries |
113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It | 110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It |
114 then < returns the unicode object | encodes it with the default encoding | ... >. | 111 then < returns the unicode object | encodes it with the default encoding | ... >. |
132 # Replace unprintables with escape codes? | 129 # Replace unprintables with escape codes? |
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') | 130 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') |
134 # Replace unprintables with question marks? | 131 # Replace unprintables with question marks? |
135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') | 132 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') |
136 # ... | 133 # ... |
137 | 134 |
138 alphas = string.lowercase + string.uppercase | 135 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions |
139 | 136 singleArgBuiltins = [] |
140 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions | 137 import __builtin__ |
141 singleArgBuiltins = [] | 138 for fname in "sum len sorted reversed list tuple set any all min max".split(): |
142 import __builtin__ | 139 try: |
143 for fname in "sum len enumerate sorted reversed list tuple set any all".split(): | 140 singleArgBuiltins.append(getattr(__builtin__,fname)) |
144 try: | 141 except AttributeError: |
145 singleArgBuiltins.append(getattr(__builtin__,fname)) | 142 continue |
146 except AttributeError: | 143 |
147 continue | |
148 | 144 |
149 def _xml_escape(data): | 145 def _xml_escape(data): |
150 """Escape &, <, >, ", ', etc. in a string of data.""" | 146 """Escape &, <, >, ", ', etc. in a string of data.""" |
151 | 147 |
152 # ampersand must be replaced first | 148 # ampersand must be replaced first |
153 from_symbols = '&><"\'' | 149 from_symbols = '&><"\'' |
154 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] | 150 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) |
155 for from_,to_ in zip(from_symbols, to_symbols): | 151 for from_,to_ in zip(from_symbols, to_symbols): |
156 data = data.replace(from_, to_) | 152 data = data.replace(from_, to_) |
157 return data | 153 return data |
158 | 154 |
159 class _Constants(object): | 155 class _Constants(object): |
160 pass | 156 pass |
161 | 157 |
162 nums = string.digits | 158 alphas = string.ascii_lowercase + string.ascii_uppercase |
159 nums = "0123456789" | |
163 hexnums = nums + "ABCDEFabcdef" | 160 hexnums = nums + "ABCDEFabcdef" |
164 alphanums = alphas + nums | 161 alphanums = alphas + nums |
165 _bslash = chr(92) | 162 _bslash = chr(92) |
166 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) | 163 printables = "".join(c for c in string.printable if c not in string.whitespace) |
167 | 164 |
168 class ParseBaseException(Exception): | 165 class ParseBaseException(Exception): |
169 """base exception class for all parsing runtime exceptions""" | 166 """base exception class for all parsing runtime exceptions""" |
170 # Performance tuning: we construct a *lot* of these, so keep this | 167 # Performance tuning: we construct a *lot* of these, so keep this |
171 # constructor as small and fast as possible | 168 # constructor as small and fast as possible |
204 the location of the exception with a special symbol. | 201 the location of the exception with a special symbol. |
205 """ | 202 """ |
206 line_str = self.line | 203 line_str = self.line |
207 line_column = self.column - 1 | 204 line_column = self.column - 1 |
208 if markerString: | 205 if markerString: |
209 line_str = "".join( [line_str[:line_column], | 206 line_str = "".join(line_str[:line_column], |
210 markerString, line_str[line_column:]]) | 207 markerString, line_str[line_column:]) |
211 return line_str.strip() | 208 return line_str.strip() |
212 def __dir__(self): | 209 def __dir__(self): |
213 return "loc msg pstr parserElement lineno col line " \ | 210 return "loc msg pstr parserElement lineno col line " \ |
214 "markInputLine __str__ __repr__".split() | 211 "markInputline __str__ __repr__".split() |
215 | 212 |
216 class ParseException(ParseBaseException): | 213 class ParseException(ParseBaseException): |
217 """exception thrown when parse expressions don't match class; | 214 """exception thrown when parse expressions don't match class; |
218 supported attributes by name are: | 215 supported attributes by name are: |
219 - lineno - returns the line number of the exception text | 216 - lineno - returns the line number of the exception text |
226 """user-throwable exception thrown when inconsistent parse content | 223 """user-throwable exception thrown when inconsistent parse content |
227 is found; stops all parsing immediately""" | 224 is found; stops all parsing immediately""" |
228 pass | 225 pass |
229 | 226 |
230 class ParseSyntaxException(ParseFatalException): | 227 class ParseSyntaxException(ParseFatalException): |
231 """just like C{ParseFatalException}, but thrown internally when an | 228 """just like C{L{ParseFatalException}}, but thrown internally when an |
232 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because | 229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because |
233 an unbacktrackable syntax error has been found""" | 230 an unbacktrackable syntax error has been found""" |
234 def __init__(self, pe): | 231 def __init__(self, pe): |
235 super(ParseSyntaxException, self).__init__( | 232 super(ParseSyntaxException, self).__init__( |
236 pe.pstr, pe.loc, pe.msg, pe.parserElement) | 233 pe.pstr, pe.loc, pe.msg, pe.parserElement) |
237 | 234 |
442 | 439 |
443 def __repr__( self ): | 440 def __repr__( self ): |
444 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) | 441 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) |
445 | 442 |
446 def __str__( self ): | 443 def __str__( self ): |
447 out = "[" | 444 out = [] |
448 sep = "" | |
449 for i in self.__toklist: | 445 for i in self.__toklist: |
450 if isinstance(i, ParseResults): | 446 if isinstance(i, ParseResults): |
451 out += sep + _ustr(i) | 447 out.append(_ustr(i)) |
452 else: | 448 else: |
453 out += sep + repr(i) | 449 out.append(repr(i)) |
454 sep = ", " | 450 return '[' + ', '.join(out) + ']' |
455 out += "]" | |
456 return out | |
457 | 451 |
458 def _asStringList( self, sep='' ): | 452 def _asStringList( self, sep='' ): |
459 out = [] | 453 out = [] |
460 for item in self.__toklist: | 454 for item in self.__toklist: |
461 if out and sep: | 455 if out and sep: |
491 | 485 |
492 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): | 486 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): |
493 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" | 487 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" |
494 nl = "\n" | 488 nl = "\n" |
495 out = [] | 489 out = [] |
496 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() | 490 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() |
497 for v in vlist ] ) | 491 for v in vlist) |
498 nextLevelIndent = indent + " " | 492 nextLevelIndent = indent + " " |
499 | 493 |
500 # collapse out indents if formatting is not desired | 494 # collapse out indents if formatting is not desired |
501 if not formatted: | 495 if not formatted: |
502 indent = "" | 496 indent = "" |
614 self.__parent = wkref(par) | 608 self.__parent = wkref(par) |
615 else: | 609 else: |
616 self.__parent = None | 610 self.__parent = None |
617 | 611 |
618 def __dir__(self): | 612 def __dir__(self): |
619 return dir(super(ParseResults,self)) + self.keys() | 613 return dir(super(ParseResults,self)) + list(self.keys()) |
614 | |
615 if hasattr (collections, 'MutableMapping'): | |
616 collections.MutableMapping.register(ParseResults) | |
617 else: | |
618 from collections.abc import MutableMapping | |
619 MutableMapping.register (ParseResults) | |
620 | 620 |
621 def col (loc,strg): | 621 def col (loc,strg): |
622 """Returns current column within a string, counting newlines as line separators. | 622 """Returns current column within a string, counting newlines as line separators. |
623 The first column is number 1. | 623 The first column is number 1. |
624 | 624 |
625 Note: the default parsing behavior is to expand tabs in the input string | 625 Note: the default parsing behavior is to expand tabs in the input string |
626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | 626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information |
627 on parsing strings containing <TAB>s, and suggested methods to maintain a | 627 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a |
628 consistent view of the parsed string, the parse location, and line and column | 628 consistent view of the parsed string, the parse location, and line and column |
629 positions within the parsed string. | 629 positions within the parsed string. |
630 """ | 630 """ |
631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) | 631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) |
632 | 632 |
634 """Returns current line number within a string, counting newlines as line separators. | 634 """Returns current line number within a string, counting newlines as line separators. |
635 The first line is number 1. | 635 The first line is number 1. |
636 | 636 |
637 Note: the default parsing behavior is to expand tabs in the input string | 637 Note: the default parsing behavior is to expand tabs in the input string |
638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | 638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information |
639 on parsing strings containing <TAB>s, and suggested methods to maintain a | 639 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a |
640 consistent view of the parsed string, the parse location, and line and column | 640 consistent view of the parsed string, the parse location, and line and column |
641 positions within the parsed string. | 641 positions within the parsed string. |
642 """ | 642 """ |
643 return strg.count("\n",0,loc) + 1 | 643 return strg.count("\n",0,loc) + 1 |
644 | 644 |
651 return strg[lastCR+1:nextCR] | 651 return strg[lastCR+1:nextCR] |
652 else: | 652 else: |
653 return strg[lastCR+1:] | 653 return strg[lastCR+1:] |
654 | 654 |
655 def _defaultStartDebugAction( instring, loc, expr ): | 655 def _defaultStartDebugAction( instring, loc, expr ): |
656 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) | 656 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) |
657 | 657 |
658 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): | 658 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): |
659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) | 659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) |
660 | 660 |
661 def _defaultExceptionDebugAction( instring, loc, expr, exc ): | 661 def _defaultExceptionDebugAction( instring, loc, expr, exc ): |
663 | 663 |
664 def nullDebugAction(*args): | 664 def nullDebugAction(*args): |
665 """'Do-nothing' debug action, to suppress debugging output during parsing.""" | 665 """'Do-nothing' debug action, to suppress debugging output during parsing.""" |
666 pass | 666 pass |
667 | 667 |
668 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs | |
669 #~ 'decorator to trim function calls to match the arity of the target' | |
670 #~ def _trim_arity(func, maxargs=3): | |
671 #~ if func in singleArgBuiltins: | |
672 #~ return lambda s,l,t: func(t) | |
673 #~ limit = 0 | |
674 #~ foundArity = False | |
675 #~ def wrapper(*args): | |
676 #~ nonlocal limit,foundArity | |
677 #~ while 1: | |
678 #~ try: | |
679 #~ ret = func(*args[limit:]) | |
680 #~ foundArity = True | |
681 #~ return ret | |
682 #~ except TypeError: | |
683 #~ if limit == maxargs or foundArity: | |
684 #~ raise | |
685 #~ limit += 1 | |
686 #~ continue | |
687 #~ return wrapper | |
688 | |
689 # this version is Python 2.x-3.x cross-compatible | |
668 'decorator to trim function calls to match the arity of the target' | 690 'decorator to trim function calls to match the arity of the target' |
669 if not _PY3K: | 691 def _trim_arity(func, maxargs=2): |
670 def _trim_arity(func, maxargs=2): | 692 if func in singleArgBuiltins: |
671 limit = [0] | 693 return lambda s,l,t: func(t) |
672 def wrapper(*args): | 694 limit = [0] |
673 while 1: | 695 foundArity = [False] |
674 try: | 696 def wrapper(*args): |
675 return func(*args[limit[0]:]) | 697 while 1: |
676 except TypeError: | 698 try: |
677 if limit[0] <= maxargs: | 699 ret = func(*args[limit[0]:]) |
678 limit[0] += 1 | 700 foundArity[0] = True |
679 continue | 701 return ret |
680 raise | 702 except TypeError: |
681 return wrapper | 703 if limit[0] <= maxargs and not foundArity[0]: |
682 else: | 704 limit[0] += 1 |
683 def _trim_arity(func, maxargs=2): | 705 continue |
684 limit = maxargs | 706 raise |
685 def wrapper(*args): | 707 return wrapper |
686 #~ nonlocal limit | 708 |
687 while 1: | |
688 try: | |
689 return func(*args[limit:]) | |
690 except TypeError: | |
691 if limit: | |
692 limit -= 1 | |
693 continue | |
694 raise | |
695 return wrapper | |
696 | |
697 class ParserElement(object): | 709 class ParserElement(object): |
698 """Abstract base level parser element class.""" | 710 """Abstract base level parser element class.""" |
699 DEFAULT_WHITE_CHARS = " \n\t\r" | 711 DEFAULT_WHITE_CHARS = " \n\t\r" |
700 verbose_stacktrace = False | 712 verbose_stacktrace = False |
701 | 713 |
702 def setDefaultWhitespaceChars( chars ): | 714 def setDefaultWhitespaceChars( chars ): |
703 """Overrides the default whitespace chars | 715 """Overrides the default whitespace chars |
704 """ | 716 """ |
705 ParserElement.DEFAULT_WHITE_CHARS = chars | 717 ParserElement.DEFAULT_WHITE_CHARS = chars |
706 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) | 718 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) |
719 | |
720 def inlineLiteralsUsing(cls): | |
721 """ | |
722 Set class to be used for inclusion of string literals into a parser. | |
723 """ | |
724 ParserElement.literalStringClass = cls | |
725 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) | |
707 | 726 |
708 def __init__( self, savelist=False ): | 727 def __init__( self, savelist=False ): |
709 self.parseAction = list() | 728 self.parseAction = list() |
710 self.failAction = None | 729 self.failAction = None |
711 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall | 730 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall |
787 """Define action to perform when successfully matching parse element definition. | 806 """Define action to perform when successfully matching parse element definition. |
788 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, | 807 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, |
789 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: | 808 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: |
790 - s = the original string being parsed (see note below) | 809 - s = the original string being parsed (see note below) |
791 - loc = the location of the matching substring | 810 - loc = the location of the matching substring |
792 - toks = a list of the matched tokens, packaged as a ParseResults object | 811 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object |
793 If the functions in fns modify the tokens, they can return them as the return | 812 If the functions in fns modify the tokens, they can return them as the return |
794 value from fn, and the modified list of tokens will replace the original. | 813 value from fn, and the modified list of tokens will replace the original. |
795 Otherwise, fn does not need to return any value. | 814 Otherwise, fn does not need to return any value. |
796 | 815 |
797 Note: the default parsing behavior is to expand tabs in the input string | 816 Note: the default parsing behavior is to expand tabs in the input string |
798 before starting the parsing process. See L{I{parseString}<parseString>} for more information | 817 before starting the parsing process. See L{I{parseString}<parseString>} for more information |
799 on parsing strings containing <TAB>s, and suggested methods to maintain a | 818 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a |
800 consistent view of the parsed string, the parse location, and line and column | 819 consistent view of the parsed string, the parse location, and line and column |
801 positions within the parsed string. | 820 positions within the parsed string. |
802 """ | 821 """ |
803 self.parseAction = list(map(_trim_arity, list(fns))) | 822 self.parseAction = list(map(_trim_arity, list(fns))) |
804 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) | 823 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) |
816 C{fn(s,loc,expr,err)} where: | 835 C{fn(s,loc,expr,err)} where: |
817 - s = string being parsed | 836 - s = string being parsed |
818 - loc = location where expression match was attempted and failed | 837 - loc = location where expression match was attempted and failed |
819 - expr = the parse expression that failed | 838 - expr = the parse expression that failed |
820 - err = the exception thrown | 839 - err = the exception thrown |
821 The function returns no value. It may throw C{ParseFatalException} | 840 The function returns no value. It may throw C{L{ParseFatalException}} |
822 if it is desired to stop parsing immediately.""" | 841 if it is desired to stop parsing immediately.""" |
823 self.failAction = fn | 842 self.failAction = fn |
824 return self | 843 return self |
825 | 844 |
826 def _skipIgnorables( self, instring, loc ): | 845 def _skipIgnorables( self, instring, loc ): |
870 try: | 889 try: |
871 try: | 890 try: |
872 loc,tokens = self.parseImpl( instring, preloc, doActions ) | 891 loc,tokens = self.parseImpl( instring, preloc, doActions ) |
873 except IndexError: | 892 except IndexError: |
874 raise ParseException( instring, len(instring), self.errmsg, self ) | 893 raise ParseException( instring, len(instring), self.errmsg, self ) |
875 except ParseBaseException: | 894 except ParseBaseException as err: |
876 #~ print ("Exception raised:", err) | 895 #~ print ("Exception raised:", err) |
877 err = None | |
878 if self.debugActions[2]: | 896 if self.debugActions[2]: |
879 err = sys.exc_info()[1] | |
880 self.debugActions[2]( instring, tokensStart, self, err ) | 897 self.debugActions[2]( instring, tokensStart, self, err ) |
881 if self.failAction: | 898 if self.failAction: |
882 if err is None: | |
883 err = sys.exc_info()[1] | |
884 self.failAction( instring, tokensStart, self, err ) | 899 self.failAction( instring, tokensStart, self, err ) |
885 raise | 900 raise |
886 else: | 901 else: |
887 if callPreParse and self.callPreparse: | 902 if callPreParse and self.callPreparse: |
888 preloc = self.preParse( instring, loc ) | 903 preloc = self.preParse( instring, loc ) |
908 if tokens is not None: | 923 if tokens is not None: |
909 retTokens = ParseResults( tokens, | 924 retTokens = ParseResults( tokens, |
910 self.resultsName, | 925 self.resultsName, |
911 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), | 926 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), |
912 modal=self.modalResults ) | 927 modal=self.modalResults ) |
913 except ParseBaseException: | 928 except ParseBaseException as err: |
914 #~ print "Exception raised in user parse action:", err | 929 #~ print "Exception raised in user parse action:", err |
915 if (self.debugActions[2] ): | 930 if (self.debugActions[2] ): |
916 err = sys.exc_info()[1] | |
917 self.debugActions[2]( instring, tokensStart, self, err ) | 931 self.debugActions[2]( instring, tokensStart, self, err ) |
918 raise | 932 raise |
919 else: | 933 else: |
920 for fn in self.parseAction: | 934 for fn in self.parseAction: |
921 tokens = fn( instring, tokensStart, retTokens ) | 935 tokens = fn( instring, tokensStart, retTokens ) |
950 else: | 964 else: |
951 try: | 965 try: |
952 value = self._parseNoCache( instring, loc, doActions, callPreParse ) | 966 value = self._parseNoCache( instring, loc, doActions, callPreParse ) |
953 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) | 967 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) |
954 return value | 968 return value |
955 except ParseBaseException: | 969 except ParseBaseException as pe: |
956 pe = sys.exc_info()[1] | 970 pe.__traceback__ = None |
957 ParserElement._exprArgCache[ lookup ] = pe | 971 ParserElement._exprArgCache[ lookup ] = pe |
958 raise | 972 raise |
959 | 973 |
960 _parse = _parseNoCache | 974 _parse = _parseNoCache |
961 | 975 |
992 This is the main interface to the client code, once the complete | 1006 This is the main interface to the client code, once the complete |
993 expression has been built. | 1007 expression has been built. |
994 | 1008 |
995 If you want the grammar to require that the entire input string be | 1009 If you want the grammar to require that the entire input string be |
996 successfully parsed, then set C{parseAll} to True (equivalent to ending | 1010 successfully parsed, then set C{parseAll} to True (equivalent to ending |
997 the grammar with C{StringEnd()}). | 1011 the grammar with C{L{StringEnd()}}). |
998 | 1012 |
999 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, | 1013 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, |
1000 in order to report proper column numbers in parse actions. | 1014 in order to report proper column numbers in parse actions. |
1001 If the input string contains tabs and | 1015 If the input string contains tabs and |
1002 the grammar uses parse actions that use the C{loc} argument to index into the | 1016 the grammar uses parse actions that use the C{loc} argument to index into the |
1021 loc, tokens = self._parse( instring, 0 ) | 1035 loc, tokens = self._parse( instring, 0 ) |
1022 if parseAll: | 1036 if parseAll: |
1023 loc = self.preParse( instring, loc ) | 1037 loc = self.preParse( instring, loc ) |
1024 se = Empty() + StringEnd() | 1038 se = Empty() + StringEnd() |
1025 se._parse( instring, loc ) | 1039 se._parse( instring, loc ) |
1026 except ParseBaseException: | 1040 except ParseBaseException as exc: |
1027 if ParserElement.verbose_stacktrace: | 1041 if ParserElement.verbose_stacktrace: |
1028 raise | 1042 raise |
1029 else: | 1043 else: |
1030 # catch and re-raise exception from here, clears out pyparsing internal stack trace | 1044 # catch and re-raise exception from here, clears out pyparsing internal stack trace |
1031 exc = sys.exc_info()[1] | |
1032 raise exc | 1045 raise exc |
1033 else: | 1046 else: |
1034 return tokens | 1047 return tokens |
1035 | 1048 |
1036 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): | 1049 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): |
1074 loc += 1 | 1087 loc += 1 |
1075 else: | 1088 else: |
1076 loc = nextLoc | 1089 loc = nextLoc |
1077 else: | 1090 else: |
1078 loc = preloc+1 | 1091 loc = preloc+1 |
1079 except ParseBaseException: | 1092 except ParseBaseException as exc: |
1080 if ParserElement.verbose_stacktrace: | 1093 if ParserElement.verbose_stacktrace: |
1081 raise | 1094 raise |
1082 else: | 1095 else: |
1083 # catch and re-raise exception from here, clears out pyparsing internal stack trace | 1096 # catch and re-raise exception from here, clears out pyparsing internal stack trace |
1084 exc = sys.exc_info()[1] | |
1085 raise exc | 1097 raise exc |
1086 | 1098 |
1087 def transformString( self, instring ): | 1099 def transformString( self, instring ): |
1088 """Extension to C{scanString}, to modify matching text with modified tokens that may | 1100 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may |
1089 be returned from a parse action. To use C{transformString}, define a grammar and | 1101 be returned from a parse action. To use C{transformString}, define a grammar and |
1090 attach a parse action to it that modifies the returned token list. | 1102 attach a parse action to it that modifies the returned token list. |
1091 Invoking C{transformString()} on a target string will then scan for matches, | 1103 Invoking C{transformString()} on a target string will then scan for matches, |
1092 and replace the matched text patterns according to the logic in the parse | 1104 and replace the matched text patterns according to the logic in the parse |
1093 action. C{transformString()} returns the resulting transformed string.""" | 1105 action. C{transformString()} returns the resulting transformed string.""" |
1108 out.append(t) | 1120 out.append(t) |
1109 lastE = e | 1121 lastE = e |
1110 out.append(instring[lastE:]) | 1122 out.append(instring[lastE:]) |
1111 out = [o for o in out if o] | 1123 out = [o for o in out if o] |
1112 return "".join(map(_ustr,_flatten(out))) | 1124 return "".join(map(_ustr,_flatten(out))) |
1113 except ParseBaseException: | 1125 except ParseBaseException as exc: |
1114 if ParserElement.verbose_stacktrace: | 1126 if ParserElement.verbose_stacktrace: |
1115 raise | 1127 raise |
1116 else: | 1128 else: |
1117 # catch and re-raise exception from here, clears out pyparsing internal stack trace | 1129 # catch and re-raise exception from here, clears out pyparsing internal stack trace |
1118 exc = sys.exc_info()[1] | |
1119 raise exc | 1130 raise exc |
1120 | 1131 |
1121 def searchString( self, instring, maxMatches=_MAX_INT ): | 1132 def searchString( self, instring, maxMatches=_MAX_INT ): |
1122 """Another extension to C{scanString}, simplifying the access to the tokens found | 1133 """Another extension to C{L{scanString}}, simplifying the access to the tokens found |
1123 to match the given parse expression. May be called with optional | 1134 to match the given parse expression. May be called with optional |
1124 C{maxMatches} argument, to clip searching after 'n' matches are found. | 1135 C{maxMatches} argument, to clip searching after 'n' matches are found. |
1125 """ | 1136 """ |
1126 try: | 1137 try: |
1127 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) | 1138 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) |
1128 except ParseBaseException: | 1139 except ParseBaseException as exc: |
1129 if ParserElement.verbose_stacktrace: | 1140 if ParserElement.verbose_stacktrace: |
1130 raise | 1141 raise |
1131 else: | 1142 else: |
1132 # catch and re-raise exception from here, clears out pyparsing internal stack trace | 1143 # catch and re-raise exception from here, clears out pyparsing internal stack trace |
1133 exc = sys.exc_info()[1] | |
1134 raise exc | 1144 raise exc |
1135 | 1145 |
1136 def __add__(self, other ): | 1146 def __add__(self, other ): |
1137 """Implementation of + operator - returns And""" | 1147 """Implementation of + operator - returns C{L{And}}""" |
1138 if isinstance( other, basestring ): | 1148 if isinstance( other, basestring ): |
1139 other = Literal( other ) | 1149 other = ParserElement.literalStringClass( other ) |
1140 if not isinstance( other, ParserElement ): | 1150 if not isinstance( other, ParserElement ): |
1141 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1142 SyntaxWarning, stacklevel=2) | 1152 SyntaxWarning, stacklevel=2) |
1143 return None | 1153 return None |
1144 return And( [ self, other ] ) | 1154 return And( [ self, other ] ) |
1145 | 1155 |
1146 def __radd__(self, other ): | 1156 def __radd__(self, other ): |
1147 """Implementation of + operator when left operand is not a C{ParserElement}""" | 1157 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" |
1148 if isinstance( other, basestring ): | 1158 if isinstance( other, basestring ): |
1149 other = Literal( other ) | 1159 other = ParserElement.literalStringClass( other ) |
1150 if not isinstance( other, ParserElement ): | 1160 if not isinstance( other, ParserElement ): |
1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1152 SyntaxWarning, stacklevel=2) | 1162 SyntaxWarning, stacklevel=2) |
1153 return None | 1163 return None |
1154 return other + self | 1164 return other + self |
1155 | 1165 |
1156 def __sub__(self, other): | 1166 def __sub__(self, other): |
1157 """Implementation of - operator, returns C{And} with error stop""" | 1167 """Implementation of - operator, returns C{L{And}} with error stop""" |
1158 if isinstance( other, basestring ): | 1168 if isinstance( other, basestring ): |
1159 other = Literal( other ) | 1169 other = ParserElement.literalStringClass( other ) |
1160 if not isinstance( other, ParserElement ): | 1170 if not isinstance( other, ParserElement ): |
1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1162 SyntaxWarning, stacklevel=2) | 1172 SyntaxWarning, stacklevel=2) |
1163 return None | 1173 return None |
1164 return And( [ self, And._ErrorStop(), other ] ) | 1174 return And( [ self, And._ErrorStop(), other ] ) |
1165 | 1175 |
1166 def __rsub__(self, other ): | 1176 def __rsub__(self, other ): |
1167 """Implementation of - operator when left operand is not a C{ParserElement}""" | 1177 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" |
1168 if isinstance( other, basestring ): | 1178 if isinstance( other, basestring ): |
1169 other = Literal( other ) | 1179 other = ParserElement.literalStringClass( other ) |
1170 if not isinstance( other, ParserElement ): | 1180 if not isinstance( other, ParserElement ): |
1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1181 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1172 SyntaxWarning, stacklevel=2) | 1182 SyntaxWarning, stacklevel=2) |
1173 return None | 1183 return None |
1174 return other - self | 1184 return other - self |
1177 """Implementation of * operator, allows use of C{expr * 3} in place of | 1187 """Implementation of * operator, allows use of C{expr * 3} in place of |
1178 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer | 1188 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer |
1179 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples | 1189 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples |
1180 may also include C{None} as in: | 1190 may also include C{None} as in: |
1181 - C{expr*(n,None)} or C{expr*(n,)} is equivalent | 1191 - C{expr*(n,None)} or C{expr*(n,)} is equivalent |
1182 to C{expr*n + ZeroOrMore(expr)} | 1192 to C{expr*n + L{ZeroOrMore}(expr)} |
1183 (read as "at least n instances of C{expr}") | 1193 (read as "at least n instances of C{expr}") |
1184 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} | 1194 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} |
1185 (read as "0 to n instances of C{expr}") | 1195 (read as "0 to n instances of C{expr}") |
1186 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} | 1196 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} |
1187 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} | 1197 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} |
1188 | 1198 |
1189 Note that C{expr*(None,n)} does not raise an exception if | 1199 Note that C{expr*(None,n)} does not raise an exception if |
1190 more than n exprs exist in the input stream; that is, | 1200 more than n exprs exist in the input stream; that is, |
1191 C{expr*(None,n)} does not enforce a maximum number of expr | 1201 C{expr*(None,n)} does not enforce a maximum number of expr |
1192 occurrences. If this behavior is desired, then write | 1202 occurrences. If this behavior is desired, then write |
1243 | 1253 |
1244 def __rmul__(self, other): | 1254 def __rmul__(self, other): |
1245 return self.__mul__(other) | 1255 return self.__mul__(other) |
1246 | 1256 |
1247 def __or__(self, other ): | 1257 def __or__(self, other ): |
1248 """Implementation of | operator - returns C{MatchFirst}""" | 1258 """Implementation of | operator - returns C{L{MatchFirst}}""" |
1249 if isinstance( other, basestring ): | 1259 if isinstance( other, basestring ): |
1250 other = Literal( other ) | 1260 other = ParserElement.literalStringClass( other ) |
1251 if not isinstance( other, ParserElement ): | 1261 if not isinstance( other, ParserElement ): |
1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1253 SyntaxWarning, stacklevel=2) | 1263 SyntaxWarning, stacklevel=2) |
1254 return None | 1264 return None |
1255 return MatchFirst( [ self, other ] ) | 1265 return MatchFirst( [ self, other ] ) |
1256 | 1266 |
1257 def __ror__(self, other ): | 1267 def __ror__(self, other ): |
1258 """Implementation of | operator when left operand is not a C{ParserElement}""" | 1268 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" |
1259 if isinstance( other, basestring ): | 1269 if isinstance( other, basestring ): |
1260 other = Literal( other ) | 1270 other = ParserElement.literalStringClass( other ) |
1261 if not isinstance( other, ParserElement ): | 1271 if not isinstance( other, ParserElement ): |
1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1263 SyntaxWarning, stacklevel=2) | 1273 SyntaxWarning, stacklevel=2) |
1264 return None | 1274 return None |
1265 return other | self | 1275 return other | self |
1266 | 1276 |
1267 def __xor__(self, other ): | 1277 def __xor__(self, other ): |
1268 """Implementation of ^ operator - returns C{Or}""" | 1278 """Implementation of ^ operator - returns C{L{Or}}""" |
1269 if isinstance( other, basestring ): | 1279 if isinstance( other, basestring ): |
1270 other = Literal( other ) | 1280 other = ParserElement.literalStringClass( other ) |
1271 if not isinstance( other, ParserElement ): | 1281 if not isinstance( other, ParserElement ): |
1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1273 SyntaxWarning, stacklevel=2) | 1283 SyntaxWarning, stacklevel=2) |
1274 return None | 1284 return None |
1275 return Or( [ self, other ] ) | 1285 return Or( [ self, other ] ) |
1276 | 1286 |
1277 def __rxor__(self, other ): | 1287 def __rxor__(self, other ): |
1278 """Implementation of ^ operator when left operand is not a C{ParserElement}""" | 1288 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" |
1279 if isinstance( other, basestring ): | 1289 if isinstance( other, basestring ): |
1280 other = Literal( other ) | 1290 other = ParserElement.literalStringClass( other ) |
1281 if not isinstance( other, ParserElement ): | 1291 if not isinstance( other, ParserElement ): |
1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1283 SyntaxWarning, stacklevel=2) | 1293 SyntaxWarning, stacklevel=2) |
1284 return None | 1294 return None |
1285 return other ^ self | 1295 return other ^ self |
1286 | 1296 |
1287 def __and__(self, other ): | 1297 def __and__(self, other ): |
1288 """Implementation of & operator - returns C{Each}""" | 1298 """Implementation of & operator - returns C{L{Each}}""" |
1289 if isinstance( other, basestring ): | 1299 if isinstance( other, basestring ): |
1290 other = Literal( other ) | 1300 other = ParserElement.literalStringClass( other ) |
1291 if not isinstance( other, ParserElement ): | 1301 if not isinstance( other, ParserElement ): |
1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1293 SyntaxWarning, stacklevel=2) | 1303 SyntaxWarning, stacklevel=2) |
1294 return None | 1304 return None |
1295 return Each( [ self, other ] ) | 1305 return Each( [ self, other ] ) |
1296 | 1306 |
1297 def __rand__(self, other ): | 1307 def __rand__(self, other ): |
1298 """Implementation of & operator when left operand is not a C{ParserElement}""" | 1308 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" |
1299 if isinstance( other, basestring ): | 1309 if isinstance( other, basestring ): |
1300 other = Literal( other ) | 1310 other = ParserElement.literalStringClass( other ) |
1301 if not isinstance( other, ParserElement ): | 1311 if not isinstance( other, ParserElement ): |
1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | 1312 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), |
1303 SyntaxWarning, stacklevel=2) | 1313 SyntaxWarning, stacklevel=2) |
1304 return None | 1314 return None |
1305 return other & self | 1315 return other & self |
1306 | 1316 |
1307 def __invert__( self ): | 1317 def __invert__( self ): |
1308 """Implementation of ~ operator - returns C{NotAny}""" | 1318 """Implementation of ~ operator - returns C{L{NotAny}}""" |
1309 return NotAny( self ) | 1319 return NotAny( self ) |
1310 | 1320 |
1311 def __call__(self, name): | 1321 def __call__(self, name): |
1312 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: | 1322 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: |
1313 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") | 1323 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") |
1314 could be written as:: | 1324 could be written as:: |
1315 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") | 1325 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") |
1316 | 1326 |
1317 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be | 1327 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be |
1401 the entire file is opened, read, and closed before parsing. | 1411 the entire file is opened, read, and closed before parsing. |
1402 """ | 1412 """ |
1403 try: | 1413 try: |
1404 file_contents = file_or_filename.read() | 1414 file_contents = file_or_filename.read() |
1405 except AttributeError: | 1415 except AttributeError: |
1406 f = open(file_or_filename, "rb") | 1416 f = open(file_or_filename, "r") |
1407 file_contents = f.read() | 1417 file_contents = f.read() |
1408 f.close() | 1418 f.close() |
1409 try: | 1419 try: |
1410 return self.parseString(file_contents, parseAll) | 1420 return self.parseString(file_contents, parseAll) |
1411 except ParseBaseException: | 1421 except ParseBaseException as exc: |
1412 # catch and re-raise exception from here, clears out pyparsing internal stack trace | 1422 if ParserElement.verbose_stacktrace: |
1413 exc = sys.exc_info()[1] | 1423 raise |
1414 raise exc | 1424 else: |
1415 | 1425 # catch and re-raise exception from here, clears out pyparsing internal stack trace |
1416 def getException(self): | 1426 raise exc |
1417 return ParseException("",0,self.errmsg,self) | |
1418 | |
1419 def __getattr__(self,aname): | |
1420 if aname == "myException": | |
1421 self.myException = ret = self.getException(); | |
1422 return ret; | |
1423 else: | |
1424 raise AttributeError("no such attribute " + aname) | |
1425 | 1427 |
1426 def __eq__(self,other): | 1428 def __eq__(self,other): |
1427 if isinstance(other, ParserElement): | 1429 if isinstance(other, ParserElement): |
1428 return self is other or self.__dict__ == other.__dict__ | 1430 return self is other or self.__dict__ == other.__dict__ |
1429 elif isinstance(other, basestring): | 1431 elif isinstance(other, basestring): |
1476 self.mayReturnEmpty = True | 1478 self.mayReturnEmpty = True |
1477 self.mayIndexError = False | 1479 self.mayIndexError = False |
1478 self.errmsg = "Unmatchable token" | 1480 self.errmsg = "Unmatchable token" |
1479 | 1481 |
1480 def parseImpl( self, instring, loc, doActions=True ): | 1482 def parseImpl( self, instring, loc, doActions=True ): |
1481 exc = self.myException | 1483 raise ParseException(instring, loc, self.errmsg, self) |
1482 exc.loc = loc | |
1483 exc.pstr = instring | |
1484 raise exc | |
1485 | 1484 |
1486 | 1485 |
1487 class Literal(Token): | 1486 class Literal(Token): |
1488 """Token to exactly match a specified string.""" | 1487 """Token to exactly match a specified string.""" |
1489 def __init__( self, matchString ): | 1488 def __init__( self, matchString ): |
1507 #~ @profile | 1506 #~ @profile |
1508 def parseImpl( self, instring, loc, doActions=True ): | 1507 def parseImpl( self, instring, loc, doActions=True ): |
1509 if (instring[loc] == self.firstMatchChar and | 1508 if (instring[loc] == self.firstMatchChar and |
1510 (self.matchLen==1 or instring.startswith(self.match,loc)) ): | 1509 (self.matchLen==1 or instring.startswith(self.match,loc)) ): |
1511 return loc+self.matchLen, self.match | 1510 return loc+self.matchLen, self.match |
1512 #~ raise ParseException( instring, loc, self.errmsg ) | 1511 raise ParseException(instring, loc, self.errmsg, self) |
1513 exc = self.myException | |
1514 exc.loc = loc | |
1515 exc.pstr = instring | |
1516 raise exc | |
1517 _L = Literal | 1512 _L = Literal |
1513 ParserElement.literalStringClass = Literal | |
1518 | 1514 |
1519 class Keyword(Token): | 1515 class Keyword(Token): |
1520 """Token to exactly match a specified string as a keyword, that is, it must be | 1516 """Token to exactly match a specified string as a keyword, that is, it must be |
1521 immediately followed by a non-keyword character. Compare with C{Literal}:: | 1517 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: |
1522 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. | 1518 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. |
1523 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} | 1519 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} |
1524 Accepts two optional constructor arguments in addition to the keyword string: | 1520 Accepts two optional constructor arguments in addition to the keyword string: |
1525 C{identChars} is a string of characters that would be valid identifier characters, | 1521 C{identChars} is a string of characters that would be valid identifier characters, |
1526 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive | 1522 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive |
1557 if (instring[loc] == self.firstMatchChar and | 1553 if (instring[loc] == self.firstMatchChar and |
1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and | 1554 (self.matchLen==1 or instring.startswith(self.match,loc)) and |
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and | 1555 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and |
1560 (loc == 0 or instring[loc-1] not in self.identChars) ): | 1556 (loc == 0 or instring[loc-1] not in self.identChars) ): |
1561 return loc+self.matchLen, self.match | 1557 return loc+self.matchLen, self.match |
1562 #~ raise ParseException( instring, loc, self.errmsg ) | 1558 raise ParseException(instring, loc, self.errmsg, self) |
1563 exc = self.myException | |
1564 exc.loc = loc | |
1565 exc.pstr = instring | |
1566 raise exc | |
1567 | 1559 |
1568 def copy(self): | 1560 def copy(self): |
1569 c = super(Keyword,self).copy() | 1561 c = super(Keyword,self).copy() |
1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS | 1562 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS |
1571 return c | 1563 return c |
1589 self.errmsg = "Expected " + self.name | 1581 self.errmsg = "Expected " + self.name |
1590 | 1582 |
1591 def parseImpl( self, instring, loc, doActions=True ): | 1583 def parseImpl( self, instring, loc, doActions=True ): |
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match: | 1584 if instring[ loc:loc+self.matchLen ].upper() == self.match: |
1593 return loc+self.matchLen, self.returnString | 1585 return loc+self.matchLen, self.returnString |
1594 #~ raise ParseException( instring, loc, self.errmsg ) | 1586 raise ParseException(instring, loc, self.errmsg, self) |
1595 exc = self.myException | |
1596 exc.loc = loc | |
1597 exc.pstr = instring | |
1598 raise exc | |
1599 | 1587 |
1600 class CaselessKeyword(Keyword): | 1588 class CaselessKeyword(Keyword): |
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): | 1589 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): |
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) | 1590 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) |
1603 | 1591 |
1604 def parseImpl( self, instring, loc, doActions=True ): | 1592 def parseImpl( self, instring, loc, doActions=True ): |
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and | 1593 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and |
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): | 1594 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): |
1607 return loc+self.matchLen, self.match | 1595 return loc+self.matchLen, self.match |
1608 #~ raise ParseException( instring, loc, self.errmsg ) | 1596 raise ParseException(instring, loc, self.errmsg, self) |
1609 exc = self.myException | |
1610 exc.loc = loc | |
1611 exc.pstr = instring | |
1612 raise exc | |
1613 | 1597 |
1614 class Word(Token): | 1598 class Word(Token): |
1615 """Token for matching words composed of allowed character sets. | 1599 """Token for matching words composed of allowed character sets. |
1616 Defined with string containing all allowed initial characters, | 1600 Defined with string containing all allowed initial characters, |
1617 an optional string containing allowed body characters (if omitted, | 1601 an optional string containing allowed body characters (if omitted, |
1624 except for one or two characters, for instance. | 1608 except for one or two characters, for instance. |
1625 """ | 1609 """ |
1626 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): | 1610 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): |
1627 super(Word,self).__init__() | 1611 super(Word,self).__init__() |
1628 if excludeChars: | 1612 if excludeChars: |
1629 initChars = ''.join([c for c in initChars if c not in excludeChars]) | 1613 initChars = ''.join(c for c in initChars if c not in excludeChars) |
1630 if bodyChars: | 1614 if bodyChars: |
1631 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) | 1615 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) |
1632 self.initCharsOrig = initChars | 1616 self.initCharsOrig = initChars |
1633 self.initChars = set(initChars) | 1617 self.initChars = set(initChars) |
1634 if bodyChars : | 1618 if bodyChars : |
1635 self.bodyCharsOrig = bodyChars | 1619 self.bodyCharsOrig = bodyChars |
1636 self.bodyChars = set(bodyChars) | 1620 self.bodyChars = set(bodyChars) |
1679 | 1663 |
1680 def parseImpl( self, instring, loc, doActions=True ): | 1664 def parseImpl( self, instring, loc, doActions=True ): |
1681 if self.re: | 1665 if self.re: |
1682 result = self.re.match(instring,loc) | 1666 result = self.re.match(instring,loc) |
1683 if not result: | 1667 if not result: |
1684 exc = self.myException | 1668 raise ParseException(instring, loc, self.errmsg, self) |
1685 exc.loc = loc | |
1686 exc.pstr = instring | |
1687 raise exc | |
1688 | 1669 |
1689 loc = result.end() | 1670 loc = result.end() |
1690 return loc, result.group() | 1671 return loc, result.group() |
1691 | 1672 |
1692 if not(instring[ loc ] in self.initChars): | 1673 if not(instring[ loc ] in self.initChars): |
1693 #~ raise ParseException( instring, loc, self.errmsg ) | 1674 raise ParseException(instring, loc, self.errmsg, self) |
1694 exc = self.myException | 1675 |
1695 exc.loc = loc | |
1696 exc.pstr = instring | |
1697 raise exc | |
1698 start = loc | 1676 start = loc |
1699 loc += 1 | 1677 loc += 1 |
1700 instrlen = len(instring) | 1678 instrlen = len(instring) |
1701 bodychars = self.bodyChars | 1679 bodychars = self.bodyChars |
1702 maxloc = start + self.maxLen | 1680 maxloc = start + self.maxLen |
1712 if self.asKeyword: | 1690 if self.asKeyword: |
1713 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): | 1691 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): |
1714 throwException = True | 1692 throwException = True |
1715 | 1693 |
1716 if throwException: | 1694 if throwException: |
1717 #~ raise ParseException( instring, loc, self.errmsg ) | 1695 raise ParseException(instring, loc, self.errmsg, self) |
1718 exc = self.myException | |
1719 exc.loc = loc | |
1720 exc.pstr = instring | |
1721 raise exc | |
1722 | 1696 |
1723 return loc, instring[start:loc] | 1697 return loc, instring[start:loc] |
1724 | 1698 |
1725 def __str__( self ): | 1699 def __str__( self ): |
1726 try: | 1700 try: |
1785 self.mayReturnEmpty = True | 1759 self.mayReturnEmpty = True |
1786 | 1760 |
1787 def parseImpl( self, instring, loc, doActions=True ): | 1761 def parseImpl( self, instring, loc, doActions=True ): |
1788 result = self.re.match(instring,loc) | 1762 result = self.re.match(instring,loc) |
1789 if not result: | 1763 if not result: |
1790 exc = self.myException | 1764 raise ParseException(instring, loc, self.errmsg, self) |
1791 exc.loc = loc | |
1792 exc.pstr = instring | |
1793 raise exc | |
1794 | 1765 |
1795 loc = result.end() | 1766 loc = result.end() |
1796 d = result.groupdict() | 1767 d = result.groupdict() |
1797 ret = ParseResults(result.group()) | 1768 ret = ParseResults(result.group()) |
1798 if d: | 1769 if d: |
1819 """ | 1790 """ |
1820 Defined with the following parameters: | 1791 Defined with the following parameters: |
1821 - quoteChar - string of one or more characters defining the quote delimiting string | 1792 - quoteChar - string of one or more characters defining the quote delimiting string |
1822 - escChar - character to escape quotes, typically backslash (default=None) | 1793 - escChar - character to escape quotes, typically backslash (default=None) |
1823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) | 1794 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) |
1824 - multiline - boolean indicating whether quotes can span multiple lines (default=False) | 1795 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) |
1825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) | 1796 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) |
1826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) | 1797 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) |
1827 """ | 1798 """ |
1828 super(QuotedString,self).__init__() | 1799 super(QuotedString,self).__init__() |
1829 | 1800 |
1830 # remove white space from quote chars - wont work anyway | 1801 # remove white space from quote chars - wont work anyway |
1831 quoteChar = quoteChar.strip() | 1802 quoteChar = quoteChar.strip() |
1862 ( re.escape(self.quoteChar), | 1833 ( re.escape(self.quoteChar), |
1863 _escapeRegexRangeChars(self.endQuoteChar[0]), | 1834 _escapeRegexRangeChars(self.endQuoteChar[0]), |
1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) | 1835 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) |
1865 if len(self.endQuoteChar) > 1: | 1836 if len(self.endQuoteChar) > 1: |
1866 self.pattern += ( | 1837 self.pattern += ( |
1867 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), | 1838 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), |
1868 _escapeRegexRangeChars(self.endQuoteChar[i])) | 1839 _escapeRegexRangeChars(self.endQuoteChar[i])) |
1869 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' | 1840 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' |
1870 ) | 1841 ) |
1871 if escQuote: | 1842 if escQuote: |
1872 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) | 1843 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) |
1873 if escChar: | 1844 if escChar: |
1874 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) | 1845 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) |
1890 self.mayReturnEmpty = True | 1861 self.mayReturnEmpty = True |
1891 | 1862 |
1892 def parseImpl( self, instring, loc, doActions=True ): | 1863 def parseImpl( self, instring, loc, doActions=True ): |
1893 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None | 1864 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None |
1894 if not result: | 1865 if not result: |
1895 exc = self.myException | 1866 raise ParseException(instring, loc, self.errmsg, self) |
1896 exc.loc = loc | |
1897 exc.pstr = instring | |
1898 raise exc | |
1899 | 1867 |
1900 loc = result.end() | 1868 loc = result.end() |
1901 ret = result.group() | 1869 ret = result.group() |
1902 | 1870 |
1903 if self.unquoteResults: | 1871 if self.unquoteResults: |
1959 self.mayReturnEmpty = ( self.minLen == 0 ) | 1927 self.mayReturnEmpty = ( self.minLen == 0 ) |
1960 self.mayIndexError = False | 1928 self.mayIndexError = False |
1961 | 1929 |
1962 def parseImpl( self, instring, loc, doActions=True ): | 1930 def parseImpl( self, instring, loc, doActions=True ): |
1963 if instring[loc] in self.notChars: | 1931 if instring[loc] in self.notChars: |
1964 #~ raise ParseException( instring, loc, self.errmsg ) | 1932 raise ParseException(instring, loc, self.errmsg, self) |
1965 exc = self.myException | |
1966 exc.loc = loc | |
1967 exc.pstr = instring | |
1968 raise exc | |
1969 | 1933 |
1970 start = loc | 1934 start = loc |
1971 loc += 1 | 1935 loc += 1 |
1972 notchars = self.notChars | 1936 notchars = self.notChars |
1973 maxlen = min( start+self.maxLen, len(instring) ) | 1937 maxlen = min( start+self.maxLen, len(instring) ) |
1974 while loc < maxlen and \ | 1938 while loc < maxlen and \ |
1975 (instring[loc] not in notchars): | 1939 (instring[loc] not in notchars): |
1976 loc += 1 | 1940 loc += 1 |
1977 | 1941 |
1978 if loc - start < self.minLen: | 1942 if loc - start < self.minLen: |
1979 #~ raise ParseException( instring, loc, self.errmsg ) | 1943 raise ParseException(instring, loc, self.errmsg, self) |
1980 exc = self.myException | |
1981 exc.loc = loc | |
1982 exc.pstr = instring | |
1983 raise exc | |
1984 | 1944 |
1985 return loc, instring[start:loc] | 1945 return loc, instring[start:loc] |
1986 | 1946 |
1987 def __str__( self ): | 1947 def __str__( self ): |
1988 try: | 1948 try: |
2001 class White(Token): | 1961 class White(Token): |
2002 """Special matching class for matching whitespace. Normally, whitespace is ignored | 1962 """Special matching class for matching whitespace. Normally, whitespace is ignored |
2003 by pyparsing grammars. This class is included when some whitespace structures | 1963 by pyparsing grammars. This class is included when some whitespace structures |
2004 are significant. Define with a string containing the whitespace characters to be | 1964 are significant. Define with a string containing the whitespace characters to be |
2005 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, | 1965 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, |
2006 as defined for the C{Word} class.""" | 1966 as defined for the C{L{Word}} class.""" |
2007 whiteStrs = { | 1967 whiteStrs = { |
2008 " " : "<SPC>", | 1968 " " : "<SPC>", |
2009 "\t": "<TAB>", | 1969 "\t": "<TAB>", |
2010 "\n": "<LF>", | 1970 "\n": "<LF>", |
2011 "\r": "<CR>", | 1971 "\r": "<CR>", |
2012 "\f": "<FF>", | 1972 "\f": "<FF>", |
2013 } | 1973 } |
2014 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): | 1974 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): |
2015 super(White,self).__init__() | 1975 super(White,self).__init__() |
2016 self.matchWhite = ws | 1976 self.matchWhite = ws |
2017 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) | 1977 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) |
2018 #~ self.leaveWhitespace() | 1978 #~ self.leaveWhitespace() |
2019 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) | 1979 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) |
2020 self.mayReturnEmpty = True | 1980 self.mayReturnEmpty = True |
2021 self.errmsg = "Expected " + self.name | 1981 self.errmsg = "Expected " + self.name |
2022 | 1982 |
2023 self.minLen = min | 1983 self.minLen = min |
2024 | 1984 |
2031 self.maxLen = exact | 1991 self.maxLen = exact |
2032 self.minLen = exact | 1992 self.minLen = exact |
2033 | 1993 |
2034 def parseImpl( self, instring, loc, doActions=True ): | 1994 def parseImpl( self, instring, loc, doActions=True ): |
2035 if not(instring[ loc ] in self.matchWhite): | 1995 if not(instring[ loc ] in self.matchWhite): |
2036 #~ raise ParseException( instring, loc, self.errmsg ) | 1996 raise ParseException(instring, loc, self.errmsg, self) |
2037 exc = self.myException | |
2038 exc.loc = loc | |
2039 exc.pstr = instring | |
2040 raise exc | |
2041 start = loc | 1997 start = loc |
2042 loc += 1 | 1998 loc += 1 |
2043 maxloc = start + self.maxLen | 1999 maxloc = start + self.maxLen |
2044 maxloc = min( maxloc, len(instring) ) | 2000 maxloc = min( maxloc, len(instring) ) |
2045 while loc < maxloc and instring[loc] in self.matchWhite: | 2001 while loc < maxloc and instring[loc] in self.matchWhite: |
2046 loc += 1 | 2002 loc += 1 |
2047 | 2003 |
2048 if loc - start < self.minLen: | 2004 if loc - start < self.minLen: |
2049 #~ raise ParseException( instring, loc, self.errmsg ) | 2005 raise ParseException(instring, loc, self.errmsg, self) |
2050 exc = self.myException | |
2051 exc.loc = loc | |
2052 exc.pstr = instring | |
2053 raise exc | |
2054 | 2006 |
2055 return loc, instring[start:loc] | 2007 return loc, instring[start:loc] |
2056 | 2008 |
2057 | 2009 |
2058 class _PositionToken(Token): | 2010 class _PositionToken(Token): |
2100 | 2052 |
2101 def parseImpl( self, instring, loc, doActions=True ): | 2053 def parseImpl( self, instring, loc, doActions=True ): |
2102 if not( loc==0 or | 2054 if not( loc==0 or |
2103 (loc == self.preParse( instring, 0 )) or | 2055 (loc == self.preParse( instring, 0 )) or |
2104 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: | 2056 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: |
2105 #~ raise ParseException( instring, loc, "Expected start of line" ) | 2057 raise ParseException(instring, loc, self.errmsg, self) |
2106 exc = self.myException | |
2107 exc.loc = loc | |
2108 exc.pstr = instring | |
2109 raise exc | |
2110 return loc, [] | 2058 return loc, [] |
2111 | 2059 |
2112 class LineEnd(_PositionToken): | 2060 class LineEnd(_PositionToken): |
2113 """Matches if current position is at the end of a line within the parse string""" | 2061 """Matches if current position is at the end of a line within the parse string""" |
2114 def __init__( self ): | 2062 def __init__( self ): |
2119 def parseImpl( self, instring, loc, doActions=True ): | 2067 def parseImpl( self, instring, loc, doActions=True ): |
2120 if loc<len(instring): | 2068 if loc<len(instring): |
2121 if instring[loc] == "\n": | 2069 if instring[loc] == "\n": |
2122 return loc+1, "\n" | 2070 return loc+1, "\n" |
2123 else: | 2071 else: |
2124 #~ raise ParseException( instring, loc, "Expected end of line" ) | 2072 raise ParseException(instring, loc, self.errmsg, self) |
2125 exc = self.myException | |
2126 exc.loc = loc | |
2127 exc.pstr = instring | |
2128 raise exc | |
2129 elif loc == len(instring): | 2073 elif loc == len(instring): |
2130 return loc+1, [] | 2074 return loc+1, [] |
2131 else: | 2075 else: |
2132 exc = self.myException | 2076 raise ParseException(instring, loc, self.errmsg, self) |
2133 exc.loc = loc | |
2134 exc.pstr = instring | |
2135 raise exc | |
2136 | 2077 |
2137 class StringStart(_PositionToken): | 2078 class StringStart(_PositionToken): |
2138 """Matches if current position is at the beginning of the parse string""" | 2079 """Matches if current position is at the beginning of the parse string""" |
2139 def __init__( self ): | 2080 def __init__( self ): |
2140 super(StringStart,self).__init__() | 2081 super(StringStart,self).__init__() |
2142 | 2083 |
2143 def parseImpl( self, instring, loc, doActions=True ): | 2084 def parseImpl( self, instring, loc, doActions=True ): |
2144 if loc != 0: | 2085 if loc != 0: |
2145 # see if entire string up to here is just whitespace and ignoreables | 2086 # see if entire string up to here is just whitespace and ignoreables |
2146 if loc != self.preParse( instring, 0 ): | 2087 if loc != self.preParse( instring, 0 ): |
2147 #~ raise ParseException( instring, loc, "Expected start of text" ) | 2088 raise ParseException(instring, loc, self.errmsg, self) |
2148 exc = self.myException | |
2149 exc.loc = loc | |
2150 exc.pstr = instring | |
2151 raise exc | |
2152 return loc, [] | 2089 return loc, [] |
2153 | 2090 |
2154 class StringEnd(_PositionToken): | 2091 class StringEnd(_PositionToken): |
2155 """Matches if current position is at the end of the parse string""" | 2092 """Matches if current position is at the end of the parse string""" |
2156 def __init__( self ): | 2093 def __init__( self ): |
2157 super(StringEnd,self).__init__() | 2094 super(StringEnd,self).__init__() |
2158 self.errmsg = "Expected end of text" | 2095 self.errmsg = "Expected end of text" |
2159 | 2096 |
2160 def parseImpl( self, instring, loc, doActions=True ): | 2097 def parseImpl( self, instring, loc, doActions=True ): |
2161 if loc < len(instring): | 2098 if loc < len(instring): |
2162 #~ raise ParseException( instring, loc, "Expected end of text" ) | 2099 raise ParseException(instring, loc, self.errmsg, self) |
2163 exc = self.myException | |
2164 exc.loc = loc | |
2165 exc.pstr = instring | |
2166 raise exc | |
2167 elif loc == len(instring): | 2100 elif loc == len(instring): |
2168 return loc+1, [] | 2101 return loc+1, [] |
2169 elif loc > len(instring): | 2102 elif loc > len(instring): |
2170 return loc, [] | 2103 return loc, [] |
2171 else: | 2104 else: |
2172 exc = self.myException | 2105 raise ParseException(instring, loc, self.errmsg, self) |
2173 exc.loc = loc | |
2174 exc.pstr = instring | |
2175 raise exc | |
2176 | 2106 |
2177 class WordStart(_PositionToken): | 2107 class WordStart(_PositionToken): |
2178 """Matches if the current position is at the beginning of a Word, and | 2108 """Matches if the current position is at the beginning of a Word, and |
2179 is not preceded by any character in a given set of C{wordChars} | 2109 is not preceded by any character in a given set of C{wordChars} |
2180 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, | 2110 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, |
2188 | 2118 |
2189 def parseImpl(self, instring, loc, doActions=True ): | 2119 def parseImpl(self, instring, loc, doActions=True ): |
2190 if loc != 0: | 2120 if loc != 0: |
2191 if (instring[loc-1] in self.wordChars or | 2121 if (instring[loc-1] in self.wordChars or |
2192 instring[loc] not in self.wordChars): | 2122 instring[loc] not in self.wordChars): |
2193 exc = self.myException | 2123 raise ParseException(instring, loc, self.errmsg, self) |
2194 exc.loc = loc | |
2195 exc.pstr = instring | |
2196 raise exc | |
2197 return loc, [] | 2124 return loc, [] |
2198 | 2125 |
2199 class WordEnd(_PositionToken): | 2126 class WordEnd(_PositionToken): |
2200 """Matches if the current position is at the end of a Word, and | 2127 """Matches if the current position is at the end of a Word, and |
2201 is not followed by any character in a given set of C{wordChars} | 2128 is not followed by any character in a given set of C{wordChars} |
2212 def parseImpl(self, instring, loc, doActions=True ): | 2139 def parseImpl(self, instring, loc, doActions=True ): |
2213 instrlen = len(instring) | 2140 instrlen = len(instring) |
2214 if instrlen>0 and loc<instrlen: | 2141 if instrlen>0 and loc<instrlen: |
2215 if (instring[loc] in self.wordChars or | 2142 if (instring[loc] in self.wordChars or |
2216 instring[loc-1] not in self.wordChars): | 2143 instring[loc-1] not in self.wordChars): |
2217 #~ raise ParseException( instring, loc, "Expected end of word" ) | 2144 raise ParseException(instring, loc, self.errmsg, self) |
2218 exc = self.myException | |
2219 exc.loc = loc | |
2220 exc.pstr = instring | |
2221 raise exc | |
2222 return loc, [] | 2145 return loc, [] |
2223 | 2146 |
2224 | 2147 |
2225 class ParseExpression(ParserElement): | 2148 class ParseExpression(ParserElement): |
2226 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" | 2149 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" |
2329 May be constructed using the C{'+'} operator. | 2252 May be constructed using the C{'+'} operator. |
2330 """ | 2253 """ |
2331 | 2254 |
2332 class _ErrorStop(Empty): | 2255 class _ErrorStop(Empty): |
2333 def __init__(self, *args, **kwargs): | 2256 def __init__(self, *args, **kwargs): |
2334 super(Empty,self).__init__(*args, **kwargs) | 2257 super(And._ErrorStop,self).__init__(*args, **kwargs) |
2258 self.name = '-' | |
2335 self.leaveWhitespace() | 2259 self.leaveWhitespace() |
2336 | 2260 |
2337 def __init__( self, exprs, savelist = True ): | 2261 def __init__( self, exprs, savelist = True ): |
2338 super(And,self).__init__(exprs, savelist) | 2262 super(And,self).__init__(exprs, savelist) |
2339 self.mayReturnEmpty = True | 2263 self.mayReturnEmpty = True |
2357 if errorStop: | 2281 if errorStop: |
2358 try: | 2282 try: |
2359 loc, exprtokens = e._parse( instring, loc, doActions ) | 2283 loc, exprtokens = e._parse( instring, loc, doActions ) |
2360 except ParseSyntaxException: | 2284 except ParseSyntaxException: |
2361 raise | 2285 raise |
2362 except ParseBaseException: | 2286 except ParseBaseException as pe: |
2363 pe = sys.exc_info()[1] | 2287 pe.__traceback__ = None |
2364 raise ParseSyntaxException(pe) | 2288 raise ParseSyntaxException(pe) |
2365 except IndexError: | 2289 except IndexError: |
2366 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) | 2290 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) |
2367 else: | 2291 else: |
2368 loc, exprtokens = e._parse( instring, loc, doActions ) | 2292 loc, exprtokens = e._parse( instring, loc, doActions ) |
2385 def __str__( self ): | 2309 def __str__( self ): |
2386 if hasattr(self,"name"): | 2310 if hasattr(self,"name"): |
2387 return self.name | 2311 return self.name |
2388 | 2312 |
2389 if self.strRepr is None: | 2313 if self.strRepr is None: |
2390 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | 2314 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" |
2391 | 2315 |
2392 return self.strRepr | 2316 return self.strRepr |
2393 | 2317 |
2394 | 2318 |
2395 class Or(ParseExpression): | 2319 class Or(ParseExpression): |
2410 maxMatchLoc = -1 | 2334 maxMatchLoc = -1 |
2411 maxException = None | 2335 maxException = None |
2412 for e in self.exprs: | 2336 for e in self.exprs: |
2413 try: | 2337 try: |
2414 loc2 = e.tryParse( instring, loc ) | 2338 loc2 = e.tryParse( instring, loc ) |
2415 except ParseException: | 2339 except ParseException as err: |
2416 err = sys.exc_info()[1] | 2340 err.__traceback__ = None |
2417 if err.loc > maxExcLoc: | 2341 if err.loc > maxExcLoc: |
2418 maxException = err | 2342 maxException = err |
2419 maxExcLoc = err.loc | 2343 maxExcLoc = err.loc |
2420 except IndexError: | 2344 except IndexError: |
2421 if len(instring) > maxExcLoc: | 2345 if len(instring) > maxExcLoc: |
2434 | 2358 |
2435 return maxMatchExp._parse( instring, loc, doActions ) | 2359 return maxMatchExp._parse( instring, loc, doActions ) |
2436 | 2360 |
2437 def __ixor__(self, other ): | 2361 def __ixor__(self, other ): |
2438 if isinstance( other, basestring ): | 2362 if isinstance( other, basestring ): |
2439 other = Literal( other ) | 2363 other = ParserElement.literalStringClass( other ) |
2440 return self.append( other ) #Or( [ self, other ] ) | 2364 return self.append( other ) #Or( [ self, other ] ) |
2441 | 2365 |
2442 def __str__( self ): | 2366 def __str__( self ): |
2443 if hasattr(self,"name"): | 2367 if hasattr(self,"name"): |
2444 return self.name | 2368 return self.name |
2445 | 2369 |
2446 if self.strRepr is None: | 2370 if self.strRepr is None: |
2447 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | 2371 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" |
2448 | 2372 |
2449 return self.strRepr | 2373 return self.strRepr |
2450 | 2374 |
2451 def checkRecursion( self, parseElementList ): | 2375 def checkRecursion( self, parseElementList ): |
2452 subRecCheckList = parseElementList[:] + [ self ] | 2376 subRecCheckList = parseElementList[:] + [ self ] |
2475 maxException = None | 2399 maxException = None |
2476 for e in self.exprs: | 2400 for e in self.exprs: |
2477 try: | 2401 try: |
2478 ret = e._parse( instring, loc, doActions ) | 2402 ret = e._parse( instring, loc, doActions ) |
2479 return ret | 2403 return ret |
2480 except ParseException, err: | 2404 except ParseException as err: |
2481 if err.loc > maxExcLoc: | 2405 if err.loc > maxExcLoc: |
2482 maxException = err | 2406 maxException = err |
2483 maxExcLoc = err.loc | 2407 maxExcLoc = err.loc |
2484 except IndexError: | 2408 except IndexError: |
2485 if len(instring) > maxExcLoc: | 2409 if len(instring) > maxExcLoc: |
2493 else: | 2417 else: |
2494 raise ParseException(instring, loc, "no defined alternatives to match", self) | 2418 raise ParseException(instring, loc, "no defined alternatives to match", self) |
2495 | 2419 |
2496 def __ior__(self, other ): | 2420 def __ior__(self, other ): |
2497 if isinstance( other, basestring ): | 2421 if isinstance( other, basestring ): |
2498 other = Literal( other ) | 2422 other = ParserElement.literalStringClass( other ) |
2499 return self.append( other ) #MatchFirst( [ self, other ] ) | 2423 return self.append( other ) #MatchFirst( [ self, other ] ) |
2500 | 2424 |
2501 def __str__( self ): | 2425 def __str__( self ): |
2502 if hasattr(self,"name"): | 2426 if hasattr(self,"name"): |
2503 return self.name | 2427 return self.name |
2504 | 2428 |
2505 if self.strRepr is None: | 2429 if self.strRepr is None: |
2506 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | 2430 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" |
2507 | 2431 |
2508 return self.strRepr | 2432 return self.strRepr |
2509 | 2433 |
2510 def checkRecursion( self, parseElementList ): | 2434 def checkRecursion( self, parseElementList ): |
2511 subRecCheckList = parseElementList[:] + [ self ] | 2435 subRecCheckList = parseElementList[:] + [ self ] |
2560 tmpOpt.remove(e) | 2484 tmpOpt.remove(e) |
2561 if len(failed) == len(tmpExprs): | 2485 if len(failed) == len(tmpExprs): |
2562 keepMatching = False | 2486 keepMatching = False |
2563 | 2487 |
2564 if tmpReqd: | 2488 if tmpReqd: |
2565 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) | 2489 missing = ", ".join(_ustr(e) for e in tmpReqd) |
2566 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) | 2490 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) |
2567 | 2491 |
2568 # add any unmatched Optionals, in case they have default values defined | 2492 # add any unmatched Optionals, in case they have default values defined |
2569 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] | 2493 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] |
2570 | 2494 |
2589 def __str__( self ): | 2513 def __str__( self ): |
2590 if hasattr(self,"name"): | 2514 if hasattr(self,"name"): |
2591 return self.name | 2515 return self.name |
2592 | 2516 |
2593 if self.strRepr is None: | 2517 if self.strRepr is None: |
2594 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | 2518 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" |
2595 | 2519 |
2596 return self.strRepr | 2520 return self.strRepr |
2597 | 2521 |
2598 def checkRecursion( self, parseElementList ): | 2522 def checkRecursion( self, parseElementList ): |
2599 subRecCheckList = parseElementList[:] + [ self ] | 2523 subRecCheckList = parseElementList[:] + [ self ] |
2704 try: | 2628 try: |
2705 self.expr.tryParse( instring, loc ) | 2629 self.expr.tryParse( instring, loc ) |
2706 except (ParseException,IndexError): | 2630 except (ParseException,IndexError): |
2707 pass | 2631 pass |
2708 else: | 2632 else: |
2709 #~ raise ParseException(instring, loc, self.errmsg ) | 2633 raise ParseException(instring, loc, self.errmsg, self) |
2710 exc = self.myException | |
2711 exc.loc = loc | |
2712 exc.pstr = instring | |
2713 raise exc | |
2714 return loc, [] | 2634 return loc, [] |
2715 | 2635 |
2716 def __str__( self ): | 2636 def __str__( self ): |
2717 if hasattr(self,"name"): | 2637 if hasattr(self,"name"): |
2718 return self.name | 2638 return self.name |
2876 failParse = False | 2796 failParse = False |
2877 if self.ignoreExpr is not None: | 2797 if self.ignoreExpr is not None: |
2878 while 1: | 2798 while 1: |
2879 try: | 2799 try: |
2880 loc = self.ignoreExpr.tryParse(instring,loc) | 2800 loc = self.ignoreExpr.tryParse(instring,loc) |
2881 # print "found ignoreExpr, advance to", loc | 2801 # print("found ignoreExpr, advance to", loc) |
2882 except ParseBaseException: | 2802 except ParseBaseException: |
2883 break | 2803 break |
2884 expr._parse( instring, loc, doActions=False, callPreParse=False ) | 2804 expr._parse( instring, loc, doActions=False, callPreParse=False ) |
2885 skipText = instring[startLoc:loc] | 2805 skipText = instring[startLoc:loc] |
2886 if self.includeMatch: | 2806 if self.includeMatch: |
2896 except (ParseException,IndexError): | 2816 except (ParseException,IndexError): |
2897 if failParse: | 2817 if failParse: |
2898 raise | 2818 raise |
2899 else: | 2819 else: |
2900 loc += 1 | 2820 loc += 1 |
2901 exc = self.myException | 2821 raise ParseException(instring, loc, self.errmsg, self) |
2902 exc.loc = loc | |
2903 exc.pstr = instring | |
2904 raise exc | |
2905 | 2822 |
2906 class Forward(ParseElementEnhance): | 2823 class Forward(ParseElementEnhance): |
2907 """Forward declaration of an expression to be defined later - | 2824 """Forward declaration of an expression to be defined later - |
2908 used for recursive grammars, such as algebraic infix notation. | 2825 used for recursive grammars, such as algebraic infix notation. |
2909 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. | 2826 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. |
2914 will actually be evaluated as:: | 2831 will actually be evaluated as:: |
2915 (fwdExpr << a) | b | c | 2832 (fwdExpr << a) | b | c |
2916 thereby leaving b and c out as parseable alternatives. It is recommended that you | 2833 thereby leaving b and c out as parseable alternatives. It is recommended that you |
2917 explicitly group the values inserted into the C{Forward}:: | 2834 explicitly group the values inserted into the C{Forward}:: |
2918 fwdExpr << (a | b | c) | 2835 fwdExpr << (a | b | c) |
2836 Converting to use the '<<=' operator instead will avoid this problem. | |
2919 """ | 2837 """ |
2920 def __init__( self, other=None ): | 2838 def __init__( self, other=None ): |
2921 super(Forward,self).__init__( other, savelist=False ) | 2839 super(Forward,self).__init__( other, savelist=False ) |
2922 | 2840 |
2923 def __lshift__( self, other ): | 2841 def __ilshift__( self, other ): |
2924 if isinstance( other, basestring ): | 2842 if isinstance( other, basestring ): |
2925 other = Literal(other) | 2843 other = ParserElement.literalStringClass(other) |
2926 self.expr = other | 2844 self.expr = other |
2927 self.mayReturnEmpty = other.mayReturnEmpty | 2845 self.mayReturnEmpty = other.mayReturnEmpty |
2928 self.strRepr = None | 2846 self.strRepr = None |
2929 self.mayIndexError = self.expr.mayIndexError | 2847 self.mayIndexError = self.expr.mayIndexError |
2930 self.mayReturnEmpty = self.expr.mayReturnEmpty | 2848 self.mayReturnEmpty = self.expr.mayReturnEmpty |
2931 self.setWhitespaceChars( self.expr.whiteChars ) | 2849 self.setWhitespaceChars( self.expr.whiteChars ) |
2932 self.skipWhitespace = self.expr.skipWhitespace | 2850 self.skipWhitespace = self.expr.skipWhitespace |
2933 self.saveAsList = self.expr.saveAsList | 2851 self.saveAsList = self.expr.saveAsList |
2934 self.ignoreExprs.extend(self.expr.ignoreExprs) | 2852 self.ignoreExprs.extend(self.expr.ignoreExprs) |
2853 return self | |
2854 | |
2855 def __lshift__(self, other): | |
2856 warnings.warn("Operator '<<' is deprecated, use '<<=' instead", | |
2857 DeprecationWarning,stacklevel=2) | |
2858 self <<= other | |
2935 return None | 2859 return None |
2936 | 2860 |
2937 def leaveWhitespace( self ): | 2861 def leaveWhitespace( self ): |
2938 self.skipWhitespace = False | 2862 self.skipWhitespace = False |
2939 return self | 2863 return self |
2940 | 2864 |
2941 def streamline( self ): | 2865 def streamline( self ): |
2991 super(Upcase,self).__init__(*args) | 2915 super(Upcase,self).__init__(*args) |
2992 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", | 2916 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", |
2993 DeprecationWarning,stacklevel=2) | 2917 DeprecationWarning,stacklevel=2) |
2994 | 2918 |
2995 def postParse( self, instring, loc, tokenlist ): | 2919 def postParse( self, instring, loc, tokenlist ): |
2996 return list(map( string.upper, tokenlist )) | 2920 return list(map( str.upper, tokenlist )) |
2997 | 2921 |
2998 | 2922 |
2999 class Combine(TokenConverter): | 2923 class Combine(TokenConverter): |
3000 """Converter to concatenate all matching tokens to a single string. | 2924 """Converter to concatenate all matching tokens to a single string. |
3001 By default, the matching patterns must also be contiguous in the input string; | 2925 By default, the matching patterns must also be contiguous in the input string; |
3027 return [ retToks ] | 2951 return [ retToks ] |
3028 else: | 2952 else: |
3029 return retToks | 2953 return retToks |
3030 | 2954 |
3031 class Group(TokenConverter): | 2955 class Group(TokenConverter): |
3032 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" | 2956 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" |
3033 def __init__( self, expr ): | 2957 def __init__( self, expr ): |
3034 super(Group,self).__init__( expr ) | 2958 super(Group,self).__init__( expr ) |
3035 self.saveAsList = True | 2959 self.saveAsList = True |
3036 | 2960 |
3037 def postParse( self, instring, loc, tokenlist ): | 2961 def postParse( self, instring, loc, tokenlist ): |
3103 if len(paArgs)>3: | 3027 if len(paArgs)>3: |
3104 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc | 3028 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc |
3105 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) | 3029 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) |
3106 try: | 3030 try: |
3107 ret = f(*paArgs) | 3031 ret = f(*paArgs) |
3108 except Exception: | 3032 except Exception as exc: |
3109 exc = sys.exc_info()[1] | |
3110 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) | 3033 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) |
3111 raise | 3034 raise |
3112 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) | 3035 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) |
3113 return ret | 3036 return ret |
3114 try: | 3037 try: |
3122 # | 3045 # |
3123 def delimitedList( expr, delim=",", combine=False ): | 3046 def delimitedList( expr, delim=",", combine=False ): |
3124 """Helper to define a delimited list of expressions - the delimiter defaults to ','. | 3047 """Helper to define a delimited list of expressions - the delimiter defaults to ','. |
3125 By default, the list elements and delimiters can have intervening whitespace, and | 3048 By default, the list elements and delimiters can have intervening whitespace, and |
3126 comments, but this can be overridden by passing C{combine=True} in the constructor. | 3049 comments, but this can be overridden by passing C{combine=True} in the constructor. |
3127 If C{combine} is set to True, the matching tokens are returned as a single token | 3050 If C{combine} is set to C{True}, the matching tokens are returned as a single token |
3128 string, with the delimiters included; otherwise, the matching tokens are returned | 3051 string, with the delimiters included; otherwise, the matching tokens are returned |
3129 as a list of tokens, with the delimiters suppressed. | 3052 as a list of tokens, with the delimiters suppressed. |
3130 """ | 3053 """ |
3131 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." | 3054 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." |
3132 if combine: | 3055 if combine: |
3224 return _ustr(s) | 3147 return _ustr(s) |
3225 | 3148 |
3226 def oneOf( strs, caseless=False, useRegex=True ): | 3149 def oneOf( strs, caseless=False, useRegex=True ): |
3227 """Helper to quickly define a set of alternative Literals, and makes sure to do | 3150 """Helper to quickly define a set of alternative Literals, and makes sure to do |
3228 longest-first testing when there is a conflict, regardless of the input order, | 3151 longest-first testing when there is a conflict, regardless of the input order, |
3229 but returns a C{MatchFirst} for best performance. | 3152 but returns a C{L{MatchFirst}} for best performance. |
3230 | 3153 |
3231 Parameters: | 3154 Parameters: |
3232 - strs - a string of space-delimited literals, or a list of string literals | 3155 - strs - a string of space-delimited literals, or a list of string literals |
3233 - caseless - (default=False) - treat all literals as caseless | 3156 - caseless - (default=False) - treat all literals as caseless |
3234 - useRegex - (default=True) - as an optimization, will generate a Regex | 3157 - useRegex - (default=True) - as an optimization, will generate a Regex |
3269 | 3192 |
3270 if not caseless and useRegex: | 3193 if not caseless and useRegex: |
3271 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) | 3194 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) |
3272 try: | 3195 try: |
3273 if len(symbols)==len("".join(symbols)): | 3196 if len(symbols)==len("".join(symbols)): |
3274 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) | 3197 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) |
3275 else: | 3198 else: |
3276 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) | 3199 return Regex( "|".join(re.escape(sym) for sym in symbols) ) |
3277 except: | 3200 except: |
3278 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", | 3201 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", |
3279 SyntaxWarning, stacklevel=2) | 3202 SyntaxWarning, stacklevel=2) |
3280 | 3203 |
3281 | 3204 |
3282 # last resort, just use MatchFirst | 3205 # last resort, just use MatchFirst |
3283 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) | 3206 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) |
3284 | 3207 |
3285 def dictOf( key, value ): | 3208 def dictOf( key, value ): |
3286 """Helper to easily and clearly define a dictionary by specifying the respective patterns | 3209 """Helper to easily and clearly define a dictionary by specifying the respective patterns |
3287 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens | 3210 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens |
3288 in the proper order. The key pattern can include delimiting markers or punctuation, | 3211 in the proper order. The key pattern can include delimiting markers or punctuation, |
3289 as long as they are suppressed, thereby leaving the significant key text. The value | 3212 as long as they are suppressed, thereby leaving the significant key text. The value |
3290 pattern can include named results, so that the C{Dict} results can include named token | 3213 pattern can include named results, so that the C{Dict} results can include named token |
3291 fields. | 3214 fields. |
3292 """ | 3215 """ |
3299 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not | 3222 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not |
3300 require the inspect module to chase up the call stack. By default, returns a | 3223 require the inspect module to chase up the call stack. By default, returns a |
3301 string containing the original parsed text. | 3224 string containing the original parsed text. |
3302 | 3225 |
3303 If the optional C{asString} argument is passed as C{False}, then the return value is a | 3226 If the optional C{asString} argument is passed as C{False}, then the return value is a |
3304 C{ParseResults} containing any results names that were originally matched, and a | 3227 C{L{ParseResults}} containing any results names that were originally matched, and a |
3305 single token containing the original matched text from the input string. So if | 3228 single token containing the original matched text from the input string. So if |
3306 the expression passed to C{L{originalTextFor}} contains expressions with defined | 3229 the expression passed to C{L{originalTextFor}} contains expressions with defined |
3307 results names, you must set C{asString} to C{False} if you want to preserve those | 3230 results names, you must set C{asString} to C{False} if you want to preserve those |
3308 results name values.""" | 3231 results name values.""" |
3309 locMarker = Empty().setParseAction(lambda s,loc,t: loc) | 3232 locMarker = Empty().setParseAction(lambda s,loc,t: loc) |
3332 lineEnd = LineEnd().setName("lineEnd") | 3255 lineEnd = LineEnd().setName("lineEnd") |
3333 stringStart = StringStart().setName("stringStart") | 3256 stringStart = StringStart().setName("stringStart") |
3334 stringEnd = StringEnd().setName("stringEnd") | 3257 stringEnd = StringEnd().setName("stringEnd") |
3335 | 3258 |
3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) | 3259 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) |
3337 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) | 3260 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) |
3338 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) | |
3339 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) | 3261 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) |
3340 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) | 3262 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) |
3341 _charRange = Group(_singleChar + Suppress("-") + _singleChar) | 3263 _charRange = Group(_singleChar + Suppress("-") + _singleChar) |
3342 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" | 3264 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" |
3343 | 3265 |
3344 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) | 3266 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) or p) |
3345 | 3267 |
3346 def srange(s): | 3268 def srange(s): |
3347 r"""Helper to easily define string ranges for use in Word construction. Borrows | 3269 r"""Helper to easily define string ranges for use in Word construction. Borrows |
3348 syntax from regexp '[]' string range definitions:: | 3270 syntax from regexp '[]' string range definitions:: |
3349 srange("[0-9]") -> "0123456789" | 3271 srange("[0-9]") -> "0123456789" |
3359 an escaped octal character with a leading '\0' (\041, which is a '!' character) | 3281 an escaped octal character with a leading '\0' (\041, which is a '!' character) |
3360 a range of any of the above, separated by a dash ('a-z', etc.) | 3282 a range of any of the above, separated by a dash ('a-z', etc.) |
3361 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) | 3283 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) |
3362 """ | 3284 """ |
3363 try: | 3285 try: |
3364 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) | 3286 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) |
3365 except: | 3287 except: |
3366 return "" | 3288 return "" |
3367 | 3289 |
3368 def matchOnlyAtCol(n): | 3290 def matchOnlyAtCol(n): |
3369 """Helper method for defining parse actions that require matching at a specific | 3291 """Helper method for defining parse actions that require matching at a specific |
3374 raise ParseException(strg,locn,"matched token not at column %d" % n) | 3296 raise ParseException(strg,locn,"matched token not at column %d" % n) |
3375 return verifyCol | 3297 return verifyCol |
3376 | 3298 |
3377 def replaceWith(replStr): | 3299 def replaceWith(replStr): |
3378 """Helper method for common parse actions that simply return a literal value. Especially | 3300 """Helper method for common parse actions that simply return a literal value. Especially |
3379 useful when used with C{transformString()}. | 3301 useful when used with C{L{transformString<ParserElement.transformString>}()}. |
3380 """ | 3302 """ |
3381 def _replFunc(*args): | 3303 def _replFunc(*args): |
3382 return [replStr] | 3304 return [replStr] |
3383 return _replFunc | 3305 return _replFunc |
3384 | 3306 |
3396 def downcaseTokens(s,l,t): | 3318 def downcaseTokens(s,l,t): |
3397 """Helper parse action to convert tokens to lower case.""" | 3319 """Helper parse action to convert tokens to lower case.""" |
3398 return [ tt.lower() for tt in map(_ustr,t) ] | 3320 return [ tt.lower() for tt in map(_ustr,t) ] |
3399 | 3321 |
3400 def keepOriginalText(s,startLoc,t): | 3322 def keepOriginalText(s,startLoc,t): |
3401 """DEPRECATED - use new helper method C{originalTextFor}. | 3323 """DEPRECATED - use new helper method C{L{originalTextFor}}. |
3402 Helper parse action to preserve original parsed text, | 3324 Helper parse action to preserve original parsed text, |
3403 overriding any nested parse actions.""" | 3325 overriding any nested parse actions.""" |
3404 try: | 3326 try: |
3405 endloc = getTokensEndLoc() | 3327 endloc = getTokensEndLoc() |
3406 except ParseException: | 3328 except ParseException: |
3438 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) | 3360 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) |
3439 openTag = Suppress("<") + tagStr("tag") + \ | 3361 openTag = Suppress("<") + tagStr("tag") + \ |
3440 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ | 3362 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ |
3441 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | 3363 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") |
3442 else: | 3364 else: |
3443 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) | 3365 printablesLessRAbrack = "".join(c for c in printables if c not in ">") |
3444 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) | 3366 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) |
3445 openTag = Suppress("<") + tagStr("tag") + \ | 3367 openTag = Suppress("<") + tagStr("tag") + \ |
3446 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ | 3368 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ |
3447 Optional( Suppress("=") + tagAttrValue ) ))) + \ | 3369 Optional( Suppress("=") + tagAttrValue ) ))) + \ |
3448 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | 3370 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") |
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name""" | 3384 """Helper to construct opening and closing tag expressions for XML, given a tag name""" |
3463 return _makeTags( tagStr, True ) | 3385 return _makeTags( tagStr, True ) |
3464 | 3386 |
3465 def withAttribute(*args,**attrDict): | 3387 def withAttribute(*args,**attrDict): |
3466 """Helper to create a validating parse action to be used with start tags created | 3388 """Helper to create a validating parse action to be used with start tags created |
3467 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag | 3389 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag |
3468 with a required attribute value, to avoid false matches on common tags such as | 3390 with a required attribute value, to avoid false matches on common tags such as |
3469 C{<TD>} or C{<DIV>}. | 3391 C{<TD>} or C{<DIV>}. |
3470 | 3392 |
3471 Call C{withAttribute} with a series of attribute names and values. Specify the list | 3393 Call C{withAttribute} with a series of attribute names and values. Specify the list |
3472 of filter attributes names and values as: | 3394 of filter attributes names and values as: |
3497 | 3419 |
3498 opAssoc = _Constants() | 3420 opAssoc = _Constants() |
3499 opAssoc.LEFT = object() | 3421 opAssoc.LEFT = object() |
3500 opAssoc.RIGHT = object() | 3422 opAssoc.RIGHT = object() |
3501 | 3423 |
3502 def operatorPrecedence( baseExpr, opList ): | 3424 def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): |
3503 """Helper method for constructing grammars of expressions made up of | 3425 """Helper method for constructing grammars of expressions made up of |
3504 operators working in a precedence hierarchy. Operators may be unary or | 3426 operators working in a precedence hierarchy. Operators may be unary or |
3505 binary, left- or right-associative. Parse actions can also be attached | 3427 binary, left- or right-associative. Parse actions can also be attached |
3506 to operator expressions. | 3428 to operator expressions. |
3507 | 3429 |
3516 two operators separating the 3 terms | 3438 two operators separating the 3 terms |
3517 - numTerms is the number of terms for this operator (must | 3439 - numTerms is the number of terms for this operator (must |
3518 be 1, 2, or 3) | 3440 be 1, 2, or 3) |
3519 - rightLeftAssoc is the indicator whether the operator is | 3441 - rightLeftAssoc is the indicator whether the operator is |
3520 right or left associative, using the pyparsing-defined | 3442 right or left associative, using the pyparsing-defined |
3521 constants opAssoc.RIGHT and opAssoc.LEFT. | 3443 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. |
3522 - parseAction is the parse action to be associated with | 3444 - parseAction is the parse action to be associated with |
3523 expressions matching this operator expression (the | 3445 expressions matching this operator expression (the |
3524 parse action tuple member may be omitted) | 3446 parse action tuple member may be omitted) |
3447 - lpar - expression for matching left-parentheses (default=Suppress('(')) | |
3448 - rpar - expression for matching right-parentheses (default=Suppress(')')) | |
3525 """ | 3449 """ |
3526 ret = Forward() | 3450 ret = Forward() |
3527 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) | 3451 lastExpr = baseExpr | ( lpar + ret + rpar ) |
3528 for i,operDef in enumerate(opList): | 3452 for i,operDef in enumerate(opList): |
3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] | 3453 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] |
3530 if arity == 3: | 3454 if arity == 3: |
3531 if opExpr is None or len(opExpr) != 2: | 3455 if opExpr is None or len(opExpr) != 2: |
3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") | 3456 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") |
3567 matchExpr.setParseAction( pa ) | 3491 matchExpr.setParseAction( pa ) |
3568 thisExpr << ( matchExpr | lastExpr ) | 3492 thisExpr << ( matchExpr | lastExpr ) |
3569 lastExpr = thisExpr | 3493 lastExpr = thisExpr |
3570 ret << lastExpr | 3494 ret << lastExpr |
3571 return ret | 3495 return ret |
3496 operatorPrecedence = infixNotation | |
3572 | 3497 |
3573 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") | 3498 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") |
3574 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") | 3499 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") |
3575 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") | 3500 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") |
3576 unicodeString = Combine(_L('u') + quotedString.copy()) | 3501 unicodeString = Combine(_L('u') + quotedString.copy()) |
3695 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") | 3620 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") |
3696 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") | 3621 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") |
3697 | 3622 |
3698 javaStyleComment = cppStyleComment | 3623 javaStyleComment = cppStyleComment |
3699 pythonStyleComment = Regex(r"#.*").setName("Python style comment") | 3624 pythonStyleComment = Regex(r"#.*").setName("Python style comment") |
3700 _noncomma = "".join( [ c for c in printables if c != "," ] ) | 3625 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + |
3701 _commasepitem = Combine(OneOrMore(Word(_noncomma) + | |
3702 Optional( Word(" \t") + | 3626 Optional( Word(" \t") + |
3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") | 3627 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") |
3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") | 3628 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") |
3705 | 3629 |
3706 | 3630 |
3713 print (teststring + "->" + str(tokenlist)) | 3637 print (teststring + "->" + str(tokenlist)) |
3714 print ("tokens = " + str(tokens)) | 3638 print ("tokens = " + str(tokens)) |
3715 print ("tokens.columns = " + str(tokens.columns)) | 3639 print ("tokens.columns = " + str(tokens.columns)) |
3716 print ("tokens.tables = " + str(tokens.tables)) | 3640 print ("tokens.tables = " + str(tokens.tables)) |
3717 print (tokens.asXML("SQL",True)) | 3641 print (tokens.asXML("SQL",True)) |
3718 except ParseBaseException: | 3642 except ParseBaseException as err: |
3719 err = sys.exc_info()[1] | |
3720 print (teststring + "->") | 3643 print (teststring + "->") |
3721 print (err.line) | 3644 print (err.line) |
3722 print (" "*(err.column-1) + "^") | 3645 print (" "*(err.column-1) + "^") |
3723 print (err) | 3646 print (err) |
3724 print() | 3647 print() |