Mercurial > dottes
comparison abc2xml/pyparsing.py @ 484:4fab69a1027d build-default-207
Add MusicXML conversion to tune pages.
Might help someone.
author | Jim Hague <jim.hague@acm.org> |
---|---|
date | Tue, 17 Jun 2014 09:11:38 +0100 (2014-06-17) |
parents | |
children | b1dbb76f4eb9 |
comparison
equal
deleted
inserted
replaced
483:681274f40615 | 484:4fab69a1027d |
---|---|
1 # module pyparsing.py | |
2 # | |
3 # Copyright (c) 2003-2011 Paul T. McGuire | |
4 # | |
5 # Permission is hereby granted, free of charge, to any person obtaining | |
6 # a copy of this software and associated documentation files (the | |
7 # "Software"), to deal in the Software without restriction, including | |
8 # without limitation the rights to use, copy, modify, merge, publish, | |
9 # distribute, sublicense, and/or sell copies of the Software, and to | |
10 # permit persons to whom the Software is furnished to do so, subject to | |
11 # the following conditions: | |
12 # | |
13 # The above copyright notice and this permission notice shall be | |
14 # included in all copies or substantial portions of the Software. | |
15 # | |
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 # | |
24 #from __future__ import generators | |
25 | |
26 __doc__ = \ | |
27 """ | |
28 pyparsing module - Classes and methods to define and execute parsing grammars | |
29 | |
30 The pyparsing module is an alternative approach to creating and executing simple grammars, | |
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you | |
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module | |
33 provides a library of classes that you use to construct the grammar directly in Python. | |
34 | |
35 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: | |
36 | |
37 from pyparsing import Word, alphas | |
38 | |
39 # define grammar of a greeting | |
40 greet = Word( alphas ) + "," + Word( alphas ) + "!" | |
41 | |
42 hello = "Hello, World!" | |
43 print hello, "->", greet.parseString( hello ) | |
44 | |
45 The program outputs the following:: | |
46 | |
47 Hello, World! -> ['Hello', ',', 'World', '!'] | |
48 | |
49 The Python representation of the grammar is quite readable, owing to the self-explanatory | |
50 class names, and the use of '+', '|' and '^' operators. | |
51 | |
52 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an | |
53 object with named attributes. | |
54 | |
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers: | |
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) | |
57 - quoted strings | |
58 - embedded comments | |
59 """ | |
60 | |
61 __version__ = "1.5.6" | |
62 __versionTime__ = "26 June 2011 10:53" | |
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" | |
64 | |
65 import string | |
66 from weakref import ref as wkref | |
67 import copy | |
68 import sys | |
69 import warnings | |
70 import re | |
71 import sre_constants | |
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) | |
73 | |
74 __all__ = [ | |
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', | |
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', | |
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', | |
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', | |
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', | |
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', | |
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', | |
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', | |
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', | |
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', | |
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', | |
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', | |
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', | |
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', | |
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', | |
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', | |
91 'indentedBlock', 'originalTextFor', | |
92 ] | |
93 | |
94 """ | |
95 Detect if we are running version 3.X and make appropriate changes | |
96 Robert A. Clark | |
97 """ | |
98 _PY3K = sys.version_info[0] > 2 | |
99 if _PY3K: | |
100 _MAX_INT = sys.maxsize | |
101 basestring = str | |
102 unichr = chr | |
103 _ustr = str | |
104 alphas = string.ascii_lowercase + string.ascii_uppercase | |
105 else: | |
106 _MAX_INT = sys.maxint | |
107 range = xrange | |
108 set = lambda s : dict( [(c,0) for c in s] ) | |
109 alphas = string.lowercase + string.uppercase | |
110 | |
111 def _ustr(obj): | |
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries | |
113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It | |
114 then < returns the unicode object | encodes it with the default encoding | ... >. | |
115 """ | |
116 if isinstance(obj,unicode): | |
117 return obj | |
118 | |
119 try: | |
120 # If this works, then _ustr(obj) has the same behaviour as str(obj), so | |
121 # it won't break any existing code. | |
122 return str(obj) | |
123 | |
124 except UnicodeEncodeError: | |
125 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) | |
126 # state that "The return value must be a string object". However, does a | |
127 # unicode object (being a subclass of basestring) count as a "string | |
128 # object"? | |
129 # If so, then return a unicode object: | |
130 return unicode(obj) | |
131 # Else encode it... but how? There are many choices... :) | |
132 # Replace unprintables with escape codes? | |
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') | |
134 # Replace unprintables with question marks? | |
135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') | |
136 # ... | |
137 | |
138 alphas = string.lowercase + string.uppercase | |
139 | |
140 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions | |
141 singleArgBuiltins = [] | |
142 import __builtin__ | |
143 for fname in "sum len enumerate sorted reversed list tuple set any all".split(): | |
144 try: | |
145 singleArgBuiltins.append(getattr(__builtin__,fname)) | |
146 except AttributeError: | |
147 continue | |
148 | |
149 def _xml_escape(data): | |
150 """Escape &, <, >, ", ', etc. in a string of data.""" | |
151 | |
152 # ampersand must be replaced first | |
153 from_symbols = '&><"\'' | |
154 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] | |
155 for from_,to_ in zip(from_symbols, to_symbols): | |
156 data = data.replace(from_, to_) | |
157 return data | |
158 | |
159 class _Constants(object): | |
160 pass | |
161 | |
162 nums = string.digits | |
163 hexnums = nums + "ABCDEFabcdef" | |
164 alphanums = alphas + nums | |
165 _bslash = chr(92) | |
166 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) | |
167 | |
168 class ParseBaseException(Exception): | |
169 """base exception class for all parsing runtime exceptions""" | |
170 # Performance tuning: we construct a *lot* of these, so keep this | |
171 # constructor as small and fast as possible | |
172 def __init__( self, pstr, loc=0, msg=None, elem=None ): | |
173 self.loc = loc | |
174 if msg is None: | |
175 self.msg = pstr | |
176 self.pstr = "" | |
177 else: | |
178 self.msg = msg | |
179 self.pstr = pstr | |
180 self.parserElement = elem | |
181 | |
182 def __getattr__( self, aname ): | |
183 """supported attributes by name are: | |
184 - lineno - returns the line number of the exception text | |
185 - col - returns the column number of the exception text | |
186 - line - returns the line containing the exception text | |
187 """ | |
188 if( aname == "lineno" ): | |
189 return lineno( self.loc, self.pstr ) | |
190 elif( aname in ("col", "column") ): | |
191 return col( self.loc, self.pstr ) | |
192 elif( aname == "line" ): | |
193 return line( self.loc, self.pstr ) | |
194 else: | |
195 raise AttributeError(aname) | |
196 | |
197 def __str__( self ): | |
198 return "%s (at char %d), (line:%d, col:%d)" % \ | |
199 ( self.msg, self.loc, self.lineno, self.column ) | |
200 def __repr__( self ): | |
201 return _ustr(self) | |
202 def markInputline( self, markerString = ">!<" ): | |
203 """Extracts the exception line from the input string, and marks | |
204 the location of the exception with a special symbol. | |
205 """ | |
206 line_str = self.line | |
207 line_column = self.column - 1 | |
208 if markerString: | |
209 line_str = "".join( [line_str[:line_column], | |
210 markerString, line_str[line_column:]]) | |
211 return line_str.strip() | |
212 def __dir__(self): | |
213 return "loc msg pstr parserElement lineno col line " \ | |
214 "markInputLine __str__ __repr__".split() | |
215 | |
216 class ParseException(ParseBaseException): | |
217 """exception thrown when parse expressions don't match class; | |
218 supported attributes by name are: | |
219 - lineno - returns the line number of the exception text | |
220 - col - returns the column number of the exception text | |
221 - line - returns the line containing the exception text | |
222 """ | |
223 pass | |
224 | |
225 class ParseFatalException(ParseBaseException): | |
226 """user-throwable exception thrown when inconsistent parse content | |
227 is found; stops all parsing immediately""" | |
228 pass | |
229 | |
230 class ParseSyntaxException(ParseFatalException): | |
231 """just like C{ParseFatalException}, but thrown internally when an | |
232 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because | |
233 an unbacktrackable syntax error has been found""" | |
234 def __init__(self, pe): | |
235 super(ParseSyntaxException, self).__init__( | |
236 pe.pstr, pe.loc, pe.msg, pe.parserElement) | |
237 | |
238 #~ class ReparseException(ParseBaseException): | |
239 #~ """Experimental class - parse actions can raise this exception to cause | |
240 #~ pyparsing to reparse the input string: | |
241 #~ - with a modified input string, and/or | |
242 #~ - with a modified start location | |
243 #~ Set the values of the ReparseException in the constructor, and raise the | |
244 #~ exception in a parse action to cause pyparsing to use the new string/location. | |
245 #~ Setting the values as None causes no change to be made. | |
246 #~ """ | |
247 #~ def __init_( self, newstring, restartLoc ): | |
248 #~ self.newParseText = newstring | |
249 #~ self.reparseLoc = restartLoc | |
250 | |
251 class RecursiveGrammarException(Exception): | |
252 """exception thrown by C{validate()} if the grammar could be improperly recursive""" | |
253 def __init__( self, parseElementList ): | |
254 self.parseElementTrace = parseElementList | |
255 | |
256 def __str__( self ): | |
257 return "RecursiveGrammarException: %s" % self.parseElementTrace | |
258 | |
259 class _ParseResultsWithOffset(object): | |
260 def __init__(self,p1,p2): | |
261 self.tup = (p1,p2) | |
262 def __getitem__(self,i): | |
263 return self.tup[i] | |
264 def __repr__(self): | |
265 return repr(self.tup) | |
266 def setOffset(self,i): | |
267 self.tup = (self.tup[0],i) | |
268 | |
269 class ParseResults(object): | |
270 """Structured parse results, to provide multiple means of access to the parsed data: | |
271 - as a list (C{len(results)}) | |
272 - by list index (C{results[0], results[1]}, etc.) | |
273 - by attribute (C{results.<resultsName>}) | |
274 """ | |
275 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) | |
276 def __new__(cls, toklist, name=None, asList=True, modal=True ): | |
277 if isinstance(toklist, cls): | |
278 return toklist | |
279 retobj = object.__new__(cls) | |
280 retobj.__doinit = True | |
281 return retobj | |
282 | |
283 # Performance tuning: we construct a *lot* of these, so keep this | |
284 # constructor as small and fast as possible | |
285 def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): | |
286 if self.__doinit: | |
287 self.__doinit = False | |
288 self.__name = None | |
289 self.__parent = None | |
290 self.__accumNames = {} | |
291 if isinstance(toklist, list): | |
292 self.__toklist = toklist[:] | |
293 else: | |
294 self.__toklist = [toklist] | |
295 self.__tokdict = dict() | |
296 | |
297 if name is not None and name: | |
298 if not modal: | |
299 self.__accumNames[name] = 0 | |
300 if isinstance(name,int): | |
301 name = _ustr(name) # will always return a str, but use _ustr for consistency | |
302 self.__name = name | |
303 if not toklist in (None,'',[]): | |
304 if isinstance(toklist,basestring): | |
305 toklist = [ toklist ] | |
306 if asList: | |
307 if isinstance(toklist,ParseResults): | |
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) | |
309 else: | |
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) | |
311 self[name].__name = name | |
312 else: | |
313 try: | |
314 self[name] = toklist[0] | |
315 except (KeyError,TypeError,IndexError): | |
316 self[name] = toklist | |
317 | |
318 def __getitem__( self, i ): | |
319 if isinstance( i, (int,slice) ): | |
320 return self.__toklist[i] | |
321 else: | |
322 if i not in self.__accumNames: | |
323 return self.__tokdict[i][-1][0] | |
324 else: | |
325 return ParseResults([ v[0] for v in self.__tokdict[i] ]) | |
326 | |
327 def __setitem__( self, k, v, isinstance=isinstance ): | |
328 if isinstance(v,_ParseResultsWithOffset): | |
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] | |
330 sub = v[0] | |
331 elif isinstance(k,int): | |
332 self.__toklist[k] = v | |
333 sub = v | |
334 else: | |
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] | |
336 sub = v | |
337 if isinstance(sub,ParseResults): | |
338 sub.__parent = wkref(self) | |
339 | |
340 def __delitem__( self, i ): | |
341 if isinstance(i,(int,slice)): | |
342 mylen = len( self.__toklist ) | |
343 del self.__toklist[i] | |
344 | |
345 # convert int to slice | |
346 if isinstance(i, int): | |
347 if i < 0: | |
348 i += mylen | |
349 i = slice(i, i+1) | |
350 # get removed indices | |
351 removed = list(range(*i.indices(mylen))) | |
352 removed.reverse() | |
353 # fixup indices in token dictionary | |
354 for name in self.__tokdict: | |
355 occurrences = self.__tokdict[name] | |
356 for j in removed: | |
357 for k, (value, position) in enumerate(occurrences): | |
358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) | |
359 else: | |
360 del self.__tokdict[i] | |
361 | |
362 def __contains__( self, k ): | |
363 return k in self.__tokdict | |
364 | |
365 def __len__( self ): return len( self.__toklist ) | |
366 def __bool__(self): return len( self.__toklist ) > 0 | |
367 __nonzero__ = __bool__ | |
368 def __iter__( self ): return iter( self.__toklist ) | |
369 def __reversed__( self ): return iter( self.__toklist[::-1] ) | |
370 def keys( self ): | |
371 """Returns all named result keys.""" | |
372 return self.__tokdict.keys() | |
373 | |
374 def pop( self, index=-1 ): | |
375 """Removes and returns item at specified index (default=last). | |
376 Will work with either numeric indices or dict-key indicies.""" | |
377 ret = self[index] | |
378 del self[index] | |
379 return ret | |
380 | |
381 def get(self, key, defaultValue=None): | |
382 """Returns named result matching the given key, or if there is no | |
383 such name, then returns the given C{defaultValue} or C{None} if no | |
384 C{defaultValue} is specified.""" | |
385 if key in self: | |
386 return self[key] | |
387 else: | |
388 return defaultValue | |
389 | |
390 def insert( self, index, insStr ): | |
391 """Inserts new element at location index in the list of parsed tokens.""" | |
392 self.__toklist.insert(index, insStr) | |
393 # fixup indices in token dictionary | |
394 for name in self.__tokdict: | |
395 occurrences = self.__tokdict[name] | |
396 for k, (value, position) in enumerate(occurrences): | |
397 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) | |
398 | |
399 def items( self ): | |
400 """Returns all named result keys and values as a list of tuples.""" | |
401 return [(k,self[k]) for k in self.__tokdict] | |
402 | |
403 def values( self ): | |
404 """Returns all named result values.""" | |
405 return [ v[-1][0] for v in self.__tokdict.values() ] | |
406 | |
407 def __getattr__( self, name ): | |
408 if True: #name not in self.__slots__: | |
409 if name in self.__tokdict: | |
410 if name not in self.__accumNames: | |
411 return self.__tokdict[name][-1][0] | |
412 else: | |
413 return ParseResults([ v[0] for v in self.__tokdict[name] ]) | |
414 else: | |
415 return "" | |
416 return None | |
417 | |
418 def __add__( self, other ): | |
419 ret = self.copy() | |
420 ret += other | |
421 return ret | |
422 | |
423 def __iadd__( self, other ): | |
424 if other.__tokdict: | |
425 offset = len(self.__toklist) | |
426 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) | |
427 otheritems = other.__tokdict.items() | |
428 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) | |
429 for (k,vlist) in otheritems for v in vlist] | |
430 for k,v in otherdictitems: | |
431 self[k] = v | |
432 if isinstance(v[0],ParseResults): | |
433 v[0].__parent = wkref(self) | |
434 | |
435 self.__toklist += other.__toklist | |
436 self.__accumNames.update( other.__accumNames ) | |
437 return self | |
438 | |
439 def __radd__(self, other): | |
440 if isinstance(other,int) and other == 0: | |
441 return self.copy() | |
442 | |
443 def __repr__( self ): | |
444 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) | |
445 | |
446 def __str__( self ): | |
447 out = "[" | |
448 sep = "" | |
449 for i in self.__toklist: | |
450 if isinstance(i, ParseResults): | |
451 out += sep + _ustr(i) | |
452 else: | |
453 out += sep + repr(i) | |
454 sep = ", " | |
455 out += "]" | |
456 return out | |
457 | |
458 def _asStringList( self, sep='' ): | |
459 out = [] | |
460 for item in self.__toklist: | |
461 if out and sep: | |
462 out.append(sep) | |
463 if isinstance( item, ParseResults ): | |
464 out += item._asStringList() | |
465 else: | |
466 out.append( _ustr(item) ) | |
467 return out | |
468 | |
469 def asList( self ): | |
470 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" | |
471 out = [] | |
472 for res in self.__toklist: | |
473 if isinstance(res,ParseResults): | |
474 out.append( res.asList() ) | |
475 else: | |
476 out.append( res ) | |
477 return out | |
478 | |
479 def asDict( self ): | |
480 """Returns the named parse results as dictionary.""" | |
481 return dict( self.items() ) | |
482 | |
483 def copy( self ): | |
484 """Returns a new copy of a C{ParseResults} object.""" | |
485 ret = ParseResults( self.__toklist ) | |
486 ret.__tokdict = self.__tokdict.copy() | |
487 ret.__parent = self.__parent | |
488 ret.__accumNames.update( self.__accumNames ) | |
489 ret.__name = self.__name | |
490 return ret | |
491 | |
492 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): | |
493 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" | |
494 nl = "\n" | |
495 out = [] | |
496 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() | |
497 for v in vlist ] ) | |
498 nextLevelIndent = indent + " " | |
499 | |
500 # collapse out indents if formatting is not desired | |
501 if not formatted: | |
502 indent = "" | |
503 nextLevelIndent = "" | |
504 nl = "" | |
505 | |
506 selfTag = None | |
507 if doctag is not None: | |
508 selfTag = doctag | |
509 else: | |
510 if self.__name: | |
511 selfTag = self.__name | |
512 | |
513 if not selfTag: | |
514 if namedItemsOnly: | |
515 return "" | |
516 else: | |
517 selfTag = "ITEM" | |
518 | |
519 out += [ nl, indent, "<", selfTag, ">" ] | |
520 | |
521 worklist = self.__toklist | |
522 for i,res in enumerate(worklist): | |
523 if isinstance(res,ParseResults): | |
524 if i in namedItems: | |
525 out += [ res.asXML(namedItems[i], | |
526 namedItemsOnly and doctag is None, | |
527 nextLevelIndent, | |
528 formatted)] | |
529 else: | |
530 out += [ res.asXML(None, | |
531 namedItemsOnly and doctag is None, | |
532 nextLevelIndent, | |
533 formatted)] | |
534 else: | |
535 # individual token, see if there is a name for it | |
536 resTag = None | |
537 if i in namedItems: | |
538 resTag = namedItems[i] | |
539 if not resTag: | |
540 if namedItemsOnly: | |
541 continue | |
542 else: | |
543 resTag = "ITEM" | |
544 xmlBodyText = _xml_escape(_ustr(res)) | |
545 out += [ nl, nextLevelIndent, "<", resTag, ">", | |
546 xmlBodyText, | |
547 "</", resTag, ">" ] | |
548 | |
549 out += [ nl, indent, "</", selfTag, ">" ] | |
550 return "".join(out) | |
551 | |
552 def __lookup(self,sub): | |
553 for k,vlist in self.__tokdict.items(): | |
554 for v,loc in vlist: | |
555 if sub is v: | |
556 return k | |
557 return None | |
558 | |
559 def getName(self): | |
560 """Returns the results name for this token expression.""" | |
561 if self.__name: | |
562 return self.__name | |
563 elif self.__parent: | |
564 par = self.__parent() | |
565 if par: | |
566 return par.__lookup(self) | |
567 else: | |
568 return None | |
569 elif (len(self) == 1 and | |
570 len(self.__tokdict) == 1 and | |
571 self.__tokdict.values()[0][0][1] in (0,-1)): | |
572 return self.__tokdict.keys()[0] | |
573 else: | |
574 return None | |
575 | |
576 def dump(self,indent='',depth=0): | |
577 """Diagnostic method for listing out the contents of a C{ParseResults}. | |
578 Accepts an optional C{indent} argument so that this string can be embedded | |
579 in a nested display of other data.""" | |
580 out = [] | |
581 out.append( indent+_ustr(self.asList()) ) | |
582 keys = self.items() | |
583 keys.sort() | |
584 for k,v in keys: | |
585 if out: | |
586 out.append('\n') | |
587 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) | |
588 if isinstance(v,ParseResults): | |
589 if v.keys(): | |
590 out.append( v.dump(indent,depth+1) ) | |
591 else: | |
592 out.append(_ustr(v)) | |
593 else: | |
594 out.append(_ustr(v)) | |
595 return "".join(out) | |
596 | |
597 # add support for pickle protocol | |
598 def __getstate__(self): | |
599 return ( self.__toklist, | |
600 ( self.__tokdict.copy(), | |
601 self.__parent is not None and self.__parent() or None, | |
602 self.__accumNames, | |
603 self.__name ) ) | |
604 | |
605 def __setstate__(self,state): | |
606 self.__toklist = state[0] | |
607 (self.__tokdict, | |
608 par, | |
609 inAccumNames, | |
610 self.__name) = state[1] | |
611 self.__accumNames = {} | |
612 self.__accumNames.update(inAccumNames) | |
613 if par is not None: | |
614 self.__parent = wkref(par) | |
615 else: | |
616 self.__parent = None | |
617 | |
618 def __dir__(self): | |
619 return dir(super(ParseResults,self)) + self.keys() | |
620 | |
621 def col (loc,strg): | |
622 """Returns current column within a string, counting newlines as line separators. | |
623 The first column is number 1. | |
624 | |
625 Note: the default parsing behavior is to expand tabs in the input string | |
626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | |
627 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
628 consistent view of the parsed string, the parse location, and line and column | |
629 positions within the parsed string. | |
630 """ | |
631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) | |
632 | |
633 def lineno(loc,strg): | |
634 """Returns current line number within a string, counting newlines as line separators. | |
635 The first line is number 1. | |
636 | |
637 Note: the default parsing behavior is to expand tabs in the input string | |
638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information | |
639 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
640 consistent view of the parsed string, the parse location, and line and column | |
641 positions within the parsed string. | |
642 """ | |
643 return strg.count("\n",0,loc) + 1 | |
644 | |
645 def line( loc, strg ): | |
646 """Returns the line of text containing loc within a string, counting newlines as line separators. | |
647 """ | |
648 lastCR = strg.rfind("\n", 0, loc) | |
649 nextCR = strg.find("\n", loc) | |
650 if nextCR >= 0: | |
651 return strg[lastCR+1:nextCR] | |
652 else: | |
653 return strg[lastCR+1:] | |
654 | |
655 def _defaultStartDebugAction( instring, loc, expr ): | |
656 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) | |
657 | |
658 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): | |
659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) | |
660 | |
661 def _defaultExceptionDebugAction( instring, loc, expr, exc ): | |
662 print ("Exception raised:" + _ustr(exc)) | |
663 | |
664 def nullDebugAction(*args): | |
665 """'Do-nothing' debug action, to suppress debugging output during parsing.""" | |
666 pass | |
667 | |
668 'decorator to trim function calls to match the arity of the target' | |
669 if not _PY3K: | |
670 def _trim_arity(func, maxargs=2): | |
671 limit = [0] | |
672 def wrapper(*args): | |
673 while 1: | |
674 try: | |
675 return func(*args[limit[0]:]) | |
676 except TypeError: | |
677 if limit[0] <= maxargs: | |
678 limit[0] += 1 | |
679 continue | |
680 raise | |
681 return wrapper | |
682 else: | |
683 def _trim_arity(func, maxargs=2): | |
684 limit = maxargs | |
685 def wrapper(*args): | |
686 #~ nonlocal limit | |
687 while 1: | |
688 try: | |
689 return func(*args[limit:]) | |
690 except TypeError: | |
691 if limit: | |
692 limit -= 1 | |
693 continue | |
694 raise | |
695 return wrapper | |
696 | |
697 class ParserElement(object): | |
698 """Abstract base level parser element class.""" | |
699 DEFAULT_WHITE_CHARS = " \n\t\r" | |
700 verbose_stacktrace = False | |
701 | |
702 def setDefaultWhitespaceChars( chars ): | |
703 """Overrides the default whitespace chars | |
704 """ | |
705 ParserElement.DEFAULT_WHITE_CHARS = chars | |
706 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) | |
707 | |
708 def __init__( self, savelist=False ): | |
709 self.parseAction = list() | |
710 self.failAction = None | |
711 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall | |
712 self.strRepr = None | |
713 self.resultsName = None | |
714 self.saveAsList = savelist | |
715 self.skipWhitespace = True | |
716 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS | |
717 self.copyDefaultWhiteChars = True | |
718 self.mayReturnEmpty = False # used when checking for left-recursion | |
719 self.keepTabs = False | |
720 self.ignoreExprs = list() | |
721 self.debug = False | |
722 self.streamlined = False | |
723 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index | |
724 self.errmsg = "" | |
725 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) | |
726 self.debugActions = ( None, None, None ) #custom debug actions | |
727 self.re = None | |
728 self.callPreparse = True # used to avoid redundant calls to preParse | |
729 self.callDuringTry = False | |
730 | |
731 def copy( self ): | |
732 """Make a copy of this C{ParserElement}. Useful for defining different parse actions | |
733 for the same parsing pattern, using copies of the original parse element.""" | |
734 cpy = copy.copy( self ) | |
735 cpy.parseAction = self.parseAction[:] | |
736 cpy.ignoreExprs = self.ignoreExprs[:] | |
737 if self.copyDefaultWhiteChars: | |
738 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS | |
739 return cpy | |
740 | |
741 def setName( self, name ): | |
742 """Define name for this expression, for use in debugging.""" | |
743 self.name = name | |
744 self.errmsg = "Expected " + self.name | |
745 if hasattr(self,"exception"): | |
746 self.exception.msg = self.errmsg | |
747 return self | |
748 | |
749 def setResultsName( self, name, listAllMatches=False ): | |
750 """Define name for referencing matching tokens as a nested attribute | |
751 of the returned parse results. | |
752 NOTE: this returns a *copy* of the original C{ParserElement} object; | |
753 this is so that the client can define a basic element, such as an | |
754 integer, and reference it in multiple places with different names. | |
755 | |
756 You can also set results names using the abbreviated syntax, | |
757 C{expr("name")} in place of C{expr.setResultsName("name")} - | |
758 see L{I{__call__}<__call__>}. | |
759 """ | |
760 newself = self.copy() | |
761 if name.endswith("*"): | |
762 name = name[:-1] | |
763 listAllMatches=True | |
764 newself.resultsName = name | |
765 newself.modalResults = not listAllMatches | |
766 return newself | |
767 | |
768 def setBreak(self,breakFlag = True): | |
769 """Method to invoke the Python pdb debugger when this element is | |
770 about to be parsed. Set C{breakFlag} to True to enable, False to | |
771 disable. | |
772 """ | |
773 if breakFlag: | |
774 _parseMethod = self._parse | |
775 def breaker(instring, loc, doActions=True, callPreParse=True): | |
776 import pdb | |
777 pdb.set_trace() | |
778 return _parseMethod( instring, loc, doActions, callPreParse ) | |
779 breaker._originalParseMethod = _parseMethod | |
780 self._parse = breaker | |
781 else: | |
782 if hasattr(self._parse,"_originalParseMethod"): | |
783 self._parse = self._parse._originalParseMethod | |
784 return self | |
785 | |
786 def setParseAction( self, *fns, **kwargs ): | |
787 """Define action to perform when successfully matching parse element definition. | |
788 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, | |
789 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: | |
790 - s = the original string being parsed (see note below) | |
791 - loc = the location of the matching substring | |
792 - toks = a list of the matched tokens, packaged as a ParseResults object | |
793 If the functions in fns modify the tokens, they can return them as the return | |
794 value from fn, and the modified list of tokens will replace the original. | |
795 Otherwise, fn does not need to return any value. | |
796 | |
797 Note: the default parsing behavior is to expand tabs in the input string | |
798 before starting the parsing process. See L{I{parseString}<parseString>} for more information | |
799 on parsing strings containing <TAB>s, and suggested methods to maintain a | |
800 consistent view of the parsed string, the parse location, and line and column | |
801 positions within the parsed string. | |
802 """ | |
803 self.parseAction = list(map(_trim_arity, list(fns))) | |
804 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) | |
805 return self | |
806 | |
807 def addParseAction( self, *fns, **kwargs ): | |
808 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" | |
809 self.parseAction += list(map(_trim_arity, list(fns))) | |
810 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) | |
811 return self | |
812 | |
813 def setFailAction( self, fn ): | |
814 """Define action to perform if parsing fails at this expression. | |
815 Fail acton fn is a callable function that takes the arguments | |
816 C{fn(s,loc,expr,err)} where: | |
817 - s = string being parsed | |
818 - loc = location where expression match was attempted and failed | |
819 - expr = the parse expression that failed | |
820 - err = the exception thrown | |
821 The function returns no value. It may throw C{ParseFatalException} | |
822 if it is desired to stop parsing immediately.""" | |
823 self.failAction = fn | |
824 return self | |
825 | |
826 def _skipIgnorables( self, instring, loc ): | |
827 exprsFound = True | |
828 while exprsFound: | |
829 exprsFound = False | |
830 for e in self.ignoreExprs: | |
831 try: | |
832 while 1: | |
833 loc,dummy = e._parse( instring, loc ) | |
834 exprsFound = True | |
835 except ParseException: | |
836 pass | |
837 return loc | |
838 | |
839 def preParse( self, instring, loc ): | |
840 if self.ignoreExprs: | |
841 loc = self._skipIgnorables( instring, loc ) | |
842 | |
843 if self.skipWhitespace: | |
844 wt = self.whiteChars | |
845 instrlen = len(instring) | |
846 while loc < instrlen and instring[loc] in wt: | |
847 loc += 1 | |
848 | |
849 return loc | |
850 | |
851 def parseImpl( self, instring, loc, doActions=True ): | |
852 return loc, [] | |
853 | |
854 def postParse( self, instring, loc, tokenlist ): | |
855 return tokenlist | |
856 | |
857 #~ @profile | |
858 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): | |
859 debugging = ( self.debug ) #and doActions ) | |
860 | |
861 if debugging or self.failAction: | |
862 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) | |
863 if (self.debugActions[0] ): | |
864 self.debugActions[0]( instring, loc, self ) | |
865 if callPreParse and self.callPreparse: | |
866 preloc = self.preParse( instring, loc ) | |
867 else: | |
868 preloc = loc | |
869 tokensStart = preloc | |
870 try: | |
871 try: | |
872 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
873 except IndexError: | |
874 raise ParseException( instring, len(instring), self.errmsg, self ) | |
875 except ParseBaseException: | |
876 #~ print ("Exception raised:", err) | |
877 err = None | |
878 if self.debugActions[2]: | |
879 err = sys.exc_info()[1] | |
880 self.debugActions[2]( instring, tokensStart, self, err ) | |
881 if self.failAction: | |
882 if err is None: | |
883 err = sys.exc_info()[1] | |
884 self.failAction( instring, tokensStart, self, err ) | |
885 raise | |
886 else: | |
887 if callPreParse and self.callPreparse: | |
888 preloc = self.preParse( instring, loc ) | |
889 else: | |
890 preloc = loc | |
891 tokensStart = preloc | |
892 if self.mayIndexError or loc >= len(instring): | |
893 try: | |
894 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
895 except IndexError: | |
896 raise ParseException( instring, len(instring), self.errmsg, self ) | |
897 else: | |
898 loc,tokens = self.parseImpl( instring, preloc, doActions ) | |
899 | |
900 tokens = self.postParse( instring, loc, tokens ) | |
901 | |
902 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) | |
903 if self.parseAction and (doActions or self.callDuringTry): | |
904 if debugging: | |
905 try: | |
906 for fn in self.parseAction: | |
907 tokens = fn( instring, tokensStart, retTokens ) | |
908 if tokens is not None: | |
909 retTokens = ParseResults( tokens, | |
910 self.resultsName, | |
911 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), | |
912 modal=self.modalResults ) | |
913 except ParseBaseException: | |
914 #~ print "Exception raised in user parse action:", err | |
915 if (self.debugActions[2] ): | |
916 err = sys.exc_info()[1] | |
917 self.debugActions[2]( instring, tokensStart, self, err ) | |
918 raise | |
919 else: | |
920 for fn in self.parseAction: | |
921 tokens = fn( instring, tokensStart, retTokens ) | |
922 if tokens is not None: | |
923 retTokens = ParseResults( tokens, | |
924 self.resultsName, | |
925 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), | |
926 modal=self.modalResults ) | |
927 | |
928 if debugging: | |
929 #~ print ("Matched",self,"->",retTokens.asList()) | |
930 if (self.debugActions[1] ): | |
931 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) | |
932 | |
933 return loc, retTokens | |
934 | |
935 def tryParse( self, instring, loc ): | |
936 try: | |
937 return self._parse( instring, loc, doActions=False )[0] | |
938 except ParseFatalException: | |
939 raise ParseException( instring, loc, self.errmsg, self) | |
940 | |
941 # this method gets repeatedly called during backtracking with the same arguments - | |
942 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression | |
943 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): | |
944 lookup = (self,instring,loc,callPreParse,doActions) | |
945 if lookup in ParserElement._exprArgCache: | |
946 value = ParserElement._exprArgCache[ lookup ] | |
947 if isinstance(value, Exception): | |
948 raise value | |
949 return (value[0],value[1].copy()) | |
950 else: | |
951 try: | |
952 value = self._parseNoCache( instring, loc, doActions, callPreParse ) | |
953 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) | |
954 return value | |
955 except ParseBaseException: | |
956 pe = sys.exc_info()[1] | |
957 ParserElement._exprArgCache[ lookup ] = pe | |
958 raise | |
959 | |
960 _parse = _parseNoCache | |
961 | |
962 # argument cache for optimizing repeated calls when backtracking through recursive expressions | |
963 _exprArgCache = {} | |
964 def resetCache(): | |
965 ParserElement._exprArgCache.clear() | |
966 resetCache = staticmethod(resetCache) | |
967 | |
968 _packratEnabled = False | |
969 def enablePackrat(): | |
970 """Enables "packrat" parsing, which adds memoizing to the parsing logic. | |
971 Repeated parse attempts at the same string location (which happens | |
972 often in many complex grammars) can immediately return a cached value, | |
973 instead of re-executing parsing/validating code. Memoizing is done of | |
974 both valid results and parsing exceptions. | |
975 | |
976 This speedup may break existing programs that use parse actions that | |
977 have side-effects. For this reason, packrat parsing is disabled when | |
978 you first import pyparsing. To activate the packrat feature, your | |
979 program must call the class method C{ParserElement.enablePackrat()}. If | |
980 your program uses C{psyco} to "compile as you go", you must call | |
981 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, | |
982 Python will crash. For best results, call C{enablePackrat()} immediately | |
983 after importing pyparsing. | |
984 """ | |
985 if not ParserElement._packratEnabled: | |
986 ParserElement._packratEnabled = True | |
987 ParserElement._parse = ParserElement._parseCache | |
988 enablePackrat = staticmethod(enablePackrat) | |
989 | |
990 def parseString( self, instring, parseAll=False ): | |
991 """Execute the parse expression with the given string. | |
992 This is the main interface to the client code, once the complete | |
993 expression has been built. | |
994 | |
995 If you want the grammar to require that the entire input string be | |
996 successfully parsed, then set C{parseAll} to True (equivalent to ending | |
997 the grammar with C{StringEnd()}). | |
998 | |
999 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, | |
1000 in order to report proper column numbers in parse actions. | |
1001 If the input string contains tabs and | |
1002 the grammar uses parse actions that use the C{loc} argument to index into the | |
1003 string being parsed, you can ensure you have a consistent view of the input | |
1004 string by: | |
1005 - calling C{parseWithTabs} on your grammar before calling C{parseString} | |
1006 (see L{I{parseWithTabs}<parseWithTabs>}) | |
1007 - define your parse action using the full C{(s,loc,toks)} signature, and | |
1008 reference the input string using the parse action's C{s} argument | |
1009 - explictly expand the tabs in your input string before calling | |
1010 C{parseString} | |
1011 """ | |
1012 ParserElement.resetCache() | |
1013 if not self.streamlined: | |
1014 self.streamline() | |
1015 #~ self.saveAsList = True | |
1016 for e in self.ignoreExprs: | |
1017 e.streamline() | |
1018 if not self.keepTabs: | |
1019 instring = instring.expandtabs() | |
1020 try: | |
1021 loc, tokens = self._parse( instring, 0 ) | |
1022 if parseAll: | |
1023 loc = self.preParse( instring, loc ) | |
1024 se = Empty() + StringEnd() | |
1025 se._parse( instring, loc ) | |
1026 except ParseBaseException: | |
1027 if ParserElement.verbose_stacktrace: | |
1028 raise | |
1029 else: | |
1030 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1031 exc = sys.exc_info()[1] | |
1032 raise exc | |
1033 else: | |
1034 return tokens | |
1035 | |
1036 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): | |
1037 """Scan the input string for expression matches. Each match will return the | |
1038 matching tokens, start location, and end location. May be called with optional | |
1039 C{maxMatches} argument, to clip scanning after 'n' matches are found. If | |
1040 C{overlap} is specified, then overlapping matches will be reported. | |
1041 | |
1042 Note that the start and end locations are reported relative to the string | |
1043 being parsed. See L{I{parseString}<parseString>} for more information on parsing | |
1044 strings with embedded tabs.""" | |
1045 if not self.streamlined: | |
1046 self.streamline() | |
1047 for e in self.ignoreExprs: | |
1048 e.streamline() | |
1049 | |
1050 if not self.keepTabs: | |
1051 instring = _ustr(instring).expandtabs() | |
1052 instrlen = len(instring) | |
1053 loc = 0 | |
1054 preparseFn = self.preParse | |
1055 parseFn = self._parse | |
1056 ParserElement.resetCache() | |
1057 matches = 0 | |
1058 try: | |
1059 while loc <= instrlen and matches < maxMatches: | |
1060 try: | |
1061 preloc = preparseFn( instring, loc ) | |
1062 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) | |
1063 except ParseException: | |
1064 loc = preloc+1 | |
1065 else: | |
1066 if nextLoc > loc: | |
1067 matches += 1 | |
1068 yield tokens, preloc, nextLoc | |
1069 if overlap: | |
1070 nextloc = preparseFn( instring, loc ) | |
1071 if nextloc > loc: | |
1072 loc = nextLoc | |
1073 else: | |
1074 loc += 1 | |
1075 else: | |
1076 loc = nextLoc | |
1077 else: | |
1078 loc = preloc+1 | |
1079 except ParseBaseException: | |
1080 if ParserElement.verbose_stacktrace: | |
1081 raise | |
1082 else: | |
1083 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1084 exc = sys.exc_info()[1] | |
1085 raise exc | |
1086 | |
1087 def transformString( self, instring ): | |
1088 """Extension to C{scanString}, to modify matching text with modified tokens that may | |
1089 be returned from a parse action. To use C{transformString}, define a grammar and | |
1090 attach a parse action to it that modifies the returned token list. | |
1091 Invoking C{transformString()} on a target string will then scan for matches, | |
1092 and replace the matched text patterns according to the logic in the parse | |
1093 action. C{transformString()} returns the resulting transformed string.""" | |
1094 out = [] | |
1095 lastE = 0 | |
1096 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to | |
1097 # keep string locs straight between transformString and scanString | |
1098 self.keepTabs = True | |
1099 try: | |
1100 for t,s,e in self.scanString( instring ): | |
1101 out.append( instring[lastE:s] ) | |
1102 if t: | |
1103 if isinstance(t,ParseResults): | |
1104 out += t.asList() | |
1105 elif isinstance(t,list): | |
1106 out += t | |
1107 else: | |
1108 out.append(t) | |
1109 lastE = e | |
1110 out.append(instring[lastE:]) | |
1111 out = [o for o in out if o] | |
1112 return "".join(map(_ustr,_flatten(out))) | |
1113 except ParseBaseException: | |
1114 if ParserElement.verbose_stacktrace: | |
1115 raise | |
1116 else: | |
1117 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1118 exc = sys.exc_info()[1] | |
1119 raise exc | |
1120 | |
1121 def searchString( self, instring, maxMatches=_MAX_INT ): | |
1122 """Another extension to C{scanString}, simplifying the access to the tokens found | |
1123 to match the given parse expression. May be called with optional | |
1124 C{maxMatches} argument, to clip searching after 'n' matches are found. | |
1125 """ | |
1126 try: | |
1127 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) | |
1128 except ParseBaseException: | |
1129 if ParserElement.verbose_stacktrace: | |
1130 raise | |
1131 else: | |
1132 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1133 exc = sys.exc_info()[1] | |
1134 raise exc | |
1135 | |
1136 def __add__(self, other ): | |
1137 """Implementation of + operator - returns And""" | |
1138 if isinstance( other, basestring ): | |
1139 other = Literal( other ) | |
1140 if not isinstance( other, ParserElement ): | |
1141 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1142 SyntaxWarning, stacklevel=2) | |
1143 return None | |
1144 return And( [ self, other ] ) | |
1145 | |
1146 def __radd__(self, other ): | |
1147 """Implementation of + operator when left operand is not a C{ParserElement}""" | |
1148 if isinstance( other, basestring ): | |
1149 other = Literal( other ) | |
1150 if not isinstance( other, ParserElement ): | |
1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1152 SyntaxWarning, stacklevel=2) | |
1153 return None | |
1154 return other + self | |
1155 | |
1156 def __sub__(self, other): | |
1157 """Implementation of - operator, returns C{And} with error stop""" | |
1158 if isinstance( other, basestring ): | |
1159 other = Literal( other ) | |
1160 if not isinstance( other, ParserElement ): | |
1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1162 SyntaxWarning, stacklevel=2) | |
1163 return None | |
1164 return And( [ self, And._ErrorStop(), other ] ) | |
1165 | |
1166 def __rsub__(self, other ): | |
1167 """Implementation of - operator when left operand is not a C{ParserElement}""" | |
1168 if isinstance( other, basestring ): | |
1169 other = Literal( other ) | |
1170 if not isinstance( other, ParserElement ): | |
1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1172 SyntaxWarning, stacklevel=2) | |
1173 return None | |
1174 return other - self | |
1175 | |
1176 def __mul__(self,other): | |
1177 """Implementation of * operator, allows use of C{expr * 3} in place of | |
1178 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer | |
1179 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples | |
1180 may also include C{None} as in: | |
1181 - C{expr*(n,None)} or C{expr*(n,)} is equivalent | |
1182 to C{expr*n + ZeroOrMore(expr)} | |
1183 (read as "at least n instances of C{expr}") | |
1184 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} | |
1185 (read as "0 to n instances of C{expr}") | |
1186 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} | |
1187 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} | |
1188 | |
1189 Note that C{expr*(None,n)} does not raise an exception if | |
1190 more than n exprs exist in the input stream; that is, | |
1191 C{expr*(None,n)} does not enforce a maximum number of expr | |
1192 occurrences. If this behavior is desired, then write | |
1193 C{expr*(None,n) + ~expr} | |
1194 | |
1195 """ | |
1196 if isinstance(other,int): | |
1197 minElements, optElements = other,0 | |
1198 elif isinstance(other,tuple): | |
1199 other = (other + (None, None))[:2] | |
1200 if other[0] is None: | |
1201 other = (0, other[1]) | |
1202 if isinstance(other[0],int) and other[1] is None: | |
1203 if other[0] == 0: | |
1204 return ZeroOrMore(self) | |
1205 if other[0] == 1: | |
1206 return OneOrMore(self) | |
1207 else: | |
1208 return self*other[0] + ZeroOrMore(self) | |
1209 elif isinstance(other[0],int) and isinstance(other[1],int): | |
1210 minElements, optElements = other | |
1211 optElements -= minElements | |
1212 else: | |
1213 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) | |
1214 else: | |
1215 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) | |
1216 | |
1217 if minElements < 0: | |
1218 raise ValueError("cannot multiply ParserElement by negative value") | |
1219 if optElements < 0: | |
1220 raise ValueError("second tuple value must be greater or equal to first tuple value") | |
1221 if minElements == optElements == 0: | |
1222 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") | |
1223 | |
1224 if (optElements): | |
1225 def makeOptionalList(n): | |
1226 if n>1: | |
1227 return Optional(self + makeOptionalList(n-1)) | |
1228 else: | |
1229 return Optional(self) | |
1230 if minElements: | |
1231 if minElements == 1: | |
1232 ret = self + makeOptionalList(optElements) | |
1233 else: | |
1234 ret = And([self]*minElements) + makeOptionalList(optElements) | |
1235 else: | |
1236 ret = makeOptionalList(optElements) | |
1237 else: | |
1238 if minElements == 1: | |
1239 ret = self | |
1240 else: | |
1241 ret = And([self]*minElements) | |
1242 return ret | |
1243 | |
1244 def __rmul__(self, other): | |
1245 return self.__mul__(other) | |
1246 | |
1247 def __or__(self, other ): | |
1248 """Implementation of | operator - returns C{MatchFirst}""" | |
1249 if isinstance( other, basestring ): | |
1250 other = Literal( other ) | |
1251 if not isinstance( other, ParserElement ): | |
1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1253 SyntaxWarning, stacklevel=2) | |
1254 return None | |
1255 return MatchFirst( [ self, other ] ) | |
1256 | |
1257 def __ror__(self, other ): | |
1258 """Implementation of | operator when left operand is not a C{ParserElement}""" | |
1259 if isinstance( other, basestring ): | |
1260 other = Literal( other ) | |
1261 if not isinstance( other, ParserElement ): | |
1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1263 SyntaxWarning, stacklevel=2) | |
1264 return None | |
1265 return other | self | |
1266 | |
1267 def __xor__(self, other ): | |
1268 """Implementation of ^ operator - returns C{Or}""" | |
1269 if isinstance( other, basestring ): | |
1270 other = Literal( other ) | |
1271 if not isinstance( other, ParserElement ): | |
1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1273 SyntaxWarning, stacklevel=2) | |
1274 return None | |
1275 return Or( [ self, other ] ) | |
1276 | |
1277 def __rxor__(self, other ): | |
1278 """Implementation of ^ operator when left operand is not a C{ParserElement}""" | |
1279 if isinstance( other, basestring ): | |
1280 other = Literal( other ) | |
1281 if not isinstance( other, ParserElement ): | |
1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1283 SyntaxWarning, stacklevel=2) | |
1284 return None | |
1285 return other ^ self | |
1286 | |
1287 def __and__(self, other ): | |
1288 """Implementation of & operator - returns C{Each}""" | |
1289 if isinstance( other, basestring ): | |
1290 other = Literal( other ) | |
1291 if not isinstance( other, ParserElement ): | |
1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1293 SyntaxWarning, stacklevel=2) | |
1294 return None | |
1295 return Each( [ self, other ] ) | |
1296 | |
1297 def __rand__(self, other ): | |
1298 """Implementation of & operator when left operand is not a C{ParserElement}""" | |
1299 if isinstance( other, basestring ): | |
1300 other = Literal( other ) | |
1301 if not isinstance( other, ParserElement ): | |
1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), | |
1303 SyntaxWarning, stacklevel=2) | |
1304 return None | |
1305 return other & self | |
1306 | |
1307 def __invert__( self ): | |
1308 """Implementation of ~ operator - returns C{NotAny}""" | |
1309 return NotAny( self ) | |
1310 | |
1311 def __call__(self, name): | |
1312 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: | |
1313 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") | |
1314 could be written as:: | |
1315 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") | |
1316 | |
1317 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be | |
1318 passed as C{True}. | |
1319 """ | |
1320 return self.setResultsName(name) | |
1321 | |
1322 def suppress( self ): | |
1323 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from | |
1324 cluttering up returned output. | |
1325 """ | |
1326 return Suppress( self ) | |
1327 | |
1328 def leaveWhitespace( self ): | |
1329 """Disables the skipping of whitespace before matching the characters in the | |
1330 C{ParserElement}'s defined pattern. This is normally only used internally by | |
1331 the pyparsing module, but may be needed in some whitespace-sensitive grammars. | |
1332 """ | |
1333 self.skipWhitespace = False | |
1334 return self | |
1335 | |
1336 def setWhitespaceChars( self, chars ): | |
1337 """Overrides the default whitespace chars | |
1338 """ | |
1339 self.skipWhitespace = True | |
1340 self.whiteChars = chars | |
1341 self.copyDefaultWhiteChars = False | |
1342 return self | |
1343 | |
1344 def parseWithTabs( self ): | |
1345 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. | |
1346 Must be called before C{parseString} when the input grammar contains elements that | |
1347 match C{<TAB>} characters.""" | |
1348 self.keepTabs = True | |
1349 return self | |
1350 | |
1351 def ignore( self, other ): | |
1352 """Define expression to be ignored (e.g., comments) while doing pattern | |
1353 matching; may be called repeatedly, to define multiple comment or other | |
1354 ignorable patterns. | |
1355 """ | |
1356 if isinstance( other, Suppress ): | |
1357 if other not in self.ignoreExprs: | |
1358 self.ignoreExprs.append( other.copy() ) | |
1359 else: | |
1360 self.ignoreExprs.append( Suppress( other.copy() ) ) | |
1361 return self | |
1362 | |
1363 def setDebugActions( self, startAction, successAction, exceptionAction ): | |
1364 """Enable display of debugging messages while doing pattern matching.""" | |
1365 self.debugActions = (startAction or _defaultStartDebugAction, | |
1366 successAction or _defaultSuccessDebugAction, | |
1367 exceptionAction or _defaultExceptionDebugAction) | |
1368 self.debug = True | |
1369 return self | |
1370 | |
1371 def setDebug( self, flag=True ): | |
1372 """Enable display of debugging messages while doing pattern matching. | |
1373 Set C{flag} to True to enable, False to disable.""" | |
1374 if flag: | |
1375 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) | |
1376 else: | |
1377 self.debug = False | |
1378 return self | |
1379 | |
1380 def __str__( self ): | |
1381 return self.name | |
1382 | |
1383 def __repr__( self ): | |
1384 return _ustr(self) | |
1385 | |
1386 def streamline( self ): | |
1387 self.streamlined = True | |
1388 self.strRepr = None | |
1389 return self | |
1390 | |
1391 def checkRecursion( self, parseElementList ): | |
1392 pass | |
1393 | |
1394 def validate( self, validateTrace=[] ): | |
1395 """Check defined expressions for valid structure, check for infinite recursive definitions.""" | |
1396 self.checkRecursion( [] ) | |
1397 | |
1398 def parseFile( self, file_or_filename, parseAll=False ): | |
1399 """Execute the parse expression on the given file or filename. | |
1400 If a filename is specified (instead of a file object), | |
1401 the entire file is opened, read, and closed before parsing. | |
1402 """ | |
1403 try: | |
1404 file_contents = file_or_filename.read() | |
1405 except AttributeError: | |
1406 f = open(file_or_filename, "rb") | |
1407 file_contents = f.read() | |
1408 f.close() | |
1409 try: | |
1410 return self.parseString(file_contents, parseAll) | |
1411 except ParseBaseException: | |
1412 # catch and re-raise exception from here, clears out pyparsing internal stack trace | |
1413 exc = sys.exc_info()[1] | |
1414 raise exc | |
1415 | |
1416 def getException(self): | |
1417 return ParseException("",0,self.errmsg,self) | |
1418 | |
1419 def __getattr__(self,aname): | |
1420 if aname == "myException": | |
1421 self.myException = ret = self.getException(); | |
1422 return ret; | |
1423 else: | |
1424 raise AttributeError("no such attribute " + aname) | |
1425 | |
1426 def __eq__(self,other): | |
1427 if isinstance(other, ParserElement): | |
1428 return self is other or self.__dict__ == other.__dict__ | |
1429 elif isinstance(other, basestring): | |
1430 try: | |
1431 self.parseString(_ustr(other), parseAll=True) | |
1432 return True | |
1433 except ParseBaseException: | |
1434 return False | |
1435 else: | |
1436 return super(ParserElement,self)==other | |
1437 | |
1438 def __ne__(self,other): | |
1439 return not (self == other) | |
1440 | |
1441 def __hash__(self): | |
1442 return hash(id(self)) | |
1443 | |
1444 def __req__(self,other): | |
1445 return self == other | |
1446 | |
1447 def __rne__(self,other): | |
1448 return not (self == other) | |
1449 | |
1450 | |
1451 class Token(ParserElement): | |
1452 """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" | |
1453 def __init__( self ): | |
1454 super(Token,self).__init__( savelist=False ) | |
1455 | |
1456 def setName(self, name): | |
1457 s = super(Token,self).setName(name) | |
1458 self.errmsg = "Expected " + self.name | |
1459 return s | |
1460 | |
1461 | |
1462 class Empty(Token): | |
1463 """An empty token, will always match.""" | |
1464 def __init__( self ): | |
1465 super(Empty,self).__init__() | |
1466 self.name = "Empty" | |
1467 self.mayReturnEmpty = True | |
1468 self.mayIndexError = False | |
1469 | |
1470 | |
1471 class NoMatch(Token): | |
1472 """A token that will never match.""" | |
1473 def __init__( self ): | |
1474 super(NoMatch,self).__init__() | |
1475 self.name = "NoMatch" | |
1476 self.mayReturnEmpty = True | |
1477 self.mayIndexError = False | |
1478 self.errmsg = "Unmatchable token" | |
1479 | |
1480 def parseImpl( self, instring, loc, doActions=True ): | |
1481 exc = self.myException | |
1482 exc.loc = loc | |
1483 exc.pstr = instring | |
1484 raise exc | |
1485 | |
1486 | |
1487 class Literal(Token): | |
1488 """Token to exactly match a specified string.""" | |
1489 def __init__( self, matchString ): | |
1490 super(Literal,self).__init__() | |
1491 self.match = matchString | |
1492 self.matchLen = len(matchString) | |
1493 try: | |
1494 self.firstMatchChar = matchString[0] | |
1495 except IndexError: | |
1496 warnings.warn("null string passed to Literal; use Empty() instead", | |
1497 SyntaxWarning, stacklevel=2) | |
1498 self.__class__ = Empty | |
1499 self.name = '"%s"' % _ustr(self.match) | |
1500 self.errmsg = "Expected " + self.name | |
1501 self.mayReturnEmpty = False | |
1502 self.mayIndexError = False | |
1503 | |
1504 # Performance tuning: this routine gets called a *lot* | |
1505 # if this is a single character match string and the first character matches, | |
1506 # short-circuit as quickly as possible, and avoid calling startswith | |
1507 #~ @profile | |
1508 def parseImpl( self, instring, loc, doActions=True ): | |
1509 if (instring[loc] == self.firstMatchChar and | |
1510 (self.matchLen==1 or instring.startswith(self.match,loc)) ): | |
1511 return loc+self.matchLen, self.match | |
1512 #~ raise ParseException( instring, loc, self.errmsg ) | |
1513 exc = self.myException | |
1514 exc.loc = loc | |
1515 exc.pstr = instring | |
1516 raise exc | |
1517 _L = Literal | |
1518 | |
1519 class Keyword(Token): | |
1520 """Token to exactly match a specified string as a keyword, that is, it must be | |
1521 immediately followed by a non-keyword character. Compare with C{Literal}:: | |
1522 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. | |
1523 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} | |
1524 Accepts two optional constructor arguments in addition to the keyword string: | |
1525 C{identChars} is a string of characters that would be valid identifier characters, | |
1526 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive | |
1527 matching, default is C{False}. | |
1528 """ | |
1529 DEFAULT_KEYWORD_CHARS = alphanums+"_$" | |
1530 | |
1531 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): | |
1532 super(Keyword,self).__init__() | |
1533 self.match = matchString | |
1534 self.matchLen = len(matchString) | |
1535 try: | |
1536 self.firstMatchChar = matchString[0] | |
1537 except IndexError: | |
1538 warnings.warn("null string passed to Keyword; use Empty() instead", | |
1539 SyntaxWarning, stacklevel=2) | |
1540 self.name = '"%s"' % self.match | |
1541 self.errmsg = "Expected " + self.name | |
1542 self.mayReturnEmpty = False | |
1543 self.mayIndexError = False | |
1544 self.caseless = caseless | |
1545 if caseless: | |
1546 self.caselessmatch = matchString.upper() | |
1547 identChars = identChars.upper() | |
1548 self.identChars = set(identChars) | |
1549 | |
1550 def parseImpl( self, instring, loc, doActions=True ): | |
1551 if self.caseless: | |
1552 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and | |
1553 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and | |
1554 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): | |
1555 return loc+self.matchLen, self.match | |
1556 else: | |
1557 if (instring[loc] == self.firstMatchChar and | |
1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and | |
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and | |
1560 (loc == 0 or instring[loc-1] not in self.identChars) ): | |
1561 return loc+self.matchLen, self.match | |
1562 #~ raise ParseException( instring, loc, self.errmsg ) | |
1563 exc = self.myException | |
1564 exc.loc = loc | |
1565 exc.pstr = instring | |
1566 raise exc | |
1567 | |
1568 def copy(self): | |
1569 c = super(Keyword,self).copy() | |
1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS | |
1571 return c | |
1572 | |
1573 def setDefaultKeywordChars( chars ): | |
1574 """Overrides the default Keyword chars | |
1575 """ | |
1576 Keyword.DEFAULT_KEYWORD_CHARS = chars | |
1577 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) | |
1578 | |
1579 class CaselessLiteral(Literal): | |
1580 """Token to match a specified string, ignoring case of letters. | |
1581 Note: the matched results will always be in the case of the given | |
1582 match string, NOT the case of the input text. | |
1583 """ | |
1584 def __init__( self, matchString ): | |
1585 super(CaselessLiteral,self).__init__( matchString.upper() ) | |
1586 # Preserve the defining literal. | |
1587 self.returnString = matchString | |
1588 self.name = "'%s'" % self.returnString | |
1589 self.errmsg = "Expected " + self.name | |
1590 | |
1591 def parseImpl( self, instring, loc, doActions=True ): | |
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match: | |
1593 return loc+self.matchLen, self.returnString | |
1594 #~ raise ParseException( instring, loc, self.errmsg ) | |
1595 exc = self.myException | |
1596 exc.loc = loc | |
1597 exc.pstr = instring | |
1598 raise exc | |
1599 | |
1600 class CaselessKeyword(Keyword): | |
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): | |
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) | |
1603 | |
1604 def parseImpl( self, instring, loc, doActions=True ): | |
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and | |
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): | |
1607 return loc+self.matchLen, self.match | |
1608 #~ raise ParseException( instring, loc, self.errmsg ) | |
1609 exc = self.myException | |
1610 exc.loc = loc | |
1611 exc.pstr = instring | |
1612 raise exc | |
1613 | |
1614 class Word(Token): | |
1615 """Token for matching words composed of allowed character sets. | |
1616 Defined with string containing all allowed initial characters, | |
1617 an optional string containing allowed body characters (if omitted, | |
1618 defaults to the initial character set), and an optional minimum, | |
1619 maximum, and/or exact length. The default value for C{min} is 1 (a | |
1620 minimum value < 1 is not valid); the default values for C{max} and C{exact} | |
1621 are 0, meaning no maximum or exact length restriction. An optional | |
1622 C{exclude} parameter can list characters that might be found in | |
1623 the input C{bodyChars} string; useful to define a word of all printables | |
1624 except for one or two characters, for instance. | |
1625 """ | |
1626 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): | |
1627 super(Word,self).__init__() | |
1628 if excludeChars: | |
1629 initChars = ''.join([c for c in initChars if c not in excludeChars]) | |
1630 if bodyChars: | |
1631 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) | |
1632 self.initCharsOrig = initChars | |
1633 self.initChars = set(initChars) | |
1634 if bodyChars : | |
1635 self.bodyCharsOrig = bodyChars | |
1636 self.bodyChars = set(bodyChars) | |
1637 else: | |
1638 self.bodyCharsOrig = initChars | |
1639 self.bodyChars = set(initChars) | |
1640 | |
1641 self.maxSpecified = max > 0 | |
1642 | |
1643 if min < 1: | |
1644 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") | |
1645 | |
1646 self.minLen = min | |
1647 | |
1648 if max > 0: | |
1649 self.maxLen = max | |
1650 else: | |
1651 self.maxLen = _MAX_INT | |
1652 | |
1653 if exact > 0: | |
1654 self.maxLen = exact | |
1655 self.minLen = exact | |
1656 | |
1657 self.name = _ustr(self) | |
1658 self.errmsg = "Expected " + self.name | |
1659 self.mayIndexError = False | |
1660 self.asKeyword = asKeyword | |
1661 | |
1662 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): | |
1663 if self.bodyCharsOrig == self.initCharsOrig: | |
1664 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) | |
1665 elif len(self.bodyCharsOrig) == 1: | |
1666 self.reString = "%s[%s]*" % \ | |
1667 (re.escape(self.initCharsOrig), | |
1668 _escapeRegexRangeChars(self.bodyCharsOrig),) | |
1669 else: | |
1670 self.reString = "[%s][%s]*" % \ | |
1671 (_escapeRegexRangeChars(self.initCharsOrig), | |
1672 _escapeRegexRangeChars(self.bodyCharsOrig),) | |
1673 if self.asKeyword: | |
1674 self.reString = r"\b"+self.reString+r"\b" | |
1675 try: | |
1676 self.re = re.compile( self.reString ) | |
1677 except: | |
1678 self.re = None | |
1679 | |
1680 def parseImpl( self, instring, loc, doActions=True ): | |
1681 if self.re: | |
1682 result = self.re.match(instring,loc) | |
1683 if not result: | |
1684 exc = self.myException | |
1685 exc.loc = loc | |
1686 exc.pstr = instring | |
1687 raise exc | |
1688 | |
1689 loc = result.end() | |
1690 return loc, result.group() | |
1691 | |
1692 if not(instring[ loc ] in self.initChars): | |
1693 #~ raise ParseException( instring, loc, self.errmsg ) | |
1694 exc = self.myException | |
1695 exc.loc = loc | |
1696 exc.pstr = instring | |
1697 raise exc | |
1698 start = loc | |
1699 loc += 1 | |
1700 instrlen = len(instring) | |
1701 bodychars = self.bodyChars | |
1702 maxloc = start + self.maxLen | |
1703 maxloc = min( maxloc, instrlen ) | |
1704 while loc < maxloc and instring[loc] in bodychars: | |
1705 loc += 1 | |
1706 | |
1707 throwException = False | |
1708 if loc - start < self.minLen: | |
1709 throwException = True | |
1710 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: | |
1711 throwException = True | |
1712 if self.asKeyword: | |
1713 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): | |
1714 throwException = True | |
1715 | |
1716 if throwException: | |
1717 #~ raise ParseException( instring, loc, self.errmsg ) | |
1718 exc = self.myException | |
1719 exc.loc = loc | |
1720 exc.pstr = instring | |
1721 raise exc | |
1722 | |
1723 return loc, instring[start:loc] | |
1724 | |
1725 def __str__( self ): | |
1726 try: | |
1727 return super(Word,self).__str__() | |
1728 except: | |
1729 pass | |
1730 | |
1731 | |
1732 if self.strRepr is None: | |
1733 | |
1734 def charsAsStr(s): | |
1735 if len(s)>4: | |
1736 return s[:4]+"..." | |
1737 else: | |
1738 return s | |
1739 | |
1740 if ( self.initCharsOrig != self.bodyCharsOrig ): | |
1741 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) | |
1742 else: | |
1743 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) | |
1744 | |
1745 return self.strRepr | |
1746 | |
1747 | |
1748 class Regex(Token): | |
1749 """Token for matching strings that match a given regular expression. | |
1750 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. | |
1751 """ | |
1752 compiledREtype = type(re.compile("[A-Z]")) | |
1753 def __init__( self, pattern, flags=0): | |
1754 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" | |
1755 super(Regex,self).__init__() | |
1756 | |
1757 if isinstance(pattern, basestring): | |
1758 if len(pattern) == 0: | |
1759 warnings.warn("null string passed to Regex; use Empty() instead", | |
1760 SyntaxWarning, stacklevel=2) | |
1761 | |
1762 self.pattern = pattern | |
1763 self.flags = flags | |
1764 | |
1765 try: | |
1766 self.re = re.compile(self.pattern, self.flags) | |
1767 self.reString = self.pattern | |
1768 except sre_constants.error: | |
1769 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, | |
1770 SyntaxWarning, stacklevel=2) | |
1771 raise | |
1772 | |
1773 elif isinstance(pattern, Regex.compiledREtype): | |
1774 self.re = pattern | |
1775 self.pattern = \ | |
1776 self.reString = str(pattern) | |
1777 self.flags = flags | |
1778 | |
1779 else: | |
1780 raise ValueError("Regex may only be constructed with a string or a compiled RE object") | |
1781 | |
1782 self.name = _ustr(self) | |
1783 self.errmsg = "Expected " + self.name | |
1784 self.mayIndexError = False | |
1785 self.mayReturnEmpty = True | |
1786 | |
1787 def parseImpl( self, instring, loc, doActions=True ): | |
1788 result = self.re.match(instring,loc) | |
1789 if not result: | |
1790 exc = self.myException | |
1791 exc.loc = loc | |
1792 exc.pstr = instring | |
1793 raise exc | |
1794 | |
1795 loc = result.end() | |
1796 d = result.groupdict() | |
1797 ret = ParseResults(result.group()) | |
1798 if d: | |
1799 for k in d: | |
1800 ret[k] = d[k] | |
1801 return loc,ret | |
1802 | |
1803 def __str__( self ): | |
1804 try: | |
1805 return super(Regex,self).__str__() | |
1806 except: | |
1807 pass | |
1808 | |
1809 if self.strRepr is None: | |
1810 self.strRepr = "Re:(%s)" % repr(self.pattern) | |
1811 | |
1812 return self.strRepr | |
1813 | |
1814 | |
1815 class QuotedString(Token): | |
1816 """Token for matching strings that are delimited by quoting characters. | |
1817 """ | |
1818 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): | |
1819 """ | |
1820 Defined with the following parameters: | |
1821 - quoteChar - string of one or more characters defining the quote delimiting string | |
1822 - escChar - character to escape quotes, typically backslash (default=None) | |
1823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) | |
1824 - multiline - boolean indicating whether quotes can span multiple lines (default=False) | |
1825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) | |
1826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) | |
1827 """ | |
1828 super(QuotedString,self).__init__() | |
1829 | |
1830 # remove white space from quote chars - wont work anyway | |
1831 quoteChar = quoteChar.strip() | |
1832 if len(quoteChar) == 0: | |
1833 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) | |
1834 raise SyntaxError() | |
1835 | |
1836 if endQuoteChar is None: | |
1837 endQuoteChar = quoteChar | |
1838 else: | |
1839 endQuoteChar = endQuoteChar.strip() | |
1840 if len(endQuoteChar) == 0: | |
1841 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) | |
1842 raise SyntaxError() | |
1843 | |
1844 self.quoteChar = quoteChar | |
1845 self.quoteCharLen = len(quoteChar) | |
1846 self.firstQuoteChar = quoteChar[0] | |
1847 self.endQuoteChar = endQuoteChar | |
1848 self.endQuoteCharLen = len(endQuoteChar) | |
1849 self.escChar = escChar | |
1850 self.escQuote = escQuote | |
1851 self.unquoteResults = unquoteResults | |
1852 | |
1853 if multiline: | |
1854 self.flags = re.MULTILINE | re.DOTALL | |
1855 self.pattern = r'%s(?:[^%s%s]' % \ | |
1856 ( re.escape(self.quoteChar), | |
1857 _escapeRegexRangeChars(self.endQuoteChar[0]), | |
1858 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) | |
1859 else: | |
1860 self.flags = 0 | |
1861 self.pattern = r'%s(?:[^%s\n\r%s]' % \ | |
1862 ( re.escape(self.quoteChar), | |
1863 _escapeRegexRangeChars(self.endQuoteChar[0]), | |
1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) | |
1865 if len(self.endQuoteChar) > 1: | |
1866 self.pattern += ( | |
1867 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), | |
1868 _escapeRegexRangeChars(self.endQuoteChar[i])) | |
1869 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' | |
1870 ) | |
1871 if escQuote: | |
1872 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) | |
1873 if escChar: | |
1874 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) | |
1875 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') | |
1876 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) | |
1877 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) | |
1878 | |
1879 try: | |
1880 self.re = re.compile(self.pattern, self.flags) | |
1881 self.reString = self.pattern | |
1882 except sre_constants.error: | |
1883 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, | |
1884 SyntaxWarning, stacklevel=2) | |
1885 raise | |
1886 | |
1887 self.name = _ustr(self) | |
1888 self.errmsg = "Expected " + self.name | |
1889 self.mayIndexError = False | |
1890 self.mayReturnEmpty = True | |
1891 | |
1892 def parseImpl( self, instring, loc, doActions=True ): | |
1893 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None | |
1894 if not result: | |
1895 exc = self.myException | |
1896 exc.loc = loc | |
1897 exc.pstr = instring | |
1898 raise exc | |
1899 | |
1900 loc = result.end() | |
1901 ret = result.group() | |
1902 | |
1903 if self.unquoteResults: | |
1904 | |
1905 # strip off quotes | |
1906 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] | |
1907 | |
1908 if isinstance(ret,basestring): | |
1909 # replace escaped characters | |
1910 if self.escChar: | |
1911 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) | |
1912 | |
1913 # replace escaped quotes | |
1914 if self.escQuote: | |
1915 ret = ret.replace(self.escQuote, self.endQuoteChar) | |
1916 | |
1917 return loc, ret | |
1918 | |
1919 def __str__( self ): | |
1920 try: | |
1921 return super(QuotedString,self).__str__() | |
1922 except: | |
1923 pass | |
1924 | |
1925 if self.strRepr is None: | |
1926 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) | |
1927 | |
1928 return self.strRepr | |
1929 | |
1930 | |
1931 class CharsNotIn(Token): | |
1932 """Token for matching words composed of characters *not* in a given set. | |
1933 Defined with string containing all disallowed characters, and an optional | |
1934 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a | |
1935 minimum value < 1 is not valid); the default values for C{max} and C{exact} | |
1936 are 0, meaning no maximum or exact length restriction. | |
1937 """ | |
1938 def __init__( self, notChars, min=1, max=0, exact=0 ): | |
1939 super(CharsNotIn,self).__init__() | |
1940 self.skipWhitespace = False | |
1941 self.notChars = notChars | |
1942 | |
1943 if min < 1: | |
1944 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") | |
1945 | |
1946 self.minLen = min | |
1947 | |
1948 if max > 0: | |
1949 self.maxLen = max | |
1950 else: | |
1951 self.maxLen = _MAX_INT | |
1952 | |
1953 if exact > 0: | |
1954 self.maxLen = exact | |
1955 self.minLen = exact | |
1956 | |
1957 self.name = _ustr(self) | |
1958 self.errmsg = "Expected " + self.name | |
1959 self.mayReturnEmpty = ( self.minLen == 0 ) | |
1960 self.mayIndexError = False | |
1961 | |
1962 def parseImpl( self, instring, loc, doActions=True ): | |
1963 if instring[loc] in self.notChars: | |
1964 #~ raise ParseException( instring, loc, self.errmsg ) | |
1965 exc = self.myException | |
1966 exc.loc = loc | |
1967 exc.pstr = instring | |
1968 raise exc | |
1969 | |
1970 start = loc | |
1971 loc += 1 | |
1972 notchars = self.notChars | |
1973 maxlen = min( start+self.maxLen, len(instring) ) | |
1974 while loc < maxlen and \ | |
1975 (instring[loc] not in notchars): | |
1976 loc += 1 | |
1977 | |
1978 if loc - start < self.minLen: | |
1979 #~ raise ParseException( instring, loc, self.errmsg ) | |
1980 exc = self.myException | |
1981 exc.loc = loc | |
1982 exc.pstr = instring | |
1983 raise exc | |
1984 | |
1985 return loc, instring[start:loc] | |
1986 | |
1987 def __str__( self ): | |
1988 try: | |
1989 return super(CharsNotIn, self).__str__() | |
1990 except: | |
1991 pass | |
1992 | |
1993 if self.strRepr is None: | |
1994 if len(self.notChars) > 4: | |
1995 self.strRepr = "!W:(%s...)" % self.notChars[:4] | |
1996 else: | |
1997 self.strRepr = "!W:(%s)" % self.notChars | |
1998 | |
1999 return self.strRepr | |
2000 | |
2001 class White(Token): | |
2002 """Special matching class for matching whitespace. Normally, whitespace is ignored | |
2003 by pyparsing grammars. This class is included when some whitespace structures | |
2004 are significant. Define with a string containing the whitespace characters to be | |
2005 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, | |
2006 as defined for the C{Word} class.""" | |
2007 whiteStrs = { | |
2008 " " : "<SPC>", | |
2009 "\t": "<TAB>", | |
2010 "\n": "<LF>", | |
2011 "\r": "<CR>", | |
2012 "\f": "<FF>", | |
2013 } | |
2014 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): | |
2015 super(White,self).__init__() | |
2016 self.matchWhite = ws | |
2017 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) | |
2018 #~ self.leaveWhitespace() | |
2019 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) | |
2020 self.mayReturnEmpty = True | |
2021 self.errmsg = "Expected " + self.name | |
2022 | |
2023 self.minLen = min | |
2024 | |
2025 if max > 0: | |
2026 self.maxLen = max | |
2027 else: | |
2028 self.maxLen = _MAX_INT | |
2029 | |
2030 if exact > 0: | |
2031 self.maxLen = exact | |
2032 self.minLen = exact | |
2033 | |
2034 def parseImpl( self, instring, loc, doActions=True ): | |
2035 if not(instring[ loc ] in self.matchWhite): | |
2036 #~ raise ParseException( instring, loc, self.errmsg ) | |
2037 exc = self.myException | |
2038 exc.loc = loc | |
2039 exc.pstr = instring | |
2040 raise exc | |
2041 start = loc | |
2042 loc += 1 | |
2043 maxloc = start + self.maxLen | |
2044 maxloc = min( maxloc, len(instring) ) | |
2045 while loc < maxloc and instring[loc] in self.matchWhite: | |
2046 loc += 1 | |
2047 | |
2048 if loc - start < self.minLen: | |
2049 #~ raise ParseException( instring, loc, self.errmsg ) | |
2050 exc = self.myException | |
2051 exc.loc = loc | |
2052 exc.pstr = instring | |
2053 raise exc | |
2054 | |
2055 return loc, instring[start:loc] | |
2056 | |
2057 | |
2058 class _PositionToken(Token): | |
2059 def __init__( self ): | |
2060 super(_PositionToken,self).__init__() | |
2061 self.name=self.__class__.__name__ | |
2062 self.mayReturnEmpty = True | |
2063 self.mayIndexError = False | |
2064 | |
2065 class GoToColumn(_PositionToken): | |
2066 """Token to advance to a specific column of input text; useful for tabular report scraping.""" | |
2067 def __init__( self, colno ): | |
2068 super(GoToColumn,self).__init__() | |
2069 self.col = colno | |
2070 | |
2071 def preParse( self, instring, loc ): | |
2072 if col(loc,instring) != self.col: | |
2073 instrlen = len(instring) | |
2074 if self.ignoreExprs: | |
2075 loc = self._skipIgnorables( instring, loc ) | |
2076 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : | |
2077 loc += 1 | |
2078 return loc | |
2079 | |
2080 def parseImpl( self, instring, loc, doActions=True ): | |
2081 thiscol = col( loc, instring ) | |
2082 if thiscol > self.col: | |
2083 raise ParseException( instring, loc, "Text not in expected column", self ) | |
2084 newloc = loc + self.col - thiscol | |
2085 ret = instring[ loc: newloc ] | |
2086 return newloc, ret | |
2087 | |
2088 class LineStart(_PositionToken): | |
2089 """Matches if current position is at the beginning of a line within the parse string""" | |
2090 def __init__( self ): | |
2091 super(LineStart,self).__init__() | |
2092 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) | |
2093 self.errmsg = "Expected start of line" | |
2094 | |
2095 def preParse( self, instring, loc ): | |
2096 preloc = super(LineStart,self).preParse(instring,loc) | |
2097 if instring[preloc] == "\n": | |
2098 loc += 1 | |
2099 return loc | |
2100 | |
2101 def parseImpl( self, instring, loc, doActions=True ): | |
2102 if not( loc==0 or | |
2103 (loc == self.preParse( instring, 0 )) or | |
2104 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: | |
2105 #~ raise ParseException( instring, loc, "Expected start of line" ) | |
2106 exc = self.myException | |
2107 exc.loc = loc | |
2108 exc.pstr = instring | |
2109 raise exc | |
2110 return loc, [] | |
2111 | |
2112 class LineEnd(_PositionToken): | |
2113 """Matches if current position is at the end of a line within the parse string""" | |
2114 def __init__( self ): | |
2115 super(LineEnd,self).__init__() | |
2116 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) | |
2117 self.errmsg = "Expected end of line" | |
2118 | |
2119 def parseImpl( self, instring, loc, doActions=True ): | |
2120 if loc<len(instring): | |
2121 if instring[loc] == "\n": | |
2122 return loc+1, "\n" | |
2123 else: | |
2124 #~ raise ParseException( instring, loc, "Expected end of line" ) | |
2125 exc = self.myException | |
2126 exc.loc = loc | |
2127 exc.pstr = instring | |
2128 raise exc | |
2129 elif loc == len(instring): | |
2130 return loc+1, [] | |
2131 else: | |
2132 exc = self.myException | |
2133 exc.loc = loc | |
2134 exc.pstr = instring | |
2135 raise exc | |
2136 | |
2137 class StringStart(_PositionToken): | |
2138 """Matches if current position is at the beginning of the parse string""" | |
2139 def __init__( self ): | |
2140 super(StringStart,self).__init__() | |
2141 self.errmsg = "Expected start of text" | |
2142 | |
2143 def parseImpl( self, instring, loc, doActions=True ): | |
2144 if loc != 0: | |
2145 # see if entire string up to here is just whitespace and ignoreables | |
2146 if loc != self.preParse( instring, 0 ): | |
2147 #~ raise ParseException( instring, loc, "Expected start of text" ) | |
2148 exc = self.myException | |
2149 exc.loc = loc | |
2150 exc.pstr = instring | |
2151 raise exc | |
2152 return loc, [] | |
2153 | |
2154 class StringEnd(_PositionToken): | |
2155 """Matches if current position is at the end of the parse string""" | |
2156 def __init__( self ): | |
2157 super(StringEnd,self).__init__() | |
2158 self.errmsg = "Expected end of text" | |
2159 | |
2160 def parseImpl( self, instring, loc, doActions=True ): | |
2161 if loc < len(instring): | |
2162 #~ raise ParseException( instring, loc, "Expected end of text" ) | |
2163 exc = self.myException | |
2164 exc.loc = loc | |
2165 exc.pstr = instring | |
2166 raise exc | |
2167 elif loc == len(instring): | |
2168 return loc+1, [] | |
2169 elif loc > len(instring): | |
2170 return loc, [] | |
2171 else: | |
2172 exc = self.myException | |
2173 exc.loc = loc | |
2174 exc.pstr = instring | |
2175 raise exc | |
2176 | |
2177 class WordStart(_PositionToken): | |
2178 """Matches if the current position is at the beginning of a Word, and | |
2179 is not preceded by any character in a given set of C{wordChars} | |
2180 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, | |
2181 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of | |
2182 the string being parsed, or at the beginning of a line. | |
2183 """ | |
2184 def __init__(self, wordChars = printables): | |
2185 super(WordStart,self).__init__() | |
2186 self.wordChars = set(wordChars) | |
2187 self.errmsg = "Not at the start of a word" | |
2188 | |
2189 def parseImpl(self, instring, loc, doActions=True ): | |
2190 if loc != 0: | |
2191 if (instring[loc-1] in self.wordChars or | |
2192 instring[loc] not in self.wordChars): | |
2193 exc = self.myException | |
2194 exc.loc = loc | |
2195 exc.pstr = instring | |
2196 raise exc | |
2197 return loc, [] | |
2198 | |
2199 class WordEnd(_PositionToken): | |
2200 """Matches if the current position is at the end of a Word, and | |
2201 is not followed by any character in a given set of C{wordChars} | |
2202 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, | |
2203 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of | |
2204 the string being parsed, or at the end of a line. | |
2205 """ | |
2206 def __init__(self, wordChars = printables): | |
2207 super(WordEnd,self).__init__() | |
2208 self.wordChars = set(wordChars) | |
2209 self.skipWhitespace = False | |
2210 self.errmsg = "Not at the end of a word" | |
2211 | |
2212 def parseImpl(self, instring, loc, doActions=True ): | |
2213 instrlen = len(instring) | |
2214 if instrlen>0 and loc<instrlen: | |
2215 if (instring[loc] in self.wordChars or | |
2216 instring[loc-1] not in self.wordChars): | |
2217 #~ raise ParseException( instring, loc, "Expected end of word" ) | |
2218 exc = self.myException | |
2219 exc.loc = loc | |
2220 exc.pstr = instring | |
2221 raise exc | |
2222 return loc, [] | |
2223 | |
2224 | |
2225 class ParseExpression(ParserElement): | |
2226 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" | |
2227 def __init__( self, exprs, savelist = False ): | |
2228 super(ParseExpression,self).__init__(savelist) | |
2229 if isinstance( exprs, list ): | |
2230 self.exprs = exprs | |
2231 elif isinstance( exprs, basestring ): | |
2232 self.exprs = [ Literal( exprs ) ] | |
2233 else: | |
2234 try: | |
2235 self.exprs = list( exprs ) | |
2236 except TypeError: | |
2237 self.exprs = [ exprs ] | |
2238 self.callPreparse = False | |
2239 | |
2240 def __getitem__( self, i ): | |
2241 return self.exprs[i] | |
2242 | |
2243 def append( self, other ): | |
2244 self.exprs.append( other ) | |
2245 self.strRepr = None | |
2246 return self | |
2247 | |
2248 def leaveWhitespace( self ): | |
2249 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on | |
2250 all contained expressions.""" | |
2251 self.skipWhitespace = False | |
2252 self.exprs = [ e.copy() for e in self.exprs ] | |
2253 for e in self.exprs: | |
2254 e.leaveWhitespace() | |
2255 return self | |
2256 | |
2257 def ignore( self, other ): | |
2258 if isinstance( other, Suppress ): | |
2259 if other not in self.ignoreExprs: | |
2260 super( ParseExpression, self).ignore( other ) | |
2261 for e in self.exprs: | |
2262 e.ignore( self.ignoreExprs[-1] ) | |
2263 else: | |
2264 super( ParseExpression, self).ignore( other ) | |
2265 for e in self.exprs: | |
2266 e.ignore( self.ignoreExprs[-1] ) | |
2267 return self | |
2268 | |
2269 def __str__( self ): | |
2270 try: | |
2271 return super(ParseExpression,self).__str__() | |
2272 except: | |
2273 pass | |
2274 | |
2275 if self.strRepr is None: | |
2276 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) | |
2277 return self.strRepr | |
2278 | |
2279 def streamline( self ): | |
2280 super(ParseExpression,self).streamline() | |
2281 | |
2282 for e in self.exprs: | |
2283 e.streamline() | |
2284 | |
2285 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) | |
2286 # but only if there are no parse actions or resultsNames on the nested And's | |
2287 # (likewise for Or's and MatchFirst's) | |
2288 if ( len(self.exprs) == 2 ): | |
2289 other = self.exprs[0] | |
2290 if ( isinstance( other, self.__class__ ) and | |
2291 not(other.parseAction) and | |
2292 other.resultsName is None and | |
2293 not other.debug ): | |
2294 self.exprs = other.exprs[:] + [ self.exprs[1] ] | |
2295 self.strRepr = None | |
2296 self.mayReturnEmpty |= other.mayReturnEmpty | |
2297 self.mayIndexError |= other.mayIndexError | |
2298 | |
2299 other = self.exprs[-1] | |
2300 if ( isinstance( other, self.__class__ ) and | |
2301 not(other.parseAction) and | |
2302 other.resultsName is None and | |
2303 not other.debug ): | |
2304 self.exprs = self.exprs[:-1] + other.exprs[:] | |
2305 self.strRepr = None | |
2306 self.mayReturnEmpty |= other.mayReturnEmpty | |
2307 self.mayIndexError |= other.mayIndexError | |
2308 | |
2309 return self | |
2310 | |
2311 def setResultsName( self, name, listAllMatches=False ): | |
2312 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) | |
2313 return ret | |
2314 | |
2315 def validate( self, validateTrace=[] ): | |
2316 tmp = validateTrace[:]+[self] | |
2317 for e in self.exprs: | |
2318 e.validate(tmp) | |
2319 self.checkRecursion( [] ) | |
2320 | |
2321 def copy(self): | |
2322 ret = super(ParseExpression,self).copy() | |
2323 ret.exprs = [e.copy() for e in self.exprs] | |
2324 return ret | |
2325 | |
2326 class And(ParseExpression): | |
2327 """Requires all given C{ParseExpression}s to be found in the given order. | |
2328 Expressions may be separated by whitespace. | |
2329 May be constructed using the C{'+'} operator. | |
2330 """ | |
2331 | |
2332 class _ErrorStop(Empty): | |
2333 def __init__(self, *args, **kwargs): | |
2334 super(Empty,self).__init__(*args, **kwargs) | |
2335 self.leaveWhitespace() | |
2336 | |
2337 def __init__( self, exprs, savelist = True ): | |
2338 super(And,self).__init__(exprs, savelist) | |
2339 self.mayReturnEmpty = True | |
2340 for e in self.exprs: | |
2341 if not e.mayReturnEmpty: | |
2342 self.mayReturnEmpty = False | |
2343 break | |
2344 self.setWhitespaceChars( exprs[0].whiteChars ) | |
2345 self.skipWhitespace = exprs[0].skipWhitespace | |
2346 self.callPreparse = True | |
2347 | |
2348 def parseImpl( self, instring, loc, doActions=True ): | |
2349 # pass False as last arg to _parse for first element, since we already | |
2350 # pre-parsed the string as part of our And pre-parsing | |
2351 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) | |
2352 errorStop = False | |
2353 for e in self.exprs[1:]: | |
2354 if isinstance(e, And._ErrorStop): | |
2355 errorStop = True | |
2356 continue | |
2357 if errorStop: | |
2358 try: | |
2359 loc, exprtokens = e._parse( instring, loc, doActions ) | |
2360 except ParseSyntaxException: | |
2361 raise | |
2362 except ParseBaseException: | |
2363 pe = sys.exc_info()[1] | |
2364 raise ParseSyntaxException(pe) | |
2365 except IndexError: | |
2366 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) | |
2367 else: | |
2368 loc, exprtokens = e._parse( instring, loc, doActions ) | |
2369 if exprtokens or exprtokens.keys(): | |
2370 resultlist += exprtokens | |
2371 return loc, resultlist | |
2372 | |
2373 def __iadd__(self, other ): | |
2374 if isinstance( other, basestring ): | |
2375 other = Literal( other ) | |
2376 return self.append( other ) #And( [ self, other ] ) | |
2377 | |
2378 def checkRecursion( self, parseElementList ): | |
2379 subRecCheckList = parseElementList[:] + [ self ] | |
2380 for e in self.exprs: | |
2381 e.checkRecursion( subRecCheckList ) | |
2382 if not e.mayReturnEmpty: | |
2383 break | |
2384 | |
2385 def __str__( self ): | |
2386 if hasattr(self,"name"): | |
2387 return self.name | |
2388 | |
2389 if self.strRepr is None: | |
2390 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2391 | |
2392 return self.strRepr | |
2393 | |
2394 | |
2395 class Or(ParseExpression): | |
2396 """Requires that at least one C{ParseExpression} is found. | |
2397 If two expressions match, the expression that matches the longest string will be used. | |
2398 May be constructed using the C{'^'} operator. | |
2399 """ | |
2400 def __init__( self, exprs, savelist = False ): | |
2401 super(Or,self).__init__(exprs, savelist) | |
2402 self.mayReturnEmpty = False | |
2403 for e in self.exprs: | |
2404 if e.mayReturnEmpty: | |
2405 self.mayReturnEmpty = True | |
2406 break | |
2407 | |
2408 def parseImpl( self, instring, loc, doActions=True ): | |
2409 maxExcLoc = -1 | |
2410 maxMatchLoc = -1 | |
2411 maxException = None | |
2412 for e in self.exprs: | |
2413 try: | |
2414 loc2 = e.tryParse( instring, loc ) | |
2415 except ParseException: | |
2416 err = sys.exc_info()[1] | |
2417 if err.loc > maxExcLoc: | |
2418 maxException = err | |
2419 maxExcLoc = err.loc | |
2420 except IndexError: | |
2421 if len(instring) > maxExcLoc: | |
2422 maxException = ParseException(instring,len(instring),e.errmsg,self) | |
2423 maxExcLoc = len(instring) | |
2424 else: | |
2425 if loc2 > maxMatchLoc: | |
2426 maxMatchLoc = loc2 | |
2427 maxMatchExp = e | |
2428 | |
2429 if maxMatchLoc < 0: | |
2430 if maxException is not None: | |
2431 raise maxException | |
2432 else: | |
2433 raise ParseException(instring, loc, "no defined alternatives to match", self) | |
2434 | |
2435 return maxMatchExp._parse( instring, loc, doActions ) | |
2436 | |
2437 def __ixor__(self, other ): | |
2438 if isinstance( other, basestring ): | |
2439 other = Literal( other ) | |
2440 return self.append( other ) #Or( [ self, other ] ) | |
2441 | |
2442 def __str__( self ): | |
2443 if hasattr(self,"name"): | |
2444 return self.name | |
2445 | |
2446 if self.strRepr is None: | |
2447 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2448 | |
2449 return self.strRepr | |
2450 | |
2451 def checkRecursion( self, parseElementList ): | |
2452 subRecCheckList = parseElementList[:] + [ self ] | |
2453 for e in self.exprs: | |
2454 e.checkRecursion( subRecCheckList ) | |
2455 | |
2456 | |
2457 class MatchFirst(ParseExpression): | |
2458 """Requires that at least one C{ParseExpression} is found. | |
2459 If two expressions match, the first one listed is the one that will match. | |
2460 May be constructed using the C{'|'} operator. | |
2461 """ | |
2462 def __init__( self, exprs, savelist = False ): | |
2463 super(MatchFirst,self).__init__(exprs, savelist) | |
2464 if exprs: | |
2465 self.mayReturnEmpty = False | |
2466 for e in self.exprs: | |
2467 if e.mayReturnEmpty: | |
2468 self.mayReturnEmpty = True | |
2469 break | |
2470 else: | |
2471 self.mayReturnEmpty = True | |
2472 | |
2473 def parseImpl( self, instring, loc, doActions=True ): | |
2474 maxExcLoc = -1 | |
2475 maxException = None | |
2476 for e in self.exprs: | |
2477 try: | |
2478 ret = e._parse( instring, loc, doActions ) | |
2479 return ret | |
2480 except ParseException, err: | |
2481 if err.loc > maxExcLoc: | |
2482 maxException = err | |
2483 maxExcLoc = err.loc | |
2484 except IndexError: | |
2485 if len(instring) > maxExcLoc: | |
2486 maxException = ParseException(instring,len(instring),e.errmsg,self) | |
2487 maxExcLoc = len(instring) | |
2488 | |
2489 # only got here if no expression matched, raise exception for match that made it the furthest | |
2490 else: | |
2491 if maxException is not None: | |
2492 raise maxException | |
2493 else: | |
2494 raise ParseException(instring, loc, "no defined alternatives to match", self) | |
2495 | |
2496 def __ior__(self, other ): | |
2497 if isinstance( other, basestring ): | |
2498 other = Literal( other ) | |
2499 return self.append( other ) #MatchFirst( [ self, other ] ) | |
2500 | |
2501 def __str__( self ): | |
2502 if hasattr(self,"name"): | |
2503 return self.name | |
2504 | |
2505 if self.strRepr is None: | |
2506 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2507 | |
2508 return self.strRepr | |
2509 | |
2510 def checkRecursion( self, parseElementList ): | |
2511 subRecCheckList = parseElementList[:] + [ self ] | |
2512 for e in self.exprs: | |
2513 e.checkRecursion( subRecCheckList ) | |
2514 | |
2515 | |
2516 class Each(ParseExpression): | |
2517 """Requires all given C{ParseExpression}s to be found, but in any order. | |
2518 Expressions may be separated by whitespace. | |
2519 May be constructed using the C{'&'} operator. | |
2520 """ | |
2521 def __init__( self, exprs, savelist = True ): | |
2522 super(Each,self).__init__(exprs, savelist) | |
2523 self.mayReturnEmpty = True | |
2524 for e in self.exprs: | |
2525 if not e.mayReturnEmpty: | |
2526 self.mayReturnEmpty = False | |
2527 break | |
2528 self.skipWhitespace = True | |
2529 self.initExprGroups = True | |
2530 | |
2531 def parseImpl( self, instring, loc, doActions=True ): | |
2532 if self.initExprGroups: | |
2533 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] | |
2534 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] | |
2535 self.optionals = opt1 + opt2 | |
2536 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] | |
2537 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] | |
2538 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] | |
2539 self.required += self.multirequired | |
2540 self.initExprGroups = False | |
2541 tmpLoc = loc | |
2542 tmpReqd = self.required[:] | |
2543 tmpOpt = self.optionals[:] | |
2544 matchOrder = [] | |
2545 | |
2546 keepMatching = True | |
2547 while keepMatching: | |
2548 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired | |
2549 failed = [] | |
2550 for e in tmpExprs: | |
2551 try: | |
2552 tmpLoc = e.tryParse( instring, tmpLoc ) | |
2553 except ParseException: | |
2554 failed.append(e) | |
2555 else: | |
2556 matchOrder.append(e) | |
2557 if e in tmpReqd: | |
2558 tmpReqd.remove(e) | |
2559 elif e in tmpOpt: | |
2560 tmpOpt.remove(e) | |
2561 if len(failed) == len(tmpExprs): | |
2562 keepMatching = False | |
2563 | |
2564 if tmpReqd: | |
2565 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) | |
2566 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) | |
2567 | |
2568 # add any unmatched Optionals, in case they have default values defined | |
2569 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] | |
2570 | |
2571 resultlist = [] | |
2572 for e in matchOrder: | |
2573 loc,results = e._parse(instring,loc,doActions) | |
2574 resultlist.append(results) | |
2575 | |
2576 finalResults = ParseResults([]) | |
2577 for r in resultlist: | |
2578 dups = {} | |
2579 for k in r.keys(): | |
2580 if k in finalResults.keys(): | |
2581 tmp = ParseResults(finalResults[k]) | |
2582 tmp += ParseResults(r[k]) | |
2583 dups[k] = tmp | |
2584 finalResults += ParseResults(r) | |
2585 for k,v in dups.items(): | |
2586 finalResults[k] = v | |
2587 return loc, finalResults | |
2588 | |
2589 def __str__( self ): | |
2590 if hasattr(self,"name"): | |
2591 return self.name | |
2592 | |
2593 if self.strRepr is None: | |
2594 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" | |
2595 | |
2596 return self.strRepr | |
2597 | |
2598 def checkRecursion( self, parseElementList ): | |
2599 subRecCheckList = parseElementList[:] + [ self ] | |
2600 for e in self.exprs: | |
2601 e.checkRecursion( subRecCheckList ) | |
2602 | |
2603 | |
2604 class ParseElementEnhance(ParserElement): | |
2605 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" | |
2606 def __init__( self, expr, savelist=False ): | |
2607 super(ParseElementEnhance,self).__init__(savelist) | |
2608 if isinstance( expr, basestring ): | |
2609 expr = Literal(expr) | |
2610 self.expr = expr | |
2611 self.strRepr = None | |
2612 if expr is not None: | |
2613 self.mayIndexError = expr.mayIndexError | |
2614 self.mayReturnEmpty = expr.mayReturnEmpty | |
2615 self.setWhitespaceChars( expr.whiteChars ) | |
2616 self.skipWhitespace = expr.skipWhitespace | |
2617 self.saveAsList = expr.saveAsList | |
2618 self.callPreparse = expr.callPreparse | |
2619 self.ignoreExprs.extend(expr.ignoreExprs) | |
2620 | |
2621 def parseImpl( self, instring, loc, doActions=True ): | |
2622 if self.expr is not None: | |
2623 return self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2624 else: | |
2625 raise ParseException("",loc,self.errmsg,self) | |
2626 | |
2627 def leaveWhitespace( self ): | |
2628 self.skipWhitespace = False | |
2629 self.expr = self.expr.copy() | |
2630 if self.expr is not None: | |
2631 self.expr.leaveWhitespace() | |
2632 return self | |
2633 | |
2634 def ignore( self, other ): | |
2635 if isinstance( other, Suppress ): | |
2636 if other not in self.ignoreExprs: | |
2637 super( ParseElementEnhance, self).ignore( other ) | |
2638 if self.expr is not None: | |
2639 self.expr.ignore( self.ignoreExprs[-1] ) | |
2640 else: | |
2641 super( ParseElementEnhance, self).ignore( other ) | |
2642 if self.expr is not None: | |
2643 self.expr.ignore( self.ignoreExprs[-1] ) | |
2644 return self | |
2645 | |
2646 def streamline( self ): | |
2647 super(ParseElementEnhance,self).streamline() | |
2648 if self.expr is not None: | |
2649 self.expr.streamline() | |
2650 return self | |
2651 | |
2652 def checkRecursion( self, parseElementList ): | |
2653 if self in parseElementList: | |
2654 raise RecursiveGrammarException( parseElementList+[self] ) | |
2655 subRecCheckList = parseElementList[:] + [ self ] | |
2656 if self.expr is not None: | |
2657 self.expr.checkRecursion( subRecCheckList ) | |
2658 | |
2659 def validate( self, validateTrace=[] ): | |
2660 tmp = validateTrace[:]+[self] | |
2661 if self.expr is not None: | |
2662 self.expr.validate(tmp) | |
2663 self.checkRecursion( [] ) | |
2664 | |
2665 def __str__( self ): | |
2666 try: | |
2667 return super(ParseElementEnhance,self).__str__() | |
2668 except: | |
2669 pass | |
2670 | |
2671 if self.strRepr is None and self.expr is not None: | |
2672 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) | |
2673 return self.strRepr | |
2674 | |
2675 | |
2676 class FollowedBy(ParseElementEnhance): | |
2677 """Lookahead matching of the given parse expression. C{FollowedBy} | |
2678 does *not* advance the parsing position within the input string, it only | |
2679 verifies that the specified parse expression matches at the current | |
2680 position. C{FollowedBy} always returns a null token list.""" | |
2681 def __init__( self, expr ): | |
2682 super(FollowedBy,self).__init__(expr) | |
2683 self.mayReturnEmpty = True | |
2684 | |
2685 def parseImpl( self, instring, loc, doActions=True ): | |
2686 self.expr.tryParse( instring, loc ) | |
2687 return loc, [] | |
2688 | |
2689 | |
2690 class NotAny(ParseElementEnhance): | |
2691 """Lookahead to disallow matching with the given parse expression. C{NotAny} | |
2692 does *not* advance the parsing position within the input string, it only | |
2693 verifies that the specified parse expression does *not* match at the current | |
2694 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} | |
2695 always returns a null token list. May be constructed using the '~' operator.""" | |
2696 def __init__( self, expr ): | |
2697 super(NotAny,self).__init__(expr) | |
2698 #~ self.leaveWhitespace() | |
2699 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs | |
2700 self.mayReturnEmpty = True | |
2701 self.errmsg = "Found unwanted token, "+_ustr(self.expr) | |
2702 | |
2703 def parseImpl( self, instring, loc, doActions=True ): | |
2704 try: | |
2705 self.expr.tryParse( instring, loc ) | |
2706 except (ParseException,IndexError): | |
2707 pass | |
2708 else: | |
2709 #~ raise ParseException(instring, loc, self.errmsg ) | |
2710 exc = self.myException | |
2711 exc.loc = loc | |
2712 exc.pstr = instring | |
2713 raise exc | |
2714 return loc, [] | |
2715 | |
2716 def __str__( self ): | |
2717 if hasattr(self,"name"): | |
2718 return self.name | |
2719 | |
2720 if self.strRepr is None: | |
2721 self.strRepr = "~{" + _ustr(self.expr) + "}" | |
2722 | |
2723 return self.strRepr | |
2724 | |
2725 | |
2726 class ZeroOrMore(ParseElementEnhance): | |
2727 """Optional repetition of zero or more of the given expression.""" | |
2728 def __init__( self, expr ): | |
2729 super(ZeroOrMore,self).__init__(expr) | |
2730 self.mayReturnEmpty = True | |
2731 | |
2732 def parseImpl( self, instring, loc, doActions=True ): | |
2733 tokens = [] | |
2734 try: | |
2735 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2736 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) | |
2737 while 1: | |
2738 if hasIgnoreExprs: | |
2739 preloc = self._skipIgnorables( instring, loc ) | |
2740 else: | |
2741 preloc = loc | |
2742 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) | |
2743 if tmptokens or tmptokens.keys(): | |
2744 tokens += tmptokens | |
2745 except (ParseException,IndexError): | |
2746 pass | |
2747 | |
2748 return loc, tokens | |
2749 | |
2750 def __str__( self ): | |
2751 if hasattr(self,"name"): | |
2752 return self.name | |
2753 | |
2754 if self.strRepr is None: | |
2755 self.strRepr = "[" + _ustr(self.expr) + "]..." | |
2756 | |
2757 return self.strRepr | |
2758 | |
2759 def setResultsName( self, name, listAllMatches=False ): | |
2760 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) | |
2761 ret.saveAsList = True | |
2762 return ret | |
2763 | |
2764 | |
2765 class OneOrMore(ParseElementEnhance): | |
2766 """Repetition of one or more of the given expression.""" | |
2767 def parseImpl( self, instring, loc, doActions=True ): | |
2768 # must be at least one | |
2769 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2770 try: | |
2771 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) | |
2772 while 1: | |
2773 if hasIgnoreExprs: | |
2774 preloc = self._skipIgnorables( instring, loc ) | |
2775 else: | |
2776 preloc = loc | |
2777 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) | |
2778 if tmptokens or tmptokens.keys(): | |
2779 tokens += tmptokens | |
2780 except (ParseException,IndexError): | |
2781 pass | |
2782 | |
2783 return loc, tokens | |
2784 | |
2785 def __str__( self ): | |
2786 if hasattr(self,"name"): | |
2787 return self.name | |
2788 | |
2789 if self.strRepr is None: | |
2790 self.strRepr = "{" + _ustr(self.expr) + "}..." | |
2791 | |
2792 return self.strRepr | |
2793 | |
2794 def setResultsName( self, name, listAllMatches=False ): | |
2795 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) | |
2796 ret.saveAsList = True | |
2797 return ret | |
2798 | |
2799 class _NullToken(object): | |
2800 def __bool__(self): | |
2801 return False | |
2802 __nonzero__ = __bool__ | |
2803 def __str__(self): | |
2804 return "" | |
2805 | |
2806 _optionalNotMatched = _NullToken() | |
2807 class Optional(ParseElementEnhance): | |
2808 """Optional matching of the given expression. | |
2809 A default return string can also be specified, if the optional expression | |
2810 is not found. | |
2811 """ | |
2812 def __init__( self, exprs, default=_optionalNotMatched ): | |
2813 super(Optional,self).__init__( exprs, savelist=False ) | |
2814 self.defaultValue = default | |
2815 self.mayReturnEmpty = True | |
2816 | |
2817 def parseImpl( self, instring, loc, doActions=True ): | |
2818 try: | |
2819 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) | |
2820 except (ParseException,IndexError): | |
2821 if self.defaultValue is not _optionalNotMatched: | |
2822 if self.expr.resultsName: | |
2823 tokens = ParseResults([ self.defaultValue ]) | |
2824 tokens[self.expr.resultsName] = self.defaultValue | |
2825 else: | |
2826 tokens = [ self.defaultValue ] | |
2827 else: | |
2828 tokens = [] | |
2829 return loc, tokens | |
2830 | |
2831 def __str__( self ): | |
2832 if hasattr(self,"name"): | |
2833 return self.name | |
2834 | |
2835 if self.strRepr is None: | |
2836 self.strRepr = "[" + _ustr(self.expr) + "]" | |
2837 | |
2838 return self.strRepr | |
2839 | |
2840 | |
2841 class SkipTo(ParseElementEnhance): | |
2842 """Token for skipping over all undefined text until the matched expression is found. | |
2843 If C{include} is set to true, the matched expression is also parsed (the skipped text | |
2844 and matched expression are returned as a 2-element list). The C{ignore} | |
2845 argument is used to define grammars (typically quoted strings and comments) that | |
2846 might contain false matches. | |
2847 """ | |
2848 def __init__( self, other, include=False, ignore=None, failOn=None ): | |
2849 super( SkipTo, self ).__init__( other ) | |
2850 self.ignoreExpr = ignore | |
2851 self.mayReturnEmpty = True | |
2852 self.mayIndexError = False | |
2853 self.includeMatch = include | |
2854 self.asList = False | |
2855 if failOn is not None and isinstance(failOn, basestring): | |
2856 self.failOn = Literal(failOn) | |
2857 else: | |
2858 self.failOn = failOn | |
2859 self.errmsg = "No match found for "+_ustr(self.expr) | |
2860 | |
2861 def parseImpl( self, instring, loc, doActions=True ): | |
2862 startLoc = loc | |
2863 instrlen = len(instring) | |
2864 expr = self.expr | |
2865 failParse = False | |
2866 while loc <= instrlen: | |
2867 try: | |
2868 if self.failOn: | |
2869 try: | |
2870 self.failOn.tryParse(instring, loc) | |
2871 except ParseBaseException: | |
2872 pass | |
2873 else: | |
2874 failParse = True | |
2875 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) | |
2876 failParse = False | |
2877 if self.ignoreExpr is not None: | |
2878 while 1: | |
2879 try: | |
2880 loc = self.ignoreExpr.tryParse(instring,loc) | |
2881 # print "found ignoreExpr, advance to", loc | |
2882 except ParseBaseException: | |
2883 break | |
2884 expr._parse( instring, loc, doActions=False, callPreParse=False ) | |
2885 skipText = instring[startLoc:loc] | |
2886 if self.includeMatch: | |
2887 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) | |
2888 if mat: | |
2889 skipRes = ParseResults( skipText ) | |
2890 skipRes += mat | |
2891 return loc, [ skipRes ] | |
2892 else: | |
2893 return loc, [ skipText ] | |
2894 else: | |
2895 return loc, [ skipText ] | |
2896 except (ParseException,IndexError): | |
2897 if failParse: | |
2898 raise | |
2899 else: | |
2900 loc += 1 | |
2901 exc = self.myException | |
2902 exc.loc = loc | |
2903 exc.pstr = instring | |
2904 raise exc | |
2905 | |
2906 class Forward(ParseElementEnhance): | |
2907 """Forward declaration of an expression to be defined later - | |
2908 used for recursive grammars, such as algebraic infix notation. | |
2909 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. | |
2910 | |
2911 Note: take care when assigning to C{Forward} not to overlook precedence of operators. | |
2912 Specifically, '|' has a lower precedence than '<<', so that:: | |
2913 fwdExpr << a | b | c | |
2914 will actually be evaluated as:: | |
2915 (fwdExpr << a) | b | c | |
2916 thereby leaving b and c out as parseable alternatives. It is recommended that you | |
2917 explicitly group the values inserted into the C{Forward}:: | |
2918 fwdExpr << (a | b | c) | |
2919 """ | |
2920 def __init__( self, other=None ): | |
2921 super(Forward,self).__init__( other, savelist=False ) | |
2922 | |
2923 def __lshift__( self, other ): | |
2924 if isinstance( other, basestring ): | |
2925 other = Literal(other) | |
2926 self.expr = other | |
2927 self.mayReturnEmpty = other.mayReturnEmpty | |
2928 self.strRepr = None | |
2929 self.mayIndexError = self.expr.mayIndexError | |
2930 self.mayReturnEmpty = self.expr.mayReturnEmpty | |
2931 self.setWhitespaceChars( self.expr.whiteChars ) | |
2932 self.skipWhitespace = self.expr.skipWhitespace | |
2933 self.saveAsList = self.expr.saveAsList | |
2934 self.ignoreExprs.extend(self.expr.ignoreExprs) | |
2935 return None | |
2936 | |
2937 def leaveWhitespace( self ): | |
2938 self.skipWhitespace = False | |
2939 return self | |
2940 | |
2941 def streamline( self ): | |
2942 if not self.streamlined: | |
2943 self.streamlined = True | |
2944 if self.expr is not None: | |
2945 self.expr.streamline() | |
2946 return self | |
2947 | |
2948 def validate( self, validateTrace=[] ): | |
2949 if self not in validateTrace: | |
2950 tmp = validateTrace[:]+[self] | |
2951 if self.expr is not None: | |
2952 self.expr.validate(tmp) | |
2953 self.checkRecursion([]) | |
2954 | |
2955 def __str__( self ): | |
2956 if hasattr(self,"name"): | |
2957 return self.name | |
2958 | |
2959 self._revertClass = self.__class__ | |
2960 self.__class__ = _ForwardNoRecurse | |
2961 try: | |
2962 if self.expr is not None: | |
2963 retString = _ustr(self.expr) | |
2964 else: | |
2965 retString = "None" | |
2966 finally: | |
2967 self.__class__ = self._revertClass | |
2968 return self.__class__.__name__ + ": " + retString | |
2969 | |
2970 def copy(self): | |
2971 if self.expr is not None: | |
2972 return super(Forward,self).copy() | |
2973 else: | |
2974 ret = Forward() | |
2975 ret << self | |
2976 return ret | |
2977 | |
2978 class _ForwardNoRecurse(Forward): | |
2979 def __str__( self ): | |
2980 return "..." | |
2981 | |
2982 class TokenConverter(ParseElementEnhance): | |
2983 """Abstract subclass of C{ParseExpression}, for converting parsed results.""" | |
2984 def __init__( self, expr, savelist=False ): | |
2985 super(TokenConverter,self).__init__( expr )#, savelist ) | |
2986 self.saveAsList = False | |
2987 | |
2988 class Upcase(TokenConverter): | |
2989 """Converter to upper case all matching tokens.""" | |
2990 def __init__(self, *args): | |
2991 super(Upcase,self).__init__(*args) | |
2992 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", | |
2993 DeprecationWarning,stacklevel=2) | |
2994 | |
2995 def postParse( self, instring, loc, tokenlist ): | |
2996 return list(map( string.upper, tokenlist )) | |
2997 | |
2998 | |
2999 class Combine(TokenConverter): | |
3000 """Converter to concatenate all matching tokens to a single string. | |
3001 By default, the matching patterns must also be contiguous in the input string; | |
3002 this can be disabled by specifying C{'adjacent=False'} in the constructor. | |
3003 """ | |
3004 def __init__( self, expr, joinString="", adjacent=True ): | |
3005 super(Combine,self).__init__( expr ) | |
3006 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself | |
3007 if adjacent: | |
3008 self.leaveWhitespace() | |
3009 self.adjacent = adjacent | |
3010 self.skipWhitespace = True | |
3011 self.joinString = joinString | |
3012 self.callPreparse = True | |
3013 | |
3014 def ignore( self, other ): | |
3015 if self.adjacent: | |
3016 ParserElement.ignore(self, other) | |
3017 else: | |
3018 super( Combine, self).ignore( other ) | |
3019 return self | |
3020 | |
3021 def postParse( self, instring, loc, tokenlist ): | |
3022 retToks = tokenlist.copy() | |
3023 del retToks[:] | |
3024 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) | |
3025 | |
3026 if self.resultsName and len(retToks.keys())>0: | |
3027 return [ retToks ] | |
3028 else: | |
3029 return retToks | |
3030 | |
3031 class Group(TokenConverter): | |
3032 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" | |
3033 def __init__( self, expr ): | |
3034 super(Group,self).__init__( expr ) | |
3035 self.saveAsList = True | |
3036 | |
3037 def postParse( self, instring, loc, tokenlist ): | |
3038 return [ tokenlist ] | |
3039 | |
3040 class Dict(TokenConverter): | |
3041 """Converter to return a repetitive expression as a list, but also as a dictionary. | |
3042 Each element can also be referenced using the first token in the expression as its key. | |
3043 Useful for tabular report scraping when the first column can be used as a item key. | |
3044 """ | |
3045 def __init__( self, exprs ): | |
3046 super(Dict,self).__init__( exprs ) | |
3047 self.saveAsList = True | |
3048 | |
3049 def postParse( self, instring, loc, tokenlist ): | |
3050 for i,tok in enumerate(tokenlist): | |
3051 if len(tok) == 0: | |
3052 continue | |
3053 ikey = tok[0] | |
3054 if isinstance(ikey,int): | |
3055 ikey = _ustr(tok[0]).strip() | |
3056 if len(tok)==1: | |
3057 tokenlist[ikey] = _ParseResultsWithOffset("",i) | |
3058 elif len(tok)==2 and not isinstance(tok[1],ParseResults): | |
3059 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) | |
3060 else: | |
3061 dictvalue = tok.copy() #ParseResults(i) | |
3062 del dictvalue[0] | |
3063 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): | |
3064 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) | |
3065 else: | |
3066 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) | |
3067 | |
3068 if self.resultsName: | |
3069 return [ tokenlist ] | |
3070 else: | |
3071 return tokenlist | |
3072 | |
3073 | |
3074 class Suppress(TokenConverter): | |
3075 """Converter for ignoring the results of a parsed expression.""" | |
3076 def postParse( self, instring, loc, tokenlist ): | |
3077 return [] | |
3078 | |
3079 def suppress( self ): | |
3080 return self | |
3081 | |
3082 | |
3083 class OnlyOnce(object): | |
3084 """Wrapper for parse actions, to ensure they are only called once.""" | |
3085 def __init__(self, methodCall): | |
3086 self.callable = _trim_arity(methodCall) | |
3087 self.called = False | |
3088 def __call__(self,s,l,t): | |
3089 if not self.called: | |
3090 results = self.callable(s,l,t) | |
3091 self.called = True | |
3092 return results | |
3093 raise ParseException(s,l,"") | |
3094 def reset(self): | |
3095 self.called = False | |
3096 | |
3097 def traceParseAction(f): | |
3098 """Decorator for debugging parse actions.""" | |
3099 f = _trim_arity(f) | |
3100 def z(*paArgs): | |
3101 thisFunc = f.func_name | |
3102 s,l,t = paArgs[-3:] | |
3103 if len(paArgs)>3: | |
3104 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc | |
3105 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) | |
3106 try: | |
3107 ret = f(*paArgs) | |
3108 except Exception: | |
3109 exc = sys.exc_info()[1] | |
3110 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) | |
3111 raise | |
3112 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) | |
3113 return ret | |
3114 try: | |
3115 z.__name__ = f.__name__ | |
3116 except AttributeError: | |
3117 pass | |
3118 return z | |
3119 | |
3120 # | |
3121 # global helpers | |
3122 # | |
3123 def delimitedList( expr, delim=",", combine=False ): | |
3124 """Helper to define a delimited list of expressions - the delimiter defaults to ','. | |
3125 By default, the list elements and delimiters can have intervening whitespace, and | |
3126 comments, but this can be overridden by passing C{combine=True} in the constructor. | |
3127 If C{combine} is set to True, the matching tokens are returned as a single token | |
3128 string, with the delimiters included; otherwise, the matching tokens are returned | |
3129 as a list of tokens, with the delimiters suppressed. | |
3130 """ | |
3131 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." | |
3132 if combine: | |
3133 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) | |
3134 else: | |
3135 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) | |
3136 | |
3137 def countedArray( expr, intExpr=None ): | |
3138 """Helper to define a counted list of expressions. | |
3139 This helper defines a pattern of the form:: | |
3140 integer expr expr expr... | |
3141 where the leading integer tells how many expr expressions follow. | |
3142 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. | |
3143 """ | |
3144 arrayExpr = Forward() | |
3145 def countFieldParseAction(s,l,t): | |
3146 n = t[0] | |
3147 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) | |
3148 return [] | |
3149 if intExpr is None: | |
3150 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) | |
3151 else: | |
3152 intExpr = intExpr.copy() | |
3153 intExpr.setName("arrayLen") | |
3154 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) | |
3155 return ( intExpr + arrayExpr ) | |
3156 | |
3157 def _flatten(L): | |
3158 ret = [] | |
3159 for i in L: | |
3160 if isinstance(i,list): | |
3161 ret.extend(_flatten(i)) | |
3162 else: | |
3163 ret.append(i) | |
3164 return ret | |
3165 | |
3166 def matchPreviousLiteral(expr): | |
3167 """Helper to define an expression that is indirectly defined from | |
3168 the tokens matched in a previous expression, that is, it looks | |
3169 for a 'repeat' of a previous expression. For example:: | |
3170 first = Word(nums) | |
3171 second = matchPreviousLiteral(first) | |
3172 matchExpr = first + ":" + second | |
3173 will match C{"1:1"}, but not C{"1:2"}. Because this matches a | |
3174 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. | |
3175 If this is not desired, use C{matchPreviousExpr}. | |
3176 Do *not* use with packrat parsing enabled. | |
3177 """ | |
3178 rep = Forward() | |
3179 def copyTokenToRepeater(s,l,t): | |
3180 if t: | |
3181 if len(t) == 1: | |
3182 rep << t[0] | |
3183 else: | |
3184 # flatten t tokens | |
3185 tflat = _flatten(t.asList()) | |
3186 rep << And( [ Literal(tt) for tt in tflat ] ) | |
3187 else: | |
3188 rep << Empty() | |
3189 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) | |
3190 return rep | |
3191 | |
3192 def matchPreviousExpr(expr): | |
3193 """Helper to define an expression that is indirectly defined from | |
3194 the tokens matched in a previous expression, that is, it looks | |
3195 for a 'repeat' of a previous expression. For example:: | |
3196 first = Word(nums) | |
3197 second = matchPreviousExpr(first) | |
3198 matchExpr = first + ":" + second | |
3199 will match C{"1:1"}, but not C{"1:2"}. Because this matches by | |
3200 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; | |
3201 the expressions are evaluated first, and then compared, so | |
3202 C{"1"} is compared with C{"10"}. | |
3203 Do *not* use with packrat parsing enabled. | |
3204 """ | |
3205 rep = Forward() | |
3206 e2 = expr.copy() | |
3207 rep << e2 | |
3208 def copyTokenToRepeater(s,l,t): | |
3209 matchTokens = _flatten(t.asList()) | |
3210 def mustMatchTheseTokens(s,l,t): | |
3211 theseTokens = _flatten(t.asList()) | |
3212 if theseTokens != matchTokens: | |
3213 raise ParseException("",0,"") | |
3214 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) | |
3215 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) | |
3216 return rep | |
3217 | |
3218 def _escapeRegexRangeChars(s): | |
3219 #~ escape these chars: ^-] | |
3220 for c in r"\^-]": | |
3221 s = s.replace(c,_bslash+c) | |
3222 s = s.replace("\n",r"\n") | |
3223 s = s.replace("\t",r"\t") | |
3224 return _ustr(s) | |
3225 | |
3226 def oneOf( strs, caseless=False, useRegex=True ): | |
3227 """Helper to quickly define a set of alternative Literals, and makes sure to do | |
3228 longest-first testing when there is a conflict, regardless of the input order, | |
3229 but returns a C{MatchFirst} for best performance. | |
3230 | |
3231 Parameters: | |
3232 - strs - a string of space-delimited literals, or a list of string literals | |
3233 - caseless - (default=False) - treat all literals as caseless | |
3234 - useRegex - (default=True) - as an optimization, will generate a Regex | |
3235 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or | |
3236 if creating a C{Regex} raises an exception) | |
3237 """ | |
3238 if caseless: | |
3239 isequal = ( lambda a,b: a.upper() == b.upper() ) | |
3240 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) | |
3241 parseElementClass = CaselessLiteral | |
3242 else: | |
3243 isequal = ( lambda a,b: a == b ) | |
3244 masks = ( lambda a,b: b.startswith(a) ) | |
3245 parseElementClass = Literal | |
3246 | |
3247 if isinstance(strs,(list,tuple)): | |
3248 symbols = list(strs[:]) | |
3249 elif isinstance(strs,basestring): | |
3250 symbols = strs.split() | |
3251 else: | |
3252 warnings.warn("Invalid argument to oneOf, expected string or list", | |
3253 SyntaxWarning, stacklevel=2) | |
3254 | |
3255 i = 0 | |
3256 while i < len(symbols)-1: | |
3257 cur = symbols[i] | |
3258 for j,other in enumerate(symbols[i+1:]): | |
3259 if ( isequal(other, cur) ): | |
3260 del symbols[i+j+1] | |
3261 break | |
3262 elif ( masks(cur, other) ): | |
3263 del symbols[i+j+1] | |
3264 symbols.insert(i,other) | |
3265 cur = other | |
3266 break | |
3267 else: | |
3268 i += 1 | |
3269 | |
3270 if not caseless and useRegex: | |
3271 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) | |
3272 try: | |
3273 if len(symbols)==len("".join(symbols)): | |
3274 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) | |
3275 else: | |
3276 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) | |
3277 except: | |
3278 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", | |
3279 SyntaxWarning, stacklevel=2) | |
3280 | |
3281 | |
3282 # last resort, just use MatchFirst | |
3283 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) | |
3284 | |
3285 def dictOf( key, value ): | |
3286 """Helper to easily and clearly define a dictionary by specifying the respective patterns | |
3287 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens | |
3288 in the proper order. The key pattern can include delimiting markers or punctuation, | |
3289 as long as they are suppressed, thereby leaving the significant key text. The value | |
3290 pattern can include named results, so that the C{Dict} results can include named token | |
3291 fields. | |
3292 """ | |
3293 return Dict( ZeroOrMore( Group ( key + value ) ) ) | |
3294 | |
3295 def originalTextFor(expr, asString=True): | |
3296 """Helper to return the original, untokenized text for a given expression. Useful to | |
3297 restore the parsed fields of an HTML start tag into the raw tag text itself, or to | |
3298 revert separate tokens with intervening whitespace back to the original matching | |
3299 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not | |
3300 require the inspect module to chase up the call stack. By default, returns a | |
3301 string containing the original parsed text. | |
3302 | |
3303 If the optional C{asString} argument is passed as C{False}, then the return value is a | |
3304 C{ParseResults} containing any results names that were originally matched, and a | |
3305 single token containing the original matched text from the input string. So if | |
3306 the expression passed to C{L{originalTextFor}} contains expressions with defined | |
3307 results names, you must set C{asString} to C{False} if you want to preserve those | |
3308 results name values.""" | |
3309 locMarker = Empty().setParseAction(lambda s,loc,t: loc) | |
3310 endlocMarker = locMarker.copy() | |
3311 endlocMarker.callPreparse = False | |
3312 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") | |
3313 if asString: | |
3314 extractText = lambda s,l,t: s[t._original_start:t._original_end] | |
3315 else: | |
3316 def extractText(s,l,t): | |
3317 del t[:] | |
3318 t.insert(0, s[t._original_start:t._original_end]) | |
3319 del t["_original_start"] | |
3320 del t["_original_end"] | |
3321 matchExpr.setParseAction(extractText) | |
3322 return matchExpr | |
3323 | |
3324 def ungroup(expr): | |
3325 """Helper to undo pyparsing's default grouping of And expressions, even | |
3326 if all but one are non-empty.""" | |
3327 return TokenConverter(expr).setParseAction(lambda t:t[0]) | |
3328 | |
3329 # convenience constants for positional expressions | |
3330 empty = Empty().setName("empty") | |
3331 lineStart = LineStart().setName("lineStart") | |
3332 lineEnd = LineEnd().setName("lineEnd") | |
3333 stringStart = StringStart().setName("stringStart") | |
3334 stringEnd = StringEnd().setName("stringEnd") | |
3335 | |
3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) | |
3337 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) | |
3338 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) | |
3339 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) | |
3340 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) | |
3341 _charRange = Group(_singleChar + Suppress("-") + _singleChar) | |
3342 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" | |
3343 | |
3344 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) | |
3345 | |
3346 def srange(s): | |
3347 r"""Helper to easily define string ranges for use in Word construction. Borrows | |
3348 syntax from regexp '[]' string range definitions:: | |
3349 srange("[0-9]") -> "0123456789" | |
3350 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" | |
3351 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" | |
3352 The input string must be enclosed in []'s, and the returned string is the expanded | |
3353 character set joined into a single string. | |
3354 The values enclosed in the []'s may be:: | |
3355 a single character | |
3356 an escaped character with a leading backslash (such as \- or \]) | |
3357 an escaped hex character with a leading '\x' (\x21, which is a '!' character) | |
3358 (\0x## is also supported for backwards compatibility) | |
3359 an escaped octal character with a leading '\0' (\041, which is a '!' character) | |
3360 a range of any of the above, separated by a dash ('a-z', etc.) | |
3361 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) | |
3362 """ | |
3363 try: | |
3364 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) | |
3365 except: | |
3366 return "" | |
3367 | |
3368 def matchOnlyAtCol(n): | |
3369 """Helper method for defining parse actions that require matching at a specific | |
3370 column in the input text. | |
3371 """ | |
3372 def verifyCol(strg,locn,toks): | |
3373 if col(locn,strg) != n: | |
3374 raise ParseException(strg,locn,"matched token not at column %d" % n) | |
3375 return verifyCol | |
3376 | |
3377 def replaceWith(replStr): | |
3378 """Helper method for common parse actions that simply return a literal value. Especially | |
3379 useful when used with C{transformString()}. | |
3380 """ | |
3381 def _replFunc(*args): | |
3382 return [replStr] | |
3383 return _replFunc | |
3384 | |
3385 def removeQuotes(s,l,t): | |
3386 """Helper parse action for removing quotation marks from parsed quoted strings. | |
3387 To use, add this parse action to quoted string using:: | |
3388 quotedString.setParseAction( removeQuotes ) | |
3389 """ | |
3390 return t[0][1:-1] | |
3391 | |
3392 def upcaseTokens(s,l,t): | |
3393 """Helper parse action to convert tokens to upper case.""" | |
3394 return [ tt.upper() for tt in map(_ustr,t) ] | |
3395 | |
3396 def downcaseTokens(s,l,t): | |
3397 """Helper parse action to convert tokens to lower case.""" | |
3398 return [ tt.lower() for tt in map(_ustr,t) ] | |
3399 | |
3400 def keepOriginalText(s,startLoc,t): | |
3401 """DEPRECATED - use new helper method C{originalTextFor}. | |
3402 Helper parse action to preserve original parsed text, | |
3403 overriding any nested parse actions.""" | |
3404 try: | |
3405 endloc = getTokensEndLoc() | |
3406 except ParseException: | |
3407 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") | |
3408 del t[:] | |
3409 t += ParseResults(s[startLoc:endloc]) | |
3410 return t | |
3411 | |
3412 def getTokensEndLoc(): | |
3413 """Method to be called from within a parse action to determine the end | |
3414 location of the parsed tokens.""" | |
3415 import inspect | |
3416 fstack = inspect.stack() | |
3417 try: | |
3418 # search up the stack (through intervening argument normalizers) for correct calling routine | |
3419 for f in fstack[2:]: | |
3420 if f[3] == "_parseNoCache": | |
3421 endloc = f[0].f_locals["loc"] | |
3422 return endloc | |
3423 else: | |
3424 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") | |
3425 finally: | |
3426 del fstack | |
3427 | |
3428 def _makeTags(tagStr, xml): | |
3429 """Internal helper to construct opening and closing tag expressions, given a tag name""" | |
3430 if isinstance(tagStr,basestring): | |
3431 resname = tagStr | |
3432 tagStr = Keyword(tagStr, caseless=not xml) | |
3433 else: | |
3434 resname = tagStr.name | |
3435 | |
3436 tagAttrName = Word(alphas,alphanums+"_-:") | |
3437 if (xml): | |
3438 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) | |
3439 openTag = Suppress("<") + tagStr("tag") + \ | |
3440 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ | |
3441 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | |
3442 else: | |
3443 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) | |
3444 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) | |
3445 openTag = Suppress("<") + tagStr("tag") + \ | |
3446 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ | |
3447 Optional( Suppress("=") + tagAttrValue ) ))) + \ | |
3448 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") | |
3449 closeTag = Combine(_L("</") + tagStr + ">") | |
3450 | |
3451 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) | |
3452 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) | |
3453 openTag.tag = resname | |
3454 closeTag.tag = resname | |
3455 return openTag, closeTag | |
3456 | |
3457 def makeHTMLTags(tagStr): | |
3458 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" | |
3459 return _makeTags( tagStr, False ) | |
3460 | |
3461 def makeXMLTags(tagStr): | |
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name""" | |
3463 return _makeTags( tagStr, True ) | |
3464 | |
3465 def withAttribute(*args,**attrDict): | |
3466 """Helper to create a validating parse action to be used with start tags created | |
3467 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag | |
3468 with a required attribute value, to avoid false matches on common tags such as | |
3469 C{<TD>} or C{<DIV>}. | |
3470 | |
3471 Call C{withAttribute} with a series of attribute names and values. Specify the list | |
3472 of filter attributes names and values as: | |
3473 - keyword arguments, as in C{(align="right")}, or | |
3474 - as an explicit dict with C{**} operator, when an attribute name is also a Python | |
3475 reserved word, as in C{**{"class":"Customer", "align":"right"}} | |
3476 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) | |
3477 For attribute names with a namespace prefix, you must use the second form. Attribute | |
3478 names are matched insensitive to upper/lower case. | |
3479 | |
3480 To verify that the attribute exists, but without specifying a value, pass | |
3481 C{withAttribute.ANY_VALUE} as the value. | |
3482 """ | |
3483 if args: | |
3484 attrs = args[:] | |
3485 else: | |
3486 attrs = attrDict.items() | |
3487 attrs = [(k,v) for k,v in attrs] | |
3488 def pa(s,l,tokens): | |
3489 for attrName,attrValue in attrs: | |
3490 if attrName not in tokens: | |
3491 raise ParseException(s,l,"no matching attribute " + attrName) | |
3492 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: | |
3493 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % | |
3494 (attrName, tokens[attrName], attrValue)) | |
3495 return pa | |
3496 withAttribute.ANY_VALUE = object() | |
3497 | |
3498 opAssoc = _Constants() | |
3499 opAssoc.LEFT = object() | |
3500 opAssoc.RIGHT = object() | |
3501 | |
3502 def operatorPrecedence( baseExpr, opList ): | |
3503 """Helper method for constructing grammars of expressions made up of | |
3504 operators working in a precedence hierarchy. Operators may be unary or | |
3505 binary, left- or right-associative. Parse actions can also be attached | |
3506 to operator expressions. | |
3507 | |
3508 Parameters: | |
3509 - baseExpr - expression representing the most basic element for the nested | |
3510 - opList - list of tuples, one for each operator precedence level in the | |
3511 expression grammar; each tuple is of the form | |
3512 (opExpr, numTerms, rightLeftAssoc, parseAction), where: | |
3513 - opExpr is the pyparsing expression for the operator; | |
3514 may also be a string, which will be converted to a Literal; | |
3515 if numTerms is 3, opExpr is a tuple of two expressions, for the | |
3516 two operators separating the 3 terms | |
3517 - numTerms is the number of terms for this operator (must | |
3518 be 1, 2, or 3) | |
3519 - rightLeftAssoc is the indicator whether the operator is | |
3520 right or left associative, using the pyparsing-defined | |
3521 constants opAssoc.RIGHT and opAssoc.LEFT. | |
3522 - parseAction is the parse action to be associated with | |
3523 expressions matching this operator expression (the | |
3524 parse action tuple member may be omitted) | |
3525 """ | |
3526 ret = Forward() | |
3527 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) | |
3528 for i,operDef in enumerate(opList): | |
3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] | |
3530 if arity == 3: | |
3531 if opExpr is None or len(opExpr) != 2: | |
3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") | |
3533 opExpr1, opExpr2 = opExpr | |
3534 thisExpr = Forward()#.setName("expr%d" % i) | |
3535 if rightLeftAssoc == opAssoc.LEFT: | |
3536 if arity == 1: | |
3537 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) | |
3538 elif arity == 2: | |
3539 if opExpr is not None: | |
3540 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) | |
3541 else: | |
3542 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) | |
3543 elif arity == 3: | |
3544 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ | |
3545 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) | |
3546 else: | |
3547 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") | |
3548 elif rightLeftAssoc == opAssoc.RIGHT: | |
3549 if arity == 1: | |
3550 # try to avoid LR with this extra test | |
3551 if not isinstance(opExpr, Optional): | |
3552 opExpr = Optional(opExpr) | |
3553 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) | |
3554 elif arity == 2: | |
3555 if opExpr is not None: | |
3556 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) | |
3557 else: | |
3558 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) | |
3559 elif arity == 3: | |
3560 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ | |
3561 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) | |
3562 else: | |
3563 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") | |
3564 else: | |
3565 raise ValueError("operator must indicate right or left associativity") | |
3566 if pa: | |
3567 matchExpr.setParseAction( pa ) | |
3568 thisExpr << ( matchExpr | lastExpr ) | |
3569 lastExpr = thisExpr | |
3570 ret << lastExpr | |
3571 return ret | |
3572 | |
3573 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") | |
3574 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") | |
3575 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") | |
3576 unicodeString = Combine(_L('u') + quotedString.copy()) | |
3577 | |
3578 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): | |
3579 """Helper method for defining nested lists enclosed in opening and closing | |
3580 delimiters ("(" and ")" are the default). | |
3581 | |
3582 Parameters: | |
3583 - opener - opening character for a nested list (default="("); can also be a pyparsing expression | |
3584 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression | |
3585 - content - expression for items within the nested lists (default=None) | |
3586 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) | |
3587 | |
3588 If an expression is not provided for the content argument, the nested | |
3589 expression will capture all whitespace-delimited content between delimiters | |
3590 as a list of separate values. | |
3591 | |
3592 Use the C{ignoreExpr} argument to define expressions that may contain | |
3593 opening or closing characters that should not be treated as opening | |
3594 or closing characters for nesting, such as quotedString or a comment | |
3595 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. | |
3596 The default is L{quotedString}, but if no expressions are to be ignored, | |
3597 then pass C{None} for this argument. | |
3598 """ | |
3599 if opener == closer: | |
3600 raise ValueError("opening and closing strings cannot be the same") | |
3601 if content is None: | |
3602 if isinstance(opener,basestring) and isinstance(closer,basestring): | |
3603 if len(opener) == 1 and len(closer)==1: | |
3604 if ignoreExpr is not None: | |
3605 content = (Combine(OneOrMore(~ignoreExpr + | |
3606 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3607 ).setParseAction(lambda t:t[0].strip())) | |
3608 else: | |
3609 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS | |
3610 ).setParseAction(lambda t:t[0].strip())) | |
3611 else: | |
3612 if ignoreExpr is not None: | |
3613 content = (Combine(OneOrMore(~ignoreExpr + | |
3614 ~Literal(opener) + ~Literal(closer) + | |
3615 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3616 ).setParseAction(lambda t:t[0].strip())) | |
3617 else: | |
3618 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + | |
3619 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) | |
3620 ).setParseAction(lambda t:t[0].strip())) | |
3621 else: | |
3622 raise ValueError("opening and closing arguments must be strings if no content expression is given") | |
3623 ret = Forward() | |
3624 if ignoreExpr is not None: | |
3625 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) | |
3626 else: | |
3627 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) | |
3628 return ret | |
3629 | |
3630 def indentedBlock(blockStatementExpr, indentStack, indent=True): | |
3631 """Helper method for defining space-delimited indentation blocks, such as | |
3632 those used to define block statements in Python source code. | |
3633 | |
3634 Parameters: | |
3635 - blockStatementExpr - expression defining syntax of statement that | |
3636 is repeated within the indented block | |
3637 - indentStack - list created by caller to manage indentation stack | |
3638 (multiple statementWithIndentedBlock expressions within a single grammar | |
3639 should share a common indentStack) | |
3640 - indent - boolean indicating whether block must be indented beyond the | |
3641 the current level; set to False for block of left-most statements | |
3642 (default=True) | |
3643 | |
3644 A valid block must contain at least one C{blockStatement}. | |
3645 """ | |
3646 def checkPeerIndent(s,l,t): | |
3647 if l >= len(s): return | |
3648 curCol = col(l,s) | |
3649 if curCol != indentStack[-1]: | |
3650 if curCol > indentStack[-1]: | |
3651 raise ParseFatalException(s,l,"illegal nesting") | |
3652 raise ParseException(s,l,"not a peer entry") | |
3653 | |
3654 def checkSubIndent(s,l,t): | |
3655 curCol = col(l,s) | |
3656 if curCol > indentStack[-1]: | |
3657 indentStack.append( curCol ) | |
3658 else: | |
3659 raise ParseException(s,l,"not a subentry") | |
3660 | |
3661 def checkUnindent(s,l,t): | |
3662 if l >= len(s): return | |
3663 curCol = col(l,s) | |
3664 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): | |
3665 raise ParseException(s,l,"not an unindent") | |
3666 indentStack.pop() | |
3667 | |
3668 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) | |
3669 INDENT = Empty() + Empty().setParseAction(checkSubIndent) | |
3670 PEER = Empty().setParseAction(checkPeerIndent) | |
3671 UNDENT = Empty().setParseAction(checkUnindent) | |
3672 if indent: | |
3673 smExpr = Group( Optional(NL) + | |
3674 #~ FollowedBy(blockStatementExpr) + | |
3675 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) | |
3676 else: | |
3677 smExpr = Group( Optional(NL) + | |
3678 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) | |
3679 blockStatementExpr.ignore(_bslash + LineEnd()) | |
3680 return smExpr | |
3681 | |
3682 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") | |
3683 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") | |
3684 | |
3685 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) | |
3686 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() | |
3687 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) | |
3688 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None | |
3689 | |
3690 # it's easy to get these comment structures wrong - they're very common, so may as well make them available | |
3691 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") | |
3692 | |
3693 htmlComment = Regex(r"<!--[\s\S]*?-->") | |
3694 restOfLine = Regex(r".*").leaveWhitespace() | |
3695 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") | |
3696 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") | |
3697 | |
3698 javaStyleComment = cppStyleComment | |
3699 pythonStyleComment = Regex(r"#.*").setName("Python style comment") | |
3700 _noncomma = "".join( [ c for c in printables if c != "," ] ) | |
3701 _commasepitem = Combine(OneOrMore(Word(_noncomma) + | |
3702 Optional( Word(" \t") + | |
3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") | |
3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") | |
3705 | |
3706 | |
3707 if __name__ == "__main__": | |
3708 | |
3709 def test( teststring ): | |
3710 try: | |
3711 tokens = simpleSQL.parseString( teststring ) | |
3712 tokenlist = tokens.asList() | |
3713 print (teststring + "->" + str(tokenlist)) | |
3714 print ("tokens = " + str(tokens)) | |
3715 print ("tokens.columns = " + str(tokens.columns)) | |
3716 print ("tokens.tables = " + str(tokens.tables)) | |
3717 print (tokens.asXML("SQL",True)) | |
3718 except ParseBaseException: | |
3719 err = sys.exc_info()[1] | |
3720 print (teststring + "->") | |
3721 print (err.line) | |
3722 print (" "*(err.column-1) + "^") | |
3723 print (err) | |
3724 print() | |
3725 | |
3726 selectToken = CaselessLiteral( "select" ) | |
3727 fromToken = CaselessLiteral( "from" ) | |
3728 | |
3729 ident = Word( alphas, alphanums + "_$" ) | |
3730 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) | |
3731 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") | |
3732 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) | |
3733 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") | |
3734 simpleSQL = ( selectToken + \ | |
3735 ( '*' | columnNameList ).setResultsName( "columns" ) + \ | |
3736 fromToken + \ | |
3737 tableNameList.setResultsName( "tables" ) ) | |
3738 | |
3739 test( "SELECT * from XYZZY, ABC" ) | |
3740 test( "select * from SYS.XYZZY" ) | |
3741 test( "Select A from Sys.dual" ) | |
3742 test( "Select AA,BB,CC from Sys.dual" ) | |
3743 test( "Select A, B, C from Sys.dual" ) | |
3744 test( "Select A, B, C from Sys.dual" ) | |
3745 test( "Xelect A, B, C from Sys.dual" ) | |
3746 test( "Select A, B, C frox Sys.dual" ) | |
3747 test( "Select" ) | |
3748 test( "Select ^^^ frox Sys.dual" ) | |
3749 test( "Select A, B, C from Sys.dual, Table2 " ) |