blob: ed34209c0074e31d553e0954ff969e0583c3cef7 [file] [log] [blame]
Andreas Wundsam53256162013-05-02 14:05:53 -07001# module pyparsing.py
2#
3# Copyright (c) 2003-2011 Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24#from __future__ import generators
25
26__doc__ = \
27"""
28pyparsing module - Classes and methods to define and execute parsing grammars
29
30The pyparsing module is an alternative approach to creating and executing simple grammars,
31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33provides a library of classes that you use to construct the grammar directly in Python.
34
35Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49The Python representation of the grammar is quite readable, owing to the self-explanatory
50class names, and the use of '+', '|' and '^' operators.
51
52The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
53object with named attributes.
54
55The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59"""
60
61__version__ = "1.5.6"
62__versionTime__ = "26 June 2011 10:53"
63__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65import string
66from weakref import ref as wkref
67import copy
68import sys
69import warnings
70import re
71import sre_constants
72#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74__all__ = [
75'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91'indentedBlock', 'originalTextFor',
92]
93
94"""
95Detect if we are running version 3.X and make appropriate changes
96Robert A. Clark
97"""
98_PY3K = sys.version_info[0] > 2
99if _PY3K:
100 _MAX_INT = sys.maxsize
101 basestring = str
102 unichr = chr
103 _ustr = str
104 alphas = string.ascii_lowercase + string.ascii_uppercase
105else:
106 _MAX_INT = sys.maxint
107 range = xrange
108 set = lambda s : dict( [(c,0) for c in s] )
109 alphas = string.lowercase + string.uppercase
110
111 def _ustr(obj):
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
114 then < returns the unicode object | encodes it with the default encoding | ... >.
115 """
116 if isinstance(obj,unicode):
117 return obj
118
119 try:
120 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
121 # it won't break any existing code.
122 return str(obj)
123
124 except UnicodeEncodeError:
125 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
126 # state that "The return value must be a string object". However, does a
127 # unicode object (being a subclass of basestring) count as a "string
128 # object"?
129 # If so, then return a unicode object:
130 return unicode(obj)
131 # Else encode it... but how? There are many choices... :)
132 # Replace unprintables with escape codes?
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
134 # Replace unprintables with question marks?
135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
136 # ...
137
138 alphas = string.lowercase + string.uppercase
139
140# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
141singleArgBuiltins = []
142import __builtin__
143for fname in "sum len enumerate sorted reversed list tuple set any all".split():
144 try:
145 singleArgBuiltins.append(getattr(__builtin__,fname))
146 except AttributeError:
147 continue
148
149def _xml_escape(data):
150 """Escape &, <, >, ", ', etc. in a string of data."""
151
152 # ampersand must be replaced first
153 from_symbols = '&><"\''
154 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
155 for from_,to_ in zip(from_symbols, to_symbols):
156 data = data.replace(from_, to_)
157 return data
158
159class _Constants(object):
160 pass
161
162nums = string.digits
163hexnums = nums + "ABCDEFabcdef"
164alphanums = alphas + nums
165_bslash = chr(92)
166printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
167
168class ParseBaseException(Exception):
169 """base exception class for all parsing runtime exceptions"""
170 # Performance tuning: we construct a *lot* of these, so keep this
171 # constructor as small and fast as possible
172 def __init__( self, pstr, loc=0, msg=None, elem=None ):
173 self.loc = loc
174 if msg is None:
175 self.msg = pstr
176 self.pstr = ""
177 else:
178 self.msg = msg
179 self.pstr = pstr
180 self.parserElement = elem
181
182 def __getattr__( self, aname ):
183 """supported attributes by name are:
184 - lineno - returns the line number of the exception text
185 - col - returns the column number of the exception text
186 - line - returns the line containing the exception text
187 """
188 if( aname == "lineno" ):
189 return lineno( self.loc, self.pstr )
190 elif( aname in ("col", "column") ):
191 return col( self.loc, self.pstr )
192 elif( aname == "line" ):
193 return line( self.loc, self.pstr )
194 else:
195 raise AttributeError(aname)
196
197 def __str__( self ):
198 return "%s (at char %d), (line:%d, col:%d)" % \
199 ( self.msg, self.loc, self.lineno, self.column )
200 def __repr__( self ):
201 return _ustr(self)
202 def markInputline( self, markerString = ">!<" ):
203 """Extracts the exception line from the input string, and marks
204 the location of the exception with a special symbol.
205 """
206 line_str = self.line
207 line_column = self.column - 1
208 if markerString:
209 line_str = "".join( [line_str[:line_column],
210 markerString, line_str[line_column:]])
211 return line_str.strip()
212 def __dir__(self):
213 return "loc msg pstr parserElement lineno col line " \
214 "markInputLine __str__ __repr__".split()
215
216class ParseException(ParseBaseException):
217 """exception thrown when parse expressions don't match class;
218 supported attributes by name are:
219 - lineno - returns the line number of the exception text
220 - col - returns the column number of the exception text
221 - line - returns the line containing the exception text
222 """
223 pass
224
225class ParseFatalException(ParseBaseException):
226 """user-throwable exception thrown when inconsistent parse content
227 is found; stops all parsing immediately"""
228 pass
229
230class ParseSyntaxException(ParseFatalException):
231 """just like C{ParseFatalException}, but thrown internally when an
232 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because
233 an unbacktrackable syntax error has been found"""
234 def __init__(self, pe):
235 super(ParseSyntaxException, self).__init__(
236 pe.pstr, pe.loc, pe.msg, pe.parserElement)
237
238#~ class ReparseException(ParseBaseException):
239 #~ """Experimental class - parse actions can raise this exception to cause
240 #~ pyparsing to reparse the input string:
241 #~ - with a modified input string, and/or
242 #~ - with a modified start location
243 #~ Set the values of the ReparseException in the constructor, and raise the
244 #~ exception in a parse action to cause pyparsing to use the new string/location.
245 #~ Setting the values as None causes no change to be made.
246 #~ """
247 #~ def __init_( self, newstring, restartLoc ):
248 #~ self.newParseText = newstring
249 #~ self.reparseLoc = restartLoc
250
251class RecursiveGrammarException(Exception):
252 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
253 def __init__( self, parseElementList ):
254 self.parseElementTrace = parseElementList
255
256 def __str__( self ):
257 return "RecursiveGrammarException: %s" % self.parseElementTrace
258
259class _ParseResultsWithOffset(object):
260 def __init__(self,p1,p2):
261 self.tup = (p1,p2)
262 def __getitem__(self,i):
263 return self.tup[i]
264 def __repr__(self):
265 return repr(self.tup)
266 def setOffset(self,i):
267 self.tup = (self.tup[0],i)
268
269class ParseResults(object):
270 """Structured parse results, to provide multiple means of access to the parsed data:
271 - as a list (C{len(results)})
272 - by list index (C{results[0], results[1]}, etc.)
273 - by attribute (C{results.<resultsName>})
274 """
275 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
276 def __new__(cls, toklist, name=None, asList=True, modal=True ):
277 if isinstance(toklist, cls):
278 return toklist
279 retobj = object.__new__(cls)
280 retobj.__doinit = True
281 return retobj
282
283 # Performance tuning: we construct a *lot* of these, so keep this
284 # constructor as small and fast as possible
285 def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
286 if self.__doinit:
287 self.__doinit = False
288 self.__name = None
289 self.__parent = None
290 self.__accumNames = {}
291 if isinstance(toklist, list):
292 self.__toklist = toklist[:]
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name) # will always return a str, but use _ustr for consistency
302 self.__name = name
303 if not toklist in (None,'',[]):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
318 def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,int):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
340 def __delitem__( self, i ):
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345 # convert int to slice
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350 # get removed indices
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353 # fixup indices in token dictionary
354 for name in self.__tokdict:
355 occurrences = self.__tokdict[name]
356 for j in removed:
357 for k, (value, position) in enumerate(occurrences):
358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
359 else:
360 del self.__tokdict[i]
361
362 def __contains__( self, k ):
363 return k in self.__tokdict
364
365 def __len__( self ): return len( self.__toklist )
366 def __bool__(self): return len( self.__toklist ) > 0
367 __nonzero__ = __bool__
368 def __iter__( self ): return iter( self.__toklist )
369 def __reversed__( self ): return iter( self.__toklist[::-1] )
370 def keys( self ):
371 """Returns all named result keys."""
372 return self.__tokdict.keys()
373
374 def pop( self, index=-1 ):
375 """Removes and returns item at specified index (default=last).
376 Will work with either numeric indices or dict-key indicies."""
377 ret = self[index]
378 del self[index]
379 return ret
380
381 def get(self, key, defaultValue=None):
382 """Returns named result matching the given key, or if there is no
383 such name, then returns the given C{defaultValue} or C{None} if no
384 C{defaultValue} is specified."""
385 if key in self:
386 return self[key]
387 else:
388 return defaultValue
389
390 def insert( self, index, insStr ):
391 """Inserts new element at location index in the list of parsed tokens."""
392 self.__toklist.insert(index, insStr)
393 # fixup indices in token dictionary
394 for name in self.__tokdict:
395 occurrences = self.__tokdict[name]
396 for k, (value, position) in enumerate(occurrences):
397 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
398
399 def items( self ):
400 """Returns all named result keys and values as a list of tuples."""
401 return [(k,self[k]) for k in self.__tokdict]
402
403 def values( self ):
404 """Returns all named result values."""
405 return [ v[-1][0] for v in self.__tokdict.values() ]
406
407 def __getattr__( self, name ):
408 if True: #name not in self.__slots__:
409 if name in self.__tokdict:
410 if name not in self.__accumNames:
411 return self.__tokdict[name][-1][0]
412 else:
413 return ParseResults([ v[0] for v in self.__tokdict[name] ])
414 else:
415 return ""
416 return None
417
418 def __add__( self, other ):
419 ret = self.copy()
420 ret += other
421 return ret
422
423 def __iadd__( self, other ):
424 if other.__tokdict:
425 offset = len(self.__toklist)
426 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
427 otheritems = other.__tokdict.items()
428 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
429 for (k,vlist) in otheritems for v in vlist]
430 for k,v in otherdictitems:
431 self[k] = v
432 if isinstance(v[0],ParseResults):
433 v[0].__parent = wkref(self)
434
435 self.__toklist += other.__toklist
436 self.__accumNames.update( other.__accumNames )
437 return self
438
439 def __radd__(self, other):
440 if isinstance(other,int) and other == 0:
441 return self.copy()
442
443 def __repr__( self ):
444 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
445
446 def __str__( self ):
447 out = "["
448 sep = ""
449 for i in self.__toklist:
450 if isinstance(i, ParseResults):
451 out += sep + _ustr(i)
452 else:
453 out += sep + repr(i)
454 sep = ", "
455 out += "]"
456 return out
457
458 def _asStringList( self, sep='' ):
459 out = []
460 for item in self.__toklist:
461 if out and sep:
462 out.append(sep)
463 if isinstance( item, ParseResults ):
464 out += item._asStringList()
465 else:
466 out.append( _ustr(item) )
467 return out
468
469 def asList( self ):
470 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
471 out = []
472 for res in self.__toklist:
473 if isinstance(res,ParseResults):
474 out.append( res.asList() )
475 else:
476 out.append( res )
477 return out
478
479 def asDict( self ):
480 """Returns the named parse results as dictionary."""
481 return dict( self.items() )
482
483 def copy( self ):
484 """Returns a new copy of a C{ParseResults} object."""
485 ret = ParseResults( self.__toklist )
486 ret.__tokdict = self.__tokdict.copy()
487 ret.__parent = self.__parent
488 ret.__accumNames.update( self.__accumNames )
489 ret.__name = self.__name
490 return ret
491
492 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
493 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
494 nl = "\n"
495 out = []
496 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
497 for v in vlist ] )
498 nextLevelIndent = indent + " "
499
500 # collapse out indents if formatting is not desired
501 if not formatted:
502 indent = ""
503 nextLevelIndent = ""
504 nl = ""
505
506 selfTag = None
507 if doctag is not None:
508 selfTag = doctag
509 else:
510 if self.__name:
511 selfTag = self.__name
512
513 if not selfTag:
514 if namedItemsOnly:
515 return ""
516 else:
517 selfTag = "ITEM"
518
519 out += [ nl, indent, "<", selfTag, ">" ]
520
521 worklist = self.__toklist
522 for i,res in enumerate(worklist):
523 if isinstance(res,ParseResults):
524 if i in namedItems:
525 out += [ res.asXML(namedItems[i],
526 namedItemsOnly and doctag is None,
527 nextLevelIndent,
528 formatted)]
529 else:
530 out += [ res.asXML(None,
531 namedItemsOnly and doctag is None,
532 nextLevelIndent,
533 formatted)]
534 else:
535 # individual token, see if there is a name for it
536 resTag = None
537 if i in namedItems:
538 resTag = namedItems[i]
539 if not resTag:
540 if namedItemsOnly:
541 continue
542 else:
543 resTag = "ITEM"
544 xmlBodyText = _xml_escape(_ustr(res))
545 out += [ nl, nextLevelIndent, "<", resTag, ">",
546 xmlBodyText,
547 "</", resTag, ">" ]
548
549 out += [ nl, indent, "</", selfTag, ">" ]
550 return "".join(out)
551
552 def __lookup(self,sub):
553 for k,vlist in self.__tokdict.items():
554 for v,loc in vlist:
555 if sub is v:
556 return k
557 return None
558
559 def getName(self):
560 """Returns the results name for this token expression."""
561 if self.__name:
562 return self.__name
563 elif self.__parent:
564 par = self.__parent()
565 if par:
566 return par.__lookup(self)
567 else:
568 return None
569 elif (len(self) == 1 and
570 len(self.__tokdict) == 1 and
571 self.__tokdict.values()[0][0][1] in (0,-1)):
572 return self.__tokdict.keys()[0]
573 else:
574 return None
575
576 def dump(self,indent='',depth=0):
577 """Diagnostic method for listing out the contents of a C{ParseResults}.
578 Accepts an optional C{indent} argument so that this string can be embedded
579 in a nested display of other data."""
580 out = []
581 out.append( indent+_ustr(self.asList()) )
582 keys = self.items()
583 keys.sort()
584 for k,v in keys:
585 if out:
586 out.append('\n')
587 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
588 if isinstance(v,ParseResults):
589 if v.keys():
590 out.append( v.dump(indent,depth+1) )
591 else:
592 out.append(_ustr(v))
593 else:
594 out.append(_ustr(v))
595 return "".join(out)
596
597 # add support for pickle protocol
598 def __getstate__(self):
599 return ( self.__toklist,
600 ( self.__tokdict.copy(),
601 self.__parent is not None and self.__parent() or None,
602 self.__accumNames,
603 self.__name ) )
604
605 def __setstate__(self,state):
606 self.__toklist = state[0]
607 (self.__tokdict,
608 par,
609 inAccumNames,
610 self.__name) = state[1]
611 self.__accumNames = {}
612 self.__accumNames.update(inAccumNames)
613 if par is not None:
614 self.__parent = wkref(par)
615 else:
616 self.__parent = None
617
618 def __dir__(self):
619 return dir(super(ParseResults,self)) + self.keys()
620
621def col (loc,strg):
622 """Returns current column within a string, counting newlines as line separators.
623 The first column is number 1.
624
625 Note: the default parsing behavior is to expand tabs in the input string
626 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
627 on parsing strings containing <TAB>s, and suggested methods to maintain a
628 consistent view of the parsed string, the parse location, and line and column
629 positions within the parsed string.
630 """
631 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
632
633def lineno(loc,strg):
634 """Returns current line number within a string, counting newlines as line separators.
635 The first line is number 1.
636
637 Note: the default parsing behavior is to expand tabs in the input string
638 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
639 on parsing strings containing <TAB>s, and suggested methods to maintain a
640 consistent view of the parsed string, the parse location, and line and column
641 positions within the parsed string.
642 """
643 return strg.count("\n",0,loc) + 1
644
645def line( loc, strg ):
646 """Returns the line of text containing loc within a string, counting newlines as line separators.
647 """
648 lastCR = strg.rfind("\n", 0, loc)
649 nextCR = strg.find("\n", loc)
650 if nextCR >= 0:
651 return strg[lastCR+1:nextCR]
652 else:
653 return strg[lastCR+1:]
654
655def _defaultStartDebugAction( instring, loc, expr ):
656 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
657
658def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
659 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
660
661def _defaultExceptionDebugAction( instring, loc, expr, exc ):
662 print ("Exception raised:" + _ustr(exc))
663
664def nullDebugAction(*args):
665 """'Do-nothing' debug action, to suppress debugging output during parsing."""
666 pass
667
668'decorator to trim function calls to match the arity of the target'
669if not _PY3K:
670 def _trim_arity(func, maxargs=2):
671 limit = [0]
672 def wrapper(*args):
673 while 1:
674 try:
675 return func(*args[limit[0]:])
676 except TypeError:
677 if limit[0] <= maxargs:
678 limit[0] += 1
679 continue
680 raise
681 return wrapper
682else:
683 def _trim_arity(func, maxargs=2):
684 limit = maxargs
685 def wrapper(*args):
686 #~ nonlocal limit
687 while 1:
688 try:
689 return func(*args[limit:])
690 except TypeError:
691 if limit:
692 limit -= 1
693 continue
694 raise
695 return wrapper
696
697class ParserElement(object):
698 """Abstract base level parser element class."""
699 DEFAULT_WHITE_CHARS = " \n\t\r"
700 verbose_stacktrace = False
701
702 def setDefaultWhitespaceChars( chars ):
703 """Overrides the default whitespace chars
704 """
705 ParserElement.DEFAULT_WHITE_CHARS = chars
706 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
707
708 def __init__( self, savelist=False ):
709 self.parseAction = list()
710 self.failAction = None
711 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
712 self.strRepr = None
713 self.resultsName = None
714 self.saveAsList = savelist
715 self.skipWhitespace = True
716 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
717 self.copyDefaultWhiteChars = True
718 self.mayReturnEmpty = False # used when checking for left-recursion
719 self.keepTabs = False
720 self.ignoreExprs = list()
721 self.debug = False
722 self.streamlined = False
723 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
724 self.errmsg = ""
725 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
726 self.debugActions = ( None, None, None ) #custom debug actions
727 self.re = None
728 self.callPreparse = True # used to avoid redundant calls to preParse
729 self.callDuringTry = False
730
731 def copy( self ):
732 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
733 for the same parsing pattern, using copies of the original parse element."""
734 cpy = copy.copy( self )
735 cpy.parseAction = self.parseAction[:]
736 cpy.ignoreExprs = self.ignoreExprs[:]
737 if self.copyDefaultWhiteChars:
738 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
739 return cpy
740
741 def setName( self, name ):
742 """Define name for this expression, for use in debugging."""
743 self.name = name
744 self.errmsg = "Expected " + self.name
745 if hasattr(self,"exception"):
746 self.exception.msg = self.errmsg
747 return self
748
749 def setResultsName( self, name, listAllMatches=False ):
750 """Define name for referencing matching tokens as a nested attribute
751 of the returned parse results.
752 NOTE: this returns a *copy* of the original C{ParserElement} object;
753 this is so that the client can define a basic element, such as an
754 integer, and reference it in multiple places with different names.
755
756 You can also set results names using the abbreviated syntax,
757 C{expr("name")} in place of C{expr.setResultsName("name")} -
758 see L{I{__call__}<__call__>}.
759 """
760 newself = self.copy()
761 if name.endswith("*"):
762 name = name[:-1]
763 listAllMatches=True
764 newself.resultsName = name
765 newself.modalResults = not listAllMatches
766 return newself
767
768 def setBreak(self,breakFlag = True):
769 """Method to invoke the Python pdb debugger when this element is
770 about to be parsed. Set C{breakFlag} to True to enable, False to
771 disable.
772 """
773 if breakFlag:
774 _parseMethod = self._parse
775 def breaker(instring, loc, doActions=True, callPreParse=True):
776 import pdb
777 pdb.set_trace()
778 return _parseMethod( instring, loc, doActions, callPreParse )
779 breaker._originalParseMethod = _parseMethod
780 self._parse = breaker
781 else:
782 if hasattr(self._parse,"_originalParseMethod"):
783 self._parse = self._parse._originalParseMethod
784 return self
785
786 def setParseAction( self, *fns, **kwargs ):
787 """Define action to perform when successfully matching parse element definition.
788 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
789 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
790 - s = the original string being parsed (see note below)
791 - loc = the location of the matching substring
792 - toks = a list of the matched tokens, packaged as a ParseResults object
793 If the functions in fns modify the tokens, they can return them as the return
794 value from fn, and the modified list of tokens will replace the original.
795 Otherwise, fn does not need to return any value.
796
797 Note: the default parsing behavior is to expand tabs in the input string
798 before starting the parsing process. See L{I{parseString}<parseString>} for more information
799 on parsing strings containing <TAB>s, and suggested methods to maintain a
800 consistent view of the parsed string, the parse location, and line and column
801 positions within the parsed string.
802 """
803 self.parseAction = list(map(_trim_arity, list(fns)))
804 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
805 return self
806
807 def addParseAction( self, *fns, **kwargs ):
808 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
809 self.parseAction += list(map(_trim_arity, list(fns)))
810 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
811 return self
812
813 def setFailAction( self, fn ):
814 """Define action to perform if parsing fails at this expression.
815 Fail acton fn is a callable function that takes the arguments
816 C{fn(s,loc,expr,err)} where:
817 - s = string being parsed
818 - loc = location where expression match was attempted and failed
819 - expr = the parse expression that failed
820 - err = the exception thrown
821 The function returns no value. It may throw C{ParseFatalException}
822 if it is desired to stop parsing immediately."""
823 self.failAction = fn
824 return self
825
826 def _skipIgnorables( self, instring, loc ):
827 exprsFound = True
828 while exprsFound:
829 exprsFound = False
830 for e in self.ignoreExprs:
831 try:
832 while 1:
833 loc,dummy = e._parse( instring, loc )
834 exprsFound = True
835 except ParseException:
836 pass
837 return loc
838
839 def preParse( self, instring, loc ):
840 if self.ignoreExprs:
841 loc = self._skipIgnorables( instring, loc )
842
843 if self.skipWhitespace:
844 wt = self.whiteChars
845 instrlen = len(instring)
846 while loc < instrlen and instring[loc] in wt:
847 loc += 1
848
849 return loc
850
851 def parseImpl( self, instring, loc, doActions=True ):
852 return loc, []
853
854 def postParse( self, instring, loc, tokenlist ):
855 return tokenlist
856
857 #~ @profile
858 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
859 debugging = ( self.debug ) #and doActions )
860
861 if debugging or self.failAction:
862 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
863 if (self.debugActions[0] ):
864 self.debugActions[0]( instring, loc, self )
865 if callPreParse and self.callPreparse:
866 preloc = self.preParse( instring, loc )
867 else:
868 preloc = loc
869 tokensStart = preloc
870 try:
871 try:
872 loc,tokens = self.parseImpl( instring, preloc, doActions )
873 except IndexError:
874 raise ParseException( instring, len(instring), self.errmsg, self )
875 except ParseBaseException:
876 #~ print ("Exception raised:", err)
877 err = None
878 if self.debugActions[2]:
879 err = sys.exc_info()[1]
880 self.debugActions[2]( instring, tokensStart, self, err )
881 if self.failAction:
882 if err is None:
883 err = sys.exc_info()[1]
884 self.failAction( instring, tokensStart, self, err )
885 raise
886 else:
887 if callPreParse and self.callPreparse:
888 preloc = self.preParse( instring, loc )
889 else:
890 preloc = loc
891 tokensStart = preloc
892 if self.mayIndexError or loc >= len(instring):
893 try:
894 loc,tokens = self.parseImpl( instring, preloc, doActions )
895 except IndexError:
896 raise ParseException( instring, len(instring), self.errmsg, self )
897 else:
898 loc,tokens = self.parseImpl( instring, preloc, doActions )
899
900 tokens = self.postParse( instring, loc, tokens )
901
902 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
903 if self.parseAction and (doActions or self.callDuringTry):
904 if debugging:
905 try:
906 for fn in self.parseAction:
907 tokens = fn( instring, tokensStart, retTokens )
908 if tokens is not None:
909 retTokens = ParseResults( tokens,
910 self.resultsName,
911 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
912 modal=self.modalResults )
913 except ParseBaseException:
914 #~ print "Exception raised in user parse action:", err
915 if (self.debugActions[2] ):
916 err = sys.exc_info()[1]
917 self.debugActions[2]( instring, tokensStart, self, err )
918 raise
919 else:
920 for fn in self.parseAction:
921 tokens = fn( instring, tokensStart, retTokens )
922 if tokens is not None:
923 retTokens = ParseResults( tokens,
924 self.resultsName,
925 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
926 modal=self.modalResults )
927
928 if debugging:
929 #~ print ("Matched",self,"->",retTokens.asList())
930 if (self.debugActions[1] ):
931 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
932
933 return loc, retTokens
934
935 def tryParse( self, instring, loc ):
936 try:
937 return self._parse( instring, loc, doActions=False )[0]
938 except ParseFatalException:
939 raise ParseException( instring, loc, self.errmsg, self)
940
941 # this method gets repeatedly called during backtracking with the same arguments -
942 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
943 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
944 lookup = (self,instring,loc,callPreParse,doActions)
945 if lookup in ParserElement._exprArgCache:
946 value = ParserElement._exprArgCache[ lookup ]
947 if isinstance(value, Exception):
948 raise value
949 return (value[0],value[1].copy())
950 else:
951 try:
952 value = self._parseNoCache( instring, loc, doActions, callPreParse )
953 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
954 return value
955 except ParseBaseException:
956 pe = sys.exc_info()[1]
957 ParserElement._exprArgCache[ lookup ] = pe
958 raise
959
960 _parse = _parseNoCache
961
962 # argument cache for optimizing repeated calls when backtracking through recursive expressions
963 _exprArgCache = {}
964 def resetCache():
965 ParserElement._exprArgCache.clear()
966 resetCache = staticmethod(resetCache)
967
968 _packratEnabled = False
969 def enablePackrat():
970 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
971 Repeated parse attempts at the same string location (which happens
972 often in many complex grammars) can immediately return a cached value,
973 instead of re-executing parsing/validating code. Memoizing is done of
974 both valid results and parsing exceptions.
975
976 This speedup may break existing programs that use parse actions that
977 have side-effects. For this reason, packrat parsing is disabled when
978 you first import pyparsing. To activate the packrat feature, your
979 program must call the class method C{ParserElement.enablePackrat()}. If
980 your program uses C{psyco} to "compile as you go", you must call
981 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
982 Python will crash. For best results, call C{enablePackrat()} immediately
983 after importing pyparsing.
984 """
985 if not ParserElement._packratEnabled:
986 ParserElement._packratEnabled = True
987 ParserElement._parse = ParserElement._parseCache
988 enablePackrat = staticmethod(enablePackrat)
989
990 def parseString( self, instring, parseAll=False ):
991 """Execute the parse expression with the given string.
992 This is the main interface to the client code, once the complete
993 expression has been built.
994
995 If you want the grammar to require that the entire input string be
996 successfully parsed, then set C{parseAll} to True (equivalent to ending
997 the grammar with C{StringEnd()}).
998
999 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1000 in order to report proper column numbers in parse actions.
1001 If the input string contains tabs and
1002 the grammar uses parse actions that use the C{loc} argument to index into the
1003 string being parsed, you can ensure you have a consistent view of the input
1004 string by:
1005 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1006 (see L{I{parseWithTabs}<parseWithTabs>})
1007 - define your parse action using the full C{(s,loc,toks)} signature, and
1008 reference the input string using the parse action's C{s} argument
1009 - explictly expand the tabs in your input string before calling
1010 C{parseString}
1011 """
1012 ParserElement.resetCache()
1013 if not self.streamlined:
1014 self.streamline()
1015 #~ self.saveAsList = True
1016 for e in self.ignoreExprs:
1017 e.streamline()
1018 if not self.keepTabs:
1019 instring = instring.expandtabs()
1020 try:
1021 loc, tokens = self._parse( instring, 0 )
1022 if parseAll:
1023 loc = self.preParse( instring, loc )
1024 se = Empty() + StringEnd()
1025 se._parse( instring, loc )
1026 except ParseBaseException:
1027 if ParserElement.verbose_stacktrace:
1028 raise
1029 else:
1030 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1031 exc = sys.exc_info()[1]
1032 raise exc
1033 else:
1034 return tokens
1035
1036 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1037 """Scan the input string for expression matches. Each match will return the
1038 matching tokens, start location, and end location. May be called with optional
1039 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1040 C{overlap} is specified, then overlapping matches will be reported.
1041
1042 Note that the start and end locations are reported relative to the string
1043 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1044 strings with embedded tabs."""
1045 if not self.streamlined:
1046 self.streamline()
1047 for e in self.ignoreExprs:
1048 e.streamline()
1049
1050 if not self.keepTabs:
1051 instring = _ustr(instring).expandtabs()
1052 instrlen = len(instring)
1053 loc = 0
1054 preparseFn = self.preParse
1055 parseFn = self._parse
1056 ParserElement.resetCache()
1057 matches = 0
1058 try:
1059 while loc <= instrlen and matches < maxMatches:
1060 try:
1061 preloc = preparseFn( instring, loc )
1062 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1063 except ParseException:
1064 loc = preloc+1
1065 else:
1066 if nextLoc > loc:
1067 matches += 1
1068 yield tokens, preloc, nextLoc
1069 if overlap:
1070 nextloc = preparseFn( instring, loc )
1071 if nextloc > loc:
1072 loc = nextLoc
1073 else:
1074 loc += 1
1075 else:
1076 loc = nextLoc
1077 else:
1078 loc = preloc+1
1079 except ParseBaseException:
1080 if ParserElement.verbose_stacktrace:
1081 raise
1082 else:
1083 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1084 exc = sys.exc_info()[1]
1085 raise exc
1086
1087 def transformString( self, instring ):
1088 """Extension to C{scanString}, to modify matching text with modified tokens that may
1089 be returned from a parse action. To use C{transformString}, define a grammar and
1090 attach a parse action to it that modifies the returned token list.
1091 Invoking C{transformString()} on a target string will then scan for matches,
1092 and replace the matched text patterns according to the logic in the parse
1093 action. C{transformString()} returns the resulting transformed string."""
1094 out = []
1095 lastE = 0
1096 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1097 # keep string locs straight between transformString and scanString
1098 self.keepTabs = True
1099 try:
1100 for t,s,e in self.scanString( instring ):
1101 out.append( instring[lastE:s] )
1102 if t:
1103 if isinstance(t,ParseResults):
1104 out += t.asList()
1105 elif isinstance(t,list):
1106 out += t
1107 else:
1108 out.append(t)
1109 lastE = e
1110 out.append(instring[lastE:])
1111 out = [o for o in out if o]
1112 return "".join(map(_ustr,_flatten(out)))
1113 except ParseBaseException:
1114 if ParserElement.verbose_stacktrace:
1115 raise
1116 else:
1117 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1118 exc = sys.exc_info()[1]
1119 raise exc
1120
1121 def searchString( self, instring, maxMatches=_MAX_INT ):
1122 """Another extension to C{scanString}, simplifying the access to the tokens found
1123 to match the given parse expression. May be called with optional
1124 C{maxMatches} argument, to clip searching after 'n' matches are found.
1125 """
1126 try:
1127 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1128 except ParseBaseException:
1129 if ParserElement.verbose_stacktrace:
1130 raise
1131 else:
1132 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1133 exc = sys.exc_info()[1]
1134 raise exc
1135
1136 def __add__(self, other ):
1137 """Implementation of + operator - returns And"""
1138 if isinstance( other, basestring ):
1139 other = Literal( other )
1140 if not isinstance( other, ParserElement ):
1141 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1142 SyntaxWarning, stacklevel=2)
1143 return None
1144 return And( [ self, other ] )
1145
1146 def __radd__(self, other ):
1147 """Implementation of + operator when left operand is not a C{ParserElement}"""
1148 if isinstance( other, basestring ):
1149 other = Literal( other )
1150 if not isinstance( other, ParserElement ):
1151 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1152 SyntaxWarning, stacklevel=2)
1153 return None
1154 return other + self
1155
1156 def __sub__(self, other):
1157 """Implementation of - operator, returns C{And} with error stop"""
1158 if isinstance( other, basestring ):
1159 other = Literal( other )
1160 if not isinstance( other, ParserElement ):
1161 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1162 SyntaxWarning, stacklevel=2)
1163 return None
1164 return And( [ self, And._ErrorStop(), other ] )
1165
1166 def __rsub__(self, other ):
1167 """Implementation of - operator when left operand is not a C{ParserElement}"""
1168 if isinstance( other, basestring ):
1169 other = Literal( other )
1170 if not isinstance( other, ParserElement ):
1171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1172 SyntaxWarning, stacklevel=2)
1173 return None
1174 return other - self
1175
1176 def __mul__(self,other):
1177 """Implementation of * operator, allows use of C{expr * 3} in place of
1178 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1179 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1180 may also include C{None} as in:
1181 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1182 to C{expr*n + ZeroOrMore(expr)}
1183 (read as "at least n instances of C{expr}")
1184 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1185 (read as "0 to n instances of C{expr}")
1186 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)}
1187 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)}
1188
1189 Note that C{expr*(None,n)} does not raise an exception if
1190 more than n exprs exist in the input stream; that is,
1191 C{expr*(None,n)} does not enforce a maximum number of expr
1192 occurrences. If this behavior is desired, then write
1193 C{expr*(None,n) + ~expr}
1194
1195 """
1196 if isinstance(other,int):
1197 minElements, optElements = other,0
1198 elif isinstance(other,tuple):
1199 other = (other + (None, None))[:2]
1200 if other[0] is None:
1201 other = (0, other[1])
1202 if isinstance(other[0],int) and other[1] is None:
1203 if other[0] == 0:
1204 return ZeroOrMore(self)
1205 if other[0] == 1:
1206 return OneOrMore(self)
1207 else:
1208 return self*other[0] + ZeroOrMore(self)
1209 elif isinstance(other[0],int) and isinstance(other[1],int):
1210 minElements, optElements = other
1211 optElements -= minElements
1212 else:
1213 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1214 else:
1215 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1216
1217 if minElements < 0:
1218 raise ValueError("cannot multiply ParserElement by negative value")
1219 if optElements < 0:
1220 raise ValueError("second tuple value must be greater or equal to first tuple value")
1221 if minElements == optElements == 0:
1222 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1223
1224 if (optElements):
1225 def makeOptionalList(n):
1226 if n>1:
1227 return Optional(self + makeOptionalList(n-1))
1228 else:
1229 return Optional(self)
1230 if minElements:
1231 if minElements == 1:
1232 ret = self + makeOptionalList(optElements)
1233 else:
1234 ret = And([self]*minElements) + makeOptionalList(optElements)
1235 else:
1236 ret = makeOptionalList(optElements)
1237 else:
1238 if minElements == 1:
1239 ret = self
1240 else:
1241 ret = And([self]*minElements)
1242 return ret
1243
1244 def __rmul__(self, other):
1245 return self.__mul__(other)
1246
1247 def __or__(self, other ):
1248 """Implementation of | operator - returns C{MatchFirst}"""
1249 if isinstance( other, basestring ):
1250 other = Literal( other )
1251 if not isinstance( other, ParserElement ):
1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1253 SyntaxWarning, stacklevel=2)
1254 return None
1255 return MatchFirst( [ self, other ] )
1256
1257 def __ror__(self, other ):
1258 """Implementation of | operator when left operand is not a C{ParserElement}"""
1259 if isinstance( other, basestring ):
1260 other = Literal( other )
1261 if not isinstance( other, ParserElement ):
1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1263 SyntaxWarning, stacklevel=2)
1264 return None
1265 return other | self
1266
1267 def __xor__(self, other ):
1268 """Implementation of ^ operator - returns C{Or}"""
1269 if isinstance( other, basestring ):
1270 other = Literal( other )
1271 if not isinstance( other, ParserElement ):
1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1273 SyntaxWarning, stacklevel=2)
1274 return None
1275 return Or( [ self, other ] )
1276
1277 def __rxor__(self, other ):
1278 """Implementation of ^ operator when left operand is not a C{ParserElement}"""
1279 if isinstance( other, basestring ):
1280 other = Literal( other )
1281 if not isinstance( other, ParserElement ):
1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1283 SyntaxWarning, stacklevel=2)
1284 return None
1285 return other ^ self
1286
1287 def __and__(self, other ):
1288 """Implementation of & operator - returns C{Each}"""
1289 if isinstance( other, basestring ):
1290 other = Literal( other )
1291 if not isinstance( other, ParserElement ):
1292 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1293 SyntaxWarning, stacklevel=2)
1294 return None
1295 return Each( [ self, other ] )
1296
1297 def __rand__(self, other ):
1298 """Implementation of & operator when left operand is not a C{ParserElement}"""
1299 if isinstance( other, basestring ):
1300 other = Literal( other )
1301 if not isinstance( other, ParserElement ):
1302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1303 SyntaxWarning, stacklevel=2)
1304 return None
1305 return other & self
1306
1307 def __invert__( self ):
1308 """Implementation of ~ operator - returns C{NotAny}"""
1309 return NotAny( self )
1310
1311 def __call__(self, name):
1312 """Shortcut for C{setResultsName}, with C{listAllMatches=default}::
1313 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1314 could be written as::
1315 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1316
1317 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1318 passed as C{True}.
1319 """
1320 return self.setResultsName(name)
1321
1322 def suppress( self ):
1323 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1324 cluttering up returned output.
1325 """
1326 return Suppress( self )
1327
1328 def leaveWhitespace( self ):
1329 """Disables the skipping of whitespace before matching the characters in the
1330 C{ParserElement}'s defined pattern. This is normally only used internally by
1331 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1332 """
1333 self.skipWhitespace = False
1334 return self
1335
1336 def setWhitespaceChars( self, chars ):
1337 """Overrides the default whitespace chars
1338 """
1339 self.skipWhitespace = True
1340 self.whiteChars = chars
1341 self.copyDefaultWhiteChars = False
1342 return self
1343
1344 def parseWithTabs( self ):
1345 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1346 Must be called before C{parseString} when the input grammar contains elements that
1347 match C{<TAB>} characters."""
1348 self.keepTabs = True
1349 return self
1350
1351 def ignore( self, other ):
1352 """Define expression to be ignored (e.g., comments) while doing pattern
1353 matching; may be called repeatedly, to define multiple comment or other
1354 ignorable patterns.
1355 """
1356 if isinstance( other, Suppress ):
1357 if other not in self.ignoreExprs:
1358 self.ignoreExprs.append( other.copy() )
1359 else:
1360 self.ignoreExprs.append( Suppress( other.copy() ) )
1361 return self
1362
1363 def setDebugActions( self, startAction, successAction, exceptionAction ):
1364 """Enable display of debugging messages while doing pattern matching."""
1365 self.debugActions = (startAction or _defaultStartDebugAction,
1366 successAction or _defaultSuccessDebugAction,
1367 exceptionAction or _defaultExceptionDebugAction)
1368 self.debug = True
1369 return self
1370
1371 def setDebug( self, flag=True ):
1372 """Enable display of debugging messages while doing pattern matching.
1373 Set C{flag} to True to enable, False to disable."""
1374 if flag:
1375 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1376 else:
1377 self.debug = False
1378 return self
1379
1380 def __str__( self ):
1381 return self.name
1382
1383 def __repr__( self ):
1384 return _ustr(self)
1385
1386 def streamline( self ):
1387 self.streamlined = True
1388 self.strRepr = None
1389 return self
1390
1391 def checkRecursion( self, parseElementList ):
1392 pass
1393
1394 def validate( self, validateTrace=[] ):
1395 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1396 self.checkRecursion( [] )
1397
1398 def parseFile( self, file_or_filename, parseAll=False ):
1399 """Execute the parse expression on the given file or filename.
1400 If a filename is specified (instead of a file object),
1401 the entire file is opened, read, and closed before parsing.
1402 """
1403 try:
1404 file_contents = file_or_filename.read()
1405 except AttributeError:
1406 f = open(file_or_filename, "rb")
1407 file_contents = f.read()
1408 f.close()
1409 try:
1410 return self.parseString(file_contents, parseAll)
1411 except ParseBaseException:
1412 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1413 exc = sys.exc_info()[1]
1414 raise exc
1415
1416 def getException(self):
1417 return ParseException("",0,self.errmsg,self)
1418
1419 def __getattr__(self,aname):
1420 if aname == "myException":
1421 self.myException = ret = self.getException();
1422 return ret;
1423 else:
1424 raise AttributeError("no such attribute " + aname)
1425
1426 def __eq__(self,other):
1427 if isinstance(other, ParserElement):
1428 return self is other or self.__dict__ == other.__dict__
1429 elif isinstance(other, basestring):
1430 try:
1431 self.parseString(_ustr(other), parseAll=True)
1432 return True
1433 except ParseBaseException:
1434 return False
1435 else:
1436 return super(ParserElement,self)==other
1437
1438 def __ne__(self,other):
1439 return not (self == other)
1440
1441 def __hash__(self):
1442 return hash(id(self))
1443
1444 def __req__(self,other):
1445 return self == other
1446
1447 def __rne__(self,other):
1448 return not (self == other)
1449
1450
1451class Token(ParserElement):
1452 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1453 def __init__( self ):
1454 super(Token,self).__init__( savelist=False )
1455
1456 def setName(self, name):
1457 s = super(Token,self).setName(name)
1458 self.errmsg = "Expected " + self.name
1459 return s
1460
1461
1462class Empty(Token):
1463 """An empty token, will always match."""
1464 def __init__( self ):
1465 super(Empty,self).__init__()
1466 self.name = "Empty"
1467 self.mayReturnEmpty = True
1468 self.mayIndexError = False
1469
1470
1471class NoMatch(Token):
1472 """A token that will never match."""
1473 def __init__( self ):
1474 super(NoMatch,self).__init__()
1475 self.name = "NoMatch"
1476 self.mayReturnEmpty = True
1477 self.mayIndexError = False
1478 self.errmsg = "Unmatchable token"
1479
1480 def parseImpl( self, instring, loc, doActions=True ):
1481 exc = self.myException
1482 exc.loc = loc
1483 exc.pstr = instring
1484 raise exc
1485
1486
1487class Literal(Token):
1488 """Token to exactly match a specified string."""
1489 def __init__( self, matchString ):
1490 super(Literal,self).__init__()
1491 self.match = matchString
1492 self.matchLen = len(matchString)
1493 try:
1494 self.firstMatchChar = matchString[0]
1495 except IndexError:
1496 warnings.warn("null string passed to Literal; use Empty() instead",
1497 SyntaxWarning, stacklevel=2)
1498 self.__class__ = Empty
1499 self.name = '"%s"' % _ustr(self.match)
1500 self.errmsg = "Expected " + self.name
1501 self.mayReturnEmpty = False
1502 self.mayIndexError = False
1503
1504 # Performance tuning: this routine gets called a *lot*
1505 # if this is a single character match string and the first character matches,
1506 # short-circuit as quickly as possible, and avoid calling startswith
1507 #~ @profile
1508 def parseImpl( self, instring, loc, doActions=True ):
1509 if (instring[loc] == self.firstMatchChar and
1510 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1511 return loc+self.matchLen, self.match
1512 #~ raise ParseException( instring, loc, self.errmsg )
1513 exc = self.myException
1514 exc.loc = loc
1515 exc.pstr = instring
1516 raise exc
1517_L = Literal
1518
1519class Keyword(Token):
1520 """Token to exactly match a specified string as a keyword, that is, it must be
1521 immediately followed by a non-keyword character. Compare with C{Literal}::
1522 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1523 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1524 Accepts two optional constructor arguments in addition to the keyword string:
1525 C{identChars} is a string of characters that would be valid identifier characters,
1526 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1527 matching, default is C{False}.
1528 """
1529 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1530
1531 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1532 super(Keyword,self).__init__()
1533 self.match = matchString
1534 self.matchLen = len(matchString)
1535 try:
1536 self.firstMatchChar = matchString[0]
1537 except IndexError:
1538 warnings.warn("null string passed to Keyword; use Empty() instead",
1539 SyntaxWarning, stacklevel=2)
1540 self.name = '"%s"' % self.match
1541 self.errmsg = "Expected " + self.name
1542 self.mayReturnEmpty = False
1543 self.mayIndexError = False
1544 self.caseless = caseless
1545 if caseless:
1546 self.caselessmatch = matchString.upper()
1547 identChars = identChars.upper()
1548 self.identChars = set(identChars)
1549
1550 def parseImpl( self, instring, loc, doActions=True ):
1551 if self.caseless:
1552 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1553 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1554 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1555 return loc+self.matchLen, self.match
1556 else:
1557 if (instring[loc] == self.firstMatchChar and
1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1560 (loc == 0 or instring[loc-1] not in self.identChars) ):
1561 return loc+self.matchLen, self.match
1562 #~ raise ParseException( instring, loc, self.errmsg )
1563 exc = self.myException
1564 exc.loc = loc
1565 exc.pstr = instring
1566 raise exc
1567
1568 def copy(self):
1569 c = super(Keyword,self).copy()
1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1571 return c
1572
1573 def setDefaultKeywordChars( chars ):
1574 """Overrides the default Keyword chars
1575 """
1576 Keyword.DEFAULT_KEYWORD_CHARS = chars
1577 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1578
1579class CaselessLiteral(Literal):
1580 """Token to match a specified string, ignoring case of letters.
1581 Note: the matched results will always be in the case of the given
1582 match string, NOT the case of the input text.
1583 """
1584 def __init__( self, matchString ):
1585 super(CaselessLiteral,self).__init__( matchString.upper() )
1586 # Preserve the defining literal.
1587 self.returnString = matchString
1588 self.name = "'%s'" % self.returnString
1589 self.errmsg = "Expected " + self.name
1590
1591 def parseImpl( self, instring, loc, doActions=True ):
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1593 return loc+self.matchLen, self.returnString
1594 #~ raise ParseException( instring, loc, self.errmsg )
1595 exc = self.myException
1596 exc.loc = loc
1597 exc.pstr = instring
1598 raise exc
1599
1600class CaselessKeyword(Keyword):
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603
1604 def parseImpl( self, instring, loc, doActions=True ):
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1607 return loc+self.matchLen, self.match
1608 #~ raise ParseException( instring, loc, self.errmsg )
1609 exc = self.myException
1610 exc.loc = loc
1611 exc.pstr = instring
1612 raise exc
1613
1614class Word(Token):
1615 """Token for matching words composed of allowed character sets.
1616 Defined with string containing all allowed initial characters,
1617 an optional string containing allowed body characters (if omitted,
1618 defaults to the initial character set), and an optional minimum,
1619 maximum, and/or exact length. The default value for C{min} is 1 (a
1620 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1621 are 0, meaning no maximum or exact length restriction. An optional
1622 C{exclude} parameter can list characters that might be found in
1623 the input C{bodyChars} string; useful to define a word of all printables
1624 except for one or two characters, for instance.
1625 """
1626 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1627 super(Word,self).__init__()
1628 if excludeChars:
1629 initChars = ''.join([c for c in initChars if c not in excludeChars])
1630 if bodyChars:
1631 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
1632 self.initCharsOrig = initChars
1633 self.initChars = set(initChars)
1634 if bodyChars :
1635 self.bodyCharsOrig = bodyChars
1636 self.bodyChars = set(bodyChars)
1637 else:
1638 self.bodyCharsOrig = initChars
1639 self.bodyChars = set(initChars)
1640
1641 self.maxSpecified = max > 0
1642
1643 if min < 1:
1644 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1645
1646 self.minLen = min
1647
1648 if max > 0:
1649 self.maxLen = max
1650 else:
1651 self.maxLen = _MAX_INT
1652
1653 if exact > 0:
1654 self.maxLen = exact
1655 self.minLen = exact
1656
1657 self.name = _ustr(self)
1658 self.errmsg = "Expected " + self.name
1659 self.mayIndexError = False
1660 self.asKeyword = asKeyword
1661
1662 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1663 if self.bodyCharsOrig == self.initCharsOrig:
1664 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1665 elif len(self.bodyCharsOrig) == 1:
1666 self.reString = "%s[%s]*" % \
1667 (re.escape(self.initCharsOrig),
1668 _escapeRegexRangeChars(self.bodyCharsOrig),)
1669 else:
1670 self.reString = "[%s][%s]*" % \
1671 (_escapeRegexRangeChars(self.initCharsOrig),
1672 _escapeRegexRangeChars(self.bodyCharsOrig),)
1673 if self.asKeyword:
1674 self.reString = r"\b"+self.reString+r"\b"
1675 try:
1676 self.re = re.compile( self.reString )
1677 except:
1678 self.re = None
1679
1680 def parseImpl( self, instring, loc, doActions=True ):
1681 if self.re:
1682 result = self.re.match(instring,loc)
1683 if not result:
1684 exc = self.myException
1685 exc.loc = loc
1686 exc.pstr = instring
1687 raise exc
1688
1689 loc = result.end()
1690 return loc, result.group()
1691
1692 if not(instring[ loc ] in self.initChars):
1693 #~ raise ParseException( instring, loc, self.errmsg )
1694 exc = self.myException
1695 exc.loc = loc
1696 exc.pstr = instring
1697 raise exc
1698 start = loc
1699 loc += 1
1700 instrlen = len(instring)
1701 bodychars = self.bodyChars
1702 maxloc = start + self.maxLen
1703 maxloc = min( maxloc, instrlen )
1704 while loc < maxloc and instring[loc] in bodychars:
1705 loc += 1
1706
1707 throwException = False
1708 if loc - start < self.minLen:
1709 throwException = True
1710 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1711 throwException = True
1712 if self.asKeyword:
1713 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1714 throwException = True
1715
1716 if throwException:
1717 #~ raise ParseException( instring, loc, self.errmsg )
1718 exc = self.myException
1719 exc.loc = loc
1720 exc.pstr = instring
1721 raise exc
1722
1723 return loc, instring[start:loc]
1724
1725 def __str__( self ):
1726 try:
1727 return super(Word,self).__str__()
1728 except:
1729 pass
1730
1731
1732 if self.strRepr is None:
1733
1734 def charsAsStr(s):
1735 if len(s)>4:
1736 return s[:4]+"..."
1737 else:
1738 return s
1739
1740 if ( self.initCharsOrig != self.bodyCharsOrig ):
1741 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1742 else:
1743 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1744
1745 return self.strRepr
1746
1747
1748class Regex(Token):
1749 """Token for matching strings that match a given regular expression.
1750 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1751 """
1752 compiledREtype = type(re.compile("[A-Z]"))
1753 def __init__( self, pattern, flags=0):
1754 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1755 super(Regex,self).__init__()
1756
1757 if isinstance(pattern, basestring):
1758 if len(pattern) == 0:
1759 warnings.warn("null string passed to Regex; use Empty() instead",
1760 SyntaxWarning, stacklevel=2)
1761
1762 self.pattern = pattern
1763 self.flags = flags
1764
1765 try:
1766 self.re = re.compile(self.pattern, self.flags)
1767 self.reString = self.pattern
1768 except sre_constants.error:
1769 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1770 SyntaxWarning, stacklevel=2)
1771 raise
1772
1773 elif isinstance(pattern, Regex.compiledREtype):
1774 self.re = pattern
1775 self.pattern = \
1776 self.reString = str(pattern)
1777 self.flags = flags
1778
1779 else:
1780 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1781
1782 self.name = _ustr(self)
1783 self.errmsg = "Expected " + self.name
1784 self.mayIndexError = False
1785 self.mayReturnEmpty = True
1786
1787 def parseImpl( self, instring, loc, doActions=True ):
1788 result = self.re.match(instring,loc)
1789 if not result:
1790 exc = self.myException
1791 exc.loc = loc
1792 exc.pstr = instring
1793 raise exc
1794
1795 loc = result.end()
1796 d = result.groupdict()
1797 ret = ParseResults(result.group())
1798 if d:
1799 for k in d:
1800 ret[k] = d[k]
1801 return loc,ret
1802
1803 def __str__( self ):
1804 try:
1805 return super(Regex,self).__str__()
1806 except:
1807 pass
1808
1809 if self.strRepr is None:
1810 self.strRepr = "Re:(%s)" % repr(self.pattern)
1811
1812 return self.strRepr
1813
1814
1815class QuotedString(Token):
1816 """Token for matching strings that are delimited by quoting characters.
1817 """
1818 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1819 """
1820 Defined with the following parameters:
1821 - quoteChar - string of one or more characters defining the quote delimiting string
1822 - escChar - character to escape quotes, typically backslash (default=None)
1823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1824 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1827 """
1828 super(QuotedString,self).__init__()
1829
1830 # remove white space from quote chars - wont work anyway
1831 quoteChar = quoteChar.strip()
1832 if len(quoteChar) == 0:
1833 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1834 raise SyntaxError()
1835
1836 if endQuoteChar is None:
1837 endQuoteChar = quoteChar
1838 else:
1839 endQuoteChar = endQuoteChar.strip()
1840 if len(endQuoteChar) == 0:
1841 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1842 raise SyntaxError()
1843
1844 self.quoteChar = quoteChar
1845 self.quoteCharLen = len(quoteChar)
1846 self.firstQuoteChar = quoteChar[0]
1847 self.endQuoteChar = endQuoteChar
1848 self.endQuoteCharLen = len(endQuoteChar)
1849 self.escChar = escChar
1850 self.escQuote = escQuote
1851 self.unquoteResults = unquoteResults
1852
1853 if multiline:
1854 self.flags = re.MULTILINE | re.DOTALL
1855 self.pattern = r'%s(?:[^%s%s]' % \
1856 ( re.escape(self.quoteChar),
1857 _escapeRegexRangeChars(self.endQuoteChar[0]),
1858 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1859 else:
1860 self.flags = 0
1861 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1862 ( re.escape(self.quoteChar),
1863 _escapeRegexRangeChars(self.endQuoteChar[0]),
1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1865 if len(self.endQuoteChar) > 1:
1866 self.pattern += (
1867 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1868 _escapeRegexRangeChars(self.endQuoteChar[i]))
1869 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1870 )
1871 if escQuote:
1872 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1873 if escChar:
1874 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1875 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
1876 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
1877 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1878
1879 try:
1880 self.re = re.compile(self.pattern, self.flags)
1881 self.reString = self.pattern
1882 except sre_constants.error:
1883 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1884 SyntaxWarning, stacklevel=2)
1885 raise
1886
1887 self.name = _ustr(self)
1888 self.errmsg = "Expected " + self.name
1889 self.mayIndexError = False
1890 self.mayReturnEmpty = True
1891
1892 def parseImpl( self, instring, loc, doActions=True ):
1893 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1894 if not result:
1895 exc = self.myException
1896 exc.loc = loc
1897 exc.pstr = instring
1898 raise exc
1899
1900 loc = result.end()
1901 ret = result.group()
1902
1903 if self.unquoteResults:
1904
1905 # strip off quotes
1906 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1907
1908 if isinstance(ret,basestring):
1909 # replace escaped characters
1910 if self.escChar:
1911 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1912
1913 # replace escaped quotes
1914 if self.escQuote:
1915 ret = ret.replace(self.escQuote, self.endQuoteChar)
1916
1917 return loc, ret
1918
1919 def __str__( self ):
1920 try:
1921 return super(QuotedString,self).__str__()
1922 except:
1923 pass
1924
1925 if self.strRepr is None:
1926 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1927
1928 return self.strRepr
1929
1930
1931class CharsNotIn(Token):
1932 """Token for matching words composed of characters *not* in a given set.
1933 Defined with string containing all disallowed characters, and an optional
1934 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
1935 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1936 are 0, meaning no maximum or exact length restriction.
1937 """
1938 def __init__( self, notChars, min=1, max=0, exact=0 ):
1939 super(CharsNotIn,self).__init__()
1940 self.skipWhitespace = False
1941 self.notChars = notChars
1942
1943 if min < 1:
1944 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1945
1946 self.minLen = min
1947
1948 if max > 0:
1949 self.maxLen = max
1950 else:
1951 self.maxLen = _MAX_INT
1952
1953 if exact > 0:
1954 self.maxLen = exact
1955 self.minLen = exact
1956
1957 self.name = _ustr(self)
1958 self.errmsg = "Expected " + self.name
1959 self.mayReturnEmpty = ( self.minLen == 0 )
1960 self.mayIndexError = False
1961
1962 def parseImpl( self, instring, loc, doActions=True ):
1963 if instring[loc] in self.notChars:
1964 #~ raise ParseException( instring, loc, self.errmsg )
1965 exc = self.myException
1966 exc.loc = loc
1967 exc.pstr = instring
1968 raise exc
1969
1970 start = loc
1971 loc += 1
1972 notchars = self.notChars
1973 maxlen = min( start+self.maxLen, len(instring) )
1974 while loc < maxlen and \
1975 (instring[loc] not in notchars):
1976 loc += 1
1977
1978 if loc - start < self.minLen:
1979 #~ raise ParseException( instring, loc, self.errmsg )
1980 exc = self.myException
1981 exc.loc = loc
1982 exc.pstr = instring
1983 raise exc
1984
1985 return loc, instring[start:loc]
1986
1987 def __str__( self ):
1988 try:
1989 return super(CharsNotIn, self).__str__()
1990 except:
1991 pass
1992
1993 if self.strRepr is None:
1994 if len(self.notChars) > 4:
1995 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1996 else:
1997 self.strRepr = "!W:(%s)" % self.notChars
1998
1999 return self.strRepr
2000
2001class White(Token):
2002 """Special matching class for matching whitespace. Normally, whitespace is ignored
2003 by pyparsing grammars. This class is included when some whitespace structures
2004 are significant. Define with a string containing the whitespace characters to be
2005 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2006 as defined for the C{Word} class."""
2007 whiteStrs = {
2008 " " : "<SPC>",
2009 "\t": "<TAB>",
2010 "\n": "<LF>",
2011 "\r": "<CR>",
2012 "\f": "<FF>",
2013 }
2014 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2015 super(White,self).__init__()
2016 self.matchWhite = ws
2017 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2018 #~ self.leaveWhitespace()
2019 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2020 self.mayReturnEmpty = True
2021 self.errmsg = "Expected " + self.name
2022
2023 self.minLen = min
2024
2025 if max > 0:
2026 self.maxLen = max
2027 else:
2028 self.maxLen = _MAX_INT
2029
2030 if exact > 0:
2031 self.maxLen = exact
2032 self.minLen = exact
2033
2034 def parseImpl( self, instring, loc, doActions=True ):
2035 if not(instring[ loc ] in self.matchWhite):
2036 #~ raise ParseException( instring, loc, self.errmsg )
2037 exc = self.myException
2038 exc.loc = loc
2039 exc.pstr = instring
2040 raise exc
2041 start = loc
2042 loc += 1
2043 maxloc = start + self.maxLen
2044 maxloc = min( maxloc, len(instring) )
2045 while loc < maxloc and instring[loc] in self.matchWhite:
2046 loc += 1
2047
2048 if loc - start < self.minLen:
2049 #~ raise ParseException( instring, loc, self.errmsg )
2050 exc = self.myException
2051 exc.loc = loc
2052 exc.pstr = instring
2053 raise exc
2054
2055 return loc, instring[start:loc]
2056
2057
2058class _PositionToken(Token):
2059 def __init__( self ):
2060 super(_PositionToken,self).__init__()
2061 self.name=self.__class__.__name__
2062 self.mayReturnEmpty = True
2063 self.mayIndexError = False
2064
2065class GoToColumn(_PositionToken):
2066 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2067 def __init__( self, colno ):
2068 super(GoToColumn,self).__init__()
2069 self.col = colno
2070
2071 def preParse( self, instring, loc ):
2072 if col(loc,instring) != self.col:
2073 instrlen = len(instring)
2074 if self.ignoreExprs:
2075 loc = self._skipIgnorables( instring, loc )
2076 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2077 loc += 1
2078 return loc
2079
2080 def parseImpl( self, instring, loc, doActions=True ):
2081 thiscol = col( loc, instring )
2082 if thiscol > self.col:
2083 raise ParseException( instring, loc, "Text not in expected column", self )
2084 newloc = loc + self.col - thiscol
2085 ret = instring[ loc: newloc ]
2086 return newloc, ret
2087
2088class LineStart(_PositionToken):
2089 """Matches if current position is at the beginning of a line within the parse string"""
2090 def __init__( self ):
2091 super(LineStart,self).__init__()
2092 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2093 self.errmsg = "Expected start of line"
2094
2095 def preParse( self, instring, loc ):
2096 preloc = super(LineStart,self).preParse(instring,loc)
2097 if instring[preloc] == "\n":
2098 loc += 1
2099 return loc
2100
2101 def parseImpl( self, instring, loc, doActions=True ):
2102 if not( loc==0 or
2103 (loc == self.preParse( instring, 0 )) or
2104 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2105 #~ raise ParseException( instring, loc, "Expected start of line" )
2106 exc = self.myException
2107 exc.loc = loc
2108 exc.pstr = instring
2109 raise exc
2110 return loc, []
2111
2112class LineEnd(_PositionToken):
2113 """Matches if current position is at the end of a line within the parse string"""
2114 def __init__( self ):
2115 super(LineEnd,self).__init__()
2116 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2117 self.errmsg = "Expected end of line"
2118
2119 def parseImpl( self, instring, loc, doActions=True ):
2120 if loc<len(instring):
2121 if instring[loc] == "\n":
2122 return loc+1, "\n"
2123 else:
2124 #~ raise ParseException( instring, loc, "Expected end of line" )
2125 exc = self.myException
2126 exc.loc = loc
2127 exc.pstr = instring
2128 raise exc
2129 elif loc == len(instring):
2130 return loc+1, []
2131 else:
2132 exc = self.myException
2133 exc.loc = loc
2134 exc.pstr = instring
2135 raise exc
2136
2137class StringStart(_PositionToken):
2138 """Matches if current position is at the beginning of the parse string"""
2139 def __init__( self ):
2140 super(StringStart,self).__init__()
2141 self.errmsg = "Expected start of text"
2142
2143 def parseImpl( self, instring, loc, doActions=True ):
2144 if loc != 0:
2145 # see if entire string up to here is just whitespace and ignoreables
2146 if loc != self.preParse( instring, 0 ):
2147 #~ raise ParseException( instring, loc, "Expected start of text" )
2148 exc = self.myException
2149 exc.loc = loc
2150 exc.pstr = instring
2151 raise exc
2152 return loc, []
2153
2154class StringEnd(_PositionToken):
2155 """Matches if current position is at the end of the parse string"""
2156 def __init__( self ):
2157 super(StringEnd,self).__init__()
2158 self.errmsg = "Expected end of text"
2159
2160 def parseImpl( self, instring, loc, doActions=True ):
2161 if loc < len(instring):
2162 #~ raise ParseException( instring, loc, "Expected end of text" )
2163 exc = self.myException
2164 exc.loc = loc
2165 exc.pstr = instring
2166 raise exc
2167 elif loc == len(instring):
2168 return loc+1, []
2169 elif loc > len(instring):
2170 return loc, []
2171 else:
2172 exc = self.myException
2173 exc.loc = loc
2174 exc.pstr = instring
2175 raise exc
2176
2177class WordStart(_PositionToken):
2178 """Matches if the current position is at the beginning of a Word, and
2179 is not preceded by any character in a given set of C{wordChars}
2180 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2181 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2182 the string being parsed, or at the beginning of a line.
2183 """
2184 def __init__(self, wordChars = printables):
2185 super(WordStart,self).__init__()
2186 self.wordChars = set(wordChars)
2187 self.errmsg = "Not at the start of a word"
2188
2189 def parseImpl(self, instring, loc, doActions=True ):
2190 if loc != 0:
2191 if (instring[loc-1] in self.wordChars or
2192 instring[loc] not in self.wordChars):
2193 exc = self.myException
2194 exc.loc = loc
2195 exc.pstr = instring
2196 raise exc
2197 return loc, []
2198
2199class WordEnd(_PositionToken):
2200 """Matches if the current position is at the end of a Word, and
2201 is not followed by any character in a given set of C{wordChars}
2202 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2203 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2204 the string being parsed, or at the end of a line.
2205 """
2206 def __init__(self, wordChars = printables):
2207 super(WordEnd,self).__init__()
2208 self.wordChars = set(wordChars)
2209 self.skipWhitespace = False
2210 self.errmsg = "Not at the end of a word"
2211
2212 def parseImpl(self, instring, loc, doActions=True ):
2213 instrlen = len(instring)
2214 if instrlen>0 and loc<instrlen:
2215 if (instring[loc] in self.wordChars or
2216 instring[loc-1] not in self.wordChars):
2217 #~ raise ParseException( instring, loc, "Expected end of word" )
2218 exc = self.myException
2219 exc.loc = loc
2220 exc.pstr = instring
2221 raise exc
2222 return loc, []
2223
2224
2225class ParseExpression(ParserElement):
2226 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2227 def __init__( self, exprs, savelist = False ):
2228 super(ParseExpression,self).__init__(savelist)
2229 if isinstance( exprs, list ):
2230 self.exprs = exprs
2231 elif isinstance( exprs, basestring ):
2232 self.exprs = [ Literal( exprs ) ]
2233 else:
2234 try:
2235 self.exprs = list( exprs )
2236 except TypeError:
2237 self.exprs = [ exprs ]
2238 self.callPreparse = False
2239
2240 def __getitem__( self, i ):
2241 return self.exprs[i]
2242
2243 def append( self, other ):
2244 self.exprs.append( other )
2245 self.strRepr = None
2246 return self
2247
2248 def leaveWhitespace( self ):
2249 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2250 all contained expressions."""
2251 self.skipWhitespace = False
2252 self.exprs = [ e.copy() for e in self.exprs ]
2253 for e in self.exprs:
2254 e.leaveWhitespace()
2255 return self
2256
2257 def ignore( self, other ):
2258 if isinstance( other, Suppress ):
2259 if other not in self.ignoreExprs:
2260 super( ParseExpression, self).ignore( other )
2261 for e in self.exprs:
2262 e.ignore( self.ignoreExprs[-1] )
2263 else:
2264 super( ParseExpression, self).ignore( other )
2265 for e in self.exprs:
2266 e.ignore( self.ignoreExprs[-1] )
2267 return self
2268
2269 def __str__( self ):
2270 try:
2271 return super(ParseExpression,self).__str__()
2272 except:
2273 pass
2274
2275 if self.strRepr is None:
2276 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2277 return self.strRepr
2278
2279 def streamline( self ):
2280 super(ParseExpression,self).streamline()
2281
2282 for e in self.exprs:
2283 e.streamline()
2284
2285 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2286 # but only if there are no parse actions or resultsNames on the nested And's
2287 # (likewise for Or's and MatchFirst's)
2288 if ( len(self.exprs) == 2 ):
2289 other = self.exprs[0]
2290 if ( isinstance( other, self.__class__ ) and
2291 not(other.parseAction) and
2292 other.resultsName is None and
2293 not other.debug ):
2294 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2295 self.strRepr = None
2296 self.mayReturnEmpty |= other.mayReturnEmpty
2297 self.mayIndexError |= other.mayIndexError
2298
2299 other = self.exprs[-1]
2300 if ( isinstance( other, self.__class__ ) and
2301 not(other.parseAction) and
2302 other.resultsName is None and
2303 not other.debug ):
2304 self.exprs = self.exprs[:-1] + other.exprs[:]
2305 self.strRepr = None
2306 self.mayReturnEmpty |= other.mayReturnEmpty
2307 self.mayIndexError |= other.mayIndexError
2308
2309 return self
2310
2311 def setResultsName( self, name, listAllMatches=False ):
2312 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2313 return ret
2314
2315 def validate( self, validateTrace=[] ):
2316 tmp = validateTrace[:]+[self]
2317 for e in self.exprs:
2318 e.validate(tmp)
2319 self.checkRecursion( [] )
2320
2321 def copy(self):
2322 ret = super(ParseExpression,self).copy()
2323 ret.exprs = [e.copy() for e in self.exprs]
2324 return ret
2325
2326class And(ParseExpression):
2327 """Requires all given C{ParseExpression}s to be found in the given order.
2328 Expressions may be separated by whitespace.
2329 May be constructed using the C{'+'} operator.
2330 """
2331
2332 class _ErrorStop(Empty):
2333 def __init__(self, *args, **kwargs):
2334 super(Empty,self).__init__(*args, **kwargs)
2335 self.leaveWhitespace()
2336
2337 def __init__( self, exprs, savelist = True ):
2338 super(And,self).__init__(exprs, savelist)
2339 self.mayReturnEmpty = True
2340 for e in self.exprs:
2341 if not e.mayReturnEmpty:
2342 self.mayReturnEmpty = False
2343 break
2344 self.setWhitespaceChars( exprs[0].whiteChars )
2345 self.skipWhitespace = exprs[0].skipWhitespace
2346 self.callPreparse = True
2347
2348 def parseImpl( self, instring, loc, doActions=True ):
2349 # pass False as last arg to _parse for first element, since we already
2350 # pre-parsed the string as part of our And pre-parsing
2351 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2352 errorStop = False
2353 for e in self.exprs[1:]:
2354 if isinstance(e, And._ErrorStop):
2355 errorStop = True
2356 continue
2357 if errorStop:
2358 try:
2359 loc, exprtokens = e._parse( instring, loc, doActions )
2360 except ParseSyntaxException:
2361 raise
2362 except ParseBaseException:
2363 pe = sys.exc_info()[1]
2364 raise ParseSyntaxException(pe)
2365 except IndexError:
2366 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2367 else:
2368 loc, exprtokens = e._parse( instring, loc, doActions )
2369 if exprtokens or exprtokens.keys():
2370 resultlist += exprtokens
2371 return loc, resultlist
2372
2373 def __iadd__(self, other ):
2374 if isinstance( other, basestring ):
2375 other = Literal( other )
2376 return self.append( other ) #And( [ self, other ] )
2377
2378 def checkRecursion( self, parseElementList ):
2379 subRecCheckList = parseElementList[:] + [ self ]
2380 for e in self.exprs:
2381 e.checkRecursion( subRecCheckList )
2382 if not e.mayReturnEmpty:
2383 break
2384
2385 def __str__( self ):
2386 if hasattr(self,"name"):
2387 return self.name
2388
2389 if self.strRepr is None:
2390 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2391
2392 return self.strRepr
2393
2394
2395class Or(ParseExpression):
2396 """Requires that at least one C{ParseExpression} is found.
2397 If two expressions match, the expression that matches the longest string will be used.
2398 May be constructed using the C{'^'} operator.
2399 """
2400 def __init__( self, exprs, savelist = False ):
2401 super(Or,self).__init__(exprs, savelist)
2402 self.mayReturnEmpty = False
2403 for e in self.exprs:
2404 if e.mayReturnEmpty:
2405 self.mayReturnEmpty = True
2406 break
2407
2408 def parseImpl( self, instring, loc, doActions=True ):
2409 maxExcLoc = -1
2410 maxMatchLoc = -1
2411 maxException = None
2412 for e in self.exprs:
2413 try:
2414 loc2 = e.tryParse( instring, loc )
2415 except ParseException:
2416 err = sys.exc_info()[1]
2417 if err.loc > maxExcLoc:
2418 maxException = err
2419 maxExcLoc = err.loc
2420 except IndexError:
2421 if len(instring) > maxExcLoc:
2422 maxException = ParseException(instring,len(instring),e.errmsg,self)
2423 maxExcLoc = len(instring)
2424 else:
2425 if loc2 > maxMatchLoc:
2426 maxMatchLoc = loc2
2427 maxMatchExp = e
2428
2429 if maxMatchLoc < 0:
2430 if maxException is not None:
2431 raise maxException
2432 else:
2433 raise ParseException(instring, loc, "no defined alternatives to match", self)
2434
2435 return maxMatchExp._parse( instring, loc, doActions )
2436
2437 def __ixor__(self, other ):
2438 if isinstance( other, basestring ):
2439 other = Literal( other )
2440 return self.append( other ) #Or( [ self, other ] )
2441
2442 def __str__( self ):
2443 if hasattr(self,"name"):
2444 return self.name
2445
2446 if self.strRepr is None:
2447 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2448
2449 return self.strRepr
2450
2451 def checkRecursion( self, parseElementList ):
2452 subRecCheckList = parseElementList[:] + [ self ]
2453 for e in self.exprs:
2454 e.checkRecursion( subRecCheckList )
2455
2456
2457class MatchFirst(ParseExpression):
2458 """Requires that at least one C{ParseExpression} is found.
2459 If two expressions match, the first one listed is the one that will match.
2460 May be constructed using the C{'|'} operator.
2461 """
2462 def __init__( self, exprs, savelist = False ):
2463 super(MatchFirst,self).__init__(exprs, savelist)
2464 if exprs:
2465 self.mayReturnEmpty = False
2466 for e in self.exprs:
2467 if e.mayReturnEmpty:
2468 self.mayReturnEmpty = True
2469 break
2470 else:
2471 self.mayReturnEmpty = True
2472
2473 def parseImpl( self, instring, loc, doActions=True ):
2474 maxExcLoc = -1
2475 maxException = None
2476 for e in self.exprs:
2477 try:
2478 ret = e._parse( instring, loc, doActions )
2479 return ret
2480 except ParseException, err:
2481 if err.loc > maxExcLoc:
2482 maxException = err
2483 maxExcLoc = err.loc
2484 except IndexError:
2485 if len(instring) > maxExcLoc:
2486 maxException = ParseException(instring,len(instring),e.errmsg,self)
2487 maxExcLoc = len(instring)
2488
2489 # only got here if no expression matched, raise exception for match that made it the furthest
2490 else:
2491 if maxException is not None:
2492 raise maxException
2493 else:
2494 raise ParseException(instring, loc, "no defined alternatives to match", self)
2495
2496 def __ior__(self, other ):
2497 if isinstance( other, basestring ):
2498 other = Literal( other )
2499 return self.append( other ) #MatchFirst( [ self, other ] )
2500
2501 def __str__( self ):
2502 if hasattr(self,"name"):
2503 return self.name
2504
2505 if self.strRepr is None:
2506 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2507
2508 return self.strRepr
2509
2510 def checkRecursion( self, parseElementList ):
2511 subRecCheckList = parseElementList[:] + [ self ]
2512 for e in self.exprs:
2513 e.checkRecursion( subRecCheckList )
2514
2515
2516class Each(ParseExpression):
2517 """Requires all given C{ParseExpression}s to be found, but in any order.
2518 Expressions may be separated by whitespace.
2519 May be constructed using the C{'&'} operator.
2520 """
2521 def __init__( self, exprs, savelist = True ):
2522 super(Each,self).__init__(exprs, savelist)
2523 self.mayReturnEmpty = True
2524 for e in self.exprs:
2525 if not e.mayReturnEmpty:
2526 self.mayReturnEmpty = False
2527 break
2528 self.skipWhitespace = True
2529 self.initExprGroups = True
2530
2531 def parseImpl( self, instring, loc, doActions=True ):
2532 if self.initExprGroups:
2533 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2534 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
2535 self.optionals = opt1 + opt2
2536 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2537 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2538 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2539 self.required += self.multirequired
2540 self.initExprGroups = False
2541 tmpLoc = loc
2542 tmpReqd = self.required[:]
2543 tmpOpt = self.optionals[:]
2544 matchOrder = []
2545
2546 keepMatching = True
2547 while keepMatching:
2548 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2549 failed = []
2550 for e in tmpExprs:
2551 try:
2552 tmpLoc = e.tryParse( instring, tmpLoc )
2553 except ParseException:
2554 failed.append(e)
2555 else:
2556 matchOrder.append(e)
2557 if e in tmpReqd:
2558 tmpReqd.remove(e)
2559 elif e in tmpOpt:
2560 tmpOpt.remove(e)
2561 if len(failed) == len(tmpExprs):
2562 keepMatching = False
2563
2564 if tmpReqd:
2565 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2566 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2567
2568 # add any unmatched Optionals, in case they have default values defined
2569 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2570
2571 resultlist = []
2572 for e in matchOrder:
2573 loc,results = e._parse(instring,loc,doActions)
2574 resultlist.append(results)
2575
2576 finalResults = ParseResults([])
2577 for r in resultlist:
2578 dups = {}
2579 for k in r.keys():
2580 if k in finalResults.keys():
2581 tmp = ParseResults(finalResults[k])
2582 tmp += ParseResults(r[k])
2583 dups[k] = tmp
2584 finalResults += ParseResults(r)
2585 for k,v in dups.items():
2586 finalResults[k] = v
2587 return loc, finalResults
2588
2589 def __str__( self ):
2590 if hasattr(self,"name"):
2591 return self.name
2592
2593 if self.strRepr is None:
2594 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2595
2596 return self.strRepr
2597
2598 def checkRecursion( self, parseElementList ):
2599 subRecCheckList = parseElementList[:] + [ self ]
2600 for e in self.exprs:
2601 e.checkRecursion( subRecCheckList )
2602
2603
2604class ParseElementEnhance(ParserElement):
2605 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2606 def __init__( self, expr, savelist=False ):
2607 super(ParseElementEnhance,self).__init__(savelist)
2608 if isinstance( expr, basestring ):
2609 expr = Literal(expr)
2610 self.expr = expr
2611 self.strRepr = None
2612 if expr is not None:
2613 self.mayIndexError = expr.mayIndexError
2614 self.mayReturnEmpty = expr.mayReturnEmpty
2615 self.setWhitespaceChars( expr.whiteChars )
2616 self.skipWhitespace = expr.skipWhitespace
2617 self.saveAsList = expr.saveAsList
2618 self.callPreparse = expr.callPreparse
2619 self.ignoreExprs.extend(expr.ignoreExprs)
2620
2621 def parseImpl( self, instring, loc, doActions=True ):
2622 if self.expr is not None:
2623 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2624 else:
2625 raise ParseException("",loc,self.errmsg,self)
2626
2627 def leaveWhitespace( self ):
2628 self.skipWhitespace = False
2629 self.expr = self.expr.copy()
2630 if self.expr is not None:
2631 self.expr.leaveWhitespace()
2632 return self
2633
2634 def ignore( self, other ):
2635 if isinstance( other, Suppress ):
2636 if other not in self.ignoreExprs:
2637 super( ParseElementEnhance, self).ignore( other )
2638 if self.expr is not None:
2639 self.expr.ignore( self.ignoreExprs[-1] )
2640 else:
2641 super( ParseElementEnhance, self).ignore( other )
2642 if self.expr is not None:
2643 self.expr.ignore( self.ignoreExprs[-1] )
2644 return self
2645
2646 def streamline( self ):
2647 super(ParseElementEnhance,self).streamline()
2648 if self.expr is not None:
2649 self.expr.streamline()
2650 return self
2651
2652 def checkRecursion( self, parseElementList ):
2653 if self in parseElementList:
2654 raise RecursiveGrammarException( parseElementList+[self] )
2655 subRecCheckList = parseElementList[:] + [ self ]
2656 if self.expr is not None:
2657 self.expr.checkRecursion( subRecCheckList )
2658
2659 def validate( self, validateTrace=[] ):
2660 tmp = validateTrace[:]+[self]
2661 if self.expr is not None:
2662 self.expr.validate(tmp)
2663 self.checkRecursion( [] )
2664
2665 def __str__( self ):
2666 try:
2667 return super(ParseElementEnhance,self).__str__()
2668 except:
2669 pass
2670
2671 if self.strRepr is None and self.expr is not None:
2672 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2673 return self.strRepr
2674
2675
2676class FollowedBy(ParseElementEnhance):
2677 """Lookahead matching of the given parse expression. C{FollowedBy}
2678 does *not* advance the parsing position within the input string, it only
2679 verifies that the specified parse expression matches at the current
2680 position. C{FollowedBy} always returns a null token list."""
2681 def __init__( self, expr ):
2682 super(FollowedBy,self).__init__(expr)
2683 self.mayReturnEmpty = True
2684
2685 def parseImpl( self, instring, loc, doActions=True ):
2686 self.expr.tryParse( instring, loc )
2687 return loc, []
2688
2689
2690class NotAny(ParseElementEnhance):
2691 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2692 does *not* advance the parsing position within the input string, it only
2693 verifies that the specified parse expression does *not* match at the current
2694 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2695 always returns a null token list. May be constructed using the '~' operator."""
2696 def __init__( self, expr ):
2697 super(NotAny,self).__init__(expr)
2698 #~ self.leaveWhitespace()
2699 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2700 self.mayReturnEmpty = True
2701 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2702
2703 def parseImpl( self, instring, loc, doActions=True ):
2704 try:
2705 self.expr.tryParse( instring, loc )
2706 except (ParseException,IndexError):
2707 pass
2708 else:
2709 #~ raise ParseException(instring, loc, self.errmsg )
2710 exc = self.myException
2711 exc.loc = loc
2712 exc.pstr = instring
2713 raise exc
2714 return loc, []
2715
2716 def __str__( self ):
2717 if hasattr(self,"name"):
2718 return self.name
2719
2720 if self.strRepr is None:
2721 self.strRepr = "~{" + _ustr(self.expr) + "}"
2722
2723 return self.strRepr
2724
2725
2726class ZeroOrMore(ParseElementEnhance):
2727 """Optional repetition of zero or more of the given expression."""
2728 def __init__( self, expr ):
2729 super(ZeroOrMore,self).__init__(expr)
2730 self.mayReturnEmpty = True
2731
2732 def parseImpl( self, instring, loc, doActions=True ):
2733 tokens = []
2734 try:
2735 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2736 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2737 while 1:
2738 if hasIgnoreExprs:
2739 preloc = self._skipIgnorables( instring, loc )
2740 else:
2741 preloc = loc
2742 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2743 if tmptokens or tmptokens.keys():
2744 tokens += tmptokens
2745 except (ParseException,IndexError):
2746 pass
2747
2748 return loc, tokens
2749
2750 def __str__( self ):
2751 if hasattr(self,"name"):
2752 return self.name
2753
2754 if self.strRepr is None:
2755 self.strRepr = "[" + _ustr(self.expr) + "]..."
2756
2757 return self.strRepr
2758
2759 def setResultsName( self, name, listAllMatches=False ):
2760 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2761 ret.saveAsList = True
2762 return ret
2763
2764
2765class OneOrMore(ParseElementEnhance):
2766 """Repetition of one or more of the given expression."""
2767 def parseImpl( self, instring, loc, doActions=True ):
2768 # must be at least one
2769 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2770 try:
2771 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2772 while 1:
2773 if hasIgnoreExprs:
2774 preloc = self._skipIgnorables( instring, loc )
2775 else:
2776 preloc = loc
2777 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2778 if tmptokens or tmptokens.keys():
2779 tokens += tmptokens
2780 except (ParseException,IndexError):
2781 pass
2782
2783 return loc, tokens
2784
2785 def __str__( self ):
2786 if hasattr(self,"name"):
2787 return self.name
2788
2789 if self.strRepr is None:
2790 self.strRepr = "{" + _ustr(self.expr) + "}..."
2791
2792 return self.strRepr
2793
2794 def setResultsName( self, name, listAllMatches=False ):
2795 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2796 ret.saveAsList = True
2797 return ret
2798
2799class _NullToken(object):
2800 def __bool__(self):
2801 return False
2802 __nonzero__ = __bool__
2803 def __str__(self):
2804 return ""
2805
2806_optionalNotMatched = _NullToken()
2807class Optional(ParseElementEnhance):
2808 """Optional matching of the given expression.
2809 A default return string can also be specified, if the optional expression
2810 is not found.
2811 """
2812 def __init__( self, exprs, default=_optionalNotMatched ):
2813 super(Optional,self).__init__( exprs, savelist=False )
2814 self.defaultValue = default
2815 self.mayReturnEmpty = True
2816
2817 def parseImpl( self, instring, loc, doActions=True ):
2818 try:
2819 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2820 except (ParseException,IndexError):
2821 if self.defaultValue is not _optionalNotMatched:
2822 if self.expr.resultsName:
2823 tokens = ParseResults([ self.defaultValue ])
2824 tokens[self.expr.resultsName] = self.defaultValue
2825 else:
2826 tokens = [ self.defaultValue ]
2827 else:
2828 tokens = []
2829 return loc, tokens
2830
2831 def __str__( self ):
2832 if hasattr(self,"name"):
2833 return self.name
2834
2835 if self.strRepr is None:
2836 self.strRepr = "[" + _ustr(self.expr) + "]"
2837
2838 return self.strRepr
2839
2840
2841class SkipTo(ParseElementEnhance):
2842 """Token for skipping over all undefined text until the matched expression is found.
2843 If C{include} is set to true, the matched expression is also parsed (the skipped text
2844 and matched expression are returned as a 2-element list). The C{ignore}
2845 argument is used to define grammars (typically quoted strings and comments) that
2846 might contain false matches.
2847 """
2848 def __init__( self, other, include=False, ignore=None, failOn=None ):
2849 super( SkipTo, self ).__init__( other )
2850 self.ignoreExpr = ignore
2851 self.mayReturnEmpty = True
2852 self.mayIndexError = False
2853 self.includeMatch = include
2854 self.asList = False
2855 if failOn is not None and isinstance(failOn, basestring):
2856 self.failOn = Literal(failOn)
2857 else:
2858 self.failOn = failOn
2859 self.errmsg = "No match found for "+_ustr(self.expr)
2860
2861 def parseImpl( self, instring, loc, doActions=True ):
2862 startLoc = loc
2863 instrlen = len(instring)
2864 expr = self.expr
2865 failParse = False
2866 while loc <= instrlen:
2867 try:
2868 if self.failOn:
2869 try:
2870 self.failOn.tryParse(instring, loc)
2871 except ParseBaseException:
2872 pass
2873 else:
2874 failParse = True
2875 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2876 failParse = False
2877 if self.ignoreExpr is not None:
2878 while 1:
2879 try:
2880 loc = self.ignoreExpr.tryParse(instring,loc)
2881 # print "found ignoreExpr, advance to", loc
2882 except ParseBaseException:
2883 break
2884 expr._parse( instring, loc, doActions=False, callPreParse=False )
2885 skipText = instring[startLoc:loc]
2886 if self.includeMatch:
2887 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2888 if mat:
2889 skipRes = ParseResults( skipText )
2890 skipRes += mat
2891 return loc, [ skipRes ]
2892 else:
2893 return loc, [ skipText ]
2894 else:
2895 return loc, [ skipText ]
2896 except (ParseException,IndexError):
2897 if failParse:
2898 raise
2899 else:
2900 loc += 1
2901 exc = self.myException
2902 exc.loc = loc
2903 exc.pstr = instring
2904 raise exc
2905
2906class Forward(ParseElementEnhance):
2907 """Forward declaration of an expression to be defined later -
2908 used for recursive grammars, such as algebraic infix notation.
2909 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2910
2911 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
2912 Specifically, '|' has a lower precedence than '<<', so that::
2913 fwdExpr << a | b | c
2914 will actually be evaluated as::
2915 (fwdExpr << a) | b | c
2916 thereby leaving b and c out as parseable alternatives. It is recommended that you
2917 explicitly group the values inserted into the C{Forward}::
2918 fwdExpr << (a | b | c)
2919 """
2920 def __init__( self, other=None ):
2921 super(Forward,self).__init__( other, savelist=False )
2922
2923 def __lshift__( self, other ):
2924 if isinstance( other, basestring ):
2925 other = Literal(other)
2926 self.expr = other
2927 self.mayReturnEmpty = other.mayReturnEmpty
2928 self.strRepr = None
2929 self.mayIndexError = self.expr.mayIndexError
2930 self.mayReturnEmpty = self.expr.mayReturnEmpty
2931 self.setWhitespaceChars( self.expr.whiteChars )
2932 self.skipWhitespace = self.expr.skipWhitespace
2933 self.saveAsList = self.expr.saveAsList
2934 self.ignoreExprs.extend(self.expr.ignoreExprs)
2935 return None
2936
2937 def leaveWhitespace( self ):
2938 self.skipWhitespace = False
2939 return self
2940
2941 def streamline( self ):
2942 if not self.streamlined:
2943 self.streamlined = True
2944 if self.expr is not None:
2945 self.expr.streamline()
2946 return self
2947
2948 def validate( self, validateTrace=[] ):
2949 if self not in validateTrace:
2950 tmp = validateTrace[:]+[self]
2951 if self.expr is not None:
2952 self.expr.validate(tmp)
2953 self.checkRecursion([])
2954
2955 def __str__( self ):
2956 if hasattr(self,"name"):
2957 return self.name
2958
2959 self._revertClass = self.__class__
2960 self.__class__ = _ForwardNoRecurse
2961 try:
2962 if self.expr is not None:
2963 retString = _ustr(self.expr)
2964 else:
2965 retString = "None"
2966 finally:
2967 self.__class__ = self._revertClass
2968 return self.__class__.__name__ + ": " + retString
2969
2970 def copy(self):
2971 if self.expr is not None:
2972 return super(Forward,self).copy()
2973 else:
2974 ret = Forward()
2975 ret << self
2976 return ret
2977
2978class _ForwardNoRecurse(Forward):
2979 def __str__( self ):
2980 return "..."
2981
2982class TokenConverter(ParseElementEnhance):
2983 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2984 def __init__( self, expr, savelist=False ):
2985 super(TokenConverter,self).__init__( expr )#, savelist )
2986 self.saveAsList = False
2987
2988class Upcase(TokenConverter):
2989 """Converter to upper case all matching tokens."""
2990 def __init__(self, *args):
2991 super(Upcase,self).__init__(*args)
2992 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2993 DeprecationWarning,stacklevel=2)
2994
2995 def postParse( self, instring, loc, tokenlist ):
2996 return list(map( string.upper, tokenlist ))
2997
2998
2999class Combine(TokenConverter):
3000 """Converter to concatenate all matching tokens to a single string.
3001 By default, the matching patterns must also be contiguous in the input string;
3002 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3003 """
3004 def __init__( self, expr, joinString="", adjacent=True ):
3005 super(Combine,self).__init__( expr )
3006 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
3007 if adjacent:
3008 self.leaveWhitespace()
3009 self.adjacent = adjacent
3010 self.skipWhitespace = True
3011 self.joinString = joinString
3012 self.callPreparse = True
3013
3014 def ignore( self, other ):
3015 if self.adjacent:
3016 ParserElement.ignore(self, other)
3017 else:
3018 super( Combine, self).ignore( other )
3019 return self
3020
3021 def postParse( self, instring, loc, tokenlist ):
3022 retToks = tokenlist.copy()
3023 del retToks[:]
3024 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3025
3026 if self.resultsName and len(retToks.keys())>0:
3027 return [ retToks ]
3028 else:
3029 return retToks
3030
3031class Group(TokenConverter):
3032 """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
3033 def __init__( self, expr ):
3034 super(Group,self).__init__( expr )
3035 self.saveAsList = True
3036
3037 def postParse( self, instring, loc, tokenlist ):
3038 return [ tokenlist ]
3039
3040class Dict(TokenConverter):
3041 """Converter to return a repetitive expression as a list, but also as a dictionary.
3042 Each element can also be referenced using the first token in the expression as its key.
3043 Useful for tabular report scraping when the first column can be used as a item key.
3044 """
3045 def __init__( self, exprs ):
3046 super(Dict,self).__init__( exprs )
3047 self.saveAsList = True
3048
3049 def postParse( self, instring, loc, tokenlist ):
3050 for i,tok in enumerate(tokenlist):
3051 if len(tok) == 0:
3052 continue
3053 ikey = tok[0]
3054 if isinstance(ikey,int):
3055 ikey = _ustr(tok[0]).strip()
3056 if len(tok)==1:
3057 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3058 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3059 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3060 else:
3061 dictvalue = tok.copy() #ParseResults(i)
3062 del dictvalue[0]
3063 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3064 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3065 else:
3066 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3067
3068 if self.resultsName:
3069 return [ tokenlist ]
3070 else:
3071 return tokenlist
3072
3073
3074class Suppress(TokenConverter):
3075 """Converter for ignoring the results of a parsed expression."""
3076 def postParse( self, instring, loc, tokenlist ):
3077 return []
3078
3079 def suppress( self ):
3080 return self
3081
3082
3083class OnlyOnce(object):
3084 """Wrapper for parse actions, to ensure they are only called once."""
3085 def __init__(self, methodCall):
3086 self.callable = _trim_arity(methodCall)
3087 self.called = False
3088 def __call__(self,s,l,t):
3089 if not self.called:
3090 results = self.callable(s,l,t)
3091 self.called = True
3092 return results
3093 raise ParseException(s,l,"")
3094 def reset(self):
3095 self.called = False
3096
3097def traceParseAction(f):
3098 """Decorator for debugging parse actions."""
3099 f = _trim_arity(f)
3100 def z(*paArgs):
3101 thisFunc = f.func_name
3102 s,l,t = paArgs[-3:]
3103 if len(paArgs)>3:
3104 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3105 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3106 try:
3107 ret = f(*paArgs)
3108 except Exception:
3109 exc = sys.exc_info()[1]
3110 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3111 raise
3112 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3113 return ret
3114 try:
3115 z.__name__ = f.__name__
3116 except AttributeError:
3117 pass
3118 return z
3119
3120#
3121# global helpers
3122#
3123def delimitedList( expr, delim=",", combine=False ):
3124 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3125 By default, the list elements and delimiters can have intervening whitespace, and
3126 comments, but this can be overridden by passing C{combine=True} in the constructor.
3127 If C{combine} is set to True, the matching tokens are returned as a single token
3128 string, with the delimiters included; otherwise, the matching tokens are returned
3129 as a list of tokens, with the delimiters suppressed.
3130 """
3131 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3132 if combine:
3133 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3134 else:
3135 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3136
3137def countedArray( expr, intExpr=None ):
3138 """Helper to define a counted list of expressions.
3139 This helper defines a pattern of the form::
3140 integer expr expr expr...
3141 where the leading integer tells how many expr expressions follow.
3142 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3143 """
3144 arrayExpr = Forward()
3145 def countFieldParseAction(s,l,t):
3146 n = t[0]
3147 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3148 return []
3149 if intExpr is None:
3150 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3151 else:
3152 intExpr = intExpr.copy()
3153 intExpr.setName("arrayLen")
3154 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3155 return ( intExpr + arrayExpr )
3156
3157def _flatten(L):
3158 ret = []
3159 for i in L:
3160 if isinstance(i,list):
3161 ret.extend(_flatten(i))
3162 else:
3163 ret.append(i)
3164 return ret
3165
3166def matchPreviousLiteral(expr):
3167 """Helper to define an expression that is indirectly defined from
3168 the tokens matched in a previous expression, that is, it looks
3169 for a 'repeat' of a previous expression. For example::
3170 first = Word(nums)
3171 second = matchPreviousLiteral(first)
3172 matchExpr = first + ":" + second
3173 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3174 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3175 If this is not desired, use C{matchPreviousExpr}.
3176 Do *not* use with packrat parsing enabled.
3177 """
3178 rep = Forward()
3179 def copyTokenToRepeater(s,l,t):
3180 if t:
3181 if len(t) == 1:
3182 rep << t[0]
3183 else:
3184 # flatten t tokens
3185 tflat = _flatten(t.asList())
3186 rep << And( [ Literal(tt) for tt in tflat ] )
3187 else:
3188 rep << Empty()
3189 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3190 return rep
3191
3192def matchPreviousExpr(expr):
3193 """Helper to define an expression that is indirectly defined from
3194 the tokens matched in a previous expression, that is, it looks
3195 for a 'repeat' of a previous expression. For example::
3196 first = Word(nums)
3197 second = matchPreviousExpr(first)
3198 matchExpr = first + ":" + second
3199 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3200 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3201 the expressions are evaluated first, and then compared, so
3202 C{"1"} is compared with C{"10"}.
3203 Do *not* use with packrat parsing enabled.
3204 """
3205 rep = Forward()
3206 e2 = expr.copy()
3207 rep << e2
3208 def copyTokenToRepeater(s,l,t):
3209 matchTokens = _flatten(t.asList())
3210 def mustMatchTheseTokens(s,l,t):
3211 theseTokens = _flatten(t.asList())
3212 if theseTokens != matchTokens:
3213 raise ParseException("",0,"")
3214 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3215 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3216 return rep
3217
3218def _escapeRegexRangeChars(s):
3219 #~ escape these chars: ^-]
3220 for c in r"\^-]":
3221 s = s.replace(c,_bslash+c)
3222 s = s.replace("\n",r"\n")
3223 s = s.replace("\t",r"\t")
3224 return _ustr(s)
3225
3226def oneOf( strs, caseless=False, useRegex=True ):
3227 """Helper to quickly define a set of alternative Literals, and makes sure to do
3228 longest-first testing when there is a conflict, regardless of the input order,
3229 but returns a C{MatchFirst} for best performance.
3230
3231 Parameters:
3232 - strs - a string of space-delimited literals, or a list of string literals
3233 - caseless - (default=False) - treat all literals as caseless
3234 - useRegex - (default=True) - as an optimization, will generate a Regex
3235 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3236 if creating a C{Regex} raises an exception)
3237 """
3238 if caseless:
3239 isequal = ( lambda a,b: a.upper() == b.upper() )
3240 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3241 parseElementClass = CaselessLiteral
3242 else:
3243 isequal = ( lambda a,b: a == b )
3244 masks = ( lambda a,b: b.startswith(a) )
3245 parseElementClass = Literal
3246
3247 if isinstance(strs,(list,tuple)):
3248 symbols = list(strs[:])
3249 elif isinstance(strs,basestring):
3250 symbols = strs.split()
3251 else:
3252 warnings.warn("Invalid argument to oneOf, expected string or list",
3253 SyntaxWarning, stacklevel=2)
3254
3255 i = 0
3256 while i < len(symbols)-1:
3257 cur = symbols[i]
3258 for j,other in enumerate(symbols[i+1:]):
3259 if ( isequal(other, cur) ):
3260 del symbols[i+j+1]
3261 break
3262 elif ( masks(cur, other) ):
3263 del symbols[i+j+1]
3264 symbols.insert(i,other)
3265 cur = other
3266 break
3267 else:
3268 i += 1
3269
3270 if not caseless and useRegex:
3271 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3272 try:
3273 if len(symbols)==len("".join(symbols)):
3274 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3275 else:
3276 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3277 except:
3278 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3279 SyntaxWarning, stacklevel=2)
3280
3281
3282 # last resort, just use MatchFirst
3283 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3284
3285def dictOf( key, value ):
3286 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3287 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens
3288 in the proper order. The key pattern can include delimiting markers or punctuation,
3289 as long as they are suppressed, thereby leaving the significant key text. The value
3290 pattern can include named results, so that the C{Dict} results can include named token
3291 fields.
3292 """
3293 return Dict( ZeroOrMore( Group ( key + value ) ) )
3294
3295def originalTextFor(expr, asString=True):
3296 """Helper to return the original, untokenized text for a given expression. Useful to
3297 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3298 revert separate tokens with intervening whitespace back to the original matching
3299 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
3300 require the inspect module to chase up the call stack. By default, returns a
3301 string containing the original parsed text.
3302
3303 If the optional C{asString} argument is passed as C{False}, then the return value is a
3304 C{ParseResults} containing any results names that were originally matched, and a
3305 single token containing the original matched text from the input string. So if
3306 the expression passed to C{L{originalTextFor}} contains expressions with defined
3307 results names, you must set C{asString} to C{False} if you want to preserve those
3308 results name values."""
3309 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3310 endlocMarker = locMarker.copy()
3311 endlocMarker.callPreparse = False
3312 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3313 if asString:
3314 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3315 else:
3316 def extractText(s,l,t):
3317 del t[:]
3318 t.insert(0, s[t._original_start:t._original_end])
3319 del t["_original_start"]
3320 del t["_original_end"]
3321 matchExpr.setParseAction(extractText)
3322 return matchExpr
3323
3324def ungroup(expr):
3325 """Helper to undo pyparsing's default grouping of And expressions, even
3326 if all but one are non-empty."""
3327 return TokenConverter(expr).setParseAction(lambda t:t[0])
3328
3329# convenience constants for positional expressions
3330empty = Empty().setName("empty")
3331lineStart = LineStart().setName("lineStart")
3332lineEnd = LineEnd().setName("lineEnd")
3333stringStart = StringStart().setName("stringStart")
3334stringEnd = StringEnd().setName("stringEnd")
3335
3336_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3337_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3338_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
3339_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3340_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3341_charRange = Group(_singleChar + Suppress("-") + _singleChar)
3342_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3343
3344_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3345
3346def srange(s):
3347 r"""Helper to easily define string ranges for use in Word construction. Borrows
3348 syntax from regexp '[]' string range definitions::
3349 srange("[0-9]") -> "0123456789"
3350 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3351 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3352 The input string must be enclosed in []'s, and the returned string is the expanded
3353 character set joined into a single string.
3354 The values enclosed in the []'s may be::
3355 a single character
3356 an escaped character with a leading backslash (such as \- or \])
3357 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3358 (\0x## is also supported for backwards compatibility)
3359 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3360 a range of any of the above, separated by a dash ('a-z', etc.)
3361 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3362 """
3363 try:
3364 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3365 except:
3366 return ""
3367
3368def matchOnlyAtCol(n):
3369 """Helper method for defining parse actions that require matching at a specific
3370 column in the input text.
3371 """
3372 def verifyCol(strg,locn,toks):
3373 if col(locn,strg) != n:
3374 raise ParseException(strg,locn,"matched token not at column %d" % n)
3375 return verifyCol
3376
3377def replaceWith(replStr):
3378 """Helper method for common parse actions that simply return a literal value. Especially
3379 useful when used with C{transformString()}.
3380 """
3381 def _replFunc(*args):
3382 return [replStr]
3383 return _replFunc
3384
3385def removeQuotes(s,l,t):
3386 """Helper parse action for removing quotation marks from parsed quoted strings.
3387 To use, add this parse action to quoted string using::
3388 quotedString.setParseAction( removeQuotes )
3389 """
3390 return t[0][1:-1]
3391
3392def upcaseTokens(s,l,t):
3393 """Helper parse action to convert tokens to upper case."""
3394 return [ tt.upper() for tt in map(_ustr,t) ]
3395
3396def downcaseTokens(s,l,t):
3397 """Helper parse action to convert tokens to lower case."""
3398 return [ tt.lower() for tt in map(_ustr,t) ]
3399
3400def keepOriginalText(s,startLoc,t):
3401 """DEPRECATED - use new helper method C{originalTextFor}.
3402 Helper parse action to preserve original parsed text,
3403 overriding any nested parse actions."""
3404 try:
3405 endloc = getTokensEndLoc()
3406 except ParseException:
3407 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3408 del t[:]
3409 t += ParseResults(s[startLoc:endloc])
3410 return t
3411
3412def getTokensEndLoc():
3413 """Method to be called from within a parse action to determine the end
3414 location of the parsed tokens."""
3415 import inspect
3416 fstack = inspect.stack()
3417 try:
3418 # search up the stack (through intervening argument normalizers) for correct calling routine
3419 for f in fstack[2:]:
3420 if f[3] == "_parseNoCache":
3421 endloc = f[0].f_locals["loc"]
3422 return endloc
3423 else:
3424 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3425 finally:
3426 del fstack
3427
3428def _makeTags(tagStr, xml):
3429 """Internal helper to construct opening and closing tag expressions, given a tag name"""
3430 if isinstance(tagStr,basestring):
3431 resname = tagStr
3432 tagStr = Keyword(tagStr, caseless=not xml)
3433 else:
3434 resname = tagStr.name
3435
3436 tagAttrName = Word(alphas,alphanums+"_-:")
3437 if (xml):
3438 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3439 openTag = Suppress("<") + tagStr("tag") + \
3440 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3441 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3442 else:
3443 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3444 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3445 openTag = Suppress("<") + tagStr("tag") + \
3446 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3447 Optional( Suppress("=") + tagAttrValue ) ))) + \
3448 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3449 closeTag = Combine(_L("</") + tagStr + ">")
3450
3451 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3452 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3453 openTag.tag = resname
3454 closeTag.tag = resname
3455 return openTag, closeTag
3456
3457def makeHTMLTags(tagStr):
3458 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3459 return _makeTags( tagStr, False )
3460
3461def makeXMLTags(tagStr):
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3463 return _makeTags( tagStr, True )
3464
3465def withAttribute(*args,**attrDict):
3466 """Helper to create a validating parse action to be used with start tags created
3467 with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
3468 with a required attribute value, to avoid false matches on common tags such as
3469 C{<TD>} or C{<DIV>}.
3470
3471 Call C{withAttribute} with a series of attribute names and values. Specify the list
3472 of filter attributes names and values as:
3473 - keyword arguments, as in C{(align="right")}, or
3474 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3475 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3476 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3477 For attribute names with a namespace prefix, you must use the second form. Attribute
3478 names are matched insensitive to upper/lower case.
3479
3480 To verify that the attribute exists, but without specifying a value, pass
3481 C{withAttribute.ANY_VALUE} as the value.
3482 """
3483 if args:
3484 attrs = args[:]
3485 else:
3486 attrs = attrDict.items()
3487 attrs = [(k,v) for k,v in attrs]
3488 def pa(s,l,tokens):
3489 for attrName,attrValue in attrs:
3490 if attrName not in tokens:
3491 raise ParseException(s,l,"no matching attribute " + attrName)
3492 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3493 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3494 (attrName, tokens[attrName], attrValue))
3495 return pa
3496withAttribute.ANY_VALUE = object()
3497
3498opAssoc = _Constants()
3499opAssoc.LEFT = object()
3500opAssoc.RIGHT = object()
3501
3502def operatorPrecedence( baseExpr, opList ):
3503 """Helper method for constructing grammars of expressions made up of
3504 operators working in a precedence hierarchy. Operators may be unary or
3505 binary, left- or right-associative. Parse actions can also be attached
3506 to operator expressions.
3507
3508 Parameters:
3509 - baseExpr - expression representing the most basic element for the nested
3510 - opList - list of tuples, one for each operator precedence level in the
3511 expression grammar; each tuple is of the form
3512 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3513 - opExpr is the pyparsing expression for the operator;
3514 may also be a string, which will be converted to a Literal;
3515 if numTerms is 3, opExpr is a tuple of two expressions, for the
3516 two operators separating the 3 terms
3517 - numTerms is the number of terms for this operator (must
3518 be 1, 2, or 3)
3519 - rightLeftAssoc is the indicator whether the operator is
3520 right or left associative, using the pyparsing-defined
3521 constants opAssoc.RIGHT and opAssoc.LEFT.
3522 - parseAction is the parse action to be associated with
3523 expressions matching this operator expression (the
3524 parse action tuple member may be omitted)
3525 """
3526 ret = Forward()
3527 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3528 for i,operDef in enumerate(opList):
3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3530 if arity == 3:
3531 if opExpr is None or len(opExpr) != 2:
3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3533 opExpr1, opExpr2 = opExpr
3534 thisExpr = Forward()#.setName("expr%d" % i)
3535 if rightLeftAssoc == opAssoc.LEFT:
3536 if arity == 1:
3537 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3538 elif arity == 2:
3539 if opExpr is not None:
3540 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3541 else:
3542 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3543 elif arity == 3:
3544 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3545 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3546 else:
3547 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3548 elif rightLeftAssoc == opAssoc.RIGHT:
3549 if arity == 1:
3550 # try to avoid LR with this extra test
3551 if not isinstance(opExpr, Optional):
3552 opExpr = Optional(opExpr)
3553 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3554 elif arity == 2:
3555 if opExpr is not None:
3556 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3557 else:
3558 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3559 elif arity == 3:
3560 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3561 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3562 else:
3563 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3564 else:
3565 raise ValueError("operator must indicate right or left associativity")
3566 if pa:
3567 matchExpr.setParseAction( pa )
3568 thisExpr << ( matchExpr | lastExpr )
3569 lastExpr = thisExpr
3570 ret << lastExpr
3571 return ret
3572
3573dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3574sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3575quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3576unicodeString = Combine(_L('u') + quotedString.copy())
3577
3578def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3579 """Helper method for defining nested lists enclosed in opening and closing
3580 delimiters ("(" and ")" are the default).
3581
3582 Parameters:
3583 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3584 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3585 - content - expression for items within the nested lists (default=None)
3586 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3587
3588 If an expression is not provided for the content argument, the nested
3589 expression will capture all whitespace-delimited content between delimiters
3590 as a list of separate values.
3591
3592 Use the C{ignoreExpr} argument to define expressions that may contain
3593 opening or closing characters that should not be treated as opening
3594 or closing characters for nesting, such as quotedString or a comment
3595 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3596 The default is L{quotedString}, but if no expressions are to be ignored,
3597 then pass C{None} for this argument.
3598 """
3599 if opener == closer:
3600 raise ValueError("opening and closing strings cannot be the same")
3601 if content is None:
3602 if isinstance(opener,basestring) and isinstance(closer,basestring):
3603 if len(opener) == 1 and len(closer)==1:
3604 if ignoreExpr is not None:
3605 content = (Combine(OneOrMore(~ignoreExpr +
3606 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3607 ).setParseAction(lambda t:t[0].strip()))
3608 else:
3609 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3610 ).setParseAction(lambda t:t[0].strip()))
3611 else:
3612 if ignoreExpr is not None:
3613 content = (Combine(OneOrMore(~ignoreExpr +
3614 ~Literal(opener) + ~Literal(closer) +
3615 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3616 ).setParseAction(lambda t:t[0].strip()))
3617 else:
3618 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3619 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3620 ).setParseAction(lambda t:t[0].strip()))
3621 else:
3622 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3623 ret = Forward()
3624 if ignoreExpr is not None:
3625 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3626 else:
3627 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3628 return ret
3629
3630def indentedBlock(blockStatementExpr, indentStack, indent=True):
3631 """Helper method for defining space-delimited indentation blocks, such as
3632 those used to define block statements in Python source code.
3633
3634 Parameters:
3635 - blockStatementExpr - expression defining syntax of statement that
3636 is repeated within the indented block
3637 - indentStack - list created by caller to manage indentation stack
3638 (multiple statementWithIndentedBlock expressions within a single grammar
3639 should share a common indentStack)
3640 - indent - boolean indicating whether block must be indented beyond the
3641 the current level; set to False for block of left-most statements
3642 (default=True)
3643
3644 A valid block must contain at least one C{blockStatement}.
3645 """
3646 def checkPeerIndent(s,l,t):
3647 if l >= len(s): return
3648 curCol = col(l,s)
3649 if curCol != indentStack[-1]:
3650 if curCol > indentStack[-1]:
3651 raise ParseFatalException(s,l,"illegal nesting")
3652 raise ParseException(s,l,"not a peer entry")
3653
3654 def checkSubIndent(s,l,t):
3655 curCol = col(l,s)
3656 if curCol > indentStack[-1]:
3657 indentStack.append( curCol )
3658 else:
3659 raise ParseException(s,l,"not a subentry")
3660
3661 def checkUnindent(s,l,t):
3662 if l >= len(s): return
3663 curCol = col(l,s)
3664 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3665 raise ParseException(s,l,"not an unindent")
3666 indentStack.pop()
3667
3668 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3669 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3670 PEER = Empty().setParseAction(checkPeerIndent)
3671 UNDENT = Empty().setParseAction(checkUnindent)
3672 if indent:
3673 smExpr = Group( Optional(NL) +
3674 #~ FollowedBy(blockStatementExpr) +
3675 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3676 else:
3677 smExpr = Group( Optional(NL) +
3678 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3679 blockStatementExpr.ignore(_bslash + LineEnd())
3680 return smExpr
3681
3682alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3683punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3684
3685anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3686commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3687_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3688replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3689
3690# it's easy to get these comment structures wrong - they're very common, so may as well make them available
3691cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3692
3693htmlComment = Regex(r"<!--[\s\S]*?-->")
3694restOfLine = Regex(r".*").leaveWhitespace()
3695dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3696cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3697
3698javaStyleComment = cppStyleComment
3699pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3700_noncomma = "".join( [ c for c in printables if c != "," ] )
3701_commasepitem = Combine(OneOrMore(Word(_noncomma) +
3702 Optional( Word(" \t") +
3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3704commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3705
3706
3707if __name__ == "__main__":
3708
3709 def test( teststring ):
3710 try:
3711 tokens = simpleSQL.parseString( teststring )
3712 tokenlist = tokens.asList()
3713 print (teststring + "->" + str(tokenlist))
3714 print ("tokens = " + str(tokens))
3715 print ("tokens.columns = " + str(tokens.columns))
3716 print ("tokens.tables = " + str(tokens.tables))
3717 print (tokens.asXML("SQL",True))
3718 except ParseBaseException:
3719 err = sys.exc_info()[1]
3720 print (teststring + "->")
3721 print (err.line)
3722 print (" "*(err.column-1) + "^")
3723 print (err)
3724 print()
3725
3726 selectToken = CaselessLiteral( "select" )
3727 fromToken = CaselessLiteral( "from" )
3728
3729 ident = Word( alphas, alphanums + "_$" )
3730 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3731 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3732 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3733 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3734 simpleSQL = ( selectToken + \
3735 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3736 fromToken + \
3737 tableNameList.setResultsName( "tables" ) )
3738
3739 test( "SELECT * from XYZZY, ABC" )
3740 test( "select * from SYS.XYZZY" )
3741 test( "Select A from Sys.dual" )
3742 test( "Select AA,BB,CC from Sys.dual" )
3743 test( "Select A, B, C from Sys.dual" )
3744 test( "Select A, B, C from Sys.dual" )
3745 test( "Xelect A, B, C from Sys.dual" )
3746 test( "Select A, B, C frox Sys.dual" )
3747 test( "Select" )
3748 test( "Select ^^^ frox Sys.dual" )
3749 test( "Select A, B, C from Sys.dual, Table2 " )