scanner.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. """
  2. Iterator based sre token scanner
  3. """
  4. import re
  5. from re import VERBOSE, MULTILINE, DOTALL
  6. import sre_parse
  7. import sre_compile
  8. import sre_constants
  9. from sre_constants import BRANCH, SUBPATTERN
  10. __all__ = ['Scanner', 'pattern']
  11. FLAGS = (VERBOSE | MULTILINE | DOTALL)
  12. class Scanner(object):
  13. def __init__(self, lexicon, flags=FLAGS):
  14. self.actions = [None]
  15. # Combine phrases into a compound pattern
  16. s = sre_parse.Pattern()
  17. s.flags = flags
  18. p = []
  19. for idx, token in enumerate(lexicon):
  20. phrase = token.pattern
  21. try:
  22. subpattern = sre_parse.SubPattern(s,
  23. [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
  24. except sre_constants.error:
  25. raise
  26. p.append(subpattern)
  27. self.actions.append(token)
  28. s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
  29. p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
  30. self.scanner = sre_compile.compile(p)
  31. def iterscan(self, string, idx=0, context=None):
  32. """
  33. Yield match, end_idx for each match
  34. """
  35. match = self.scanner.scanner(string, idx).match
  36. actions = self.actions
  37. lastend = idx
  38. end = len(string)
  39. while True:
  40. m = match()
  41. if m is None:
  42. break
  43. matchbegin, matchend = m.span()
  44. if lastend == matchend:
  45. break
  46. action = actions[m.lastindex]
  47. if action is not None:
  48. rval, next_pos = action(m, context)
  49. if next_pos is not None and next_pos != matchend:
  50. # "fast forward" the scanner
  51. matchend = next_pos
  52. match = self.scanner.scanner(string, matchend).match
  53. yield rval, matchend
  54. lastend = matchend
  55. def pattern(pattern, flags=FLAGS):
  56. def decorator(fn):
  57. fn.pattern = pattern
  58. fn.regex = re.compile(pattern, flags)
  59. return fn
  60. return decorator