| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 | """Iterator based sre token scanner"""import refrom re import VERBOSE, MULTILINE, DOTALLimport sre_parseimport sre_compileimport sre_constantsfrom sre_constants import BRANCH, SUBPATTERN__all__ = ['Scanner', 'pattern']FLAGS = (VERBOSE | MULTILINE | DOTALL)class Scanner(object):    def __init__(self, lexicon, flags=FLAGS):        self.actions = [None]        # Combine phrases into a compound pattern        s = sre_parse.Pattern()        s.flags = flags        p = []        for idx, token in enumerate(lexicon):            phrase = token.pattern            try:                subpattern = sre_parse.SubPattern(s,                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])            except sre_constants.error:                raise            p.append(subpattern)            self.actions.append(token)        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])        self.scanner = sre_compile.compile(p)    def iterscan(self, string, idx=0, context=None):        """        Yield match, end_idx for each match        """        match = self.scanner.scanner(string, idx).match        actions = self.actions        lastend = idx        end = len(string)        while True:            m = match()            if m is None:                break            matchbegin, matchend = m.span()            if lastend == matchend:                break            action = actions[m.lastindex]            if action is not None:                rval, next_pos = action(m, context)                if next_pos is not None and next_pos != matchend:                    # "fast forward" the scanner                    matchend = next_pos                    match = self.scanner.scanner(string, matchend).match                yield rval, matchend            lastend = matchenddef pattern(pattern, flags=FLAGS):    def decorator(fn):        fn.pattern = pattern        fn.regex = re.compile(pattern, flags)        return fn    return decorator
 |