From 30608ea412461df27a52c15168520122a476a889 Mon Sep 17 00:00:00 2001 From: guido Date: Fri, 9 Feb 2007 16:13:30 +0100 Subject: [PATCH] [svn r38262] Fixed support for tokenizing multi-line strings that use \ to span lines. --HG-- branch : trunk --- py/apigen/source/color.py | 8 ++++++++ py/apigen/source/testing/test_color.py | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/py/apigen/source/color.py b/py/apigen/source/color.py index ecf3141f3..3309c3de4 100644 --- a/py/apigen/source/color.py +++ b/py/apigen/source/color.py @@ -12,6 +12,7 @@ class PythonSchema(object): 'return', 'try', 'while', 'with', 'yield'] alt_keyword = ['as', 'assert', 'class', 'def', 'del', 'exec', 'from', 'global', 'import', 'lambda', 'pass', 'print'] + linejoin = r'\\' class Token(object): data = None @@ -59,6 +60,12 @@ class Tokenizer(object): self._re_strings_full.append( re.compile(r'%s[^\\%s]+(\\.[^\\%s]*)*%s' % (d, d, d, d))) self._re_strings_empty.append(re.compile('%s%s' % (d, d))) + if schema.linejoin: + j = schema.linejoin + for d in schema.string + schema.multiline_string: + self._re_strings_multiline.append( + (re.compile('%s.*%s' % (d, j)), + re.compile('.*?%s' % (d,)))) for d in schema.multiline_string: self._re_strings_multiline.append((re.compile('%s.*' % (d,), re.S), re.compile('.*?%s' % (d,)))) @@ -105,6 +112,7 @@ class Tokenizer(object): def _check_multiline_strings(self, data): token = None for start, end in self._re_strings_multiline: + print dir(start), end m = start.match(data) if m: s = m.group(0) diff --git a/py/apigen/source/testing/test_color.py b/py/apigen/source/testing/test_color.py index 08d0ade11..a1c972ce2 100644 --- a/py/apigen/source/testing/test_color.py +++ b/py/apigen/source/testing/test_color.py @@ -75,6 +75,15 @@ class TestTokenizer(object): res = list(t.tokenize('bar')) assert res == [Token('bar', type='word')] + def test_string_multiline_slash(self): + t = Tokenizer(PythonSchema) + res = list(t.tokenize("'foo\\")) + assert res == [Token("'foo\\", type='string')] + res = list(t.tokenize("bar'")) + assert res == [Token("bar'", type='string')] + res = list(t.tokenize("bar")) + assert res == [Token('bar', type='word')] + def test_string_following_printable(self): assert self.tokens('."foo"') == [Token('.', type='unknown'), Token('"foo"', type='string')]