[svn r38262] Fixed support for tokenizing multi-line strings that use \ to span lines.
--HG-- branch : trunk
This commit is contained in:
parent
aa4896fa04
commit
30608ea412
|
@ -12,6 +12,7 @@ class PythonSchema(object):
|
||||||
'return', 'try', 'while', 'with', 'yield']
|
'return', 'try', 'while', 'with', 'yield']
|
||||||
alt_keyword = ['as', 'assert', 'class', 'def', 'del', 'exec', 'from',
|
alt_keyword = ['as', 'assert', 'class', 'def', 'del', 'exec', 'from',
|
||||||
'global', 'import', 'lambda', 'pass', 'print']
|
'global', 'import', 'lambda', 'pass', 'print']
|
||||||
|
linejoin = r'\\'
|
||||||
|
|
||||||
class Token(object):
|
class Token(object):
|
||||||
data = None
|
data = None
|
||||||
|
@ -59,6 +60,12 @@ class Tokenizer(object):
|
||||||
self._re_strings_full.append(
|
self._re_strings_full.append(
|
||||||
re.compile(r'%s[^\\%s]+(\\.[^\\%s]*)*%s' % (d, d, d, d)))
|
re.compile(r'%s[^\\%s]+(\\.[^\\%s]*)*%s' % (d, d, d, d)))
|
||||||
self._re_strings_empty.append(re.compile('%s%s' % (d, d)))
|
self._re_strings_empty.append(re.compile('%s%s' % (d, d)))
|
||||||
|
if schema.linejoin:
|
||||||
|
j = schema.linejoin
|
||||||
|
for d in schema.string + schema.multiline_string:
|
||||||
|
self._re_strings_multiline.append(
|
||||||
|
(re.compile('%s.*%s' % (d, j)),
|
||||||
|
re.compile('.*?%s' % (d,))))
|
||||||
for d in schema.multiline_string:
|
for d in schema.multiline_string:
|
||||||
self._re_strings_multiline.append((re.compile('%s.*' % (d,), re.S),
|
self._re_strings_multiline.append((re.compile('%s.*' % (d,), re.S),
|
||||||
re.compile('.*?%s' % (d,))))
|
re.compile('.*?%s' % (d,))))
|
||||||
|
@ -105,6 +112,7 @@ class Tokenizer(object):
|
||||||
def _check_multiline_strings(self, data):
|
def _check_multiline_strings(self, data):
|
||||||
token = None
|
token = None
|
||||||
for start, end in self._re_strings_multiline:
|
for start, end in self._re_strings_multiline:
|
||||||
|
print dir(start), end
|
||||||
m = start.match(data)
|
m = start.match(data)
|
||||||
if m:
|
if m:
|
||||||
s = m.group(0)
|
s = m.group(0)
|
||||||
|
|
|
@ -75,6 +75,15 @@ class TestTokenizer(object):
|
||||||
res = list(t.tokenize('bar'))
|
res = list(t.tokenize('bar'))
|
||||||
assert res == [Token('bar', type='word')]
|
assert res == [Token('bar', type='word')]
|
||||||
|
|
||||||
|
def test_string_multiline_slash(self):
|
||||||
|
t = Tokenizer(PythonSchema)
|
||||||
|
res = list(t.tokenize("'foo\\"))
|
||||||
|
assert res == [Token("'foo\\", type='string')]
|
||||||
|
res = list(t.tokenize("bar'"))
|
||||||
|
assert res == [Token("bar'", type='string')]
|
||||||
|
res = list(t.tokenize("bar"))
|
||||||
|
assert res == [Token('bar', type='word')]
|
||||||
|
|
||||||
def test_string_following_printable(self):
|
def test_string_following_printable(self):
|
||||||
assert self.tokens('."foo"') == [Token('.', type='unknown'),
|
assert self.tokens('."foo"') == [Token('.', type='unknown'),
|
||||||
Token('"foo"', type='string')]
|
Token('"foo"', type='string')]
|
||||||
|
|
Loading…
Reference in New Issue