Fixed #4164, #4171 -- Reworked some of the template lexer logic to ensure we

don't get caught out by a couple of corner cases.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@5104 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2007-04-27 12:16:22 +00:00
parent 6fe5235cea
commit 54f11ee63c
2 changed files with 39 additions and 18 deletions

View File

@ -193,18 +193,27 @@ class Lexer(object):
def tokenize(self):
"Return a list of tokens from a given template_string"
# remove all empty strings, because the regex has a tendency to add them
bits = filter(None, tag_re.split(self.template_string))
return map(self.create_token, bits)
in_tag = False
result = []
for bit in tag_re.split(self.template_string):
if bit:
result.append(self.create_token(bit, in_tag))
in_tag = not in_tag
return result
def create_token(self,token_string):
"Convert the given token string into a new Token object and return it"
if token_string.startswith(VARIABLE_TAG_START):
token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip())
elif token_string.startswith(BLOCK_TAG_START):
token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip())
elif token_string.startswith(COMMENT_TAG_START):
token = Token(TOKEN_COMMENT, '')
def create_token(self, token_string, in_tag=False):
"""
Convert the given token string into a new Token object and return it.
If tag is True, we are processing something that matched a tag,
otherwise it should be treated as a literal string.
"""
if in_tag:
if token_string.startswith(VARIABLE_TAG_START):
token = Token(TOKEN_VAR, token_string[len(VARIABLE_TAG_START):-len(VARIABLE_TAG_END)].strip())
elif token_string.startswith(BLOCK_TAG_START):
token = Token(TOKEN_BLOCK, token_string[len(BLOCK_TAG_START):-len(BLOCK_TAG_END)].strip())
elif token_string.startswith(COMMENT_TAG_START):
token = Token(TOKEN_COMMENT, '')
else:
token = Token(TOKEN_TEXT, token_string)
return token
@ -215,21 +224,21 @@ class DebugLexer(Lexer):
def tokenize(self):
"Return a list of tokens from a given template_string"
token_tups, upto = [], 0
result, upto = [], 0
for match in tag_re.finditer(self.template_string):
start, end = match.span()
if start > upto:
token_tups.append( (self.template_string[upto:start], (upto, start)) )
result.append(self.create_token(self.template_string[upto:start], (upto, start), False))
upto = start
token_tups.append( (self.template_string[start:end], (start,end)) )
result.append(self.create_token(self.template_string[start:end], (start, end), True))
upto = end
last_bit = self.template_string[upto:]
if last_bit:
token_tups.append( (last_bit, (upto, upto + len(last_bit))) )
return [self.create_token(tok, (self.origin, loc)) for tok, loc in token_tups]
result.append(self.create_token(last_bit, (upto, upto + len(last_bit)), False))
return result
def create_token(self, token_string, source):
token = super(DebugLexer, self).create_token(token_string)
def create_token(self, token_string, source, in_tag):
token = super(DebugLexer, self).create_token(token_string, in_tag)
token.source = source
return token

View File

@ -127,6 +127,18 @@ class Templates(unittest.TestCase):
# Fail silently when accessing a non-simple method
'basic-syntax20': ("{{ var.method2 }}", {"var": SomeClass()}, ("","INVALID")),
# Don't get confused when parsing something that is almost, but not
# quite, a template tag.
'basic-syntax21': ("a {{ moo %} b", {}, "a {{ moo %} b"),
'basic-syntax22': ("{{ moo #}", {}, "{{ moo #}"),
# Will try to treat "moo #} {{ cow" as the variable. Not ideal, but
# costly to work around, so this triggers an error.
'basic-syntax23': ("{{ moo #} {{ cow }}", {"cow": "cow"}, template.TemplateSyntaxError),
# Embedded newlines make it not-a-tag.
'basic-syntax24': ("{{ moo\n }}", {}, "{{ moo\n }}"),
# List-index syntax allows a template to access a certain item of a subscriptable object.
'list-index01': ("{{ var.1 }}", {"var": ["first item", "second item"]}, "second item"),