Fixed #32919 -- Optimized lexing & parsing of templates.

This optimizes:
- Lexer.create_token() by avoiding startswith() calls,
- Parser.parse() by re-using the token type enum's value,
- Parser.extend_nodelist() by removing unnecessary isinstance() check,
- some Node subclasses by removing the implicit "nodelist" from
  "child_nodelists",
- Variable.__init__() by avoiding startswith() calls.
This commit is contained in:
Keryn Knight 2021-07-06 14:53:03 +01:00
parent 34d5336b9d
commit edf184dd06
5 changed files with 30 additions and 11 deletions

View File

@ -370,7 +370,8 @@ class Lexer:
If in_tag is True, we are processing something that matched a tag, If in_tag is True, we are processing something that matched a tag,
otherwise it should be treated as a literal string. otherwise it should be treated as a literal string.
""" """
if in_tag and token_string.startswith(BLOCK_TAG_START): token_start = token_string[0:2]
if in_tag and token_start == BLOCK_TAG_START:
# The [2:-2] ranges below strip off *_TAG_START and *_TAG_END. # The [2:-2] ranges below strip off *_TAG_START and *_TAG_END.
# We could do len(BLOCK_TAG_START) to be more "correct", but we've # We could do len(BLOCK_TAG_START) to be more "correct", but we've
# hard-coded the 2s here for performance. And it's not like # hard-coded the 2s here for performance. And it's not like
@ -379,13 +380,13 @@ class Lexer:
if self.verbatim and block_content == self.verbatim: if self.verbatim and block_content == self.verbatim:
self.verbatim = False self.verbatim = False
if in_tag and not self.verbatim: if in_tag and not self.verbatim:
if token_string.startswith(VARIABLE_TAG_START): if token_start == VARIABLE_TAG_START:
return Token(TokenType.VAR, token_string[2:-2].strip(), position, lineno) return Token(TokenType.VAR, token_string[2:-2].strip(), position, lineno)
elif token_string.startswith(BLOCK_TAG_START): elif token_start == BLOCK_TAG_START:
if block_content[:9] in ('verbatim', 'verbatim '): if block_content[:9] in ('verbatim', 'verbatim '):
self.verbatim = 'end%s' % block_content self.verbatim = 'end%s' % block_content
return Token(TokenType.BLOCK, block_content, position, lineno) return Token(TokenType.BLOCK, block_content, position, lineno)
elif token_string.startswith(COMMENT_TAG_START): elif token_start == COMMENT_TAG_START:
content = '' content = ''
if token_string.find(TRANSLATOR_COMMENT_MARK): if token_string.find(TRANSLATOR_COMMENT_MARK):
content = token_string[2:-2].strip() content = token_string[2:-2].strip()
@ -457,9 +458,10 @@ class Parser:
while self.tokens: while self.tokens:
token = self.next_token() token = self.next_token()
# Use the raw values here for TokenType.* for a tiny performance boost. # Use the raw values here for TokenType.* for a tiny performance boost.
if token.token_type.value == 0: # TokenType.TEXT token_type = token.token_type.value
if token_type == 0: # TokenType.TEXT
self.extend_nodelist(nodelist, TextNode(token.contents), token) self.extend_nodelist(nodelist, TextNode(token.contents), token)
elif token.token_type.value == 1: # TokenType.VAR elif token_type == 1: # TokenType.VAR
if not token.contents: if not token.contents:
raise self.error(token, 'Empty variable tag on line %d' % token.lineno) raise self.error(token, 'Empty variable tag on line %d' % token.lineno)
try: try:
@ -468,7 +470,7 @@ class Parser:
raise self.error(token, e) raise self.error(token, e)
var_node = VariableNode(filter_expression) var_node = VariableNode(filter_expression)
self.extend_nodelist(nodelist, var_node, token) self.extend_nodelist(nodelist, var_node, token)
elif token.token_type.value == 2: # TokenType.BLOCK elif token_type == 2: # TokenType.BLOCK
try: try:
command = token.contents.split()[0] command = token.contents.split()[0]
except IndexError: except IndexError:
@ -515,7 +517,7 @@ class Parser:
raise self.error( raise self.error(
token, '%r must be the first tag in the template.' % node, token, '%r must be the first tag in the template.' % node,
) )
if isinstance(nodelist, NodeList) and not isinstance(node, TextNode): if not isinstance(node, TextNode):
nodelist.contains_nontext = True nodelist.contains_nontext = True
# Set origin and token here since we can't modify the node __init__() # Set origin and token here since we can't modify the node __init__()
# method. # method.
@ -787,13 +789,13 @@ class Variable:
if '.' in var or 'e' in var.lower(): if '.' in var or 'e' in var.lower():
self.literal = float(var) self.literal = float(var)
# "2." is invalid # "2." is invalid
if var.endswith('.'): if var[-1] == '.':
raise ValueError raise ValueError
else: else:
self.literal = int(var) self.literal = int(var)
except ValueError: except ValueError:
# A ValueError means that the variable isn't a number. # A ValueError means that the variable isn't a number.
if var.startswith('_(') and var.endswith(')'): if var[0:2] == '_(' and var[-1] == ')':
# The result of the lookup should be translated at rendering # The result of the lookup should be translated at rendering
# time. # time.
self.translate = True self.translate = True
@ -805,7 +807,7 @@ class Variable:
except ValueError: except ValueError:
# Otherwise we'll set self.lookups so that resolve() knows we're # Otherwise we'll set self.lookups so that resolve() knows we're
# dealing with a bonafide variable # dealing with a bonafide variable
if var.find(VARIABLE_ATTRIBUTE_SEPARATOR + '_') > -1 or var[0] == '_': if VARIABLE_ATTRIBUTE_SEPARATOR + '_' in var or var[0] == '_':
raise TemplateSyntaxError("Variables and attributes may " raise TemplateSyntaxError("Variables and attributes may "
"not begin with underscores: '%s'" % "not begin with underscores: '%s'" %
var) var)
@ -976,6 +978,8 @@ class NodeList(list):
class TextNode(Node): class TextNode(Node):
child_nodelists = ()
def __init__(self, s): def __init__(self, s):
self.s = s self.s = s
@ -1012,6 +1016,8 @@ def render_value_in_context(value, context):
class VariableNode(Node): class VariableNode(Node):
child_nodelists = ()
def __init__(self, filter_expression): def __init__(self, filter_expression):
self.filter_expression = filter_expression self.filter_expression = filter_expression

View File

@ -44,11 +44,15 @@ class AutoEscapeControlNode(Node):
class CommentNode(Node): class CommentNode(Node):
child_nodelists = ()
def render(self, context): def render(self, context):
return '' return ''
class CsrfTokenNode(Node): class CsrfTokenNode(Node):
child_nodelists = ()
def render(self, context): def render(self, context):
csrf_token = context.get('csrf_token') csrf_token = context.get('csrf_token')
if csrf_token: if csrf_token:
@ -342,6 +346,8 @@ class RegroupNode(Node):
class LoadNode(Node): class LoadNode(Node):
child_nodelists = ()
def render(self, context): def render(self, context):
return '' return ''
@ -400,6 +406,8 @@ class TemplateTagNode(Node):
class URLNode(Node): class URLNode(Node):
child_nodelists = ()
def __init__(self, view_name, args, kwargs, asvar): def __init__(self, view_name, args, kwargs, asvar):
self.view_name = view_name self.view_name = view_name
self.args = args self.args = args

View File

@ -182,6 +182,7 @@ class TagHelperNode(Node):
class SimpleNode(TagHelperNode): class SimpleNode(TagHelperNode):
child_nodelists = ()
def __init__(self, func, takes_context, args, kwargs, target_var): def __init__(self, func, takes_context, args, kwargs, target_var):
super().__init__(func, takes_context, args, kwargs) super().__init__(func, takes_context, args, kwargs)

View File

@ -68,6 +68,8 @@ class GetCurrentLanguageBidiNode(Node):
class TranslateNode(Node): class TranslateNode(Node):
child_nodelists = ()
def __init__(self, filter_expression, noop, asvar=None, def __init__(self, filter_expression, noop, asvar=None,
message_context=None): message_context=None):
self.noop = noop self.noop = noop

View File

@ -91,6 +91,8 @@ def get_media_prefix(parser, token):
class StaticNode(template.Node): class StaticNode(template.Node):
child_nodelists = ()
def __init__(self, varname=None, path=None): def __init__(self, varname=None, path=None):
if path is None: if path is None:
raise template.TemplateSyntaxError( raise template.TemplateSyntaxError(