Merge pull request #7211 from bluetech/expr-jit

mark: compile -k/-m expression once, reuse for all matches
2020-05-12 13:21:41 +03:00 · 2020-05-12 13:21:41 +03:00 · 07c8e0cc7f
parent 952762207a c714f05ad7
commit 07c8e0cc7f
5 changed files with 172 additions and 132 deletions
--- a/src/_pytest/mark/init.py
+++ b/src/_pytest/mark/init.py
@ -1,9 +1,12 @@
 """ generic mechanism for marking and selecting python functions. """
 import warnings
+from typing import AbstractSet
 from typing import Optional

-from .legacy import matchkeyword
-from .legacy import matchmark
+import attr
+
+from .expression import Expression
+from .expression import ParseError
 from .structures import EMPTY_PARAMETERSET_OPTION
 from .structures import get_empty_parameterset_mark
 from .structures import Mark
@ -11,6 +14,7 @@ from .structures import MARK_GEN
 from .structures import MarkDecorator
 from .structures import MarkGenerator
 from .structures import ParameterSet
+from _pytest.compat import TYPE_CHECKING
 from _pytest.config import Config
 from _pytest.config import hookimpl
 from _pytest.config import UsageError
@ -18,6 +22,9 @@ from _pytest.deprecated import MINUS_K_COLON
 from _pytest.deprecated import MINUS_K_DASH
 from _pytest.store import StoreKey

+if TYPE_CHECKING:
+    from _pytest.nodes import Item
+
 __all__ = ["Mark", "MarkDecorator", "MarkGenerator", "get_empty_parameterset_mark"]


@ -104,6 +111,57 @@ def pytest_cmdline_main(config):
        return 0


+@attr.s(slots=True)
+class KeywordMatcher:
+    """A matcher for keywords.
+
+    Given a list of names, matches any substring of one of these names. The
+    string inclusion check is case-insensitive.
+
+    Will match on the name of colitem, including the names of its parents.
+    Only matches names of items which are either a :class:`Class` or a
+    :class:`Function`.
+
+    Additionally, matches on names in the 'extra_keyword_matches' set of
+    any item, as well as names directly assigned to test functions.
+    """
+
+    _names = attr.ib(type=AbstractSet[str])
+
+    @classmethod
+    def from_item(cls, item: "Item") -> "KeywordMatcher":
+        mapped_names = set()
+
+        # Add the names of the current item and any parent items
+        import pytest
+
+        for item in item.listchain():
+            if not isinstance(item, pytest.Instance):
+                mapped_names.add(item.name)
+
+        # Add the names added as extra keywords to current or parent items
+        mapped_names.update(item.listextrakeywords())
+
+        # Add the names attached to the current function through direct assignment
+        function_obj = getattr(item, "function", None)
+        if function_obj:
+            mapped_names.update(function_obj.__dict__)
+
+        # add the markers to the keywords as we no longer handle them correctly
+        mapped_names.update(mark.name for mark in item.iter_markers())
+
+        return cls(mapped_names)
+
+    def __call__(self, subname: str) -> bool:
+        subname = subname.lower()
+        names = (name.lower() for name in self._names)
+
+        for name in names:
+            if subname in name:
+                return True
+        return False
+
+
 def deselect_by_keyword(items, config):
    keywordexpr = config.option.keyword.lstrip()
    if not keywordexpr:
@ -120,10 +178,17 @@ def deselect_by_keyword(items, config):
        selectuntil = True
        keywordexpr = keywordexpr[:-1]

+    try:
+        expression = Expression.compile(keywordexpr)
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
+        ) from None
+
    remaining = []
    deselected = []
    for colitem in items:
-        if keywordexpr and not matchkeyword(colitem, keywordexpr):
+        if keywordexpr and not expression.evaluate(KeywordMatcher.from_item(colitem)):
            deselected.append(colitem)
        else:
            if selectuntil:
@ -135,15 +200,40 @@ def deselect_by_keyword(items, config):
        items[:] = remaining


+@attr.s(slots=True)
+class MarkMatcher:
+    """A matcher for markers which are present.
+
+    Tries to match on any marker names, attached to the given colitem.
+    """
+
+    own_mark_names = attr.ib()
+
+    @classmethod
+    def from_item(cls, item) -> "MarkMatcher":
+        mark_names = {mark.name for mark in item.iter_markers()}
+        return cls(mark_names)
+
+    def __call__(self, name: str) -> bool:
+        return name in self.own_mark_names
+
+
 def deselect_by_mark(items, config):
    matchexpr = config.option.markexpr
    if not matchexpr:
        return

+    try:
+        expression = Expression.compile(matchexpr)
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-m': {}: {}".format(matchexpr, e)
+        ) from None
+
    remaining = []
    deselected = []
    for item in items:
-        if matchmark(item, matchexpr):
+        if expression.evaluate(MarkMatcher.from_item(item)):
            remaining.append(item)
        else:
            deselected.append(item)
--- a/src/_pytest/mark/expression.py
+++ b/src/_pytest/mark/expression.py
@ -15,10 +15,13 @@ The semantics are:
 - ident evaluates to True of False according to a provided matcher function.
 - or/and/not evaluate according to the usual boolean semantics.
 """
+import ast
 import enum
 import re
+import types
 from typing import Callable
 from typing import Iterator
+from typing import Mapping
 from typing import Optional
 from typing import Sequence

@ -31,7 +34,7 @@ if TYPE_CHECKING:


 __all__ = [
-    "evaluate",
+    "Expression",
    "ParseError",
 ]

@ -124,50 +127,92 @@ class Scanner:
        )


-def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+def expression(s: Scanner) -> ast.Expression:
    if s.accept(TokenType.EOF):
-        return False
-    ret = expr(s, matcher)
-    s.accept(TokenType.EOF, reject=True)
-    return ret
+        ret = ast.NameConstant(False)  # type: ast.expr
+    else:
+        ret = expr(s)
+        s.accept(TokenType.EOF, reject=True)
+    return ast.fix_missing_locations(ast.Expression(ret))


-def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
-    ret = and_expr(s, matcher)
+def expr(s: Scanner) -> ast.expr:
+    ret = and_expr(s)
    while s.accept(TokenType.OR):
-        rhs = and_expr(s, matcher)
-        ret = ret or rhs
+        rhs = and_expr(s)
+        ret = ast.BoolOp(ast.Or(), [ret, rhs])
    return ret


-def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
-    ret = not_expr(s, matcher)
+def and_expr(s: Scanner) -> ast.expr:
+    ret = not_expr(s)
    while s.accept(TokenType.AND):
-        rhs = not_expr(s, matcher)
-        ret = ret and rhs
+        rhs = not_expr(s)
+        ret = ast.BoolOp(ast.And(), [ret, rhs])
    return ret


-def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+def not_expr(s: Scanner) -> ast.expr:
    if s.accept(TokenType.NOT):
-        return not not_expr(s, matcher)
+        return ast.UnaryOp(ast.Not(), not_expr(s))
    if s.accept(TokenType.LPAREN):
-        ret = expr(s, matcher)
+        ret = expr(s)
        s.accept(TokenType.RPAREN, reject=True)
        return ret
    ident = s.accept(TokenType.IDENT)
    if ident:
-        return matcher(ident.value)
+        return ast.Name(ident.value, ast.Load())
    s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))


-def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
-    """Evaluate a match expression as used by -k and -m.
+class MatcherAdapter(Mapping[str, bool]):
+    """Adapts a matcher function to a locals mapping as required by eval()."""

-    :param input: The input expression - one line.
-    :param matcher: Given an identifier, should return whether it matches or not.
-                    Should be prepared to handle arbitrary strings as input.
+    def __init__(self, matcher: Callable[[str], bool]) -> None:
+        self.matcher = matcher

-    Returns whether the entire expression matches or not.
+    def __getitem__(self, key: str) -> bool:
+        return self.matcher(key)
+
+    def __iter__(self) -> Iterator[str]:
+        raise NotImplementedError()
+
+    def __len__(self) -> int:
+        raise NotImplementedError()
+
+
+class Expression:
+    """A compiled match expression as used by -k and -m.
+
+    The expression can be evaulated against different matchers.
    """
-    return expression(Scanner(input), matcher)
+
+    __slots__ = ("code",)
+
+    def __init__(self, code: types.CodeType) -> None:
+        self.code = code
+
+    @classmethod
+    def compile(self, input: str) -> "Expression":
+        """Compile a match expression.
+
+        :param input: The input expression - one line.
+        """
+        astexpr = expression(Scanner(input))
+        code = compile(
+            astexpr, filename="<pytest match expression>", mode="eval",
+        )  # type: types.CodeType
+        return Expression(code)
+
+    def evaluate(self, matcher: Callable[[str], bool]) -> bool:
+        """Evaluate the match expression.
+
+        :param matcher: Given an identifier, should return whether it matches or not.
+                        Should be prepared to handle arbitrary strings as input.
+
+        Returns whether the expression matches or not.
+        """
+        ret = eval(
+            self.code, {"__builtins__": {}}, MatcherAdapter(matcher)
+        )  # type: bool
+        return ret
--- a/src/_pytest/mark/legacy.py
+++ b/src/_pytest/mark/legacy.py
@ -1,101 +0,0 @@
-"""
-this is a place where we put datastructures used by legacy apis
-we hope to remove
-"""
-from typing import Set
-
-import attr
-
-from _pytest.compat import TYPE_CHECKING
-from _pytest.config import UsageError
-from _pytest.mark.expression import evaluate
-from _pytest.mark.expression import ParseError
-
-if TYPE_CHECKING:
-    from _pytest.nodes import Item
-
-
-@attr.s
-class MarkMatcher:
-    """A matcher for markers which are present."""
-
-    own_mark_names = attr.ib()
-
-    @classmethod
-    def from_item(cls, item) -> "MarkMatcher":
-        mark_names = {mark.name for mark in item.iter_markers()}
-        return cls(mark_names)
-
-    def __call__(self, name: str) -> bool:
-        return name in self.own_mark_names
-
-
-@attr.s
-class KeywordMatcher:
-    """A matcher for keywords.
-
-    Given a list of names, matches any substring of one of these names. The
-    string inclusion check is case-insensitive.
-    """
-
-    _names = attr.ib(type=Set[str])
-
-    @classmethod
-    def from_item(cls, item: "Item") -> "KeywordMatcher":
-        mapped_names = set()
-
-        # Add the names of the current item and any parent items
-        import pytest
-
-        for item in item.listchain():
-            if not isinstance(item, pytest.Instance):
-                mapped_names.add(item.name)
-
-        # Add the names added as extra keywords to current or parent items
-        mapped_names.update(item.listextrakeywords())
-
-        # Add the names attached to the current function through direct assignment
-        function_obj = getattr(item, "function", None)
-        if function_obj:
-            mapped_names.update(function_obj.__dict__)
-
-        # add the markers to the keywords as we no longer handle them correctly
-        mapped_names.update(mark.name for mark in item.iter_markers())
-
-        return cls(mapped_names)
-
-    def __call__(self, subname: str) -> bool:
-        subname = subname.lower()
-        names = (name.lower() for name in self._names)
-
-        for name in names:
-            if subname in name:
-                return True
-        return False
-
-
-def matchmark(colitem, markexpr: str) -> bool:
-    """Tries to match on any marker names, attached to the given colitem."""
-    try:
-        return evaluate(markexpr, MarkMatcher.from_item(colitem))
-    except ParseError as e:
-        raise UsageError(
-            "Wrong expression passed to '-m': {}: {}".format(markexpr, e)
-        ) from None
-
-
-def matchkeyword(colitem, keywordexpr: str) -> bool:
-    """Tries to match given keyword expression to given collector item.
-
-    Will match on the name of colitem, including the names of its parents.
-    Only matches names of items which are either a :class:`Class` or a
-    :class:`Function`.
-    Additionally, matches on names in the 'extra_keyword_matches' set of
-    any item, as well as names directly assigned to test functions.
-    """
-    try:
-        return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
-    except ParseError as e:
-        raise UsageError(
-            "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
-        ) from None
--- a/testing/test_mark_expression.py
+++ b/testing/test_mark_expression.py
@ -1,8 +1,14 @@
+from typing import Callable
+
 import pytest
-from _pytest.mark.expression import evaluate
+from _pytest.mark.expression import Expression
 from _pytest.mark.expression import ParseError


+def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
+    return Expression.compile(input).evaluate(matcher)
+
+
 def test_empty_is_false() -> None:
    assert not evaluate("", lambda ident: False)
    assert not evaluate("", lambda ident: True)
--- a/testing/test_pytester.py
+++ b/testing/test_pytester.py
@ -443,7 +443,7 @@ def test_testdir_subprocess_via_runpytest_arg(testdir) -> None:


 def test_unicode_args(testdir) -> None:
-    result = testdir.runpytest("-k", "💩")
+    result = testdir.runpytest("-k", "אבג")
    assert result.ret == ExitCode.NO_TESTS_COLLECTED