From c714f05ad707fae11804e34cf38bf5ba0fbf0b88 Mon Sep 17 00:00:00 2001
From: Ran Benita <ran@unusedvar.com>
Date: Mon, 11 May 2020 11:50:41 +0300
Subject: [PATCH] mark: reuse compiled expression for all items in -k/-m

The previous commit made this possible, so utilize it.

Since legacy.py becomes pretty bare, I inlined it into __init__.py. I'm
not sure it's really "legacy" anyway!

Using a simple 50000 items benchmark with `--collect-only -k nomatch`:

Before (two commits ago):

   ======================== 50000 deselected in 10.31s =====================
         19129345 function calls (18275596 primitive calls) in 10.634 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    2.270    2.270 __init__.py:149(pytest_collection_modifyitems)
        1    0.036    0.036    2.270    2.270 __init__.py:104(deselect_by_keyword)
    50000    0.055    0.000    2.226    0.000 legacy.py:87(matchkeyword)

After:

   ======================== 50000 deselected in 9.37s =========================
         18029363 function calls (17175972 primitive calls) in 9.701 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    1.394    1.394 __init__.py:239(pytest_collection_modifyitems)
        1    0.057    0.057    1.393    1.393 __init__.py:162(deselect_by_keyword)

The matching itself can be optimized more but that's a different story.
---
 src/_pytest/mark/__init__.py |  98 +++++++++++++++++++++++++++++++--
 src/_pytest/mark/legacy.py   | 103 -----------------------------------
 testing/test_pytester.py     |   2 +-
 3 files changed, 95 insertions(+), 108 deletions(-)
 delete mode 100644 src/_pytest/mark/legacy.py

diff --git a/src/_pytest/mark/__init__.py b/src/_pytest/mark/__init__.py
index f7556b0b7..134ed1876 100644
--- a/src/_pytest/mark/__init__.py
+++ b/src/_pytest/mark/__init__.py
@@ -1,9 +1,12 @@
 """ generic mechanism for marking and selecting python functions. """
 import warnings
+from typing import AbstractSet
 from typing import Optional
 
-from .legacy import matchkeyword
-from .legacy import matchmark
+import attr
+
+from .expression import Expression
+from .expression import ParseError
 from .structures import EMPTY_PARAMETERSET_OPTION
 from .structures import get_empty_parameterset_mark
 from .structures import Mark
@@ -11,6 +14,7 @@ from .structures import MARK_GEN
 from .structures import MarkDecorator
 from .structures import MarkGenerator
 from .structures import ParameterSet
+from _pytest.compat import TYPE_CHECKING
 from _pytest.config import Config
 from _pytest.config import hookimpl
 from _pytest.config import UsageError
@@ -18,6 +22,9 @@ from _pytest.deprecated import MINUS_K_COLON
 from _pytest.deprecated import MINUS_K_DASH
 from _pytest.store import StoreKey
 
+if TYPE_CHECKING:
+    from _pytest.nodes import Item
+
 __all__ = ["Mark", "MarkDecorator", "MarkGenerator", "get_empty_parameterset_mark"]
 
 
@@ -104,6 +111,57 @@ def pytest_cmdline_main(config):
         return 0
 
 
+@attr.s(slots=True)
+class KeywordMatcher:
+    """A matcher for keywords.
+
+    Given a list of names, matches any substring of one of these names. The
+    string inclusion check is case-insensitive.
+
+    Will match on the name of colitem, including the names of its parents.
+    Only matches names of items which are either a :class:`Class` or a
+    :class:`Function`.
+
+    Additionally, matches on names in the 'extra_keyword_matches' set of
+    any item, as well as names directly assigned to test functions.
+    """
+
+    _names = attr.ib(type=AbstractSet[str])
+
+    @classmethod
+    def from_item(cls, item: "Item") -> "KeywordMatcher":
+        mapped_names = set()
+
+        # Add the names of the current item and any parent items
+        import pytest
+
+        for item in item.listchain():
+            if not isinstance(item, pytest.Instance):
+                mapped_names.add(item.name)
+
+        # Add the names added as extra keywords to current or parent items
+        mapped_names.update(item.listextrakeywords())
+
+        # Add the names attached to the current function through direct assignment
+        function_obj = getattr(item, "function", None)
+        if function_obj:
+            mapped_names.update(function_obj.__dict__)
+
+        # add the markers to the keywords as we no longer handle them correctly
+        mapped_names.update(mark.name for mark in item.iter_markers())
+
+        return cls(mapped_names)
+
+    def __call__(self, subname: str) -> bool:
+        subname = subname.lower()
+        names = (name.lower() for name in self._names)
+
+        for name in names:
+            if subname in name:
+                return True
+        return False
+
+
 def deselect_by_keyword(items, config):
     keywordexpr = config.option.keyword.lstrip()
     if not keywordexpr:
@@ -120,10 +178,17 @@ def deselect_by_keyword(items, config):
         selectuntil = True
         keywordexpr = keywordexpr[:-1]
 
+    try:
+        expression = Expression.compile(keywordexpr)
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
+        ) from None
+
     remaining = []
     deselected = []
     for colitem in items:
-        if keywordexpr and not matchkeyword(colitem, keywordexpr):
+        if keywordexpr and not expression.evaluate(KeywordMatcher.from_item(colitem)):
             deselected.append(colitem)
         else:
             if selectuntil:
@@ -135,15 +200,40 @@ def deselect_by_keyword(items, config):
         items[:] = remaining
 
 
+@attr.s(slots=True)
+class MarkMatcher:
+    """A matcher for markers which are present.
+
+    Tries to match on any marker names, attached to the given colitem.
+    """
+
+    own_mark_names = attr.ib()
+
+    @classmethod
+    def from_item(cls, item) -> "MarkMatcher":
+        mark_names = {mark.name for mark in item.iter_markers()}
+        return cls(mark_names)
+
+    def __call__(self, name: str) -> bool:
+        return name in self.own_mark_names
+
+
 def deselect_by_mark(items, config):
     matchexpr = config.option.markexpr
     if not matchexpr:
         return
 
+    try:
+        expression = Expression.compile(matchexpr)
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-m': {}: {}".format(matchexpr, e)
+        ) from None
+
     remaining = []
     deselected = []
     for item in items:
-        if matchmark(item, matchexpr):
+        if expression.evaluate(MarkMatcher.from_item(item)):
             remaining.append(item)
         else:
             deselected.append(item)
diff --git a/src/_pytest/mark/legacy.py b/src/_pytest/mark/legacy.py
deleted file mode 100644
index ed707fcc7..000000000
--- a/src/_pytest/mark/legacy.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-this is a place where we put datastructures used by legacy apis
-we hope to remove
-"""
-from typing import Set
-
-import attr
-
-from _pytest.compat import TYPE_CHECKING
-from _pytest.config import UsageError
-from _pytest.mark.expression import Expression
-from _pytest.mark.expression import ParseError
-
-if TYPE_CHECKING:
-    from _pytest.nodes import Item
-
-
-@attr.s
-class MarkMatcher:
-    """A matcher for markers which are present."""
-
-    own_mark_names = attr.ib()
-
-    @classmethod
-    def from_item(cls, item) -> "MarkMatcher":
-        mark_names = {mark.name for mark in item.iter_markers()}
-        return cls(mark_names)
-
-    def __call__(self, name: str) -> bool:
-        return name in self.own_mark_names
-
-
-@attr.s
-class KeywordMatcher:
-    """A matcher for keywords.
-
-    Given a list of names, matches any substring of one of these names. The
-    string inclusion check is case-insensitive.
-    """
-
-    _names = attr.ib(type=Set[str])
-
-    @classmethod
-    def from_item(cls, item: "Item") -> "KeywordMatcher":
-        mapped_names = set()
-
-        # Add the names of the current item and any parent items
-        import pytest
-
-        for item in item.listchain():
-            if not isinstance(item, pytest.Instance):
-                mapped_names.add(item.name)
-
-        # Add the names added as extra keywords to current or parent items
-        mapped_names.update(item.listextrakeywords())
-
-        # Add the names attached to the current function through direct assignment
-        function_obj = getattr(item, "function", None)
-        if function_obj:
-            mapped_names.update(function_obj.__dict__)
-
-        # add the markers to the keywords as we no longer handle them correctly
-        mapped_names.update(mark.name for mark in item.iter_markers())
-
-        return cls(mapped_names)
-
-    def __call__(self, subname: str) -> bool:
-        subname = subname.lower()
-        names = (name.lower() for name in self._names)
-
-        for name in names:
-            if subname in name:
-                return True
-        return False
-
-
-def matchmark(colitem, markexpr: str) -> bool:
-    """Tries to match on any marker names, attached to the given colitem."""
-    try:
-        expression = Expression.compile(markexpr)
-    except ParseError as e:
-        raise UsageError(
-            "Wrong expression passed to '-m': {}: {}".format(markexpr, e)
-        ) from None
-    return expression.evaluate(MarkMatcher.from_item(colitem))
-
-
-def matchkeyword(colitem, keywordexpr: str) -> bool:
-    """Tries to match given keyword expression to given collector item.
-
-    Will match on the name of colitem, including the names of its parents.
-    Only matches names of items which are either a :class:`Class` or a
-    :class:`Function`.
-    Additionally, matches on names in the 'extra_keyword_matches' set of
-    any item, as well as names directly assigned to test functions.
-    """
-    try:
-        expression = Expression.compile(keywordexpr)
-    except ParseError as e:
-        raise UsageError(
-            "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
-        ) from None
-    return expression.evaluate(KeywordMatcher.from_item(colitem))
diff --git a/testing/test_pytester.py b/testing/test_pytester.py
index fa0cfce97..1d3321455 100644
--- a/testing/test_pytester.py
+++ b/testing/test_pytester.py
@@ -443,7 +443,7 @@ def test_testdir_subprocess_via_runpytest_arg(testdir) -> None:
 
 
 def test_unicode_args(testdir) -> None:
-    result = testdir.runpytest("-k", "💩")
+    result = testdir.runpytest("-k", "אבג")
     assert result.ret == ExitCode.NO_TESTS_COLLECTED