mark: reuse compiled expression for all items in -k/-m

The previous commit made this possible, so utilize it.

Since legacy.py becomes pretty bare, I inlined it into __init__.py. I'm
not sure it's really "legacy" anyway!

Using a simple 50000 items benchmark with `--collect-only -k nomatch`:

Before (two commits ago):

   ======================== 50000 deselected in 10.31s =====================
         19129345 function calls (18275596 primitive calls) in 10.634 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    2.270    2.270 __init__.py:149(pytest_collection_modifyitems)
        1    0.036    0.036    2.270    2.270 __init__.py:104(deselect_by_keyword)
    50000    0.055    0.000    2.226    0.000 legacy.py:87(matchkeyword)

After:

   ======================== 50000 deselected in 9.37s =========================
         18029363 function calls (17175972 primitive calls) in 9.701 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    1.394    1.394 __init__.py:239(pytest_collection_modifyitems)
        1    0.057    0.057    1.393    1.393 __init__.py:162(deselect_by_keyword)

The matching itself can be optimized more but that's a different story.
This commit is contained in:
Ran Benita 2020-05-11 11:50:41 +03:00
parent 622c4ce02e
commit c714f05ad7
3 changed files with 95 additions and 108 deletions

View File

@ -1,9 +1,12 @@
""" generic mechanism for marking and selecting python functions. """ """ generic mechanism for marking and selecting python functions. """
import warnings import warnings
from typing import AbstractSet
from typing import Optional from typing import Optional
from .legacy import matchkeyword import attr
from .legacy import matchmark
from .expression import Expression
from .expression import ParseError
from .structures import EMPTY_PARAMETERSET_OPTION from .structures import EMPTY_PARAMETERSET_OPTION
from .structures import get_empty_parameterset_mark from .structures import get_empty_parameterset_mark
from .structures import Mark from .structures import Mark
@ -11,6 +14,7 @@ from .structures import MARK_GEN
from .structures import MarkDecorator from .structures import MarkDecorator
from .structures import MarkGenerator from .structures import MarkGenerator
from .structures import ParameterSet from .structures import ParameterSet
from _pytest.compat import TYPE_CHECKING
from _pytest.config import Config from _pytest.config import Config
from _pytest.config import hookimpl from _pytest.config import hookimpl
from _pytest.config import UsageError from _pytest.config import UsageError
@ -18,6 +22,9 @@ from _pytest.deprecated import MINUS_K_COLON
from _pytest.deprecated import MINUS_K_DASH from _pytest.deprecated import MINUS_K_DASH
from _pytest.store import StoreKey from _pytest.store import StoreKey
if TYPE_CHECKING:
from _pytest.nodes import Item
__all__ = ["Mark", "MarkDecorator", "MarkGenerator", "get_empty_parameterset_mark"] __all__ = ["Mark", "MarkDecorator", "MarkGenerator", "get_empty_parameterset_mark"]
@ -104,6 +111,57 @@ def pytest_cmdline_main(config):
return 0 return 0
@attr.s(slots=True)
class KeywordMatcher:
"""A matcher for keywords.
Given a list of names, matches any substring of one of these names. The
string inclusion check is case-insensitive.
Will match on the name of colitem, including the names of its parents.
Only matches names of items which are either a :class:`Class` or a
:class:`Function`.
Additionally, matches on names in the 'extra_keyword_matches' set of
any item, as well as names directly assigned to test functions.
"""
_names = attr.ib(type=AbstractSet[str])
@classmethod
def from_item(cls, item: "Item") -> "KeywordMatcher":
mapped_names = set()
# Add the names of the current item and any parent items
import pytest
for item in item.listchain():
if not isinstance(item, pytest.Instance):
mapped_names.add(item.name)
# Add the names added as extra keywords to current or parent items
mapped_names.update(item.listextrakeywords())
# Add the names attached to the current function through direct assignment
function_obj = getattr(item, "function", None)
if function_obj:
mapped_names.update(function_obj.__dict__)
# add the markers to the keywords as we no longer handle them correctly
mapped_names.update(mark.name for mark in item.iter_markers())
return cls(mapped_names)
def __call__(self, subname: str) -> bool:
subname = subname.lower()
names = (name.lower() for name in self._names)
for name in names:
if subname in name:
return True
return False
def deselect_by_keyword(items, config): def deselect_by_keyword(items, config):
keywordexpr = config.option.keyword.lstrip() keywordexpr = config.option.keyword.lstrip()
if not keywordexpr: if not keywordexpr:
@ -120,10 +178,17 @@ def deselect_by_keyword(items, config):
selectuntil = True selectuntil = True
keywordexpr = keywordexpr[:-1] keywordexpr = keywordexpr[:-1]
try:
expression = Expression.compile(keywordexpr)
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
) from None
remaining = [] remaining = []
deselected = [] deselected = []
for colitem in items: for colitem in items:
if keywordexpr and not matchkeyword(colitem, keywordexpr): if keywordexpr and not expression.evaluate(KeywordMatcher.from_item(colitem)):
deselected.append(colitem) deselected.append(colitem)
else: else:
if selectuntil: if selectuntil:
@ -135,15 +200,40 @@ def deselect_by_keyword(items, config):
items[:] = remaining items[:] = remaining
@attr.s(slots=True)
class MarkMatcher:
"""A matcher for markers which are present.
Tries to match on any marker names, attached to the given colitem.
"""
own_mark_names = attr.ib()
@classmethod
def from_item(cls, item) -> "MarkMatcher":
mark_names = {mark.name for mark in item.iter_markers()}
return cls(mark_names)
def __call__(self, name: str) -> bool:
return name in self.own_mark_names
def deselect_by_mark(items, config): def deselect_by_mark(items, config):
matchexpr = config.option.markexpr matchexpr = config.option.markexpr
if not matchexpr: if not matchexpr:
return return
try:
expression = Expression.compile(matchexpr)
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-m': {}: {}".format(matchexpr, e)
) from None
remaining = [] remaining = []
deselected = [] deselected = []
for item in items: for item in items:
if matchmark(item, matchexpr): if expression.evaluate(MarkMatcher.from_item(item)):
remaining.append(item) remaining.append(item)
else: else:
deselected.append(item) deselected.append(item)

View File

@ -1,103 +0,0 @@
"""
this is a place where we put datastructures used by legacy apis
we hope to remove
"""
from typing import Set
import attr
from _pytest.compat import TYPE_CHECKING
from _pytest.config import UsageError
from _pytest.mark.expression import Expression
from _pytest.mark.expression import ParseError
if TYPE_CHECKING:
from _pytest.nodes import Item
@attr.s
class MarkMatcher:
"""A matcher for markers which are present."""
own_mark_names = attr.ib()
@classmethod
def from_item(cls, item) -> "MarkMatcher":
mark_names = {mark.name for mark in item.iter_markers()}
return cls(mark_names)
def __call__(self, name: str) -> bool:
return name in self.own_mark_names
@attr.s
class KeywordMatcher:
"""A matcher for keywords.
Given a list of names, matches any substring of one of these names. The
string inclusion check is case-insensitive.
"""
_names = attr.ib(type=Set[str])
@classmethod
def from_item(cls, item: "Item") -> "KeywordMatcher":
mapped_names = set()
# Add the names of the current item and any parent items
import pytest
for item in item.listchain():
if not isinstance(item, pytest.Instance):
mapped_names.add(item.name)
# Add the names added as extra keywords to current or parent items
mapped_names.update(item.listextrakeywords())
# Add the names attached to the current function through direct assignment
function_obj = getattr(item, "function", None)
if function_obj:
mapped_names.update(function_obj.__dict__)
# add the markers to the keywords as we no longer handle them correctly
mapped_names.update(mark.name for mark in item.iter_markers())
return cls(mapped_names)
def __call__(self, subname: str) -> bool:
subname = subname.lower()
names = (name.lower() for name in self._names)
for name in names:
if subname in name:
return True
return False
def matchmark(colitem, markexpr: str) -> bool:
"""Tries to match on any marker names, attached to the given colitem."""
try:
expression = Expression.compile(markexpr)
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-m': {}: {}".format(markexpr, e)
) from None
return expression.evaluate(MarkMatcher.from_item(colitem))
def matchkeyword(colitem, keywordexpr: str) -> bool:
"""Tries to match given keyword expression to given collector item.
Will match on the name of colitem, including the names of its parents.
Only matches names of items which are either a :class:`Class` or a
:class:`Function`.
Additionally, matches on names in the 'extra_keyword_matches' set of
any item, as well as names directly assigned to test functions.
"""
try:
expression = Expression.compile(keywordexpr)
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
) from None
return expression.evaluate(KeywordMatcher.from_item(colitem))

View File

@ -443,7 +443,7 @@ def test_testdir_subprocess_via_runpytest_arg(testdir) -> None:
def test_unicode_args(testdir) -> None: def test_unicode_args(testdir) -> None:
result = testdir.runpytest("-k", "💩") result = testdir.runpytest("-k", "אבג")
assert result.ret == ExitCode.NO_TESTS_COLLECTED assert result.ret == ExitCode.NO_TESTS_COLLECTED