Stop using Python's eval() for -m and -k

Previously, the expressions given to the `-m` and `-k` options were
evaluated with `eval`. This causes a few issues:

- Python keywords cannot be used.

- Constants like numbers, None, True, False are not handled correctly.

- Various syntax like numeric operators and `X if Y else Z` is supported
  unintentionally.

- `eval()` is somewhat dangerous for arbitrary input.

- Can fail in many ways so requires `except Exception`.

The format we want to support is quite simple, so change to a custom
parser. This fixes the issues above, and gives us full control of the
format, so can be documented comprehensively and even be extended in the
future if we wish.
This commit is contained in:
Ran Benita 2020-04-25 13:38:53 +03:00
parent be68496440
commit a718ad6363
6 changed files with 405 additions and 75 deletions

View File

@ -0,0 +1,3 @@
Expressions given to the ``-m`` and ``-k`` options are no longer evaluated using Python's ``eval()``.
The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against.
Python constants, keywords or other operators are no longer evaluated differently.

View File

@ -141,14 +141,14 @@ Or select multiple nodes:
Using ``-k expr`` to select tests based on their name
-------------------------------------------------------
.. versionadded: 2.0/2.3.4
.. versionadded:: 2.0/2.3.4
You can use the ``-k`` command line option to specify an expression
which implements a substring match on the test names instead of the
exact match on markers that ``-m`` provides. This makes it easy to
select tests based on their names:
.. versionadded: 5.4
.. versionchanged:: 5.4
The expression matching is now case-insensitive.
@ -198,20 +198,8 @@ Or to select "http" and "quick" tests:
===================== 2 passed, 2 deselected in 0.12s ======================
.. note::
You can use ``and``, ``or``, ``not`` and parentheses.
If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y``
need to be simple non-keyword names. For example, ``"pass"`` or ``"from"``
will result in SyntaxErrors because ``"-k"`` evaluates the expression using
Python's `eval`_ function.
.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval
However, if the ``"-k"`` argument is a simple string, no such restrictions
apply. Also ``"-k 'not STRING'"`` has no restrictions. You can also
specify numbers like ``"-k 1.3"`` to match tests which are parametrized
with the float ``"1.3"``.
Registering markers
-------------------------------------

View File

@ -0,0 +1,173 @@
r"""
Evaluate match expressions, as used by `-k` and `-m`.
The grammar is:
expression: expr? EOF
expr: and_expr ('or' and_expr)*
and_expr: not_expr ('and' not_expr)*
not_expr: 'not' not_expr | '(' expr ')' | ident
ident: (\w|:|\+|-|\.|\[|\])+
The semantics are:
- Empty expression evaluates to False.
- ident evaluates to True of False according to a provided matcher function.
- or/and/not evaluate according to the usual boolean semantics.
"""
import enum
import re
from typing import Callable
from typing import Iterator
from typing import Optional
from typing import Sequence
import attr
from _pytest.compat import TYPE_CHECKING
if TYPE_CHECKING:
from typing import NoReturn
__all__ = [
"evaluate",
"ParseError",
]
class TokenType(enum.Enum):
LPAREN = "left parenthesis"
RPAREN = "right parenthesis"
OR = "or"
AND = "and"
NOT = "not"
IDENT = "identifier"
EOF = "end of input"
@attr.s(frozen=True, slots=True)
class Token:
type = attr.ib(type=TokenType)
value = attr.ib(type=str)
pos = attr.ib(type=int)
class ParseError(Exception):
"""The expression contains invalid syntax.
:param column: The column in the line where the error occurred (1-based).
:param message: A description of the error.
"""
def __init__(self, column: int, message: str) -> None:
self.column = column
self.message = message
def __str__(self) -> str:
return "at column {}: {}".format(self.column, self.message)
class Scanner:
__slots__ = ("tokens", "current")
def __init__(self, input: str) -> None:
self.tokens = self.lex(input)
self.current = next(self.tokens)
def lex(self, input: str) -> Iterator[Token]:
pos = 0
while pos < len(input):
if input[pos] in (" ", "\t"):
pos += 1
elif input[pos] == "(":
yield Token(TokenType.LPAREN, "(", pos)
pos += 1
elif input[pos] == ")":
yield Token(TokenType.RPAREN, ")", pos)
pos += 1
else:
match = re.match(r"(:?\w|:|\+|-|\.|\[|\])+", input[pos:])
if match:
value = match.group(0)
if value == "or":
yield Token(TokenType.OR, value, pos)
elif value == "and":
yield Token(TokenType.AND, value, pos)
elif value == "not":
yield Token(TokenType.NOT, value, pos)
else:
yield Token(TokenType.IDENT, value, pos)
pos += len(value)
else:
raise ParseError(
pos + 1, 'unexpected character "{}"'.format(input[pos]),
)
yield Token(TokenType.EOF, "", pos)
def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
if self.current.type is type:
token = self.current
if token.type is not TokenType.EOF:
self.current = next(self.tokens)
return token
if reject:
self.reject((type,))
return None
def reject(self, expected: Sequence[TokenType]) -> "NoReturn":
raise ParseError(
self.current.pos + 1,
"expected {}; got {}".format(
" OR ".join(type.value for type in expected), self.current.type.value,
),
)
def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool:
if s.accept(TokenType.EOF):
return False
ret = expr(s, matcher)
s.accept(TokenType.EOF, reject=True)
return ret
def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = and_expr(s, matcher)
while s.accept(TokenType.OR):
rhs = and_expr(s, matcher)
ret = ret or rhs
return ret
def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = not_expr(s, matcher)
while s.accept(TokenType.AND):
rhs = not_expr(s, matcher)
ret = ret and rhs
return ret
def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
if s.accept(TokenType.NOT):
return not not_expr(s, matcher)
if s.accept(TokenType.LPAREN):
ret = expr(s, matcher)
s.accept(TokenType.RPAREN, reject=True)
return ret
ident = s.accept(TokenType.IDENT)
if ident:
return matcher(ident.value)
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))
def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
"""Evaluate a match expression as used by -k and -m.
:param input: The input expression - one line.
:param matcher: Given an identifier, should return whether it matches or not.
Should be prepared to handle arbitrary strings as input.
Returns whether the entire expression matches or not.
"""
return expression(Scanner(input), matcher)

View File

@ -2,44 +2,46 @@
this is a place where we put datastructures used by legacy apis
we hope to remove
"""
import keyword
from typing import Set
import attr
from _pytest.compat import TYPE_CHECKING
from _pytest.config import UsageError
from _pytest.mark.expression import evaluate
from _pytest.mark.expression import ParseError
if TYPE_CHECKING:
from _pytest.nodes import Item # noqa: F401 (used in type string)
@attr.s
class MarkMapping:
"""Provides a local mapping for markers where item access
resolves to True if the marker is present. """
class MarkMatcher:
"""A matcher for markers which are present."""
own_mark_names = attr.ib()
@classmethod
def from_item(cls, item):
def from_item(cls, item) -> "MarkMatcher":
mark_names = {mark.name for mark in item.iter_markers()}
return cls(mark_names)
def __getitem__(self, name):
def __call__(self, name: str) -> bool:
return name in self.own_mark_names
@attr.s
class KeywordMapping:
"""Provides a local mapping for keywords.
Given a list of names, map any substring of one of these names to True.
class KeywordMatcher:
"""A matcher for keywords.
Given a list of names, matches any substring of one of these names. The
string inclusion check is case-insensitive.
"""
_names = attr.ib(type=Set[str])
@classmethod
def from_item(cls, item: "Item") -> "KeywordMapping":
def from_item(cls, item: "Item") -> "KeywordMatcher":
mapped_names = set()
# Add the names of the current item and any parent items
@ -62,12 +64,7 @@ class KeywordMapping:
return cls(mapped_names)
def __getitem__(self, subname: str) -> bool:
"""Return whether subname is included within stored names.
The string inclusion check is case-insensitive.
"""
def __call__(self, subname: str) -> bool:
subname = subname.lower()
names = (name.lower() for name in self._names)
@ -77,18 +74,17 @@ class KeywordMapping:
return False
python_keywords_allowed_list = ["or", "and", "not"]
def matchmark(colitem, markexpr):
def matchmark(colitem, markexpr: str) -> bool:
"""Tries to match on any marker names, attached to the given colitem."""
try:
return eval(markexpr, {}, MarkMapping.from_item(colitem))
except Exception:
raise UsageError("Wrong expression passed to '-m': {}".format(markexpr))
return evaluate(markexpr, MarkMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-m': {}: {}".format(markexpr, e)
) from None
def matchkeyword(colitem, keywordexpr):
def matchkeyword(colitem, keywordexpr: str) -> bool:
"""Tries to match given keyword expression to given collector item.
Will match on the name of colitem, including the names of its parents.
@ -97,20 +93,9 @@ def matchkeyword(colitem, keywordexpr):
Additionally, matches on names in the 'extra_keyword_matches' set of
any item, as well as names directly assigned to test functions.
"""
mapping = KeywordMapping.from_item(colitem)
if " " not in keywordexpr:
# special case to allow for simple "-k pass" and "-k 1.3"
return mapping[keywordexpr]
elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]:
return not mapping[keywordexpr[4:]]
for kwd in keywordexpr.split():
if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list:
raise UsageError(
"Python keyword '{}' not accepted in expressions passed to '-k'".format(
kwd
)
)
try:
return eval(keywordexpr, {}, mapping)
except Exception:
raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr))
return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
) from None

View File

@ -200,6 +200,8 @@ def test_strict_prohibits_unregistered_markers(testdir, option_name):
"spec",
[
("xyz", ("test_one",)),
("((( xyz)) )", ("test_one",)),
("not not xyz", ("test_one",)),
("xyz and xyz2", ()),
("xyz2", ("test_two",)),
("xyz or xyz2", ("test_one", "test_two")),
@ -258,9 +260,11 @@ def test_mark_option_custom(spec, testdir):
"spec",
[
("interface", ("test_interface",)),
("not interface", ("test_nointer", "test_pass")),
("not interface", ("test_nointer", "test_pass", "test_1", "test_2")),
("pass", ("test_pass",)),
("not pass", ("test_interface", "test_nointer")),
("not pass", ("test_interface", "test_nointer", "test_1", "test_2")),
("not not not (pass)", ("test_interface", "test_nointer", "test_1", "test_2")),
("1 or 2", ("test_1", "test_2")),
],
)
def test_keyword_option_custom(spec, testdir):
@ -272,6 +276,10 @@ def test_keyword_option_custom(spec, testdir):
pass
def test_pass():
pass
def test_1():
pass
def test_2():
pass
"""
)
opt, passed_result = spec
@ -293,7 +301,7 @@ def test_keyword_option_considers_mark(testdir):
"spec",
[
("None", ("test_func[None]",)),
("1.3", ("test_func[1.3]",)),
("[1.3]", ("test_func[1.3]",)),
("2-3", ("test_func[2-3]",)),
],
)
@ -333,10 +341,23 @@ def test_parametrize_with_module(testdir):
"spec",
[
(
"foo or import",
"ERROR: Python keyword 'import' not accepted in expressions passed to '-k'",
"foo or",
"at column 7: expected not OR left parenthesis OR identifier; got end of input",
),
(
"foo or or",
"at column 8: expected not OR left parenthesis OR identifier; got or",
),
("(foo", "at column 5: expected right parenthesis; got end of input",),
("foo bar", "at column 5: expected end of input; got identifier",),
(
"or or",
"at column 1: expected not OR left parenthesis OR identifier; got or",
),
(
"not or",
"at column 5: expected not OR left parenthesis OR identifier; got or",
),
("foo or", "ERROR: Wrong expression passed to '-k': foo or"),
],
)
def test_keyword_option_wrong_arguments(spec, testdir, capsys):
@ -798,10 +819,12 @@ class TestKeywordSelection:
passed, skipped, failed = reprec.countoutcomes()
assert passed + skipped + failed == 0
def test_no_magic_values(self, testdir):
@pytest.mark.parametrize(
"keyword", ["__", "+", ".."],
)
def test_no_magic_values(self, testdir, keyword: str) -> None:
"""Make sure the tests do not match on magic values,
no double underscored values, like '__dict__',
and no instance values, like '()'.
no double underscored values, like '__dict__' and '+'.
"""
p = testdir.makepyfile(
"""
@ -809,16 +832,12 @@ class TestKeywordSelection:
"""
)
def assert_test_is_not_selected(keyword):
reprec = testdir.inline_run("-k", keyword, p)
passed, skipped, failed = reprec.countoutcomes()
dlist = reprec.getcalls("pytest_deselected")
assert passed + skipped + failed == 0
deselected_tests = dlist[0].items
assert len(deselected_tests) == 1
assert_test_is_not_selected("__")
assert_test_is_not_selected("()")
reprec = testdir.inline_run("-k", keyword, p)
passed, skipped, failed = reprec.countoutcomes()
dlist = reprec.getcalls("pytest_deselected")
assert passed + skipped + failed == 0
deselected_tests = dlist[0].items
assert len(deselected_tests) == 1
class TestMarkDecorator:
@ -1023,7 +1042,7 @@ def test_marker_expr_eval_failure_handling(testdir, expr):
pass
"""
)
expected = "ERROR: Wrong expression passed to '-m': {}".format(expr)
expected = "ERROR: Wrong expression passed to '-m': {}: *".format(expr)
result = testdir.runpytest(foo, "-m", expr)
result.stderr.fnmatch_lines([expected])
assert result.ret == ExitCode.USAGE_ERROR

View File

@ -0,0 +1,162 @@
import pytest
from _pytest.mark.expression import evaluate
from _pytest.mark.expression import ParseError
def test_empty_is_false() -> None:
assert not evaluate("", lambda ident: False)
assert not evaluate("", lambda ident: True)
assert not evaluate(" ", lambda ident: False)
assert not evaluate("\t", lambda ident: False)
@pytest.mark.parametrize(
("expr", "expected"),
(
("true", True),
("true", True),
("false", False),
("not true", False),
("not false", True),
("not not true", True),
("not not false", False),
("true and true", True),
("true and false", False),
("false and true", False),
("true and true and true", True),
("true and true and false", False),
("true and true and not true", False),
("false or false", False),
("false or true", True),
("true or true", True),
("true or true or false", True),
("true and true or false", True),
("not true or true", True),
("(not true) or true", True),
("not (true or true)", False),
("true and true or false and false", True),
("true and (true or false) and false", False),
("true and (true or (not (not false))) and false", False),
),
)
def test_basic(expr: str, expected: bool) -> None:
matcher = {"true": True, "false": False}.__getitem__
assert evaluate(expr, matcher) is expected
@pytest.mark.parametrize(
("expr", "expected"),
(
(" true ", True),
(" ((((((true)))))) ", True),
(" ( ((\t (((true))))) \t \t)", True),
("( true and (((false))))", False),
("not not not not true", True),
("not not not not not true", False),
),
)
def test_syntax_oddeties(expr: str, expected: bool) -> None:
matcher = {"true": True, "false": False}.__getitem__
assert evaluate(expr, matcher) is expected
@pytest.mark.parametrize(
("expr", "column", "message"),
(
("(", 2, "expected not OR left parenthesis OR identifier; got end of input"),
(" (", 3, "expected not OR left parenthesis OR identifier; got end of input",),
(
")",
1,
"expected not OR left parenthesis OR identifier; got right parenthesis",
),
(
") ",
1,
"expected not OR left parenthesis OR identifier; got right parenthesis",
),
("not", 4, "expected not OR left parenthesis OR identifier; got end of input",),
(
"not not",
8,
"expected not OR left parenthesis OR identifier; got end of input",
),
(
"(not)",
5,
"expected not OR left parenthesis OR identifier; got right parenthesis",
),
("and", 1, "expected not OR left parenthesis OR identifier; got and"),
(
"ident and",
10,
"expected not OR left parenthesis OR identifier; got end of input",
),
("ident and or", 11, "expected not OR left parenthesis OR identifier; got or",),
("ident ident", 7, "expected end of input; got identifier"),
),
)
def test_syntax_errors(expr: str, column: int, message: str) -> None:
with pytest.raises(ParseError) as excinfo:
evaluate(expr, lambda ident: True)
assert excinfo.value.column == column
assert excinfo.value.message == message
@pytest.mark.parametrize(
"ident",
(
".",
"...",
":::",
"a:::c",
"a+-b",
"אבגד",
"aaאבגדcc",
"a[bcd]",
"1234",
"1234abcd",
"1234and",
"notandor",
"not_and_or",
"not[and]or",
"1234+5678",
"123.232",
"True",
"False",
"if",
"else",
"while",
),
)
def test_valid_idents(ident: str) -> None:
assert evaluate(ident, {ident: True}.__getitem__)
@pytest.mark.parametrize(
"ident",
(
"/",
"\\",
"^",
"*",
"=",
"&",
"%",
"$",
"#",
"@",
"!",
"~",
"{",
"}",
'"',
"'",
"|",
";",
"",
),
)
def test_invalid_idents(ident: str) -> None:
with pytest.raises(ParseError):
evaluate(ident, lambda ident: True)