From a41820fbf0ca5ba25fc64bc0f976c3b3d4af53ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Hovm=C3=B6ller?= Date: Thu, 25 Oct 2018 15:09:14 +0200 Subject: [PATCH 1/4] collection: performance: use optimized parts function Time: 8.53s => 5.73s --- src/_pytest/main.py | 9 +++++---- src/_pytest/pathlib.py | 5 +++++ src/_pytest/python.py | 9 +++------ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 1c41f7e6e..dbe1ccf42 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -18,6 +18,7 @@ from _pytest.config import directory_arg from _pytest.config import hookimpl from _pytest.config import UsageError from _pytest.outcomes import exit +from _pytest.pathlib import parts from _pytest.runner import collect_one_node @@ -469,8 +470,8 @@ class Session(nodes.FSCollector): return items def collect(self): - for parts in self._initialparts: - arg = "::".join(map(str, parts)) + for initialpart in self._initialparts: + arg = "::".join(map(str, initialpart)) self.trace("processing argument", arg) self.trace.root.indent += 1 try: @@ -532,12 +533,12 @@ class Session(nodes.FSCollector): fil=filter_, rec=self._recurse, bf=True, sort=True ): pkginit = path.dirpath().join("__init__.py") - if pkginit.exists() and not any(x in pkginit.parts() for x in paths): + if pkginit.exists() and not any(x in parts(pkginit.strpath) for x in paths): for x in root._collectfile(pkginit): yield x paths.append(x.fspath.dirpath()) - if not any(x in path.parts() for x in paths): + if not any(x in parts(path.strpath) for x in paths): for x in root._collectfile(path): if (type(x), x.fspath) in self._node_cache: yield self._node_cache[(type(x), x.fspath)] diff --git a/src/_pytest/pathlib.py b/src/_pytest/pathlib.py index f5c1da8c5..c907b495c 100644 --- a/src/_pytest/pathlib.py +++ b/src/_pytest/pathlib.py @@ -303,3 +303,8 @@ def fnmatch_ex(pattern, path): else: name = six.text_type(path) return fnmatch.fnmatch(name, pattern) + + +def parts(s): + parts = s.split(sep) + return [sep.join(parts[:i+1]) or sep for i in range(len(parts))] diff --git a/src/_pytest/python.py b/src/_pytest/python.py index b866532cc..2eb5f0b87 100644 --- a/src/_pytest/python.py +++ b/src/_pytest/python.py @@ -41,6 +41,7 @@ from _pytest.mark.structures import get_unpacked_marks from _pytest.mark.structures import normalize_mark_list from _pytest.mark.structures import transfer_markers from _pytest.outcomes import fail +from _pytest.pathlib import parts from _pytest.warning_types import PytestWarning from _pytest.warning_types import RemovedInPytest4Warning @@ -562,14 +563,10 @@ class Package(Module): yield Module(init_module, self) pkg_prefixes = set() for path in this_path.visit(rec=self._recurse, bf=True, sort=True): - # We will visit our own __init__.py file, in which case we skip it. - if path.isfile(): - if path.basename == "__init__.py" and path.dirpath() == this_path: - continue - parts = path.parts() + parts_ = parts(path.strpath) if any( - pkg_prefix in parts and pkg_prefix.join("__init__.py") != path + pkg_prefix in parts_ and pkg_prefix.join("__init__.py") != path for pkg_prefix in pkg_prefixes ): continue From 2b50911c9d9e7cc2c3ffa38c70bff398109718ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Hovm=C3=B6ller?= Date: Thu, 25 Oct 2018 15:16:10 +0200 Subject: [PATCH 2/4] Minor refactor for readability Time: 5.73s => 5.88s/5.82s --- src/_pytest/main.py | 11 +++++++---- src/_pytest/python.py | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index dbe1ccf42..9a0162f06 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -533,17 +533,20 @@ class Session(nodes.FSCollector): fil=filter_, rec=self._recurse, bf=True, sort=True ): pkginit = path.dirpath().join("__init__.py") - if pkginit.exists() and not any(x in parts(pkginit.strpath) for x in paths): + if pkginit.exists() and not any( + x in parts(pkginit.strpath) for x in paths + ): for x in root._collectfile(pkginit): yield x paths.append(x.fspath.dirpath()) if not any(x in parts(path.strpath) for x in paths): for x in root._collectfile(path): - if (type(x), x.fspath) in self._node_cache: - yield self._node_cache[(type(x), x.fspath)] + key = (type(x), x.fspath) + if key in self._node_cache: + yield self._node_cache[key] else: - self._node_cache[(type(x), x.fspath)] = x + self._node_cache[key] = x yield x else: assert argpath.check(file=1) diff --git a/src/_pytest/python.py b/src/_pytest/python.py index 2eb5f0b87..6b113cacd 100644 --- a/src/_pytest/python.py +++ b/src/_pytest/python.py @@ -563,6 +563,10 @@ class Package(Module): yield Module(init_module, self) pkg_prefixes = set() for path in this_path.visit(rec=self._recurse, bf=True, sort=True): + # We will visit our own __init__.py file, in which case we skip it. + if path.isfile(): + if path.basename == "__init__.py" and path.dirpath() == this_path: + continue parts_ = parts(path.strpath) if any( From 6ffa347c77344a57cbb99ff43d7c27b78a7b9511 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Thu, 25 Oct 2018 17:19:19 +0200 Subject: [PATCH 3/4] Handle dirs only once Time: 5.73s/5.88s => 5.36s (Before rebase: 4.86s => 4.45s) --- src/_pytest/main.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 9a0162f06..d67468887 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -529,16 +529,20 @@ class Session(nodes.FSCollector): def filter_(f): return f.check(file=1) + seen_dirs = set() for path in argpath.visit( fil=filter_, rec=self._recurse, bf=True, sort=True ): - pkginit = path.dirpath().join("__init__.py") - if pkginit.exists() and not any( - x in parts(pkginit.strpath) for x in paths - ): - for x in root._collectfile(pkginit): - yield x - paths.append(x.fspath.dirpath()) + dirpath = path.dirpath() + if dirpath not in seen_dirs: + seen_dirs.add(dirpath) + pkginit = dirpath.join("__init__.py") + if pkginit.exists() and not any( + x in parts(pkginit.strpath) for x in paths + ): + for x in root._collectfile(pkginit): + yield x + paths.append(x.fspath.dirpath()) if not any(x in parts(path.strpath) for x in paths): for x in root._collectfile(path): From 023e1c78df64e0cbdc7fa9ff8a912c56a43a033b Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Thu, 25 Oct 2018 17:34:41 +0200 Subject: [PATCH 4/4] paths: use set and isdisjoint Time: 5.36s => 4.85s (before rebase: 4.45s => 3.55s) --- src/_pytest/main.py | 10 ++++------ src/_pytest/pathlib.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index d67468887..de0740744 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -489,7 +489,7 @@ class Session(nodes.FSCollector): names = self._parsearg(arg) argpath = names.pop(0).realpath() - paths = [] + paths = set() root = self # Start with a Session root, and delve to argpath item (dir or file) @@ -537,14 +537,12 @@ class Session(nodes.FSCollector): if dirpath not in seen_dirs: seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") - if pkginit.exists() and not any( - x in parts(pkginit.strpath) for x in paths - ): + if pkginit.exists() and parts(pkginit.strpath).isdisjoint(paths): for x in root._collectfile(pkginit): yield x - paths.append(x.fspath.dirpath()) + paths.add(x.fspath.dirpath()) - if not any(x in parts(path.strpath) for x in paths): + if parts(path.strpath).isdisjoint(paths): for x in root._collectfile(path): key = (type(x), x.fspath) if key in self._node_cache: diff --git a/src/_pytest/pathlib.py b/src/_pytest/pathlib.py index c907b495c..430e1ec1d 100644 --- a/src/_pytest/pathlib.py +++ b/src/_pytest/pathlib.py @@ -307,4 +307,4 @@ def fnmatch_ex(pattern, path): def parts(s): parts = s.split(sep) - return [sep.join(parts[:i+1]) or sep for i in range(len(parts))] + return {sep.join(parts[: i + 1]) or sep for i in range(len(parts))}