pathlib: make visit() independent of py.path.local, use os.scandir

`os.scandir()`, introduced in Python 3.5, is much faster than
`os.listdir()`. See https://www.python.org/dev/peps/pep-0471/.

It also has a `DirEntry` which can be used to further reduce syscalls in
some cases.
This commit is contained in:
Ran Benita 2020-07-05 23:11:47 +03:00
parent c15bb5d3de
commit 3633b691d8
4 changed files with 26 additions and 22 deletions

View File

@ -618,11 +618,13 @@ class Session(nodes.FSCollector):
assert not names, "invalid arg {!r}".format((argpath, names))
seen_dirs = set() # type: Set[py.path.local]
for path in visit(argpath, self._recurse):
if not path.check(file=1):
for direntry in visit(str(argpath), self._recurse):
if not direntry.is_file():
continue
path = py.path.local(direntry.path)
dirpath = path.dirpath()
if dirpath not in seen_dirs:
# Collect packages first.
seen_dirs.add(dirpath)

View File

@ -562,17 +562,18 @@ class FSCollector(Collector):
def gethookproxy(self, fspath: py.path.local):
raise NotImplementedError()
def _recurse(self, dirpath: py.path.local) -> bool:
if dirpath.basename == "__pycache__":
def _recurse(self, direntry: "os.DirEntry[str]") -> bool:
if direntry.name == "__pycache__":
return False
ihook = self._gethookproxy(dirpath.dirpath())
if ihook.pytest_ignore_collect(path=dirpath, config=self.config):
path = py.path.local(direntry.path)
ihook = self._gethookproxy(path.dirpath())
if ihook.pytest_ignore_collect(path=path, config=self.config):
return False
for pat in self._norecursepatterns:
if dirpath.check(fnmatch=pat):
if path.check(fnmatch=pat):
return False
ihook = self._gethookproxy(dirpath)
ihook.pytest_collect_directory(path=dirpath, parent=self)
ihook = self._gethookproxy(path)
ihook.pytest_collect_directory(path=path, parent=self)
return True
def isinitpath(self, path: py.path.local) -> bool:

View File

@ -560,14 +560,14 @@ def resolve_package_path(path: Path) -> Optional[Path]:
def visit(
path: py.path.local, recurse: Callable[[py.path.local], bool],
) -> Iterator[py.path.local]:
"""Walk path recursively, in breadth-first order.
path: str, recurse: Callable[["os.DirEntry[str]"], bool]
) -> Iterator["os.DirEntry[str]"]:
"""Walk a directory recursively, in breadth-first order.
Entries at each directory level are sorted.
"""
entries = sorted(path.listdir())
entries = sorted(os.scandir(path), key=lambda entry: entry.name)
yield from entries
for entry in entries:
if entry.check(dir=1) and recurse(entry):
yield from visit(entry, recurse)
if entry.is_dir(follow_symlinks=False) and recurse(entry):
yield from visit(entry.path, recurse)

View File

@ -642,23 +642,24 @@ class Package(Module):
):
yield Module.from_parent(self, fspath=init_module)
pkg_prefixes = set() # type: Set[py.path.local]
for path in visit(this_path, recurse=self._recurse):
for direntry in visit(str(this_path), recurse=self._recurse):
path = py.path.local(direntry.path)
# We will visit our own __init__.py file, in which case we skip it.
is_file = path.isfile()
if is_file:
if path.basename == "__init__.py" and path.dirpath() == this_path:
if direntry.is_file():
if direntry.name == "__init__.py" and path.dirpath() == this_path:
continue
parts_ = parts(path.strpath)
parts_ = parts(direntry.path)
if any(
str(pkg_prefix) in parts_ and pkg_prefix.join("__init__.py") != path
for pkg_prefix in pkg_prefixes
):
continue
if is_file:
if direntry.is_file():
yield from self._collectfile(path)
elif not path.isdir():
elif not direntry.is_dir():
# Broken symlink or invalid/missing file.
continue
elif path.join("__init__.py").check(file=1):