From eb9013d42c97c61e234aadff9651d4cedb83eae3 Mon Sep 17 00:00:00 2001 From: Ran Benita Date: Thu, 25 Jan 2024 17:33:23 +0200 Subject: [PATCH] pathlib: speed up `make_numbered_dir` given a large tmp root The function currently uses `find_suffixes` which iterates the entire directory searching for files with the given suffix. In some cases though, like in pytest's selftest, the directory can get big: $ ls /tmp/pytest-of-ran/pytest-0/ 7686 and iterating it many times can get slow. This doesn't fix the underlying issue (iterating the directory) but at least speeds it up a bit by using `os.scandir` instead of `path.iterdir`. So `make_numbered_dir` is still slow for pytest's selftests, but reduces ~10s for me. --- src/_pytest/pathlib.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/_pytest/pathlib.py b/src/_pytest/pathlib.py index 4cd635ed7..ce5156b06 100644 --- a/src/_pytest/pathlib.py +++ b/src/_pytest/pathlib.py @@ -171,23 +171,23 @@ def rm_rf(path: Path) -> None: shutil.rmtree(str(path), onerror=onerror) -def find_prefixed(root: Path, prefix: str) -> Iterator[Path]: +def find_prefixed(root: Path, prefix: str) -> Iterator["os.DirEntry[str]"]: """Find all elements in root that begin with the prefix, case insensitive.""" l_prefix = prefix.lower() - for x in root.iterdir(): + for x in os.scandir(root): if x.name.lower().startswith(l_prefix): yield x -def extract_suffixes(iter: Iterable[PurePath], prefix: str) -> Iterator[str]: +def extract_suffixes(iter: Iterable["os.DirEntry[str]"], prefix: str) -> Iterator[str]: """Return the parts of the paths following the prefix. :param iter: Iterator over path names. :param prefix: Expected prefix of the path names. """ p_len = len(prefix) - for p in iter: - yield p.name[p_len:] + for entry in iter: + yield entry.name[p_len:] def find_suffixes(root: Path, prefix: str) -> Iterator[str]: @@ -346,12 +346,12 @@ def cleanup_candidates(root: Path, prefix: str, keep: int) -> Iterator[Path]: """List candidates for numbered directories to be removed - follows py.path.""" max_existing = max(map(parse_num, find_suffixes(root, prefix)), default=-1) max_delete = max_existing - keep - paths = find_prefixed(root, prefix) - paths, paths2 = itertools.tee(paths) - numbers = map(parse_num, extract_suffixes(paths2, prefix)) - for path, number in zip(paths, numbers): + entries = find_prefixed(root, prefix) + entries, entries2 = itertools.tee(entries) + numbers = map(parse_num, extract_suffixes(entries2, prefix)) + for entry, number in zip(entries, numbers): if number <= max_delete: - yield path + yield Path(entry) def cleanup_dead_symlinks(root: Path):