pathlib: speed up `make_numbered_dir` given a large tmp root

The function currently uses `find_suffixes` which iterates the entire
directory searching for files with the given suffix. In some cases
though, like in pytest's selftest, the directory can get big:

    $ ls /tmp/pytest-of-ran/pytest-0/
    7686

and iterating it many times can get slow.

This doesn't fix the underlying issue (iterating the directory) but at
least speeds it up a bit by using `os.scandir` instead of
`path.iterdir`. So `make_numbered_dir` is still slow for pytest's
selftests, but reduces ~10s for me.
This commit is contained in:
Ran Benita 2024-01-25 17:33:23 +02:00
parent ac2cd72e5f
commit eb9013d42c
1 changed files with 10 additions and 10 deletions

View File

@ -171,23 +171,23 @@ def rm_rf(path: Path) -> None:
shutil.rmtree(str(path), onerror=onerror)
def find_prefixed(root: Path, prefix: str) -> Iterator[Path]:
def find_prefixed(root: Path, prefix: str) -> Iterator["os.DirEntry[str]"]:
"""Find all elements in root that begin with the prefix, case insensitive."""
l_prefix = prefix.lower()
for x in root.iterdir():
for x in os.scandir(root):
if x.name.lower().startswith(l_prefix):
yield x
def extract_suffixes(iter: Iterable[PurePath], prefix: str) -> Iterator[str]:
def extract_suffixes(iter: Iterable["os.DirEntry[str]"], prefix: str) -> Iterator[str]:
"""Return the parts of the paths following the prefix.
:param iter: Iterator over path names.
:param prefix: Expected prefix of the path names.
"""
p_len = len(prefix)
for p in iter:
yield p.name[p_len:]
for entry in iter:
yield entry.name[p_len:]
def find_suffixes(root: Path, prefix: str) -> Iterator[str]:
@ -346,12 +346,12 @@ def cleanup_candidates(root: Path, prefix: str, keep: int) -> Iterator[Path]:
"""List candidates for numbered directories to be removed - follows py.path."""
max_existing = max(map(parse_num, find_suffixes(root, prefix)), default=-1)
max_delete = max_existing - keep
paths = find_prefixed(root, prefix)
paths, paths2 = itertools.tee(paths)
numbers = map(parse_num, extract_suffixes(paths2, prefix))
for path, number in zip(paths, numbers):
entries = find_prefixed(root, prefix)
entries, entries2 = itertools.tee(entries)
numbers = map(parse_num, extract_suffixes(entries2, prefix))
for entry, number in zip(entries, numbers):
if number <= max_delete:
yield path
yield Path(entry)
def cleanup_dead_symlinks(root: Path):