pathlib: speed up `make_numbered_dir` given a large tmp root
The function currently uses `find_suffixes` which iterates the entire directory searching for files with the given suffix. In some cases though, like in pytest's selftest, the directory can get big: $ ls /tmp/pytest-of-ran/pytest-0/ 7686 and iterating it many times can get slow. This doesn't fix the underlying issue (iterating the directory) but at least speeds it up a bit by using `os.scandir` instead of `path.iterdir`. So `make_numbered_dir` is still slow for pytest's selftests, but reduces ~10s for me.
This commit is contained in:
parent
ac2cd72e5f
commit
eb9013d42c
|
@ -171,23 +171,23 @@ def rm_rf(path: Path) -> None:
|
|||
shutil.rmtree(str(path), onerror=onerror)
|
||||
|
||||
|
||||
def find_prefixed(root: Path, prefix: str) -> Iterator[Path]:
|
||||
def find_prefixed(root: Path, prefix: str) -> Iterator["os.DirEntry[str]"]:
|
||||
"""Find all elements in root that begin with the prefix, case insensitive."""
|
||||
l_prefix = prefix.lower()
|
||||
for x in root.iterdir():
|
||||
for x in os.scandir(root):
|
||||
if x.name.lower().startswith(l_prefix):
|
||||
yield x
|
||||
|
||||
|
||||
def extract_suffixes(iter: Iterable[PurePath], prefix: str) -> Iterator[str]:
|
||||
def extract_suffixes(iter: Iterable["os.DirEntry[str]"], prefix: str) -> Iterator[str]:
|
||||
"""Return the parts of the paths following the prefix.
|
||||
|
||||
:param iter: Iterator over path names.
|
||||
:param prefix: Expected prefix of the path names.
|
||||
"""
|
||||
p_len = len(prefix)
|
||||
for p in iter:
|
||||
yield p.name[p_len:]
|
||||
for entry in iter:
|
||||
yield entry.name[p_len:]
|
||||
|
||||
|
||||
def find_suffixes(root: Path, prefix: str) -> Iterator[str]:
|
||||
|
@ -346,12 +346,12 @@ def cleanup_candidates(root: Path, prefix: str, keep: int) -> Iterator[Path]:
|
|||
"""List candidates for numbered directories to be removed - follows py.path."""
|
||||
max_existing = max(map(parse_num, find_suffixes(root, prefix)), default=-1)
|
||||
max_delete = max_existing - keep
|
||||
paths = find_prefixed(root, prefix)
|
||||
paths, paths2 = itertools.tee(paths)
|
||||
numbers = map(parse_num, extract_suffixes(paths2, prefix))
|
||||
for path, number in zip(paths, numbers):
|
||||
entries = find_prefixed(root, prefix)
|
||||
entries, entries2 = itertools.tee(entries)
|
||||
numbers = map(parse_num, extract_suffixes(entries2, prefix))
|
||||
for entry, number in zip(entries, numbers):
|
||||
if number <= max_delete:
|
||||
yield path
|
||||
yield Path(entry)
|
||||
|
||||
|
||||
def cleanup_dead_symlinks(root: Path):
|
||||
|
|
Loading…
Reference in New Issue