pathlib: speed up `make_numbered_dir` given a large tmp root
The function currently uses `find_suffixes` which iterates the entire directory searching for files with the given suffix. In some cases though, like in pytest's selftest, the directory can get big: $ ls /tmp/pytest-of-ran/pytest-0/ 7686 and iterating it many times can get slow. This doesn't fix the underlying issue (iterating the directory) but at least speeds it up a bit by using `os.scandir` instead of `path.iterdir`. So `make_numbered_dir` is still slow for pytest's selftests, but reduces ~10s for me.
This commit is contained in:
parent
ac2cd72e5f
commit
eb9013d42c
|
@ -171,23 +171,23 @@ def rm_rf(path: Path) -> None:
|
||||||
shutil.rmtree(str(path), onerror=onerror)
|
shutil.rmtree(str(path), onerror=onerror)
|
||||||
|
|
||||||
|
|
||||||
def find_prefixed(root: Path, prefix: str) -> Iterator[Path]:
|
def find_prefixed(root: Path, prefix: str) -> Iterator["os.DirEntry[str]"]:
|
||||||
"""Find all elements in root that begin with the prefix, case insensitive."""
|
"""Find all elements in root that begin with the prefix, case insensitive."""
|
||||||
l_prefix = prefix.lower()
|
l_prefix = prefix.lower()
|
||||||
for x in root.iterdir():
|
for x in os.scandir(root):
|
||||||
if x.name.lower().startswith(l_prefix):
|
if x.name.lower().startswith(l_prefix):
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
|
||||||
def extract_suffixes(iter: Iterable[PurePath], prefix: str) -> Iterator[str]:
|
def extract_suffixes(iter: Iterable["os.DirEntry[str]"], prefix: str) -> Iterator[str]:
|
||||||
"""Return the parts of the paths following the prefix.
|
"""Return the parts of the paths following the prefix.
|
||||||
|
|
||||||
:param iter: Iterator over path names.
|
:param iter: Iterator over path names.
|
||||||
:param prefix: Expected prefix of the path names.
|
:param prefix: Expected prefix of the path names.
|
||||||
"""
|
"""
|
||||||
p_len = len(prefix)
|
p_len = len(prefix)
|
||||||
for p in iter:
|
for entry in iter:
|
||||||
yield p.name[p_len:]
|
yield entry.name[p_len:]
|
||||||
|
|
||||||
|
|
||||||
def find_suffixes(root: Path, prefix: str) -> Iterator[str]:
|
def find_suffixes(root: Path, prefix: str) -> Iterator[str]:
|
||||||
|
@ -346,12 +346,12 @@ def cleanup_candidates(root: Path, prefix: str, keep: int) -> Iterator[Path]:
|
||||||
"""List candidates for numbered directories to be removed - follows py.path."""
|
"""List candidates for numbered directories to be removed - follows py.path."""
|
||||||
max_existing = max(map(parse_num, find_suffixes(root, prefix)), default=-1)
|
max_existing = max(map(parse_num, find_suffixes(root, prefix)), default=-1)
|
||||||
max_delete = max_existing - keep
|
max_delete = max_existing - keep
|
||||||
paths = find_prefixed(root, prefix)
|
entries = find_prefixed(root, prefix)
|
||||||
paths, paths2 = itertools.tee(paths)
|
entries, entries2 = itertools.tee(entries)
|
||||||
numbers = map(parse_num, extract_suffixes(paths2, prefix))
|
numbers = map(parse_num, extract_suffixes(entries2, prefix))
|
||||||
for path, number in zip(paths, numbers):
|
for entry, number in zip(entries, numbers):
|
||||||
if number <= max_delete:
|
if number <= max_delete:
|
||||||
yield path
|
yield Path(entry)
|
||||||
|
|
||||||
|
|
||||||
def cleanup_dead_symlinks(root: Path):
|
def cleanup_dead_symlinks(root: Path):
|
||||||
|
|
Loading…
Reference in New Issue