Refine how we detect namespace packages (#12169)

Previously we used a hand crafted approach to detect namespace packages, however we should rely on ``importlib`` to detect them for us.

Fix #12112

---------

Co-authored-by: Ran Benita <ran@unusedvar.com>
This commit is contained in:
Bruno Oliveira 2024-04-09 13:21:51 -03:00 committed by GitHub
parent 17fc20af78
commit 99890636bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 297 additions and 48 deletions

View File

@ -0,0 +1 @@
Improve namespace packages detection when :confval:`consider_namespace_packages` is enabled, covering more situations (like editable installs).

View File

@ -1279,8 +1279,7 @@ passed multiple times. The expected format is ``name=value``. For example::
Controls if pytest should attempt to identify `namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages>`__ Controls if pytest should attempt to identify `namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages>`__
when collecting Python modules. Default is ``False``. when collecting Python modules. Default is ``False``.
Set to ``True`` if you are testing namespace packages installed into a virtual environment and it is important for Set to ``True`` if the package you are testing is part of a namespace package.
your packages to be imported using their full namespace package name.
Only `native namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#native-namespace-packages>`__ Only `native namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#native-namespace-packages>`__
are supported, with no plans to support `legacy namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#legacy-namespace-packages>`__. are supported, with no plans to support `legacy namespace packages <https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#legacy-namespace-packages>`__.

View File

@ -8,6 +8,7 @@ from errno import ENOENT
from errno import ENOTDIR from errno import ENOTDIR
import fnmatch import fnmatch
from functools import partial from functools import partial
from importlib.machinery import ModuleSpec
import importlib.util import importlib.util
import itertools import itertools
import os import os
@ -628,11 +629,13 @@ def _import_module_using_spec(
# such as our own assertion-rewrite hook. # such as our own assertion-rewrite hook.
for meta_importer in sys.meta_path: for meta_importer in sys.meta_path:
spec = meta_importer.find_spec(module_name, [str(module_location)]) spec = meta_importer.find_spec(module_name, [str(module_location)])
if spec is not None: if spec_matches_module_path(spec, module_path):
break break
else: else:
spec = importlib.util.spec_from_file_location(module_name, str(module_path)) spec = importlib.util.spec_from_file_location(module_name, str(module_path))
if spec is not None:
if spec_matches_module_path(spec, module_path):
assert spec is not None
mod = importlib.util.module_from_spec(spec) mod = importlib.util.module_from_spec(spec)
sys.modules[module_name] = mod sys.modules[module_name] = mod
spec.loader.exec_module(mod) # type: ignore[union-attr] spec.loader.exec_module(mod) # type: ignore[union-attr]
@ -643,6 +646,16 @@ def _import_module_using_spec(
return None return None
def spec_matches_module_path(
module_spec: Optional[ModuleSpec], module_path: Path
) -> bool:
"""Return true if the given ModuleSpec can be used to import the given module path."""
if module_spec is None or module_spec.origin is None:
return False
return Path(module_spec.origin) == module_path
# Implement a special _is_same function on Windows which returns True if the two filenames # Implement a special _is_same function on Windows which returns True if the two filenames
# compare equal, to circumvent os.path.samefile returning False for mounts in UNC (#7678). # compare equal, to circumvent os.path.samefile returning False for mounts in UNC (#7678).
if sys.platform.startswith("win"): if sys.platform.startswith("win"):
@ -762,39 +775,79 @@ def resolve_pkg_root_and_module_name(
Passing the full path to `models.py` will yield Path("src") and "app.core.models". Passing the full path to `models.py` will yield Path("src") and "app.core.models".
If consider_namespace_packages is True, then we additionally check upwards in the hierarchy If consider_namespace_packages is True, then we additionally check upwards in the hierarchy
until we find a directory that is reachable from sys.path, which marks it as a namespace package: for namespace packages:
https://packaging.python.org/en/latest/guides/packaging-namespace-packages https://packaging.python.org/en/latest/guides/packaging-namespace-packages
Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files). Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files).
""" """
pkg_root: Optional[Path] = None
pkg_path = resolve_package_path(path) pkg_path = resolve_package_path(path)
if pkg_path is not None: if pkg_path is not None:
pkg_root = pkg_path.parent pkg_root = pkg_path.parent
# https://packaging.python.org/en/latest/guides/packaging-namespace-packages/
if consider_namespace_packages: if consider_namespace_packages:
# Go upwards in the hierarchy, if we find a parent path included start = pkg_root if pkg_root is not None else path.parent
# in sys.path, it means the package found by resolve_package_path() for candidate in (start, *start.parents):
# actually belongs to a namespace package. module_name = compute_module_name(candidate, path)
for parent in pkg_root.parents: if module_name and is_importable(module_name, path):
# If any of the parent paths has a __init__.py, it means it is not
# a namespace package (see the docs linked above).
if (parent / "__init__.py").is_file():
break
if str(parent) in sys.path:
# Point the pkg_root to the root of the namespace package. # Point the pkg_root to the root of the namespace package.
pkg_root = parent pkg_root = candidate
break break
names = list(path.with_suffix("").relative_to(pkg_root).parts) if pkg_root is not None:
if names[-1] == "__init__": module_name = compute_module_name(pkg_root, path)
names.pop() if module_name:
module_name = ".".join(names)
return pkg_root, module_name return pkg_root, module_name
raise CouldNotResolvePathError(f"Could not resolve for {path}") raise CouldNotResolvePathError(f"Could not resolve for {path}")
def is_importable(module_name: str, module_path: Path) -> bool:
"""
Return if the given module path could be imported normally by Python, akin to the user
entering the REPL and importing the corresponding module name directly, and corresponds
to the module_path specified.
:param module_name:
Full module name that we want to check if is importable.
For example, "app.models".
:param module_path:
Full path to the python module/package we want to check if is importable.
For example, "/projects/src/app/models.py".
"""
try:
# Note this is different from what we do in ``_import_module_using_spec``, where we explicitly search through
# sys.meta_path to be able to pass the path of the module that we want to import (``meta_importer.find_spec``).
# Using importlib.util.find_spec() is different, it gives the same results as trying to import
# the module normally in the REPL.
spec = importlib.util.find_spec(module_name)
except (ImportError, ValueError, ImportWarning):
return False
else:
return spec_matches_module_path(spec, module_path)
def compute_module_name(root: Path, module_path: Path) -> Optional[str]:
"""Compute a module name based on a path and a root anchor."""
try:
path_without_suffix = module_path.with_suffix("")
except ValueError:
# Empty paths (such as Path.cwd()) might break meta_path hooks (like our own assertion rewriter).
return None
try:
relative = path_without_suffix.relative_to(root)
except ValueError: # pragma: no cover
return None
names = list(relative.parts)
if not names:
return None
if names[-1] == "__init__":
names.pop()
return ".".join(names)
class CouldNotResolvePathError(Exception): class CouldNotResolvePathError(Exception):
"""Custom exception raised by resolve_pkg_root_and_module_name.""" """Custom exception raised by resolve_pkg_root_and_module_name."""

View File

@ -1,5 +1,7 @@
# mypy: allow-untyped-defs # mypy: allow-untyped-defs
import errno import errno
import importlib.abc
import importlib.machinery
import os.path import os.path
from pathlib import Path from pathlib import Path
import pickle import pickle
@ -10,12 +12,15 @@ from types import ModuleType
from typing import Any from typing import Any
from typing import Generator from typing import Generator
from typing import Iterator from typing import Iterator
from typing import Optional
from typing import Sequence
from typing import Tuple from typing import Tuple
import unittest.mock import unittest.mock
from _pytest.monkeypatch import MonkeyPatch from _pytest.monkeypatch import MonkeyPatch
from _pytest.pathlib import bestrelpath from _pytest.pathlib import bestrelpath
from _pytest.pathlib import commonpath from _pytest.pathlib import commonpath
from _pytest.pathlib import compute_module_name
from _pytest.pathlib import CouldNotResolvePathError from _pytest.pathlib import CouldNotResolvePathError
from _pytest.pathlib import ensure_deletable from _pytest.pathlib import ensure_deletable
from _pytest.pathlib import fnmatch_ex from _pytest.pathlib import fnmatch_ex
@ -25,6 +30,7 @@ from _pytest.pathlib import import_path
from _pytest.pathlib import ImportMode from _pytest.pathlib import ImportMode
from _pytest.pathlib import ImportPathMismatchError from _pytest.pathlib import ImportPathMismatchError
from _pytest.pathlib import insert_missing_modules from _pytest.pathlib import insert_missing_modules
from _pytest.pathlib import is_importable
from _pytest.pathlib import maybe_delete_a_numbered_dir from _pytest.pathlib import maybe_delete_a_numbered_dir
from _pytest.pathlib import module_name_from_path from _pytest.pathlib import module_name_from_path
from _pytest.pathlib import resolve_package_path from _pytest.pathlib import resolve_package_path
@ -33,6 +39,7 @@ from _pytest.pathlib import safe_exists
from _pytest.pathlib import symlink_or_skip from _pytest.pathlib import symlink_or_skip
from _pytest.pathlib import visit from _pytest.pathlib import visit
from _pytest.pytester import Pytester from _pytest.pytester import Pytester
from _pytest.pytester import RunResult
from _pytest.tmpdir import TempPathFactory from _pytest.tmpdir import TempPathFactory
import pytest import pytest
@ -717,12 +724,13 @@ class TestImportLibMode:
assert result == "_env_310.tests.test_foo" assert result == "_env_310.tests.test_foo"
def test_resolve_pkg_root_and_module_name( def test_resolve_pkg_root_and_module_name(
self, tmp_path: Path, monkeypatch: MonkeyPatch self, tmp_path: Path, monkeypatch: MonkeyPatch, pytester: Pytester
) -> None: ) -> None:
# Create a directory structure first without __init__.py files. # Create a directory structure first without __init__.py files.
(tmp_path / "src/app/core").mkdir(parents=True) (tmp_path / "src/app/core").mkdir(parents=True)
models_py = tmp_path / "src/app/core/models.py" models_py = tmp_path / "src/app/core/models.py"
models_py.touch() models_py.touch()
with pytest.raises(CouldNotResolvePathError): with pytest.raises(CouldNotResolvePathError):
_ = resolve_pkg_root_and_module_name(models_py) _ = resolve_pkg_root_and_module_name(models_py)
@ -738,6 +746,8 @@ class TestImportLibMode:
# If we add tmp_path to sys.path, src becomes a namespace package. # If we add tmp_path to sys.path, src becomes a namespace package.
monkeypatch.syspath_prepend(tmp_path) monkeypatch.syspath_prepend(tmp_path)
validate_namespace_package(pytester, [tmp_path], ["src.app.core.models"])
assert resolve_pkg_root_and_module_name( assert resolve_pkg_root_and_module_name(
models_py, consider_namespace_packages=True models_py, consider_namespace_packages=True
) == ( ) == (
@ -1119,37 +1129,54 @@ def test_safe_exists(tmp_path: Path) -> None:
class TestNamespacePackages: class TestNamespacePackages:
"""Test import_path support when importing from properly namespace packages.""" """Test import_path support when importing from properly namespace packages."""
@pytest.fixture(autouse=True)
def setup_imports_tracking(self, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(sys, "pytest_namespace_packages_test", [], raising=False)
def setup_directories( def setup_directories(
self, tmp_path: Path, monkeypatch: MonkeyPatch, pytester: Pytester self, tmp_path: Path, monkeypatch: Optional[MonkeyPatch], pytester: Pytester
) -> Tuple[Path, Path]: ) -> Tuple[Path, Path]:
# Use a code to guard against modules being imported more than once.
# This is a safeguard in case future changes break this invariant.
code = dedent(
"""
import sys
imported = getattr(sys, "pytest_namespace_packages_test", [])
assert __name__ not in imported, f"{__name__} already imported"
imported.append(__name__)
sys.pytest_namespace_packages_test = imported
"""
)
# Set up a namespace package "com.company", containing # Set up a namespace package "com.company", containing
# two subpackages, "app" and "calc". # two subpackages, "app" and "calc".
(tmp_path / "src/dist1/com/company/app/core").mkdir(parents=True) (tmp_path / "src/dist1/com/company/app/core").mkdir(parents=True)
(tmp_path / "src/dist1/com/company/app/__init__.py").touch() (tmp_path / "src/dist1/com/company/app/__init__.py").write_text(
(tmp_path / "src/dist1/com/company/app/core/__init__.py").touch() code, encoding="UTF-8"
)
(tmp_path / "src/dist1/com/company/app/core/__init__.py").write_text(
code, encoding="UTF-8"
)
models_py = tmp_path / "src/dist1/com/company/app/core/models.py" models_py = tmp_path / "src/dist1/com/company/app/core/models.py"
models_py.touch() models_py.touch()
(tmp_path / "src/dist2/com/company/calc/algo").mkdir(parents=True) (tmp_path / "src/dist2/com/company/calc/algo").mkdir(parents=True)
(tmp_path / "src/dist2/com/company/calc/__init__.py").touch() (tmp_path / "src/dist2/com/company/calc/__init__.py").write_text(
(tmp_path / "src/dist2/com/company/calc/algo/__init__.py").touch() code, encoding="UTF-8"
algorithms_py = tmp_path / "src/dist2/com/company/calc/algo/algorithms.py"
algorithms_py.touch()
# Validate the namespace package by importing it in a Python subprocess.
r = pytester.runpython_c(
dedent(
f"""
import sys
sys.path.append(r{str(tmp_path / "src/dist1")!r})
sys.path.append(r{str(tmp_path / "src/dist2")!r})
import com.company.app.core.models
import com.company.calc.algo.algorithms
"""
) )
(tmp_path / "src/dist2/com/company/calc/algo/__init__.py").write_text(
code, encoding="UTF-8"
)
algorithms_py = tmp_path / "src/dist2/com/company/calc/algo/algorithms.py"
algorithms_py.write_text(code, encoding="UTF-8")
r = validate_namespace_package(
pytester,
[tmp_path / "src/dist1", tmp_path / "src/dist2"],
["com.company.app.core.models", "com.company.calc.algo.algorithms"],
) )
assert r.ret == 0 assert r.ret == 0
if monkeypatch is not None:
monkeypatch.syspath_prepend(tmp_path / "src/dist1") monkeypatch.syspath_prepend(tmp_path / "src/dist1")
monkeypatch.syspath_prepend(tmp_path / "src/dist2") monkeypatch.syspath_prepend(tmp_path / "src/dist2")
return models_py, algorithms_py return models_py, algorithms_py
@ -1223,11 +1250,76 @@ class TestNamespacePackages:
models_py, algorithms_py = self.setup_directories( models_py, algorithms_py = self.setup_directories(
tmp_path, monkeypatch, pytester tmp_path, monkeypatch, pytester
) )
# Namespace packages must not have an __init__.py at any of its # Namespace packages must not have an __init__.py at its top-level
# directories; if it does, we then fall back to importing just the # directory; if it does, it is no longer a namespace package, and we fall back
# part of the package containing the __init__.py files. # to importing just the part of the package containing the __init__.py files.
(tmp_path / "src/dist1/com/__init__.py").touch() (tmp_path / "src/dist1/com/__init__.py").touch()
# Because of the __init__ file, 'com' is no longer a namespace package:
# 'com.company.app' is importable as a normal module.
# 'com.company.calc' is no longer importable because 'com' is not a namespace package anymore.
r = validate_namespace_package(
pytester,
[tmp_path / "src/dist1", tmp_path / "src/dist2"],
["com.company.app.core.models", "com.company.calc.algo.algorithms"],
)
assert r.ret == 1
r.stderr.fnmatch_lines("*No module named 'com.company.calc*")
pkg_root, module_name = resolve_pkg_root_and_module_name(
models_py, consider_namespace_packages=True
)
assert (pkg_root, module_name) == (
tmp_path / "src/dist1",
"com.company.app.core.models",
)
# dist2/com/company will contain a normal Python package.
pkg_root, module_name = resolve_pkg_root_and_module_name(
algorithms_py, consider_namespace_packages=True
)
assert (pkg_root, module_name) == (
tmp_path / "src/dist2/com/company",
"calc.algo.algorithms",
)
def test_detect_meta_path(
self,
tmp_path: Path,
monkeypatch: MonkeyPatch,
pytester: Pytester,
) -> None:
"""
resolve_pkg_root_and_module_name() considers sys.meta_path when importing namespace packages.
Regression test for #12112.
"""
class CustomImporter(importlib.abc.MetaPathFinder):
"""
Imports the module name "com" as a namespace package.
This ensures our namespace detection considers sys.meta_path, which is important
to support all possible ways a module can be imported (for example editable installs).
"""
def find_spec(
self, name: str, path: Any = None, target: Any = None
) -> Optional[importlib.machinery.ModuleSpec]:
if name == "com":
spec = importlib.machinery.ModuleSpec("com", loader=None)
spec.submodule_search_locations = [str(com_root_2), str(com_root_1)]
return spec
return None
# Setup directories without configuring sys.path.
models_py, algorithms_py = self.setup_directories(
tmp_path, monkeypatch=None, pytester=pytester
)
com_root_1 = tmp_path / "src/dist1/com"
com_root_2 = tmp_path / "src/dist2/com"
# Because the namespace package is not setup correctly, we cannot resolve it as a namespace package.
pkg_root, module_name = resolve_pkg_root_and_module_name( pkg_root, module_name = resolve_pkg_root_and_module_name(
models_py, consider_namespace_packages=True models_py, consider_namespace_packages=True
) )
@ -1235,3 +1327,107 @@ class TestNamespacePackages:
tmp_path / "src/dist1/com/company", tmp_path / "src/dist1/com/company",
"app.core.models", "app.core.models",
) )
# Insert our custom importer, which will recognize the "com" directory as a namespace package.
new_meta_path = [CustomImporter(), *sys.meta_path]
monkeypatch.setattr(sys, "meta_path", new_meta_path)
# Now we should be able to resolve the path as namespace package.
pkg_root, module_name = resolve_pkg_root_and_module_name(
models_py, consider_namespace_packages=True
)
assert (pkg_root, module_name) == (
tmp_path / "src/dist1",
"com.company.app.core.models",
)
@pytest.mark.parametrize("insert", [True, False])
def test_full_ns_packages_without_init_files(
self, pytester: Pytester, tmp_path: Path, monkeypatch: MonkeyPatch, insert: bool
) -> None:
(tmp_path / "src/dist1/ns/b/app/bar/test").mkdir(parents=True)
(tmp_path / "src/dist1/ns/b/app/bar/m.py").touch()
if insert:
# The presence of this __init__.py is not a problem, ns.b.app is still part of the namespace package.
(tmp_path / "src/dist1/ns/b/app/__init__.py").touch()
(tmp_path / "src/dist2/ns/a/core/foo/test").mkdir(parents=True)
(tmp_path / "src/dist2/ns/a/core/foo/m.py").touch()
# Validate the namespace package by importing it in a Python subprocess.
r = validate_namespace_package(
pytester,
[tmp_path / "src/dist1", tmp_path / "src/dist2"],
["ns.b.app.bar.m", "ns.a.core.foo.m"],
)
assert r.ret == 0
monkeypatch.syspath_prepend(tmp_path / "src/dist1")
monkeypatch.syspath_prepend(tmp_path / "src/dist2")
assert resolve_pkg_root_and_module_name(
tmp_path / "src/dist1/ns/b/app/bar/m.py", consider_namespace_packages=True
) == (tmp_path / "src/dist1", "ns.b.app.bar.m")
assert resolve_pkg_root_and_module_name(
tmp_path / "src/dist2/ns/a/core/foo/m.py", consider_namespace_packages=True
) == (tmp_path / "src/dist2", "ns.a.core.foo.m")
def test_is_importable(pytester: Pytester) -> None:
pytester.syspathinsert()
path = pytester.path / "bar/foo.py"
path.parent.mkdir()
path.touch()
assert is_importable("bar.foo", path) is True
# Ensure that the module that can be imported points to the path we expect.
path = pytester.path / "some/other/path/bar/foo.py"
path.mkdir(parents=True, exist_ok=True)
assert is_importable("bar.foo", path) is False
# Paths containing "." cannot be imported.
path = pytester.path / "bar.x/__init__.py"
path.parent.mkdir()
path.touch()
assert is_importable("bar.x", path) is False
# Pass starting with "." denote relative imports and cannot be checked using is_importable.
path = pytester.path / ".bar.x/__init__.py"
path.parent.mkdir()
path.touch()
assert is_importable(".bar.x", path) is False
def test_compute_module_name(tmp_path: Path) -> None:
assert compute_module_name(tmp_path, tmp_path) is None
assert compute_module_name(Path(), Path()) is None
assert compute_module_name(tmp_path, tmp_path / "mod.py") == "mod"
assert compute_module_name(tmp_path, tmp_path / "src/app/bar") == "src.app.bar"
assert compute_module_name(tmp_path, tmp_path / "src/app/bar.py") == "src.app.bar"
assert (
compute_module_name(tmp_path, tmp_path / "src/app/bar/__init__.py")
== "src.app.bar"
)
def validate_namespace_package(
pytester: Pytester, paths: Sequence[Path], modules: Sequence[str]
) -> RunResult:
"""
Validate that a Python namespace package is set up correctly.
In a sub interpreter, add 'paths' to sys.path and attempt to import the given modules.
In this module many tests configure a set of files as a namespace package, this function
is used as sanity check that our files are configured correctly from the point of view of Python.
"""
lines = [
"import sys",
# Configure sys.path.
*[f"sys.path.append(r{str(x)!r})" for x in paths],
# Imports.
*[f"import {x}" for x in modules],
]
return pytester.runpython_c("\n".join(lines))