diff --git a/pyproject.toml b/pyproject.toml index 046a81b4..cb53d2a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ dependencies = [ "click>=8.0.0,<9", "colorama>=0.4.6; sys_platform == 'win32'", + "importlib-metadata>=4.13.0; python_version < '3.11'", "requirements-parser>=0.11.0,<1", "tomli>=2.0.1; python_version < '3.11'", ] @@ -160,6 +161,8 @@ select = [ "PT", # flake8-simplify "SIM", + # flake8-tidy-imports + "TID", # flake8-type-checking "TCH", # flake8-use-pathlib @@ -188,6 +191,9 @@ ignore = [ "E501", ] +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"importlib.metadata".msg = "Import from `deptry.compat.importlib_metadata` instead." + [tool.ruff.lint.flake8-type-checking] strict = true @@ -196,4 +202,5 @@ known-first-party = ["deptry"] required-imports = ["from __future__ import annotations"] [tool.ruff.lint.per-file-ignores] +"compat.py" = ["TID251"] "tests/*" = ["S101", "S603"] diff --git a/python/deptry/cli.py b/python/deptry/cli.py index f36543f7..25c9a6b0 100644 --- a/python/deptry/cli.py +++ b/python/deptry/cli.py @@ -4,12 +4,12 @@ import shutil import sys from collections import defaultdict -from importlib.metadata import version from pathlib import Path from typing import TYPE_CHECKING import click +from deptry.compat import importlib_metadata from deptry.config import read_configuration_from_pyproject_toml from deptry.core import Core @@ -102,7 +102,7 @@ def display_deptry_version(ctx: click.Context, _param: click.Parameter, value: b if not value or ctx.resilient_parsing: return None - click.echo(f"deptry {version('deptry')}") + click.echo(f"deptry {importlib_metadata.version('deptry')}") ctx.exit() diff --git a/python/deptry/compat.py b/python/deptry/compat.py new file mode 100644 index 00000000..a39ed0b1 --- /dev/null +++ b/python/deptry/compat.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +import sys + +# Although `importlib.metadata` is available before Python 3.11, we benefit from using `importlib_metadata` package +# on Python < 3.11 because it exposes `packages_distributions` function that we use in the codebase. Python 3.10 also +# has this function, but there are features we need in it that are only available in Python >= 3.11. So by using +# `importlib_metadata`, we benefit from those features for all Python versions we support. +if sys.version_info >= (3, 11): + import importlib.metadata as importlib_metadata +else: + import importlib_metadata # pragma: no cover + + +__all__ = ("importlib_metadata",) diff --git a/python/deptry/dependency.py b/python/deptry/dependency.py index 0939da76..4b8fcfc5 100644 --- a/python/deptry/dependency.py +++ b/python/deptry/dependency.py @@ -1,14 +1,12 @@ from __future__ import annotations import logging -import re -from contextlib import suppress -from importlib import metadata from typing import TYPE_CHECKING +from deptry.distribution import get_packages_from_distribution + if TYPE_CHECKING: from collections.abc import Sequence - from importlib.metadata import Distribution from pathlib import Path @@ -21,7 +19,6 @@ class Dependency: name (str): The name of the dependency. definition_file (Path): The path to the file defining the dependency, e.g. 'pyproject.toml'. and that can be used to create a variant of the package with a set of extra functionalities. - found (bool): Indicates if the dependency has been found in the environment. top_levels (set[str]): The top-level module names associated with the dependency. """ @@ -31,16 +28,11 @@ def __init__( definition_file: Path, module_names: Sequence[str] | None = None, ) -> None: - distribution = self.find_distribution(name) - self.name = name self.definition_file = definition_file - self.found = distribution is not None - self.top_levels = self._get_top_levels(name, distribution, module_names) + self.top_levels = self._get_top_levels(name, module_names) - def _get_top_levels( - self, name: str, distribution: Distribution | None, module_names: Sequence[str] | None - ) -> set[str]: + def _get_top_levels(self, name: str, module_names: Sequence[str] | None) -> set[str]: """ Get the top-level module names for a dependency. They are searched for in the following order: 1. If `module_names` is defined, simply use those as the top-level modules. @@ -49,22 +41,16 @@ def _get_top_levels( Args: name: The name of the dependency. - distribution: The metadata distribution of the package. module_names: If this is given, use these as the top-level modules instead of searching for them in the metadata. """ if module_names is not None: return set(module_names) - if distribution is not None: - with suppress(FileNotFoundError): - return self._get_top_level_module_names_from_top_level_txt(distribution) - - with suppress(FileNotFoundError): - return self._get_top_level_module_names_from_record_file(distribution) + if distributions := get_packages_from_distribution(self.name): + return distributions - # No metadata or other configuration has been found. As a fallback - # we'll guess the name. + # No metadata or other configuration has been found. As a fallback we'll guess the name. module_name = name.replace("-", "_").lower() logging.warning( "Assuming the corresponding module name of package %r is %r. Install the package or configure a" @@ -79,56 +65,3 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"Dependency '{self.name}' with top-levels: {self.top_levels}." - - @staticmethod - def find_distribution(name: str) -> Distribution | None: - try: - return metadata.distribution(name) - except metadata.PackageNotFoundError: - return None - - @staticmethod - def _get_top_level_module_names_from_top_level_txt(distribution: Distribution) -> set[str]: - """ - top-level.txt is a metadata file added by setuptools that looks as follows: - - 610faff656c4cfcbb4a3__mypyc - _black_version - black - blackd - blib2to3 - - This function extracts these names, if a top-level.txt file exists. - """ - metadata_top_levels = distribution.read_text("top_level.txt") - if metadata_top_levels is None: - raise FileNotFoundError("top_level.txt") - - return {x for x in metadata_top_levels.splitlines() if x} - - @staticmethod - def _get_top_level_module_names_from_record_file(distribution: Distribution) -> set[str]: - """ - Get the top-level module names from the RECORD file, whose contents usually look as follows: - - ... - ../../../bin/black,sha256=,247 - __pycache__/_black_version.cpython-311.pyc,, - _black_version.py,sha256=,19 - black/trans.cpython-39-darwin.so,sha256= - black/trans.py,sha256= - blackd/__init__.py,sha256= - blackd/__main__.py,sha256= - ... - - So if no file top-level.txt is provided, we can try and extract top-levels from this file, in - this case _black_version, black, and blackd. - """ - metadata_records = distribution.read_text("RECORD") - - if metadata_records is None: - raise FileNotFoundError("RECORD") - - matches = re.finditer(r"^(?!__)([a-zA-Z0-9-_]+)(?:/|\.py,)", metadata_records, re.MULTILINE) - - return {x.group(1) for x in matches} diff --git a/python/deptry/distribution.py b/python/deptry/distribution.py new file mode 100644 index 00000000..979d99c7 --- /dev/null +++ b/python/deptry/distribution.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import re +from collections import defaultdict +from functools import cache, lru_cache + +from deptry.compat import importlib_metadata + + +@cache +def normalize_distribution_name(name: str) -> str: + """ + Apply name normalization on distribution name, per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization. + """ + return re.sub(r"[-_.]+", "-", name).lower() + + +@lru_cache(maxsize=1) +def get_packages_to_normalized_distributions_mapping() -> dict[str, set[str]]: + """ + Return a mapping of top-level packages to their normalized distributions. + Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry. + """ + return { + package: {normalize_distribution_name(distribution) for distribution in distributions} + for package, distributions in importlib_metadata.packages_distributions().items() + } + + +@lru_cache(maxsize=1) +def get_normalized_distributions_to_packages_mapping() -> dict[str, set[str]]: + """ + Return a mapping of normalized distributions to their top-level packages. + Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry. + """ + distributions_packages: dict[str, set[str]] = defaultdict(set) + + for package, distributions in get_packages_to_normalized_distributions_mapping().items(): + for distribution in distributions: + distributions_packages[distribution].add(package) + + return dict(distributions_packages) + + +def get_distributions_from_package(name: str) -> set[str] | None: + """ + Retrieve the distributions provided by the package, if any. + """ + return get_packages_to_normalized_distributions_mapping().get(name) + + +def get_packages_from_distribution(name: str) -> set[str] | None: + """ + Normalize the distribution and retrieve the packages it provides, if any. + """ + return get_normalized_distributions_to_packages_mapping().get(normalize_distribution_name(name)) diff --git a/python/deptry/module.py b/python/deptry/module.py index c6b4c7aa..e4221a67 100644 --- a/python/deptry/module.py +++ b/python/deptry/module.py @@ -2,9 +2,10 @@ import logging from dataclasses import dataclass, field -from importlib.metadata import PackageNotFoundError, metadata from typing import TYPE_CHECKING +from deptry.compat import importlib_metadata + if TYPE_CHECKING: from deptry.dependency import Dependency from deptry.imports.location import Location @@ -116,8 +117,8 @@ def _get_package_name_from_metadata(self) -> str | None: Most packages simply have a field called "Name" in their metadata. This method extracts that field. """ try: - name: str = metadata(self.name)["Name"] - except PackageNotFoundError: + name: str = importlib_metadata.metadata(self.name)["Name"] + except importlib_metadata.PackageNotFoundError: return None else: return name diff --git a/tests/fixtures/pep_621_project/pyproject.toml b/tests/fixtures/pep_621_project/pyproject.toml index eea51856..882a0992 100644 --- a/tests/fixtures/pep_621_project/pyproject.toml +++ b/tests/fixtures/pep_621_project/pyproject.toml @@ -14,7 +14,8 @@ dependencies = [ [project.optional-dependencies] dev = [ - "black==24.10.0", + # Allows testing that package normalization is correctly applied, as the canonical name is `importlib-metadata`. + "Importlib_Metadata==8.5.0", "mypy==1.13.0", ] test = ["pytest==8.3.3"] diff --git a/tests/fixtures/pep_621_project/src/main.py b/tests/fixtures/pep_621_project/src/main.py index c998f9a4..a456e690 100644 --- a/tests/fixtures/pep_621_project/src/main.py +++ b/tests/fixtures/pep_621_project/src/main.py @@ -2,8 +2,8 @@ from pathlib import Path import asyncio -import black import click +import importlib_metadata import mkdocs import mkdocs_material import packaging diff --git a/tests/functional/cli/test_cli_pep_621.py b/tests/functional/cli/test_cli_pep_621.py index 059fa5dd..755afaea 100644 --- a/tests/functional/cli/test_cli_pep_621.py +++ b/tests/functional/cli/test_cli_pep_621.py @@ -56,9 +56,12 @@ def test_cli_with_pep_621(pip_venv_factory: PipVenvFactory) -> None: "location": {"file": "pyproject.toml", "line": None, "column": None}, }, { - "error": {"code": "DEP004", "message": "'black' imported but declared as a dev dependency"}, - "module": "black", - "location": {"file": str(Path("src/main.py")), "line": 5, "column": 8}, + "error": { + "code": "DEP004", + "message": "'importlib_metadata' imported but declared as a dev dependency", + }, + "module": "importlib_metadata", + "location": {"file": str(Path("src/main.py")), "line": 6, "column": 8}, }, { "error": {"code": "DEP004", "message": "'mkdocs' imported but declared as a dev dependency"}, diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index db7428db..7ed84cbb 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -181,7 +181,7 @@ def test_display_deptry_version(capsys: pytest.CaptureFixture[str]) -> None: ctx = mock.Mock(resilient_parsing=False, spec=click.Context) param = mock.Mock(spec=click.Parameter) - with patch("deptry.cli.version", return_value="1.2.3"): + with patch("deptry.cli.importlib_metadata.version", return_value="1.2.3"): display_deptry_version(ctx, param, True) assert capsys.readouterr().out == "deptry 1.2.3\n" diff --git a/tests/unit/test_dependency.py b/tests/unit/test_dependency.py index a2b41cef..6726fe29 100644 --- a/tests/unit/test_dependency.py +++ b/tests/unit/test_dependency.py @@ -1,6 +1,5 @@ from __future__ import annotations -from importlib.metadata import PackageNotFoundError from pathlib import Path from unittest.mock import patch @@ -21,20 +20,12 @@ def test_create_default_top_level_if_metadata_not_found() -> None: assert dependency.top_levels == {"foo_bar"} -def test_read_top_level_from_top_level_txt() -> None: +def test_get_top_levels_from_distribution() -> None: """ - Read the top-levels.txt file + Get the packages from distribution. """ - class MockDistribution: - def __init__(self) -> None: - pass - - def read_text(self, file_name: str) -> str: - return "foo\nbar" - - with patch("deptry.dependency.metadata.distribution") as mock: - mock.return_value = MockDistribution() + with patch("deptry.dependency.get_packages_from_distribution", return_value={"foo", "bar"}): dependency = Dependency("Foo-bar", Path("pyproject.toml")) assert dependency.name == "Foo-bar" @@ -42,59 +33,25 @@ def read_text(self, file_name: str) -> str: assert dependency.top_levels == {"foo", "bar"} -def test_read_top_level_from_record() -> None: - """ - Verify that if top-level.txt not found, an attempt is made to extract top-level module names from - the metadata RECORD file. - """ - - class MockDistribution: - def __init__(self) -> None: - pass - - def read_text(self, file_name: str) -> str | None: - if file_name == "RECORD": - return """\ -../../../bin/black,sha256=,247 -__pycache__/_black_version.cpython-311.pyc,, -_black_version.py,sha256=,19 -black/trans.cpython-39-darwin.so,sha256= -black/trans.py,sha256= -blackd/__init__.py,sha256= -blackd/__main__.py,sha256= - """ - return None - - with patch("deptry.dependency.metadata.distribution") as mock: - mock.return_value = MockDistribution() - dependency = Dependency("Foo-bar", Path("pyproject.toml")) - - assert dependency.name == "Foo-bar" - assert dependency.definition_file == Path("pyproject.toml") - assert dependency.top_levels == {"_black_version", "black", "blackd"} - - -def test_read_top_level_from_predefined() -> None: +def test_get_top_levels_from_predefined() -> None: """ - Verify that if there are predefined top-level module names it takes - precedence over other lookup methods. + Verify that if there are predefined top-level module names it take precedence over other lookup methods. """ - with patch("deptry.dependency.metadata.distribution") as mock: + with patch("deptry.dependency.get_packages_from_distribution") as mock: dependency = Dependency("Foo-bar", Path("pyproject.toml"), module_names=["foo"]) assert dependency.name == "Foo-bar" assert dependency.definition_file == Path("pyproject.toml") assert dependency.top_levels == {"foo"} - mock.return_value.read_text.assert_not_called() + mock.assert_not_called() -def test_not_predefined_and_not_installed() -> None: +def test_get_top_levels_fallback() -> None: """ Use the fallback option of translating the package name. """ - with patch("deptry.dependency.metadata.distribution") as mock: - mock.side_effect = PackageNotFoundError + with patch("deptry.dependency.get_packages_from_distribution", return_value=None): dependency = Dependency("Foo-bar", Path("pyproject.toml")) assert dependency.name == "Foo-bar" diff --git a/tests/unit/test_distribution.py b/tests/unit/test_distribution.py new file mode 100644 index 00000000..cbbaba92 --- /dev/null +++ b/tests/unit/test_distribution.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from deptry.distribution import ( + get_distributions_from_package, + get_normalized_distributions_to_packages_mapping, + get_packages_from_distribution, + get_packages_to_normalized_distributions_mapping, + normalize_distribution_name, +) + + +@pytest.mark.parametrize( + "name", + [ + "friendly-bard", + "Friendly-Bard", + "FRIENDLY-BARD", + "friendly.bard", + "friendly_bard", + "friendly--bard", + "FrIeNdLy-._.-bArD", + ], +) +def test_normalize_distribution_name(name: str) -> None: + assert normalize_distribution_name(name) == "friendly-bard" + + +def test_get_packages_to_normalized_distributions_mapping() -> None: + # Clear cache before calling the function, as it is also populated during testing. + get_packages_to_normalized_distributions_mapping.cache_clear() + + with patch( + "deptry.distribution.importlib_metadata.packages_distributions", + return_value={ + "requests": ["requests"], + "charset_normalizer": ["Charset_Normalizer"], + "bs4": ["beautifulsoup4"], + "_distutils_hack": ["setuptools"], + "pkg_resources": ["setuptools"], + "setuptools": ["setuptools"], + }, + ) as mock_packages_distributions: + normalized_packages_distributions = get_packages_to_normalized_distributions_mapping() + + # Call function a second time, to ensure that we only call `packages_distributions` once. + get_packages_to_normalized_distributions_mapping() + + # Clear cache after calling the function to avoid keeping our mocked values, in case test invocation depend on it. + get_packages_to_normalized_distributions_mapping.cache_clear() + + assert normalized_packages_distributions == { + "requests": {"requests"}, + "charset_normalizer": {"charset-normalizer"}, + "bs4": {"beautifulsoup4"}, + "_distutils_hack": {"setuptools"}, + "pkg_resources": {"setuptools"}, + "setuptools": {"setuptools"}, + } + mock_packages_distributions.assert_called_once() + + +def test_get_normalized_distributions_to_packages_mapping() -> None: + # Clear cache before calling the function, as it is also populated during testing. + get_normalized_distributions_to_packages_mapping.cache_clear() + + with patch( + "deptry.distribution.get_packages_to_normalized_distributions_mapping", + return_value={ + "requests": {"requests"}, + "charset_normalizer": {"charset-normalizer"}, + "bs4": {"beautifulsoup4"}, + "_distutils_hack": {"setuptools"}, + "pkg_resources": {"setuptools"}, + "setuptools": {"setuptools"}, + }, + ) as mock_packages_distributions: + distributions_packages = get_normalized_distributions_to_packages_mapping() + + # Call function a second time, to ensure that we only call `packages_distributions` once. + get_normalized_distributions_to_packages_mapping() + + # Clear cache after calling the function to avoid keeping our mocked values, in case test invocation depend on it. + get_normalized_distributions_to_packages_mapping.cache_clear() + + assert distributions_packages == { + "requests": {"requests"}, + "charset-normalizer": {"charset_normalizer"}, + "beautifulsoup4": {"bs4"}, + "setuptools": {"_distutils_hack", "pkg_resources", "setuptools"}, + } + mock_packages_distributions.assert_called_once() + + +def test_get_distributions_from_package() -> None: + with patch( + "deptry.distribution.get_packages_to_normalized_distributions_mapping", + return_value={ + "bar": {"foo-bar"}, + "foo": {"foo-bar", "foo"}, + }, + ): + assert get_distributions_from_package("foo") == {"foo-bar", "foo"} + + +def test_get_packages_from_distribution() -> None: + with patch( + "deptry.distribution.get_normalized_distributions_to_packages_mapping", return_value={"foo-bar": {"bar", "foo"}} + ): + assert get_packages_from_distribution("foo_Bar") == {"bar", "foo"} diff --git a/uv.lock b/uv.lock index 9eb3eddc..8d2942ef 100644 --- a/uv.lock +++ b/uv.lock @@ -199,7 +199,7 @@ wheels = [ [package.optional-dependencies] toml = [ - { name = "tomli", marker = "python_full_version <= '3.11'" }, + { name = "tomli", marker = "python_full_version <= '3.11' and python_full_version >= '3.9'" }, ] [[package]] @@ -209,8 +209,9 @@ source = { editable = "." } dependencies = [ { name = "click" }, { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.11' and python_full_version >= '3.9'" }, { name = "requirements-parser" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11' and python_full_version >= '3.9'" }, ] [package.dev-dependencies] @@ -233,6 +234,7 @@ typing = [ requires-dist = [ { name = "click", specifier = ">=8.0.0,<9" }, { name = "colorama", marker = "sys_platform == 'win32'", specifier = ">=0.4.6" }, + { name = "importlib-metadata", marker = "python_full_version < '3.11'", specifier = ">=4.13.0" }, { name = "requirements-parser", specifier = ">=0.11.0,<1" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.1" }, ] @@ -330,7 +332,7 @@ name = "markdown" version = "3.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10' and python_full_version >= '3.9'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/54/28/3af612670f82f4c056911fbbbb42760255801b3068c48de792d354ff4472/markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2", size = 357086 } wheels = [ @@ -422,7 +424,7 @@ dependencies = [ { name = "click" }, { name = "colorama", marker = "platform_system == 'Windows'" }, { name = "ghp-import" }, - { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10' and python_full_version >= '3.9'" }, { name = "jinja2" }, { name = "markdown" }, { name = "markupsafe" }, @@ -444,7 +446,7 @@ name = "mkdocs-get-deps" version = "0.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10' and python_full_version >= '3.9'" }, { name = "mergedeep" }, { name = "platformdirs" }, { name = "pyyaml" }, @@ -491,7 +493,7 @@ version = "1.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11' and python_full_version >= '3.9'" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e8/21/7e9e523537991d145ab8a0a2fd98548d67646dc2aaaf6091c31ad883e7c1/mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", size = 3152532 } @@ -681,11 +683,11 @@ version = "8.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' and python_full_version >= '3.9'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11' and python_full_version >= '3.9'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 } wheels = [