Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: use package_distributions to get top levels #861

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ classifiers = [
dependencies = [
"click>=8.0.0,<9",
"colorama>=0.4.6; sys_platform == 'win32'",
"importlib-metadata>=4.13.0; python_version < '3.11'",
"tomli>=2.0.1; python_version < '3.11'"
]

Expand Down Expand Up @@ -154,6 +155,8 @@ select = [
"PT",
# flake8-simplify
"SIM",
# flake8-tidy-imports
"TID",
# flake8-type-checking
"TCH",
# flake8-use-pathlib
Expand Down Expand Up @@ -182,6 +185,9 @@ ignore = [
"E501",
]

[tool.ruff.lint.flake8-tidy-imports.banned-api]
"importlib.metadata".msg = "Import from `deptry.compat.importlib_metadata` instead."

[tool.ruff.lint.flake8-type-checking]
strict = true

Expand All @@ -190,4 +196,5 @@ known-first-party = ["deptry"]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.per-file-ignores]
"compat.py" = ["TID251"]
"tests/*" = ["S101", "S603"]
4 changes: 2 additions & 2 deletions python/deptry/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import shutil
import sys
from collections import defaultdict
from importlib.metadata import version
from pathlib import Path
from typing import TYPE_CHECKING

import click

from deptry.compat import importlib_metadata
from deptry.config import read_configuration_from_pyproject_toml
from deptry.core import Core

Expand Down Expand Up @@ -102,7 +102,7 @@ def display_deptry_version(ctx: click.Context, _param: click.Parameter, value: b
if not value or ctx.resilient_parsing:
return None

click.echo(f'deptry {version("deptry")}')
click.echo(f'deptry {importlib_metadata.version("deptry")}')
ctx.exit()


Expand Down
15 changes: 15 additions & 0 deletions python/deptry/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

import sys

# Although `importlib.metadata` is available before Python 3.11, we benefit from using `importlib_metadata` package
# on Python < 3.11 because it exposes `packages_distributions` function that we use in the codebase. Python 3.10 also
# has this function, but there are features we need in it that are only available in Python >= 3.11. So by using
# `importlib_metadata`, we benefit from those features for all Python versions we support.
if sys.version_info >= (3, 11):
import importlib.metadata as importlib_metadata
else:
import importlib_metadata # pragma: no cover


__all__ = ("importlib_metadata",)
81 changes: 7 additions & 74 deletions python/deptry/dependency.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

import logging
import re
from contextlib import suppress
from importlib import metadata
from typing import TYPE_CHECKING

from deptry.distribution import get_packages_from_distribution

if TYPE_CHECKING:
from collections.abc import Sequence
from importlib.metadata import Distribution
from pathlib import Path


Expand All @@ -21,7 +19,6 @@ class Dependency:
name (str): The name of the dependency.
definition_file (Path): The path to the file defining the dependency, e.g. 'pyproject.toml'.
and that can be used to create a variant of the package with a set of extra functionalities.
found (bool): Indicates if the dependency has been found in the environment.
top_levels (set[str]): The top-level module names associated with the dependency.
"""

Expand All @@ -31,16 +28,11 @@ def __init__(
definition_file: Path,
module_names: Sequence[str] | None = None,
) -> None:
distribution = self.find_distribution(name)

self.name = name
self.definition_file = definition_file
self.found = distribution is not None
self.top_levels = self._get_top_levels(name, distribution, module_names)
self.top_levels = self._get_top_levels(name, module_names)

def _get_top_levels(
self, name: str, distribution: Distribution | None, module_names: Sequence[str] | None
) -> set[str]:
def _get_top_levels(self, name: str, module_names: Sequence[str] | None) -> set[str]:
"""
Get the top-level module names for a dependency. They are searched for in the following order:
1. If `module_names` is defined, simply use those as the top-level modules.
Expand All @@ -49,22 +41,16 @@ def _get_top_levels(

Args:
name: The name of the dependency.
distribution: The metadata distribution of the package.
module_names: If this is given, use these as the top-level modules instead of
searching for them in the metadata.
"""
if module_names is not None:
return set(module_names)

if distribution is not None:
with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_top_level_txt(distribution)

with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_record_file(distribution)
if distributions := get_packages_from_distribution(self.name):
return distributions

# No metadata or other configuration has been found. As a fallback
# we'll guess the name.
# No metadata or other configuration has been found. As a fallback we'll guess the name.
module_name = name.replace("-", "_").lower()
logging.warning(
"Assuming the corresponding module name of package %r is %r. Install the package or configure a"
Expand All @@ -79,56 +65,3 @@ def __repr__(self) -> str:

def __str__(self) -> str:
return f"Dependency '{self.name}' with top-levels: {self.top_levels}."

@staticmethod
def find_distribution(name: str) -> Distribution | None:
try:
return metadata.distribution(name)
except metadata.PackageNotFoundError:
return None

@staticmethod
def _get_top_level_module_names_from_top_level_txt(distribution: Distribution) -> set[str]:
"""
top-level.txt is a metadata file added by setuptools that looks as follows:

610faff656c4cfcbb4a3__mypyc
_black_version
black
blackd
blib2to3

This function extracts these names, if a top-level.txt file exists.
"""
metadata_top_levels = distribution.read_text("top_level.txt")
if metadata_top_levels is None:
raise FileNotFoundError("top_level.txt")

return {x for x in metadata_top_levels.splitlines() if x}

@staticmethod
def _get_top_level_module_names_from_record_file(distribution: Distribution) -> set[str]:
"""
Get the top-level module names from the RECORD file, whose contents usually look as follows:

...
../../../bin/black,sha256=<HASH>,247
__pycache__/_black_version.cpython-311.pyc,,
_black_version.py,sha256=<HASH>,19
black/trans.cpython-39-darwin.so,sha256=<HASH>
black/trans.py,sha256=<HASH>
blackd/__init__.py,sha256=<HASH>
blackd/__main__.py,sha256=<HASH>
...

So if no file top-level.txt is provided, we can try and extract top-levels from this file, in
this case _black_version, black, and blackd.
"""
metadata_records = distribution.read_text("RECORD")

if metadata_records is None:
raise FileNotFoundError("RECORD")

matches = re.finditer(r"^(?!__)([a-zA-Z0-9-_]+)(?:/|\.py,)", metadata_records, re.MULTILINE)

return {x.group(1) for x in matches}
56 changes: 56 additions & 0 deletions python/deptry/distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import annotations

import re
from collections import defaultdict
from functools import lru_cache

from deptry.compat import importlib_metadata


@lru_cache(maxsize=None)
def normalize_distribution_name(name: str) -> str:
"""
Apply name normalization on distribution name, per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization.
"""
return re.sub(r"[-_.]+", "-", name).lower()


@lru_cache(maxsize=1)
def get_packages_normalized_distributions() -> dict[str, set[str]]:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first glance it is a bit confusing to understand the difference between this function and get_normalized_distributions_packages. Maybe we can make their names slightly more verbose? Or add a small example output in their docstring?

Suggested change
def get_packages_normalized_distributions() -> dict[str, set[str]]:
def get_packages_to_normalized_distributions_mapping() -> dict[str, set[str]]:

"""
Return a mapping of top-level packages to their normalized distributions.
Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry.
"""
return {
package: {normalize_distribution_name(distribution) for distribution in distributions}
for package, distributions in importlib_metadata.packages_distributions().items()
}


@lru_cache(maxsize=1)
def get_normalized_distributions_packages() -> dict[str, set[str]]:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def get_normalized_distributions_packages() -> dict[str, set[str]]:
def get_normalized_distributions_to_packages_mapping() -> dict[str, set[str]]:

"""
Return a mapping of normalized distributions to their top-level packages.
Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry.
"""
distributions_packages: dict[str, set[str]] = defaultdict(set)

for package, distributions in get_packages_normalized_distributions().items():
for distribution in distributions:
distributions_packages[distribution].add(package)

return dict(distributions_packages)


def get_distributions_from_package(name: str) -> set[str] | None:
"""
Retrieve the distributions provided by the package, if any.
"""
return get_packages_normalized_distributions().get(name)


def get_packages_from_distribution(name: str) -> set[str] | None:
"""
Normalize the distribution and retrieve the packages it provides, if any.
"""
return get_normalized_distributions_packages().get(normalize_distribution_name(name))
7 changes: 4 additions & 3 deletions python/deptry/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import logging
from dataclasses import dataclass, field
from importlib.metadata import PackageNotFoundError, metadata
from typing import TYPE_CHECKING

from deptry.compat import importlib_metadata

if TYPE_CHECKING:
from deptry.dependency import Dependency
from deptry.imports.location import Location
Expand Down Expand Up @@ -116,8 +117,8 @@ def _get_package_name_from_metadata(self) -> str | None:
Most packages simply have a field called "Name" in their metadata. This method extracts that field.
"""
try:
name: str = metadata(self.name)["Name"]
except PackageNotFoundError:
name: str = importlib_metadata.metadata(self.name)["Name"]
except importlib_metadata.PackageNotFoundError:
return None
else:
return name
Expand Down
3 changes: 2 additions & 1 deletion tests/data/pep_621_project/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ dependencies = [

[project.optional-dependencies]
dev = [
"black==22.10.0",
# Allows testing that package normalization is correctly applied, as the canonical name is `importlib-metadata`.
"Importlib_Metadata==8.5.0",
"mypy==0.982",
]
test = ["pytest==7.2.0"]
Expand Down
2 changes: 1 addition & 1 deletion tests/data/pep_621_project/src/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from os import chdir, walk
from pathlib import Path

import black
import importlib_metadata
import click
import white as w
from urllib3 import contrib
Expand Down
7 changes: 5 additions & 2 deletions tests/functional/cli/test_cli_pep_621.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ def test_cli_with_pep_621(pip_venv_factory: PipVenvFactory) -> None:
"location": {"file": "pyproject.toml", "line": None, "column": None},
},
{
"error": {"code": "DEP004", "message": "'black' imported but declared as a dev dependency"},
"module": "black",
"error": {
"code": "DEP004",
"message": "'importlib_metadata' imported but declared as a dev dependency",
},
"module": "importlib_metadata",
"location": {"file": str(Path("src/main.py")), "line": 4, "column": 8},
},
{
Expand Down
38 changes: 37 additions & 1 deletion tests/unit/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import re
from typing import TYPE_CHECKING
from unittest import mock
from unittest.mock import patch

import click
import pytest

from deptry.cli import CommaSeparatedMappingParamType, CommaSeparatedTupleParamType
from deptry.cli import CommaSeparatedMappingParamType, CommaSeparatedTupleParamType, display_deptry_version

if TYPE_CHECKING:
from collections.abc import MutableMapping, Sequence
Expand Down Expand Up @@ -174,3 +175,38 @@ def test_comma_separated_mapping_param_type_convert_err(

with pytest.raises(err_type, match=err_msg_matcher):
param_type.convert(value=value, param=param, ctx=ctx)


def test_display_deptry_version(capsys: pytest.CaptureFixture[str]) -> None:
ctx = mock.Mock(resilient_parsing=False, spec=click.Context)
param = mock.Mock(spec=click.Parameter)

with patch("deptry.cli.importlib_metadata.version", return_value="1.2.3"):
display_deptry_version(ctx, param, True)

assert capsys.readouterr().out == "deptry 1.2.3\n"


@pytest.mark.parametrize(
("resilient_parsing", "value"),
[
(
False,
False,
),
(
True,
False,
),
(
True,
True,
),
],
)
def test_display_deptry_version_none(resilient_parsing: bool, value: bool, capsys: pytest.CaptureFixture[str]) -> None:
ctx = mock.Mock(resilient_parsing=resilient_parsing, spec=click.Context)
param = mock.Mock(spec=click.Parameter)

display_deptry_version(ctx, param, value)
assert capsys.readouterr().out == ""
Loading
Loading