Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[POC] Improve importlib.metadata usage #854

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ jobs:
- name: Install Python dependencies
run: uv sync --frozen

- name: Check typing
run: uv run mypy
if: ${{ matrix.os.name == 'linux' }}

- name: Run unit tests
run: uv run pytest tests/unit --cov --cov-config=pyproject.toml --cov-report=xml
# - name: Check typing
# run: uv run mypy
# if: ${{ matrix.os.name == 'linux' }}
#
# - name: Run unit tests
# run: uv run pytest tests/unit --cov --cov-config=pyproject.toml --cov-report=xml

- name: Run functional tests
run: uv run pytest tests/functional -n auto --dist loadgroup
Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ classifiers = [
dependencies = [
"click>=8.0.0,<9",
"colorama>=0.4.6; sys_platform == 'win32'",
"importlib-metadata>=4.13.0; python_version < '3.11'",
"tomli>=2.0.1; python_version < '3.11'"
]

Expand Down Expand Up @@ -154,6 +155,8 @@ select = [
"PT",
# flake8-simplify
"SIM",
# flake8-tidy-imports
"TID",
# flake8-type-checking
"TCH",
# flake8-use-pathlib
Expand Down Expand Up @@ -182,6 +185,9 @@ ignore = [
"E501",
]

[tool.ruff.lint.flake8-tidy-imports.banned-api]
"importlib.metadata".msg = "Import from `deptry.compat.importlib_metadata` instead."

[tool.ruff.lint.flake8-type-checking]
strict = true

Expand All @@ -190,4 +196,5 @@ known-first-party = ["deptry"]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.per-file-ignores]
"compat.py" = ["TID251"]
"tests/*" = ["S101", "S603"]
4 changes: 2 additions & 2 deletions python/deptry/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import shutil
import sys
from collections import defaultdict
from importlib.metadata import version
from pathlib import Path
from typing import TYPE_CHECKING

import click

from deptry.compat import importlib_metadata
from deptry.config import read_configuration_from_pyproject_toml
from deptry.core import Core

Expand Down Expand Up @@ -102,7 +102,7 @@ def display_deptry_version(ctx: click.Context, _param: click.Parameter, value: b
if not value or ctx.resilient_parsing:
return None

click.echo(f'deptry {version("deptry")}')
click.echo(f'deptry {importlib_metadata.version("deptry")}')
ctx.exit()


Expand Down
15 changes: 15 additions & 0 deletions python/deptry/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

import sys

# Although `importlib.metadata` is available before Python 3.11, we benefit from using `importlib_metadata` package
# on Python < 3.11 because it exposes `packages_distributions` function that we use in the codebase. Python 3.10 also
# has this function, but there are features we need in it that are only available in Python >= 3.11. So by using
# `importlib_metadata`, we benefit from those features for all Python versions we support.
if sys.version_info >= (3, 11):
import importlib.metadata as importlib_metadata
else:
import importlib_metadata # pragma: no cover


__all__ = ("importlib_metadata",)
81 changes: 7 additions & 74 deletions python/deptry/dependency.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

import logging
import re
from contextlib import suppress
from importlib import metadata
from typing import TYPE_CHECKING

from deptry.distribution import get_packages_from_distribution

if TYPE_CHECKING:
from collections.abc import Sequence
from importlib.metadata import Distribution
from pathlib import Path


Expand All @@ -21,7 +19,6 @@ class Dependency:
name (str): The name of the dependency.
definition_file (Path): The path to the file defining the dependency, e.g. 'pyproject.toml'.
and that can be used to create a variant of the package with a set of extra functionalities.
found (bool): Indicates if the dependency has been found in the environment.
top_levels (set[str]): The top-level module names associated with the dependency.
"""

Expand All @@ -31,16 +28,11 @@ def __init__(
definition_file: Path,
module_names: Sequence[str] | None = None,
) -> None:
distribution = self.find_distribution(name)

self.name = name
self.definition_file = definition_file
self.found = distribution is not None
self.top_levels = self._get_top_levels(name, distribution, module_names)
self.top_levels = self._get_top_levels(name, module_names)

def _get_top_levels(
self, name: str, distribution: Distribution | None, module_names: Sequence[str] | None
) -> set[str]:
def _get_top_levels(self, name: str, module_names: Sequence[str] | None) -> set[str]:
"""
Get the top-level module names for a dependency. They are searched for in the following order:
1. If `module_names` is defined, simply use those as the top-level modules.
Expand All @@ -49,22 +41,16 @@ def _get_top_levels(

Args:
name: The name of the dependency.
distribution: The metadata distribution of the package.
module_names: If this is given, use these as the top-level modules instead of
searching for them in the metadata.
"""
if module_names is not None:
return set(module_names)

if distribution is not None:
with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_top_level_txt(distribution)

with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_record_file(distribution)
if distributions := get_packages_from_distribution(self.name):
return distributions

# No metadata or other configuration has been found. As a fallback
# we'll guess the name.
# No metadata or other configuration has been found. As a fallback we'll guess the name.
module_name = name.replace("-", "_").lower()
logging.warning(
"Assuming the corresponding module name of package %r is %r. Install the package or configure a"
Expand All @@ -79,56 +65,3 @@ def __repr__(self) -> str:

def __str__(self) -> str:
return f"Dependency '{self.name}' with top-levels: {self.top_levels}."

@staticmethod
def find_distribution(name: str) -> Distribution | None:
try:
return metadata.distribution(name)
except metadata.PackageNotFoundError:
return None

@staticmethod
def _get_top_level_module_names_from_top_level_txt(distribution: Distribution) -> set[str]:
"""
top-level.txt is a metadata file added by setuptools that looks as follows:

610faff656c4cfcbb4a3__mypyc
_black_version
black
blackd
blib2to3

This function extracts these names, if a top-level.txt file exists.
"""
metadata_top_levels = distribution.read_text("top_level.txt")
if metadata_top_levels is None:
raise FileNotFoundError("top_level.txt")

return {x for x in metadata_top_levels.splitlines() if x}

@staticmethod
def _get_top_level_module_names_from_record_file(distribution: Distribution) -> set[str]:
"""
Get the top-level module names from the RECORD file, whose contents usually look as follows:

...
../../../bin/black,sha256=<HASH>,247
__pycache__/_black_version.cpython-311.pyc,,
_black_version.py,sha256=<HASH>,19
black/trans.cpython-39-darwin.so,sha256=<HASH>
black/trans.py,sha256=<HASH>
blackd/__init__.py,sha256=<HASH>
blackd/__main__.py,sha256=<HASH>
...

So if no file top-level.txt is provided, we can try and extract top-levels from this file, in
this case _black_version, black, and blackd.
"""
metadata_records = distribution.read_text("RECORD")

if metadata_records is None:
raise FileNotFoundError("RECORD")

matches = re.finditer(r"^(?!__)([a-zA-Z0-9-_]+)(?:/|\.py,)", metadata_records, re.MULTILINE)

return {x.group(1) for x in matches}
56 changes: 56 additions & 0 deletions python/deptry/distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import annotations

import re
from collections import defaultdict
from functools import lru_cache

from deptry.compat import importlib_metadata


@lru_cache(maxsize=None)
def normalize_distribution_name(name: str) -> str:
"""
Apply name normalization on distribution name, per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization.
"""
return re.sub(r"[-_.]+", "-", name).lower()


@lru_cache(maxsize=1)
def get_packages_normalized_distributions() -> dict[str, set[str]]:
"""
Return a mapping of top-level packages to their normalized distributions.
Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry.
"""
return {
package: {normalize_distribution_name(distribution) for distribution in distributions}
for package, distributions in importlib_metadata.packages_distributions().items()
}


@lru_cache(maxsize=1)
def get_normalized_distributions_packages() -> dict[str, set[str]]:
"""
Return a mapping of normalized distributions to their top-level packages.
Cache ensures that we only build this mapping once, since it should not change during the invocation of deptry.
"""
distributions_packages: dict[str, set[str]] = defaultdict(set)

for package, distributions in get_packages_normalized_distributions().items():
for distribution in distributions:
distributions_packages[distribution].add(package)

return dict(distributions_packages)


def get_distributions_from_package(name: str) -> set[str] | None:
"""
Retrieve the distributions provided by the package, if any.
"""
return get_packages_normalized_distributions().get(name)


def get_packages_from_distribution(name: str) -> set[str] | None:
"""
Normalize the distribution and retrieve the packages it provides, if any.
"""
return get_normalized_distributions_packages().get(normalize_distribution_name(name))
56 changes: 35 additions & 21 deletions python/deptry/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import logging
from dataclasses import dataclass, field
from importlib.metadata import PackageNotFoundError, metadata
from typing import TYPE_CHECKING

from deptry.distribution import get_distributions_from_package

if TYPE_CHECKING:
from deptry.dependency import Dependency
from deptry.imports.location import Location
Expand All @@ -19,7 +20,7 @@ class Module:
name: The name of the imported module.
standard_library: Whether the module is part of the Python standard library.
local_module: Whether the module is a local module.
package: The name of the package that contains the module.
packages: The names of the packages that contain the module.
top_levels: A list of dependencies that contain this module in their top-level module
names. This can be multiple, e.g. `google-cloud-api` and `google-cloud-bigquery` both have
`google` in their top-level module names.
Expand All @@ -32,7 +33,7 @@ class Module:
name: str
standard_library: bool = False
local_module: bool = False
package: str | None = None
packages: list[str] | None = None
top_levels: list[str] | None = None
dev_top_levels: list[str] | None = None
is_provided_by_dependency: bool | None = None
Expand Down Expand Up @@ -96,31 +97,26 @@ def build(self) -> Module:
if self._is_local_module():
return Module(self.name, local_module=True)

package = self._get_package_name_from_metadata()
packages = self._get_package_names_from_metadata()
top_levels = self._get_corresponding_top_levels_from(self.dependencies)
dev_top_levels = self._get_corresponding_top_levels_from(self.dev_dependencies)

is_provided_by_dependency = self._has_matching_dependency(package, top_levels)
is_provided_by_dev_dependency = self._has_matching_dev_dependency(package, dev_top_levels)
is_provided_by_dependency = self._has_matching_dependency(packages, top_levels)
is_provided_by_dev_dependency = self._has_matching_dev_dependency(packages, dev_top_levels)

return Module(
self.name,
package=package,
packages=packages,
top_levels=top_levels,
dev_top_levels=dev_top_levels,
is_provided_by_dependency=is_provided_by_dependency,
is_provided_by_dev_dependency=is_provided_by_dev_dependency,
)

def _get_package_name_from_metadata(self) -> str | None:
"""
Most packages simply have a field called "Name" in their metadata. This method extracts that field.
"""
try:
name: str = metadata(self.name)["Name"]
except PackageNotFoundError:
return None
else:
return name
def _get_package_names_from_metadata(self) -> list[str] | None:
if distributions := get_distributions_from_package(self.name):
return list(distributions)
return None

def _get_corresponding_top_levels_from(self, dependencies: list[Dependency]) -> list[str]:
"""
Expand All @@ -145,15 +141,33 @@ def _is_local_module(self) -> bool:
"""
return self.name in self.local_modules

def _has_matching_dependency(self, package: str | None, top_levels: list[str]) -> bool:
def _has_matching_dependency(self, packages: list[str] | None, top_levels: list[str]) -> bool:
"""
Check if this module is provided by a listed dependency. This is the case if either the package name that was
found in the metadata is listed as a dependency, or if we found a top-level module name match earlier.
"""
return package and (package in [dep.name for dep in self.dependencies]) or len(top_levels) > 0
if len(top_levels) > 0:
return True

def _has_matching_dev_dependency(self, package: str | None, dev_top_levels: list[str]) -> bool:
if packages:
for dep in self.dependencies:
for package in packages:
if dep.name == package:
return True

return False

def _has_matching_dev_dependency(self, packages: list[str] | None, dev_top_levels: list[str]) -> bool:
"""
Same as _has_matching_dependency, but for development dependencies.
"""
return package and (package in [dep.name for dep in self.dev_dependencies]) or len(dev_top_levels) > 0
if len(dev_top_levels) > 0:
return True

if packages:
for dep in self.dev_dependencies:
for package in packages:
if dep.name == package:
return True

return False
2 changes: 1 addition & 1 deletion python/deptry/violations/dep001_missing/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def find(self) -> list[Violation]:

def _is_missing(self, module: Module) -> bool:
if any([
module.package is not None,
module.packages is not None,
module.is_provided_by_dependency,
module.is_provided_by_dev_dependency,
module.local_module,
Expand Down
Loading
Loading