Skip to content

Commit

Permalink
refactor(dependency): use get_packages_from_distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
mkniewallner committed Sep 15, 2024
1 parent 4bcd9d0 commit 1f6d6ec
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 126 deletions.
80 changes: 6 additions & 74 deletions python/deptry/dependency.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from __future__ import annotations

import logging
import re
from contextlib import suppress
from typing import TYPE_CHECKING

from deptry.compat import importlib_metadata
from deptry.distribution import get_packages_from_distribution

if TYPE_CHECKING:
from collections.abc import Sequence
from importlib.metadata import Distribution
from pathlib import Path


Expand All @@ -22,7 +19,6 @@ class Dependency:
name (str): The name of the dependency.
definition_file (Path): The path to the file defining the dependency, e.g. 'pyproject.toml'.
and that can be used to create a variant of the package with a set of extra functionalities.
found (bool): Indicates if the dependency has been found in the environment.
top_levels (set[str]): The top-level module names associated with the dependency.
"""

Expand All @@ -32,16 +28,11 @@ def __init__(
definition_file: Path,
module_names: Sequence[str] | None = None,
) -> None:
distribution = self.find_distribution(name)

self.name = name
self.definition_file = definition_file
self.found = distribution is not None
self.top_levels = self._get_top_levels(name, distribution, module_names)
self.top_levels = self._get_top_levels(name, module_names)

def _get_top_levels(
self, name: str, distribution: Distribution | None, module_names: Sequence[str] | None
) -> set[str]:
def _get_top_levels(self, name: str, module_names: Sequence[str] | None) -> set[str]:
"""
Get the top-level module names for a dependency. They are searched for in the following order:
1. If `module_names` is defined, simply use those as the top-level modules.
Expand All @@ -50,22 +41,16 @@ def _get_top_levels(
Args:
name: The name of the dependency.
distribution: The metadata distribution of the package.
module_names: If this is given, use these as the top-level modules instead of
searching for them in the metadata.
"""
if module_names is not None:
return set(module_names)

if distribution is not None:
with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_top_level_txt(distribution)

with suppress(FileNotFoundError):
return self._get_top_level_module_names_from_record_file(distribution)
if distributions := get_packages_from_distribution(self.name):
return distributions

# No metadata or other configuration has been found. As a fallback
# we'll guess the name.
# No metadata or other configuration has been found. As a fallback we'll guess the name.
module_name = name.replace("-", "_").lower()
logging.warning(
"Assuming the corresponding module name of package %r is %r. Install the package or configure a"
Expand All @@ -80,56 +65,3 @@ def __repr__(self) -> str:

def __str__(self) -> str:
return f"Dependency '{self.name}' with top-levels: {self.top_levels}."

@staticmethod
def find_distribution(name: str) -> Distribution | None:
try:
return importlib_metadata.distribution(name)
except importlib_metadata.PackageNotFoundError:
return None

@staticmethod
def _get_top_level_module_names_from_top_level_txt(distribution: Distribution) -> set[str]:
"""
top-level.txt is a metadata file added by setuptools that looks as follows:
610faff656c4cfcbb4a3__mypyc
_black_version
black
blackd
blib2to3
This function extracts these names, if a top-level.txt file exists.
"""
metadata_top_levels = distribution.read_text("top_level.txt")
if metadata_top_levels is None:
raise FileNotFoundError("top_level.txt")

return {x for x in metadata_top_levels.splitlines() if x}

@staticmethod
def _get_top_level_module_names_from_record_file(distribution: Distribution) -> set[str]:
"""
Get the top-level module names from the RECORD file, whose contents usually look as follows:
...
../../../bin/black,sha256=<HASH>,247
__pycache__/_black_version.cpython-311.pyc,,
_black_version.py,sha256=<HASH>,19
black/trans.cpython-39-darwin.so,sha256=<HASH>
black/trans.py,sha256=<HASH>
blackd/__init__.py,sha256=<HASH>
blackd/__main__.py,sha256=<HASH>
...
So if no file top-level.txt is provided, we can try and extract top-levels from this file, in
this case _black_version, black, and blackd.
"""
metadata_records = distribution.read_text("RECORD")

if metadata_records is None:
raise FileNotFoundError("RECORD")

matches = re.finditer(r"^(?!__)([a-zA-Z0-9-_]+)(?:/|\.py,)", metadata_records, re.MULTILINE)

return {x.group(1) for x in matches}
61 changes: 9 additions & 52 deletions tests/unit/test_dependency.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from importlib.metadata import PackageNotFoundError
from pathlib import Path
from unittest.mock import patch

Expand All @@ -21,80 +20,38 @@ def test_create_default_top_level_if_metadata_not_found() -> None:
assert dependency.top_levels == {"foo_bar"}


def test_read_top_level_from_top_level_txt() -> None:
def test_get_top_levels_from_distribution() -> None:
"""
Read the top-levels.txt file
Get the packages from distribution.
"""

class MockDistribution:
def __init__(self) -> None:
pass

def read_text(self, file_name: str) -> str:
return "foo\nbar"

with patch("deptry.dependency.metadata.distribution") as mock:
mock.return_value = MockDistribution()
with patch("deptry.dependency.get_packages_from_distribution", return_value={"foo", "bar"}):
dependency = Dependency("Foo-bar", Path("pyproject.toml"))

assert dependency.name == "Foo-bar"
assert dependency.definition_file == Path("pyproject.toml")
assert dependency.top_levels == {"foo", "bar"}


def test_read_top_level_from_record() -> None:
"""
Verify that if top-level.txt not found, an attempt is made to extract top-level module names from
the metadata RECORD file.
"""

class MockDistribution:
def __init__(self) -> None:
pass

def read_text(self, file_name: str) -> str | None:
if file_name == "RECORD":
return """\
../../../bin/black,sha256=<HASH>,247
__pycache__/_black_version.cpython-311.pyc,,
_black_version.py,sha256=<HASH>,19
black/trans.cpython-39-darwin.so,sha256=<HASH>
black/trans.py,sha256=<HASH>
blackd/__init__.py,sha256=<HASH>
blackd/__main__.py,sha256=<HASH>
"""
return None

with patch("deptry.dependency.metadata.distribution") as mock:
mock.return_value = MockDistribution()
dependency = Dependency("Foo-bar", Path("pyproject.toml"))

assert dependency.name == "Foo-bar"
assert dependency.definition_file == Path("pyproject.toml")
assert dependency.top_levels == {"_black_version", "black", "blackd"}


def test_read_top_level_from_predefined() -> None:
def test_get_top_levels_from_predefined() -> None:
"""
Verify that if there are predefined top-level module names it takes
precedence over other lookup methods.
Verify that if there are predefined top-level module names it take precedence over other lookup methods.
"""
with patch("deptry.dependency.metadata.distribution") as mock:
with patch("deptry.dependency.get_packages_from_distribution") as mock:
dependency = Dependency("Foo-bar", Path("pyproject.toml"), module_names=["foo"])

assert dependency.name == "Foo-bar"
assert dependency.definition_file == Path("pyproject.toml")
assert dependency.top_levels == {"foo"}
mock.return_value.read_text.assert_not_called()
mock.assert_not_called()


def test_not_predefined_and_not_installed() -> None:
def test_get_top_levels_fallback() -> None:
"""
Use the fallback option of translating the package name.
"""

with patch("deptry.dependency.metadata.distribution") as mock:
mock.side_effect = PackageNotFoundError
with patch("deptry.dependency.get_packages_from_distribution", return_value=None):
dependency = Dependency("Foo-bar", Path("pyproject.toml"))

assert dependency.name == "Foo-bar"
Expand Down

0 comments on commit 1f6d6ec

Please sign in to comment.