Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use packaging to parse requirements #735

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ classifiers = [
dependencies = [
"click>=8.0.0,<9",
"colorama>=0.4.6; sys_platform == 'win32'",
"packaging>=22.0",
"tomli>=2.0.1; python_version < '3.11'"
]

Expand Down
19 changes: 18 additions & 1 deletion python/deptry/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import re
from contextlib import suppress
from importlib import metadata
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Mapping

from packaging.requirements import InvalidRequirement, Requirement

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -132,3 +134,18 @@ def _get_top_level_module_names_from_record_file(distribution: Distribution) ->
matches = re.finditer(r"^(?!__)([a-zA-Z0-9-_]+)(?:/|\.py,)", metadata_records, re.MULTILINE)

return {x.group(1) for x in matches}


def parse_pep_508_dependency(
specification: str, definition_file: Path, package_module_name_map: Mapping[str, Sequence[str]]
) -> Dependency | None:
try:
requirement = Requirement(specification)
except InvalidRequirement:
return None

return Dependency(
name=requirement.name,
definition_file=definition_file,
module_names=package_module_name_map.get(requirement.name),
)
2 changes: 1 addition & 1 deletion python/deptry/dependency_getter/pdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ def _get_pdm_dev_dependencies(self) -> list[Dependency]:
except KeyError:
logging.debug("No section [tool.pdm.dev-dependencies] found in pyproject.toml")

return self._extract_pep_508_dependencies(dev_dependency_strings, self.package_module_name_map)
return self._extract_pep_508_dependencies(dev_dependency_strings)
38 changes: 10 additions & 28 deletions python/deptry/dependency_getter/pep_621.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@

import itertools
import logging
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING

from deptry.dependency import Dependency
from deptry.dependency import parse_pep_508_dependency
from deptry.dependency_getter.base import DependenciesExtract, DependencyGetter
from deptry.utils import load_pyproject_toml

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
from deptry.dependency import Dependency


@dataclass
Expand Down Expand Up @@ -59,13 +58,13 @@ def get(self) -> DependenciesExtract:
def _get_dependencies(self) -> list[Dependency]:
pyproject_data = load_pyproject_toml(self.config)
dependency_strings: list[str] = pyproject_data["project"]["dependencies"]
return self._extract_pep_508_dependencies(dependency_strings, self.package_module_name_map)
return self._extract_pep_508_dependencies(dependency_strings)

def _get_optional_dependencies(self) -> dict[str, list[Dependency]]:
pyproject_data = load_pyproject_toml(self.config)

return {
group: self._extract_pep_508_dependencies(dependencies, self.package_module_name_map)
group: self._extract_pep_508_dependencies(dependencies)
for group, dependencies in pyproject_data["project"].get("optional-dependencies", {}).items()
}

Expand Down Expand Up @@ -98,31 +97,14 @@ def _split_development_dependencies_from_optional_dependencies(
)
return dev_dependencies, regular_dependencies

def _extract_pep_508_dependencies(
self, dependencies: list[str], package_module_name_map: Mapping[str, Sequence[str]]
) -> list[Dependency]:
def _extract_pep_508_dependencies(self, dependencies: list[str]) -> list[Dependency]:
"""
Given a list of dependency specifications (e.g. "django>2.1; os_name != 'nt'"), convert them to Dependency objects.
"""
extracted_dependencies = []

for spec in dependencies:
# An example of a spec is `"tomli>=1.1.0; python_version < \"3.11\""`
name = self._find_dependency_name_in(spec)
if name:
extracted_dependencies.append(
Dependency(
name,
self.config,
module_names=package_module_name_map.get(name),
)
)
extracted_dependencies: list[Dependency] = []

return extracted_dependencies
for dependency in dependencies:
if extracted_dependency := parse_pep_508_dependency(dependency, self.config, self.package_module_name_map):
extracted_dependencies.append(extracted_dependency)

@staticmethod
def _find_dependency_name_in(spec: str) -> str | None:
match = re.search("[a-zA-Z0-9-_]+", spec)
if match:
return match.group(0)
return None
return extracted_dependencies
65 changes: 10 additions & 55 deletions python/deptry/dependency_getter/requirements_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
import re
from dataclasses import dataclass
from pathlib import Path
from urllib.parse import urlparse
from typing import TYPE_CHECKING

from deptry.dependency import Dependency
from deptry.dependency import parse_pep_508_dependency
from deptry.dependency_getter.base import DependenciesExtract, DependencyGetter

if TYPE_CHECKING:
from deptry.dependency import Dependency


@dataclass
class RequirementsTxtDependencyGetter(DependencyGetter):
Expand Down Expand Up @@ -66,63 +69,15 @@ def _extract_dependency_from_line(self, line: str, file_path: Path) -> Dependenc
"""
Extract a dependency from a single line of a requirements.txt file.
"""
line = self._remove_comments_from(line)
line = self._remove_newlines_from(line)
name = self._find_dependency_name_in(line)
if name:
return Dependency(
name=name,
definition_file=file_path,
module_names=self.package_module_name_map.get(name),
)
else:
return None

def _find_dependency_name_in(self, line: str) -> str | None:
"""
Find the dependency name of a dependency specified according to the pip-standards for requirement.txt
"""
if self._line_is_url(line):
return self._extract_name_from_url(line)
else:
match = re.search("^[^-][a-zA-Z0-9-_]+", line)
if match:
return match.group(0)
return None
# Note that `packaging` does not strip comments on purpose (https://github.com/pypa/packaging/issues/807), so we
# need to remove the comments ourselves.
return parse_pep_508_dependency(self._remove_comments_from(line), file_path, self.package_module_name_map)

@staticmethod
def _remove_comments_from(line: str) -> str:
"""
Removes comments from a line. A comment is defined as any text
following a '#' that is either at the start of the line or preceded by a space.
Removes comments from a line. A comment is defined as any text following a '#' that is either at the start of
the line or preceded by a space.
This ensures that fragments like '#egg=' in URLs are not mistakenly removed.
"""
return re.sub(r"(?<!\S)#.*", "", line).strip()

@staticmethod
def _remove_newlines_from(line: str) -> str:
return line.replace("\n", "")

@staticmethod
def _line_is_url(line: str) -> bool:
return urlparse(line).scheme != ""

@staticmethod
def _extract_name_from_url(line: str) -> str | None:
# Try to find egg, for url like git+https://github.com/xxxxx/package@xxxxx#egg=package
match = re.search("egg=([a-zA-Z0-9-_]*)", line)
if match:
return match.group(1)

# for url like git+https://github.com/name/python-module.git@0d6dc38d58
match = re.search(r"\/((?:(?!\/).)*?)\.git", line)
if match:
return match.group(1)

# for url like https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
match = re.search(r"\/((?:(?!\/).)*?)\/archive\/", line)
if match:
return match.group(1)

logging.warning("Could not parse dependency name from url %s", line)
return None
6 changes: 3 additions & 3 deletions tests/unit/dependency_getter/test_pdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_dependency_getter(tmp_path: Path) -> None:
"qux",
"bar>=20.9",
"optional-foo[option]>=0.12.11",
"conditional-bar>=1.1.0; python_version < 3.11",
"conditional-bar>=1.1.0; python_version < '3.11'",
"fox-python", # top level module is called "fox"
]
"""
Expand Down Expand Up @@ -57,12 +57,12 @@ def test_dev_dependency_getter(tmp_path: Path) -> None:
"qux",
"bar>=20.9",
"optional-foo[option]>=0.12.11",
"conditional-bar>=1.1.0; python_version < 3.11",
"conditional-bar>=1.1.0; python_version < '3.11'",
]
[tool.pdm.dev-dependencies]
test = [
"qux",
"bar; python_version < 3.11"
"bar; python_version < '3.11'"
]
tox = [
"foo-bar",
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/dependency_getter/test_pep_621.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_dependency_getter(tmp_path: Path) -> None:
"qux",
"bar>=20.9",
"optional-foo[option]>=0.12.11",
"conditional-bar>=1.1.0; python_version < 3.11",
"conditional-bar>=1.1.0; python_version < '3.11'",
"fox-python", # top level module is called "fox"
]

Expand Down
57 changes: 20 additions & 37 deletions tests/unit/dependency_getter/test_requirements_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from pathlib import Path

import pytest

from deptry.dependency_getter.requirements_files import RequirementsTxtDependencyGetter
from tests.utils import run_within_dir

Expand Down Expand Up @@ -59,11 +57,15 @@ def test_parse_requirements_files(tmp_path: Path) -> None:


def test_parse_requirements_files_urls(tmp_path: Path) -> None:
fake_requirements_files = """urllib3 @ https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
git+https://github.com/baz/foo-bar.git@asd#egg=foo-bar
git+https://github.com/baz/foo-bar.git@asd
git+https://github.com/abc123/bar-foo@xyz789#egg=bar-fooo"""
fake_requirements_files = """git-archive @ git+https://github.com/foo/bar.git@1.2.3
remote-archive @ https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
another-remote-archive[with-extra] @ https://github.com/foo/bar/archive/refs/tags/v0.1.2.tar.gz
remote-archive-with-egg @ https://github.com/foo/bar/archive/refs/tags/v0.1.2.tar.gz#egg=foo-bar
local-sdist @ file:///local/sdist.tar.gz
local-wheel @ file:///local/wheel.whl
local-directory @ file:///local/directory
another-local-directory@file:///local/directory
"""

with run_within_dir(tmp_path):
with Path("requirements.txt").open("w") as f:
Expand All @@ -72,14 +74,17 @@ def test_parse_requirements_files_urls(tmp_path: Path) -> None:
dependencies_extract = RequirementsTxtDependencyGetter(Path("pyproject.toml")).get()
dependencies = dependencies_extract.dependencies

assert len(dependencies) == 5
assert len(dependencies_extract.dev_dependencies) == 0

assert dependencies[0].name == "urllib3"
assert dependencies[1].name == "urllib3"
assert dependencies[2].name == "foo-bar"
assert dependencies[3].name == "foo-bar"
assert dependencies[4].name == "bar-fooo"
assert [dependency.name for dependency in dependencies] == [
"git-archive",
"remote-archive",
"another-remote-archive",
"remote-archive-with-egg",
"local-sdist",
"local-wheel",
"local-directory",
"another-local-directory",
]
assert dependencies_extract.dev_dependencies == []


def test_single(tmp_path: Path) -> None:
Expand Down Expand Up @@ -179,25 +184,3 @@ def test_dev_multiple_with_arguments(tmp_path: Path) -> None:

assert dev_dependencies[0].name == "click"
assert dev_dependencies[1].name == "bar"


@pytest.mark.parametrize(
("line", "expected"),
[
("foo", False),
("http", False),
("https", False),
("httpx", False),
("git+http", False),
("git+https", False),
("http://", True),
("https://", True),
("git+http://", True),
("git+https://", True),
("file://", True),
("file:///", True),
("httpx://", True),
],
)
def test__line_is_url(line: str, expected: bool) -> None:
assert RequirementsTxtDependencyGetter._line_is_url(line) is expected
51 changes: 50 additions & 1 deletion tests/unit/test_dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@

from importlib.metadata import PackageNotFoundError
from pathlib import Path
from typing import TYPE_CHECKING
from unittest.mock import patch

from deptry.dependency import Dependency
import pytest

from deptry.dependency import Dependency, parse_pep_508_dependency

if TYPE_CHECKING:
from typing import Any


def test_simple_dependency() -> None:
Expand Down Expand Up @@ -99,3 +105,46 @@ def test_not_predefined_and_not_installed() -> None:

assert dependency.name == "Foo-bar"
assert dependency.top_levels == {"foo_bar"}


@pytest.mark.parametrize(
("specification", "definition_file", "package_module_name_map", "expected"),
[
(
"foo",
Path("pyproject.toml"),
{},
{
"name": "foo",
"definition_file": Path("pyproject.toml"),
"found": False,
"top_levels": {"foo"},
},
),
(
'foobar[extra]==1.2.3; python_version < "3.9"',
Path("requirements.txt"),
{"foobar": ["foo"], "barfoo": ["bar"]},
{
"name": "foobar",
"definition_file": Path("requirements.txt"),
"found": False,
"top_levels": {"foo"},
},
),
],
)
def test_parse_pep_508_dependency(
specification: str,
definition_file: Path,
package_module_name_map: dict[str, list[str]],
expected: dict[str, Any],
) -> None:
dependency = parse_pep_508_dependency(specification, definition_file, package_module_name_map)

for dependency_key, expected_value in expected.items():
assert getattr(dependency, dependency_key) == expected_value


def test_parse_pep_508_dependency_invalid_definition() -> None:
assert parse_pep_508_dependency("an_incorrect_definition=1.2.3", Path("pyproject.toml"), {}) is None
Loading