From d6c69f510d11901ed3155234ef3b2d759bacb630 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 10 Jun 2024 11:40:46 -0400 Subject: [PATCH] ultranormalized dist filenames Signed-off-by: William Woodruff --- pyproject.toml | 6 +- src/pypi_attestation_models/_impl.py | 69 ++++++++++++++++++++-- test/test_impl.py | 86 ++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d682b52..c62e1bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ classifiers = [ ] dependencies = [ "cryptography", + "packaging", "pydantic", "sigstore~=3.0.0", "sigstore-protobuf-specs", @@ -84,8 +85,9 @@ ignore = ["ANN101", "ANN102", "D203", "D213", "COM812", "ISC001"] [tool.ruff.lint.per-file-ignores] "test/**/*.py" = [ - "D", # no docstrings in tests - "S101", # asserts are expected in tests + "D", # no docstrings in tests + "S101", # asserts are expected in tests + "SLF001", # private APIs are expected in tests ] [tool.interrogate] diff --git a/src/pypi_attestation_models/_impl.py b/src/pypi_attestation_models/_impl.py index 8ab712f..ca707b6 100644 --- a/src/pypi_attestation_models/_impl.py +++ b/src/pypi_attestation_models/_impl.py @@ -6,12 +6,14 @@ from __future__ import annotations import base64 +from re import L from typing import TYPE_CHECKING, Annotated, Any, Literal, NewType import sigstore.errors from annotated_types import MinLen # noqa: TCH002 from cryptography import x509 from cryptography.hazmat.primitives import serialization +from packaging.utils import parse_sdist_filename, parse_wheel_filename from pydantic import Base64Bytes, BaseModel from pydantic_core import ValidationError from sigstore._utils import _sha256_streaming @@ -91,7 +93,14 @@ def sign(cls, signer: Signer, dist: Path) -> Attestation: stmt = ( _StatementBuilder() - .subjects([_Subject(name=dist.name, digest=_DigestSet(root={"sha256": digest}))]) + .subjects( + [ + _Subject( + name=_ultranormalize_dist_filename(dist.name), + digest=_DigestSet(root={"sha256": digest}), + ) + ] + ) .predicate_type("https://docs.pypi.org/attestations/publish/v1") .build() ) @@ -127,10 +136,10 @@ def verify(self, verifier: Verifier, policy: VerificationPolicy, dist: Path) -> raise VerificationError("too many subjects in statement (must be exactly one)") subject = statement.subjects[0] - # TODO: This is too brittle: we need to check with `parse_{sdist,wheel}_filename`. - if subject.name != dist.name: + normalized = _ultranormalize_dist_filename(dist.name) + if subject.name != _ultranormalize_dist_filename(dist.name): raise VerificationError( - f"subject does not match distribution name: {subject.name} != {dist.name}" + f"subject does not match distribution name: {subject.name} != {normalized}" ) digest = subject.digest.root.get("sha256") @@ -209,3 +218,55 @@ def pypi_to_sigstore(pypi_attestation: Attestation) -> Bundle: content=evp, log_entry=log_entry, ) + + +def _ultranormalize_dist_filename(dist: str) -> str: + """Return an "ultranormalized" form of the given distribution filename. + + This form is equivalent to the normalized form for sdist and wheel + filenames, with the additional stipulation that compressed tag sets, + if present, are also sorted alphanumerically. + + Raises `ValueError` on any invalid distribution filename. + """ + # NOTE: .whl and .tar.gz are assumed lowercase, since `packaging` + # already rejects non-lowercase variants. + if dist.endswith(".whl"): + # `parse_wheel_filename` raises a supertype of ValueError on failure. + name, ver, build, tags = parse_wheel_filename(dist) + + # The name has been normalized to replace runs of `[.-_]+` with `-`, + # which then needs to be replaced with `_` for the wheel. + name = name.replace("-", "_") + + # `parse_wheel_filename` normalizes the name and version for us, + # so all we need to do is re-compress the tag set in a canonical + # order. + # NOTE(ww): This is written in a not very efficient manner, since + # I wasn't feeling smart. + impls, abis, platforms = set(), set(), set() + for tag in tags: + impls.add(tag.interpreter) + abis.add(tag.abi) + platforms.add(tag.platform) + + impl_tag = ".".join(sorted(impls)) + abi_tag = ".".join(sorted(abis)) + platform_tag = ".".join(sorted(platforms)) + + if build: + parts = "-".join( + [name, str(ver), f"{build[0]}{build[1]}", impl_tag, abi_tag, platform_tag] + ) + else: + parts = "-".join([name, str(ver), impl_tag, abi_tag, platform_tag]) + + return f"{parts}.whl" + + elif dist.endswith(".tar.gz"): + # `parse_sdist_filename` raises a supertype of ValueError on failure. + name, ver = parse_sdist_filename(dist) + name = name.replace("-", "_") + return f"{name}-{ver}.tar.gz" + else: + raise ValueError(f"unknown distribution format: {dist}") diff --git a/test/test_impl.py b/test/test_impl.py index db3e48e..c1d004e 100644 --- a/test/test_impl.py +++ b/test/test_impl.py @@ -173,3 +173,89 @@ def test_pypi_to_sigstore_invalid_tlog_entry() -> None: with pytest.raises(impl.ConversionError, match="invalid transparency log entry"): impl.pypi_to_sigstore(attestation) + + +class TestPackaging: + """Behavioral backstops for our dependency on `packaging`.""" + + def test_exception_types(self) -> None: + from packaging.utils import InvalidSdistFilename, InvalidWheelFilename + + assert issubclass(InvalidSdistFilename, ValueError) + assert issubclass(InvalidWheelFilename, ValueError) + + +@pytest.mark.parametrize( + ("input", "normalized"), + [ + # wheel: fully normalized, no changes + ("foo-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"), + # wheel: dist name is not case normalized + ("Foo-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"), + ("FOO-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"), + ("FoO-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"), + # wheel: dist name contains alternate separators + ("foo.bar-1.0-py3-none-any.whl", "foo_bar-1.0-py3-none-any.whl"), + ("foo_bar-1.0-py3-none-any.whl", "foo_bar-1.0-py3-none-any.whl"), + # wheel: dist version is not normalized + ("foo-1.0beta1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"), + ("foo-1.0beta.1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"), + ("foo-01.0beta.1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"), + # wheel: build tag works as expected + ("foo-1.0-1whatever-py3-none-any.whl", "foo-1.0-1whatever-py3-none-any.whl"), + # wheel: compressed tag sets are sorted, even when conflicting or nonsense + ("foo-1.0-py3.py2-none-any.whl", "foo-1.0-py2.py3-none-any.whl"), + ("foo-1.0-py3.py2-none.abi3.cp37-any.whl", "foo-1.0-py2.py3-abi3.cp37.none-any.whl"), + ( + "foo-1.0-py3.py2-none.abi3.cp37-linux_x86_64.any.whl", + "foo-1.0-py2.py3-abi3.cp37.none-any.linux_x86_64.whl", + ), + # sdist: fully normalized, no changes + ("foo-1.0.tar.gz", "foo-1.0.tar.gz"), + # sdist: dist name is not case normalized + ("Foo-1.0.tar.gz", "foo-1.0.tar.gz"), + ("FOO-1.0.tar.gz", "foo-1.0.tar.gz"), + ("FoO-1.0.tar.gz", "foo-1.0.tar.gz"), + # sdist: dist name contains alternate separators, including + # `-` despite being forbidden by PEP 625 + ("foo-bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"), + ("foo-bar-baz-1.0.tar.gz", "foo_bar_baz-1.0.tar.gz"), + ("foo--bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"), + ("foo.bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"), + ("foo..bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"), + ("foo.bar.baz-1.0.tar.gz", "foo_bar_baz-1.0.tar.gz"), + # sdist: dist version is not normalized + ("foo-1.0beta1.tar.gz", "foo-1.0b1.tar.gz"), + ("foo-01.0beta1.tar.gz", "foo-1.0b1.tar.gz"), + ], +) +def test_ultranormalize_dist_filename(input: str, normalized: str) -> None: + # normalization works as expected + assert impl._ultranormalize_dist_filename(input) == normalized + + # normalization is a fixpoint, and normalized names are valid dist names + assert impl._ultranormalize_dist_filename(normalized) == normalized + + +@pytest.mark.parametrize( + "input", + [ + # completely invalid + "foo", + # suffixes must be lowercase + "foo-1.0.TAR.GZ", + "foo-1.0-py3-none-any.WHL", + # wheel: invalid separator in dist name + "foo-bar-1.0-py3-none-any.whl", + "foo__bar-1.0-py3-none-any.whl", + # wheel: invalid version + "foo-charmander-py3-none-any.whl", + "foo-1charmander-py3-none-any.whl", + # sdist: invalid version + "foo-charmander.tar.gz", + "foo-1charmander.tar.gz", + ], +) +def test_ultranormalize_dist_filename_invalid(input: str) -> None: + with pytest.raises(ValueError): + impl._ultranormalize_dist_filename(input)