Skip to content

Commit

Permalink
ultranormalized dist filenames
Browse files Browse the repository at this point in the history
Signed-off-by: William Woodruff <william@trailofbits.com>
  • Loading branch information
woodruffw committed Jun 10, 2024
1 parent da5427b commit d6c69f5
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 6 deletions.
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ classifiers = [
]
dependencies = [
"cryptography",
"packaging",
"pydantic",
"sigstore~=3.0.0",
"sigstore-protobuf-specs",
Expand Down Expand Up @@ -84,8 +85,9 @@ ignore = ["ANN101", "ANN102", "D203", "D213", "COM812", "ISC001"]
[tool.ruff.lint.per-file-ignores]

"test/**/*.py" = [
"D", # no docstrings in tests
"S101", # asserts are expected in tests
"D", # no docstrings in tests
"S101", # asserts are expected in tests
"SLF001", # private APIs are expected in tests
]

[tool.interrogate]
Expand Down
69 changes: 65 additions & 4 deletions src/pypi_attestation_models/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from __future__ import annotations

import base64
from re import L
from typing import TYPE_CHECKING, Annotated, Any, Literal, NewType

import sigstore.errors
from annotated_types import MinLen # noqa: TCH002
from cryptography import x509
from cryptography.hazmat.primitives import serialization
from packaging.utils import parse_sdist_filename, parse_wheel_filename
from pydantic import Base64Bytes, BaseModel
from pydantic_core import ValidationError
from sigstore._utils import _sha256_streaming
Expand Down Expand Up @@ -91,7 +93,14 @@ def sign(cls, signer: Signer, dist: Path) -> Attestation:

stmt = (
_StatementBuilder()
.subjects([_Subject(name=dist.name, digest=_DigestSet(root={"sha256": digest}))])
.subjects(
[
_Subject(
name=_ultranormalize_dist_filename(dist.name),
digest=_DigestSet(root={"sha256": digest}),
)
]
)
.predicate_type("https://docs.pypi.org/attestations/publish/v1")
.build()
)
Expand Down Expand Up @@ -127,10 +136,10 @@ def verify(self, verifier: Verifier, policy: VerificationPolicy, dist: Path) ->
raise VerificationError("too many subjects in statement (must be exactly one)")

subject = statement.subjects[0]
# TODO: This is too brittle: we need to check with `parse_{sdist,wheel}_filename`.
if subject.name != dist.name:
normalized = _ultranormalize_dist_filename(dist.name)
if subject.name != _ultranormalize_dist_filename(dist.name):
raise VerificationError(
f"subject does not match distribution name: {subject.name} != {dist.name}"
f"subject does not match distribution name: {subject.name} != {normalized}"
)

digest = subject.digest.root.get("sha256")
Expand Down Expand Up @@ -209,3 +218,55 @@ def pypi_to_sigstore(pypi_attestation: Attestation) -> Bundle:
content=evp,
log_entry=log_entry,
)


def _ultranormalize_dist_filename(dist: str) -> str:
"""Return an "ultranormalized" form of the given distribution filename.
This form is equivalent to the normalized form for sdist and wheel
filenames, with the additional stipulation that compressed tag sets,
if present, are also sorted alphanumerically.
Raises `ValueError` on any invalid distribution filename.
"""
# NOTE: .whl and .tar.gz are assumed lowercase, since `packaging`
# already rejects non-lowercase variants.
if dist.endswith(".whl"):
# `parse_wheel_filename` raises a supertype of ValueError on failure.
name, ver, build, tags = parse_wheel_filename(dist)

# The name has been normalized to replace runs of `[.-_]+` with `-`,
# which then needs to be replaced with `_` for the wheel.
name = name.replace("-", "_")

# `parse_wheel_filename` normalizes the name and version for us,
# so all we need to do is re-compress the tag set in a canonical
# order.
# NOTE(ww): This is written in a not very efficient manner, since
# I wasn't feeling smart.
impls, abis, platforms = set(), set(), set()
for tag in tags:
impls.add(tag.interpreter)
abis.add(tag.abi)
platforms.add(tag.platform)

impl_tag = ".".join(sorted(impls))
abi_tag = ".".join(sorted(abis))
platform_tag = ".".join(sorted(platforms))

if build:
parts = "-".join(
[name, str(ver), f"{build[0]}{build[1]}", impl_tag, abi_tag, platform_tag]
)
else:
parts = "-".join([name, str(ver), impl_tag, abi_tag, platform_tag])

return f"{parts}.whl"

elif dist.endswith(".tar.gz"):
# `parse_sdist_filename` raises a supertype of ValueError on failure.
name, ver = parse_sdist_filename(dist)
name = name.replace("-", "_")
return f"{name}-{ver}.tar.gz"
else:
raise ValueError(f"unknown distribution format: {dist}")
86 changes: 86 additions & 0 deletions test/test_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,89 @@ def test_pypi_to_sigstore_invalid_tlog_entry() -> None:

with pytest.raises(impl.ConversionError, match="invalid transparency log entry"):
impl.pypi_to_sigstore(attestation)


class TestPackaging:
"""Behavioral backstops for our dependency on `packaging`."""

def test_exception_types(self) -> None:
from packaging.utils import InvalidSdistFilename, InvalidWheelFilename

assert issubclass(InvalidSdistFilename, ValueError)
assert issubclass(InvalidWheelFilename, ValueError)


@pytest.mark.parametrize(
("input", "normalized"),
[
# wheel: fully normalized, no changes
("foo-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"),
# wheel: dist name is not case normalized
("Foo-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"),
("FOO-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"),
("FoO-1.0-py3-none-any.whl", "foo-1.0-py3-none-any.whl"),
# wheel: dist name contains alternate separators
("foo.bar-1.0-py3-none-any.whl", "foo_bar-1.0-py3-none-any.whl"),
("foo_bar-1.0-py3-none-any.whl", "foo_bar-1.0-py3-none-any.whl"),
# wheel: dist version is not normalized
("foo-1.0beta1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"),
("foo-1.0beta.1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"),
("foo-01.0beta.1-py3-none-any.whl", "foo-1.0b1-py3-none-any.whl"),
# wheel: build tag works as expected
("foo-1.0-1whatever-py3-none-any.whl", "foo-1.0-1whatever-py3-none-any.whl"),
# wheel: compressed tag sets are sorted, even when conflicting or nonsense
("foo-1.0-py3.py2-none-any.whl", "foo-1.0-py2.py3-none-any.whl"),
("foo-1.0-py3.py2-none.abi3.cp37-any.whl", "foo-1.0-py2.py3-abi3.cp37.none-any.whl"),
(
"foo-1.0-py3.py2-none.abi3.cp37-linux_x86_64.any.whl",
"foo-1.0-py2.py3-abi3.cp37.none-any.linux_x86_64.whl",
),
# sdist: fully normalized, no changes
("foo-1.0.tar.gz", "foo-1.0.tar.gz"),
# sdist: dist name is not case normalized
("Foo-1.0.tar.gz", "foo-1.0.tar.gz"),
("FOO-1.0.tar.gz", "foo-1.0.tar.gz"),
("FoO-1.0.tar.gz", "foo-1.0.tar.gz"),
# sdist: dist name contains alternate separators, including
# `-` despite being forbidden by PEP 625
("foo-bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"),
("foo-bar-baz-1.0.tar.gz", "foo_bar_baz-1.0.tar.gz"),
("foo--bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"),
("foo.bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"),
("foo..bar-1.0.tar.gz", "foo_bar-1.0.tar.gz"),
("foo.bar.baz-1.0.tar.gz", "foo_bar_baz-1.0.tar.gz"),
# sdist: dist version is not normalized
("foo-1.0beta1.tar.gz", "foo-1.0b1.tar.gz"),
("foo-01.0beta1.tar.gz", "foo-1.0b1.tar.gz"),
],
)
def test_ultranormalize_dist_filename(input: str, normalized: str) -> None:
# normalization works as expected
assert impl._ultranormalize_dist_filename(input) == normalized

# normalization is a fixpoint, and normalized names are valid dist names
assert impl._ultranormalize_dist_filename(normalized) == normalized


@pytest.mark.parametrize(
"input",
[
# completely invalid
"foo",
# suffixes must be lowercase
"foo-1.0.TAR.GZ",
"foo-1.0-py3-none-any.WHL",
# wheel: invalid separator in dist name
"foo-bar-1.0-py3-none-any.whl",
"foo__bar-1.0-py3-none-any.whl",
# wheel: invalid version
"foo-charmander-py3-none-any.whl",
"foo-1charmander-py3-none-any.whl",
# sdist: invalid version
"foo-charmander.tar.gz",
"foo-1charmander.tar.gz",
],
)
def test_ultranormalize_dist_filename_invalid(input: str) -> None:
with pytest.raises(ValueError):
impl._ultranormalize_dist_filename(input)

0 comments on commit d6c69f5

Please sign in to comment.