From 57d6d1b3306295638548cd7c8ff6224e129647df Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Thu, 29 Aug 2024 14:38:02 +0200 Subject: [PATCH] Move URL verification logic into its own file --- tests/unit/forklift/test_legacy.py | 135 ---------------- .../packaging/test_metadata_verification.py | 149 ++++++++++++++++++ warehouse/forklift/legacy.py | 52 +----- warehouse/packaging/metadata_verification.py | 67 ++++++++ 4 files changed, 220 insertions(+), 183 deletions(-) create mode 100644 tests/unit/packaging/test_metadata_verification.py create mode 100644 warehouse/packaging/metadata_verification.py diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 20beb0556347..739dd4081efc 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -4880,138 +4880,3 @@ def test_missing_trailing_slash_redirect(pyramid_request): "/legacy/ (with a trailing slash)" ) assert resp.headers["Location"] == "/legacy/" - - -@pytest.mark.parametrize( - ("url", "project_name", "project_normalized_name", "expected"), - [ - ( # PyPI /project/ case - "https://pypi.org/project/myproject", - "myproject", - "myproject", - True, - ), - ( # PyPI /p/ case - "https://pypi.org/p/myproject", - "myproject", - "myproject", - True, - ), - ( # pypi.python.org /project/ case - "https://pypi.python.org/project/myproject", - "myproject", - "myproject", - True, - ), - ( # pypi.python.org /p/ case - "https://pypi.python.org/p/myproject", - "myproject", - "myproject", - True, - ), - ( # python.org/pypi/ case - "https://python.org/pypi/myproject", - "myproject", - "myproject", - True, - ), - ( # Normalized name differs from URL - "https://pypi.org/project/my_project", - "my_project", - "my-project", - True, - ), - ( # Normalized name same as URL - "https://pypi.org/project/my-project", - "my_project", - "my-project", - True, - ), - ( # Trailing slash - "https://pypi.org/project/myproject/", - "myproject", - "myproject", - True, - ), - ( # Domains are case insensitive - "https://PyPI.org/project/myproject", - "myproject", - "myproject", - True, - ), - ( # Paths are case-sensitive - "https://pypi.org/Project/myproject", - "myproject", - "myproject", - False, - ), - ( # Wrong domain - "https://example.com/project/myproject", - "myproject", - "myproject", - False, - ), - ( # Wrong path - "https://pypi.org/something/myproject", - "myproject", - "myproject", - False, - ), - ( # Path has extra components - "https://pypi.org/something/myproject/something", - "myproject", - "myproject", - False, - ), - ( # Wrong package name - "https://pypi.org/project/otherproject", - "myproject", - "myproject", - False, - ), - ( # Similar package name - "https://pypi.org/project/myproject", - "myproject2", - "myproject2", - False, - ), - ( # Similar package name - "https://pypi.org/project/myproject2", - "myproject", - "myproject", - False, - ), - ], -) -def test_verify_url_pypi(url, project_name, project_normalized_name, expected): - assert ( - legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected - ) - - -def test_verify_url(): - # `_verify_url` is just a helper function that calls `_verify_url_pypi` and - # `OIDCPublisher.verify_url`, where the actual verification logic lives. - publisher_verifies = pretend.stub(verify_url=lambda url: True) - publisher_fails = pretend.stub(verify_url=lambda url: False) - - assert legacy._verify_url( - url="https://pypi.org/project/myproject/", - publisher=None, - project_name="myproject", - project_normalized_name="myproject", - ) - - assert legacy._verify_url( - url="https://github.com/org/myproject/issues", - publisher=publisher_verifies, - project_name="myproject", - project_normalized_name="myproject", - ) - - assert not legacy._verify_url( - url="example.com", - publisher=publisher_fails, - project_name="myproject", - project_normalized_name="myproject", - ) diff --git a/tests/unit/packaging/test_metadata_verification.py b/tests/unit/packaging/test_metadata_verification.py new file mode 100644 index 000000000000..27f6f430c70d --- /dev/null +++ b/tests/unit/packaging/test_metadata_verification.py @@ -0,0 +1,149 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pretend +import pytest + +from warehouse.packaging.metadata_verification import _verify_url_pypi, verify_url + + +@pytest.mark.parametrize( + ("url", "project_name", "project_normalized_name", "expected"), + [ + ( # PyPI /project/ case + "https://pypi.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # PyPI /p/ case + "https://pypi.org/p/myproject", + "myproject", + "myproject", + True, + ), + ( # pypi.python.org /project/ case + "https://pypi.python.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # pypi.python.org /p/ case + "https://pypi.python.org/p/myproject", + "myproject", + "myproject", + True, + ), + ( # python.org/pypi/ case + "https://python.org/pypi/myproject", + "myproject", + "myproject", + True, + ), + ( # Normalized name differs from URL + "https://pypi.org/project/my_project", + "my_project", + "my-project", + True, + ), + ( # Normalized name same as URL + "https://pypi.org/project/my-project", + "my_project", + "my-project", + True, + ), + ( # Trailing slash + "https://pypi.org/project/myproject/", + "myproject", + "myproject", + True, + ), + ( # Domains are case insensitive + "https://PyPI.org/project/myproject", + "myproject", + "myproject", + True, + ), + ( # Paths are case-sensitive + "https://pypi.org/Project/myproject", + "myproject", + "myproject", + False, + ), + ( # Wrong domain + "https://example.com/project/myproject", + "myproject", + "myproject", + False, + ), + ( # Wrong path + "https://pypi.org/something/myproject", + "myproject", + "myproject", + False, + ), + ( # Path has extra components + "https://pypi.org/something/myproject/something", + "myproject", + "myproject", + False, + ), + ( # Wrong package name + "https://pypi.org/project/otherproject", + "myproject", + "myproject", + False, + ), + ( # Similar package name + "https://pypi.org/project/myproject", + "myproject2", + "myproject2", + False, + ), + ( # Similar package name + "https://pypi.org/project/myproject2", + "myproject", + "myproject", + False, + ), + ], +) +def test_verify_url_pypi(url, project_name, project_normalized_name, expected): + assert _verify_url_pypi(url, project_name, project_normalized_name) == expected + + +def test_verify_url(): + # `verify_url` is just a helper function that calls `_verify_url_pypi` and + # `OIDCPublisher.verify_url`, where the actual verification logic lives. + publisher_verifies = pretend.stub(verify_url=lambda url: True) + publisher_fails = pretend.stub(verify_url=lambda url: False) + + assert verify_url( + url="https://pypi.org/project/myproject/", + publisher=None, + project_name="myproject", + project_normalized_name="myproject", + ) + + assert verify_url( + url="https://github.com/org/myproject/issues", + publisher=publisher_verifies, + project_name="myproject", + project_normalized_name="myproject", + ) + + assert not verify_url( + url="example.com", + publisher=publisher_fails, + project_name="myproject", + project_normalized_name="myproject", + ) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index bc8f0433ee51..d8e9ca7f2c7a 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -25,7 +25,6 @@ import packaging.utils import packaging.version import packaging_legacy.version -import rfc3986 import sentry_sdk import wtforms import wtforms.validators @@ -62,9 +61,9 @@ from warehouse.forklift.forms import UploadForm, _filetype_extension_mapping from warehouse.macaroons.models import Macaroon from warehouse.metrics import IMetricsService -from warehouse.oidc.models import OIDCPublisher from warehouse.oidc.views import is_from_reusable_workflow from warehouse.packaging.interfaces import IFileStorage, IProjectService +from warehouse.packaging.metadata_verification import verify_url from warehouse.packaging.models import ( Dependency, DependencyKind, @@ -459,49 +458,6 @@ def _process_attestations(request, distribution: Distribution): metrics.increment("warehouse.upload.attestations.ok") -_pypi_project_urls = [ - "https://pypi.org/project/", - "https://pypi.org/p/", - "https://pypi.python.org/project/", - "https://pypi.python.org/p/", - "https://python.org/pypi/", -] - - -def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) -> bool: - candidate_urls = ( - f"{pypi_project_url}{name}{optional_slash}" - for pypi_project_url in _pypi_project_urls - for name in {project_name, project_normalized_name} - for optional_slash in ["/", ""] - ) - - user_uri = rfc3986.api.uri_reference(url).normalize() - return any( - user_uri == rfc3986.api.uri_reference(candidate_url).normalize() - for candidate_url in candidate_urls - ) - - -def _verify_url( - url: str, - publisher: OIDCPublisher | None, - project_name: str, - project_normalized_name: str, -) -> bool: - if _verify_url_pypi( - url=url, - project_name=project_name, - project_normalized_name=project_normalized_name, - ): - return True - - if not publisher: - return False - - return publisher.verify_url(url) - - def _sort_releases(request: Request, project: Project): releases = ( request.db.query(Release) @@ -869,7 +825,7 @@ def file_upload(request): else { name: { "url": url, - "verified": _verify_url( + "verified": verify_url( url=url, publisher=request.oidc_publisher, project_name=project.name, @@ -883,7 +839,7 @@ def file_upload(request): home_page_verified = ( False if home_page is None - else _verify_url( + else verify_url( url=home_page, publisher=request.oidc_publisher, project_name=project.name, @@ -895,7 +851,7 @@ def file_upload(request): download_url_verified = ( False if download_url is None - else _verify_url( + else verify_url( url=download_url, publisher=request.oidc_publisher, project_name=project.name, diff --git a/warehouse/packaging/metadata_verification.py b/warehouse/packaging/metadata_verification.py new file mode 100644 index 000000000000..77572948285b --- /dev/null +++ b/warehouse/packaging/metadata_verification.py @@ -0,0 +1,67 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import rfc3986 + +from warehouse.oidc.models import OIDCPublisher + +_pypi_project_urls = [ + "https://pypi.org/project/", + "https://pypi.org/p/", + "https://pypi.python.org/project/", + "https://pypi.python.org/p/", + "https://python.org/pypi/", +] + + +def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) -> bool: + """ + Check if a URL matches any of the PyPI URLs for a specific project + """ + candidate_urls = ( + f"{pypi_project_url}{name}{optional_slash}" + for pypi_project_url in _pypi_project_urls + for name in {project_name, project_normalized_name} + for optional_slash in ["/", ""] + ) + + user_uri = rfc3986.api.uri_reference(url).normalize() + return any( + user_uri == rfc3986.api.uri_reference(candidate_url).normalize() + for candidate_url in candidate_urls + ) + + +def verify_url( + url: str, + publisher: OIDCPublisher | None, + project_name: str, + project_normalized_name: str, +) -> bool: + """ + Verify a URL included in a project's metadata + + This function is intended to be used during file uploads, checking the URLs + included in the metadata against PyPI URLs for that project and against the Trusted + Publisher used to authenticate the upload (if any). + """ + if _verify_url_pypi( + url=url, + project_name=project_name, + project_normalized_name=project_normalized_name, + ): + return True + + if not publisher: + return False + + return publisher.verify_url(url)