Skip to content

Commit

Permalink
Move URL verification logic into its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
facutuesca committed Aug 29, 2024
1 parent 1e0357d commit 57d6d1b
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 183 deletions.
135 changes: 0 additions & 135 deletions tests/unit/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4880,138 +4880,3 @@ def test_missing_trailing_slash_redirect(pyramid_request):
"/legacy/ (with a trailing slash)"
)
assert resp.headers["Location"] == "/legacy/"


@pytest.mark.parametrize(
("url", "project_name", "project_normalized_name", "expected"),
[
( # PyPI /project/ case
"https://pypi.org/project/myproject",
"myproject",
"myproject",
True,
),
( # PyPI /p/ case
"https://pypi.org/p/myproject",
"myproject",
"myproject",
True,
),
( # pypi.python.org /project/ case
"https://pypi.python.org/project/myproject",
"myproject",
"myproject",
True,
),
( # pypi.python.org /p/ case
"https://pypi.python.org/p/myproject",
"myproject",
"myproject",
True,
),
( # python.org/pypi/ case
"https://python.org/pypi/myproject",
"myproject",
"myproject",
True,
),
( # Normalized name differs from URL
"https://pypi.org/project/my_project",
"my_project",
"my-project",
True,
),
( # Normalized name same as URL
"https://pypi.org/project/my-project",
"my_project",
"my-project",
True,
),
( # Trailing slash
"https://pypi.org/project/myproject/",
"myproject",
"myproject",
True,
),
( # Domains are case insensitive
"https://PyPI.org/project/myproject",
"myproject",
"myproject",
True,
),
( # Paths are case-sensitive
"https://pypi.org/Project/myproject",
"myproject",
"myproject",
False,
),
( # Wrong domain
"https://example.com/project/myproject",
"myproject",
"myproject",
False,
),
( # Wrong path
"https://pypi.org/something/myproject",
"myproject",
"myproject",
False,
),
( # Path has extra components
"https://pypi.org/something/myproject/something",
"myproject",
"myproject",
False,
),
( # Wrong package name
"https://pypi.org/project/otherproject",
"myproject",
"myproject",
False,
),
( # Similar package name
"https://pypi.org/project/myproject",
"myproject2",
"myproject2",
False,
),
( # Similar package name
"https://pypi.org/project/myproject2",
"myproject",
"myproject",
False,
),
],
)
def test_verify_url_pypi(url, project_name, project_normalized_name, expected):
assert (
legacy._verify_url_pypi(url, project_name, project_normalized_name) == expected
)


def test_verify_url():
# `_verify_url` is just a helper function that calls `_verify_url_pypi` and
# `OIDCPublisher.verify_url`, where the actual verification logic lives.
publisher_verifies = pretend.stub(verify_url=lambda url: True)
publisher_fails = pretend.stub(verify_url=lambda url: False)

assert legacy._verify_url(
url="https://pypi.org/project/myproject/",
publisher=None,
project_name="myproject",
project_normalized_name="myproject",
)

assert legacy._verify_url(
url="https://github.com/org/myproject/issues",
publisher=publisher_verifies,
project_name="myproject",
project_normalized_name="myproject",
)

assert not legacy._verify_url(
url="example.com",
publisher=publisher_fails,
project_name="myproject",
project_normalized_name="myproject",
)
149 changes: 149 additions & 0 deletions tests/unit/packaging/test_metadata_verification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pretend
import pytest

from warehouse.packaging.metadata_verification import _verify_url_pypi, verify_url


@pytest.mark.parametrize(
("url", "project_name", "project_normalized_name", "expected"),
[
( # PyPI /project/ case
"https://pypi.org/project/myproject",
"myproject",
"myproject",
True,
),
( # PyPI /p/ case
"https://pypi.org/p/myproject",
"myproject",
"myproject",
True,
),
( # pypi.python.org /project/ case
"https://pypi.python.org/project/myproject",
"myproject",
"myproject",
True,
),
( # pypi.python.org /p/ case
"https://pypi.python.org/p/myproject",
"myproject",
"myproject",
True,
),
( # python.org/pypi/ case
"https://python.org/pypi/myproject",
"myproject",
"myproject",
True,
),
( # Normalized name differs from URL
"https://pypi.org/project/my_project",
"my_project",
"my-project",
True,
),
( # Normalized name same as URL
"https://pypi.org/project/my-project",
"my_project",
"my-project",
True,
),
( # Trailing slash
"https://pypi.org/project/myproject/",
"myproject",
"myproject",
True,
),
( # Domains are case insensitive
"https://PyPI.org/project/myproject",
"myproject",
"myproject",
True,
),
( # Paths are case-sensitive
"https://pypi.org/Project/myproject",
"myproject",
"myproject",
False,
),
( # Wrong domain
"https://example.com/project/myproject",
"myproject",
"myproject",
False,
),
( # Wrong path
"https://pypi.org/something/myproject",
"myproject",
"myproject",
False,
),
( # Path has extra components
"https://pypi.org/something/myproject/something",
"myproject",
"myproject",
False,
),
( # Wrong package name
"https://pypi.org/project/otherproject",
"myproject",
"myproject",
False,
),
( # Similar package name
"https://pypi.org/project/myproject",
"myproject2",
"myproject2",
False,
),
( # Similar package name
"https://pypi.org/project/myproject2",
"myproject",
"myproject",
False,
),
],
)
def test_verify_url_pypi(url, project_name, project_normalized_name, expected):
assert _verify_url_pypi(url, project_name, project_normalized_name) == expected


def test_verify_url():
# `verify_url` is just a helper function that calls `_verify_url_pypi` and
# `OIDCPublisher.verify_url`, where the actual verification logic lives.
publisher_verifies = pretend.stub(verify_url=lambda url: True)
publisher_fails = pretend.stub(verify_url=lambda url: False)

assert verify_url(
url="https://pypi.org/project/myproject/",
publisher=None,
project_name="myproject",
project_normalized_name="myproject",
)

assert verify_url(
url="https://github.com/org/myproject/issues",
publisher=publisher_verifies,
project_name="myproject",
project_normalized_name="myproject",
)

assert not verify_url(
url="example.com",
publisher=publisher_fails,
project_name="myproject",
project_normalized_name="myproject",
)
52 changes: 4 additions & 48 deletions warehouse/forklift/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import packaging.utils
import packaging.version
import packaging_legacy.version
import rfc3986
import sentry_sdk
import wtforms
import wtforms.validators
Expand Down Expand Up @@ -62,9 +61,9 @@
from warehouse.forklift.forms import UploadForm, _filetype_extension_mapping
from warehouse.macaroons.models import Macaroon
from warehouse.metrics import IMetricsService
from warehouse.oidc.models import OIDCPublisher
from warehouse.oidc.views import is_from_reusable_workflow
from warehouse.packaging.interfaces import IFileStorage, IProjectService
from warehouse.packaging.metadata_verification import verify_url
from warehouse.packaging.models import (
Dependency,
DependencyKind,
Expand Down Expand Up @@ -459,49 +458,6 @@ def _process_attestations(request, distribution: Distribution):
metrics.increment("warehouse.upload.attestations.ok")


_pypi_project_urls = [
"https://pypi.org/project/",
"https://pypi.org/p/",
"https://pypi.python.org/project/",
"https://pypi.python.org/p/",
"https://python.org/pypi/",
]


def _verify_url_pypi(url: str, project_name: str, project_normalized_name: str) -> bool:
candidate_urls = (
f"{pypi_project_url}{name}{optional_slash}"
for pypi_project_url in _pypi_project_urls
for name in {project_name, project_normalized_name}
for optional_slash in ["/", ""]
)

user_uri = rfc3986.api.uri_reference(url).normalize()
return any(
user_uri == rfc3986.api.uri_reference(candidate_url).normalize()
for candidate_url in candidate_urls
)


def _verify_url(
url: str,
publisher: OIDCPublisher | None,
project_name: str,
project_normalized_name: str,
) -> bool:
if _verify_url_pypi(
url=url,
project_name=project_name,
project_normalized_name=project_normalized_name,
):
return True

if not publisher:
return False

return publisher.verify_url(url)


def _sort_releases(request: Request, project: Project):
releases = (
request.db.query(Release)
Expand Down Expand Up @@ -869,7 +825,7 @@ def file_upload(request):
else {
name: {
"url": url,
"verified": _verify_url(
"verified": verify_url(
url=url,
publisher=request.oidc_publisher,
project_name=project.name,
Expand All @@ -883,7 +839,7 @@ def file_upload(request):
home_page_verified = (
False
if home_page is None
else _verify_url(
else verify_url(
url=home_page,
publisher=request.oidc_publisher,
project_name=project.name,
Expand All @@ -895,7 +851,7 @@ def file_upload(request):
download_url_verified = (
False
if download_url is None
else _verify_url(
else verify_url(
url=download_url,
publisher=request.oidc_publisher,
project_name=project.name,
Expand Down
Loading

0 comments on commit 57d6d1b

Please sign in to comment.