Skip to content

Commit

Permalink
Merge pull request #3498 from matyasselmeci/pr/issuer-in-namespaces.S…
Browse files Browse the repository at this point in the history
…OFTWARE-5768

Issuer info in namespaces JSON (SOFTWARE-5768)
  • Loading branch information
matyasselmeci authored Nov 22, 2023
2 parents e79e187 + 6a2bde0 commit 95b3ffb
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 69 deletions.
10 changes: 10 additions & 0 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,10 @@ The JSON also contains an attribute `namespaces` that is a list of namespaces wi
Note that scopes are usually relative to the namespace path.
- `vault_server`: the Vault server for the `Vault` strategy or null
- `vault_issuer`: the Vault issuer for the `Vault` strategy (or null).
- `scitokens` is information about any `SciTokens` sections in the `Authorizations` list for that namespace (or the empty list if there are none). Each list item has:
- `issuer`: the value of the `Issuer` field in the scitokens block
- `base_path`: a list which is the value of the `BasePath` (or `Base Path`) field split on commas
- `restricted_path`: a list which is the value of the `RestrictedPath` (or `Restricted Path`) field split on commas, or the empty list if unspecified

The final result looks like
```json
Expand Down Expand Up @@ -567,6 +571,7 @@ The final result looks like
"dirlisthost": null,
"path": "/xenon/PROTECTED",
"readhttps": true,
"scitokens": [],
"usetokenonread": false,
"writebackhost": null
},
Expand All @@ -582,6 +587,11 @@ The final result looks like
"dirlisthost": "https://origin-auth2001.chtc.wisc.edu:1095",
"path": "/ospool/PROTECTED",
"readhttps": true,
"scitokens": {
"issuer": "https://osg-htc.org/ospool",
"base_path": ["/ospool/PROTECTED", "/s3.amazonaws.com/us-east-1", "/s3.amazonaws.com/us-west-1"],
"restricted_path": []
},
"usetokenonread": true,
"writebackhost": "https://origin-auth2001.chtc.wisc.edu:1095"
}
Expand Down
8 changes: 8 additions & 0 deletions src/stashcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,13 @@ def get_credential_generation_dict_for_namespace(ns: Namespace) -> Optional[Dict
return info


def get_scitokens_list_for_namespace(ns: Namespace) -> List[Dict]:
"""Return the list of scitokens issuer info for the .namespaces[*].scitokens attribute in the namespaces JSON"""
return list(
filter(None, (a.get_namespaces_scitokens_block() for a in ns.authz_list))
)


def get_namespaces_info(global_data: GlobalData) -> PreJSON:
"""Return data for the /stashcache/namespaces JSON endpoint.
Expand Down Expand Up @@ -564,6 +571,7 @@ def _namespace_dict(ns: Namespace):
"caches": [],
"origins": [],
"credential_generation": get_credential_generation_dict_for_namespace(ns),
"scitokens": get_scitokens_list_for_namespace(ns),
}

for cache_name, cache_resource_obj in cache_resource_objs.items():
Expand Down
68 changes: 9 additions & 59 deletions src/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import flask
import pytest
from typing import Dict, List
import urllib.parse
from pytest_mock import MockerFixture

Expand All @@ -17,10 +18,6 @@

from app import app, global_data
from webapp.topology import Facility, Site, Resource, ResourceGroup
from webapp.data_federation import CredentialGeneration

HOST_PORT_RE = re.compile(r"[a-zA-Z0-9.-]{3,63}:[0-9]{2,5}")
PROTOCOL_HOST_PORT_RE = re.compile(r"[a-z]+://" + HOST_PORT_RE.pattern)

INVALID_USER = dict(
username="invalid",
Expand Down Expand Up @@ -61,7 +58,9 @@
"/cache/scitokens.conf",
"/api/institutions",
"/cache/grid-mapfile",
"/origin/grid-mapfile"
"/origin/grid-mapfile",
"/osdf/namespaces",
"/stashcache/namespaces",
]


Expand Down Expand Up @@ -189,60 +188,6 @@ def test_stashcache_file(key, endpoint, fqdn, resource_stashcache_files):
else:
app.config["STASHCACHE_LEGACY_AUTH"] = old_legacy_auth

def test_stashcache_namespaces(self, client: flask.Flask):
def validate_cache_schema(cc):
assert HOST_PORT_RE.match(cc["auth_endpoint"])
assert HOST_PORT_RE.match(cc["endpoint"])
assert cc["resource"] and isinstance(cc["resource"], str)

def validate_namespace_schema(ns):
assert isinstance(ns["caches"], list) # we do have a case where it's empty
assert ns["path"].startswith("/") # implies str
assert isinstance(ns["readhttps"], bool)
assert isinstance(ns["usetokenonread"], bool)
assert ns["dirlisthost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["dirlisthost"])
assert ns["writebackhost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["writebackhost"])
credgen = ns["credential_generation"]
if credgen is not None:
assert isinstance(credgen["max_scope_depth"], int) and credgen["max_scope_depth"] > -1
assert credgen["strategy"] in CredentialGeneration.STRATEGIES
assert credgen["issuer"]
parsed_issuer = urllib.parse.urlparse(credgen["issuer"])
assert parsed_issuer.netloc and parsed_issuer.scheme == "https"
if credgen["vault_server"]:
assert isinstance(credgen["vault_server"], str)
if credgen["vault_issuer"]:
assert isinstance(credgen["vault_issuer"], str)
if credgen["base_path"]:
assert isinstance(credgen["base_path"], str)

response = client.get('/stashcache/namespaces')
assert response.status_code == 200
namespaces_json = response.json

assert "caches" in namespaces_json
caches = namespaces_json["caches"]
# Have a reasonable number of caches
assert len(caches) > 20
for cache in caches:
validate_cache_schema(cache)

assert "namespaces" in namespaces_json
namespaces = namespaces_json["namespaces"]
# Have a reasonable number of namespaces
assert len(namespaces) > 15

found_credgen = False
for namespace in namespaces:
if namespace["credential_generation"] is not None:
found_credgen = True
validate_namespace_schema(namespace)
if namespace["caches"]:
for cache in namespace["caches"]:
validate_cache_schema(cache)
assert found_credgen, "At least one namespace with credential_generation"


def test_institution_accept_type(self, client: flask.Flask):
"""Checks both formats output the same content"""

Expand Down Expand Up @@ -337,6 +282,11 @@ def test_cache_grid_mapfile(self, client: flask.Flask):
hashes_not_in_authfile = mapfile_hashes - authfile_hashes
assert not hashes_not_in_authfile, f"Hashes in mapfile but not in authfile: {hashes_not_in_authfile}"

def test_namespaces_json(self, client):
response = client.get('/osdf/namespaces')
assert response.status_code == 200
assert "namespaces" in response.json


class TestEndpointContent:
# Pre-build some test cases based on AMNH resources
Expand Down
146 changes: 136 additions & 10 deletions src/tests/test_stashcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
from pytest_mock import MockerFixture
import time
from typing import List, Dict
import urllib, urllib.parse

# Rewrites the path so the app can be imported like it normally is
import os
Expand All @@ -18,8 +20,12 @@
from app import app, global_data
from webapp import models, topology, vos_data
from webapp.common import load_yaml_file
from webapp.data_federation import CredentialGeneration
import stashcache

HOST_PORT_RE = re.compile(r"[a-zA-Z0-9.-]{3,63}:[0-9]{2,5}")
PROTOCOL_HOST_PORT_RE = re.compile(r"[a-z]+://" + HOST_PORT_RE.pattern)

GRID_MAPPING_REGEX = re.compile(r'^"(/[^"]*CN=[^"]+")\s+([0-9a-f]{8}[.]0)$')
# ^^ the DN starts with a slash and will at least have a CN in it.
EMPTY_LINE_REGEX = re.compile(r'^\s*(#|$)') # Empty or comment-only lines
Expand All @@ -28,7 +34,9 @@
# fake origins in our test data:
TEST_ITB_HELM_ORIGIN = "helm-origin.osgdev.test.io"
TEST_SC_ORIGIN = "sc-origin.test.wisc.edu"

TEST_ORIGIN_AUTH2000 = "origin-auth2000.test.wisc.edu"
TEST_ISSUER = "https://test.wisc.edu"
TEST_BASEPATH = "/testvo"

# Some DNs I can use for testing and the hashes they map to.
# All of these were generated with osg-ca-generator on alma8
Expand All @@ -45,7 +53,8 @@
MOCK_DN_LIST = list(MOCK_DNS_AND_HASHES.keys())


def get_test_global_data(global_data: models.GlobalData) -> models.GlobalData:
@pytest.fixture
def test_global_data() -> models.GlobalData:
"""Get a copy of the global data with some entries created for testing"""
new_global_data = copy.deepcopy(global_data)

Expand Down Expand Up @@ -105,8 +114,7 @@ def test_allowedVO_excludes_LIGO_and_ANY_for_ligo_inclusion(self, client: flask.

assert spy.call_count == 0

def test_scitokens_issuer_sections(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)
def test_scitokens_issuer_sections(self, test_global_data):
origin_scitokens_conf = stashcache.generate_origin_scitokens(
test_global_data, TEST_ITB_HELM_ORIGIN)
assert origin_scitokens_conf.strip(), "Generated scitokens.conf empty"
Expand All @@ -128,9 +136,7 @@ def test_scitokens_issuer_sections(self, client: flask.Flask):
print(f"Generated origin scitokens.conf text:\n{origin_scitokens_conf}\n", file=sys.stderr)
raise

def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)

def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, test_global_data):
namespaces_json = stashcache.get_namespaces_info(test_global_data)
namespaces = namespaces_json["namespaces"]
testvo_PUBLIC_namespace_list = [
Expand All @@ -145,9 +151,7 @@ def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, client: f
assert ns["writebackhost"] == f"https://{TEST_SC_ORIGIN}:1095", \
"writebackhost is wrong for namespace with auth write"

def test_scitokens_issuer_public_read_auth_write_scitokens_conf(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)

def test_scitokens_issuer_public_read_auth_write_scitokens_conf(self, test_global_data):
origin_scitokens_conf = stashcache.generate_origin_scitokens(
test_global_data, TEST_SC_ORIGIN)
assert origin_scitokens_conf.strip(), "Generated scitokens.conf empty"
Expand Down Expand Up @@ -222,5 +226,127 @@ def test_cache_grid_mapfile_i2_cache(self, client: flask.Flask, mocker: MockerFi
assert num_mappings > 5, f"Too few mappings found.\nFull text:\n{text}\n"


class TestNamespaces:
@pytest.fixture
def namespaces_json(self, test_global_data) -> Dict:
return stashcache.get_namespaces_info(test_global_data)

@pytest.fixture
def namespaces(self, namespaces_json) -> List[Dict]:
assert "namespaces" in namespaces_json
return namespaces_json["namespaces"]

@staticmethod
def validate_cache_schema(cc):
assert HOST_PORT_RE.match(cc["auth_endpoint"])
assert HOST_PORT_RE.match(cc["endpoint"])
assert cc["resource"] and isinstance(cc["resource"], str)

@staticmethod
def validate_namespace_schema(ns):
assert isinstance(ns["caches"], list) # we do have a case where it's empty
assert ns["path"].startswith("/") # implies str
assert isinstance(ns["readhttps"], bool)
assert isinstance(ns["usetokenonread"], bool)
assert ns["dirlisthost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["dirlisthost"])
assert ns["writebackhost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["writebackhost"])
credgen = ns["credential_generation"]
if credgen is not None:
assert isinstance(credgen["max_scope_depth"], int) and credgen["max_scope_depth"] > -1
assert credgen["strategy"] in CredentialGeneration.STRATEGIES
assert credgen["issuer"]
parsed_issuer = urllib.parse.urlparse(credgen["issuer"])
assert parsed_issuer.netloc and parsed_issuer.scheme == "https"
if credgen["vault_server"]:
assert isinstance(credgen["vault_server"], str)
if credgen["vault_issuer"]:
assert isinstance(credgen["vault_issuer"], str)
if credgen["base_path"]:
assert isinstance(credgen["base_path"], str)

def test_caches(self, namespaces_json):
assert "caches" in namespaces_json
caches = namespaces_json["caches"]
# Have a reasonable number of caches
assert len(caches) > 20
for cache in caches:
self.validate_cache_schema(cache)

def test_namespaces(self, namespaces):
# Have a reasonable number of namespaces
assert len(namespaces) > 15

found_credgen = False
for namespace in namespaces:
if namespace["credential_generation"] is not None:
found_credgen = True
self.validate_namespace_schema(namespace)
if namespace["caches"]:
for cache in namespace["caches"]:
self.validate_cache_schema(cache)
assert found_credgen, "At least one namespace with credential_generation"

@staticmethod
def validate_scitokens_block(sci):
assert sci["issuer"]
assert isinstance(sci["issuer"], str)
assert "://" in sci["issuer"]
assert isinstance(sci["base_path"], list)
assert sci["base_path"] # must have at least 1
for bp in sci["base_path"]:
assert bp.startswith("/") # implies str
assert "," not in bp
assert isinstance(sci["restricted_path"], list)
for rp in sci["restricted_path"]: # may be empty
assert rp.startswith("/") # implies str
assert "," not in rp

def test_issuers_in_namespaces(self, namespaces):
for namespace in namespaces:
assert isinstance(namespace["scitokens"], list)
for scitokens_block in namespace["scitokens"]:
self.validate_scitokens_block(scitokens_block)

def test_testvo_public_namespace(self, namespaces):
ns = [
ns for ns in namespaces if ns["path"] == "/testvo/PUBLIC"
][0]

assert ns["readhttps"] is False
assert ns["usetokenonread"] is False
assert TEST_SC_ORIGIN in ns["writebackhost"]
assert len(ns["caches"]) > 10
assert len(ns["origins"]) == 2
assert ns["credential_generation"] is None
assert len(ns["scitokens"]) == 1
sci = ns["scitokens"][0]
assert sci["issuer"] == TEST_ISSUER
assert sci["base_path"] == [TEST_BASEPATH]
assert sci["restricted_path"] == []


def test_testvo_namespace(self, namespaces):
ns = [
ns for ns in namespaces if ns["path"] == "/testvo"
][0]

assert ns["readhttps"] is True
assert ns["usetokenonread"] is True
assert TEST_ORIGIN_AUTH2000 in ns["writebackhost"]
assert TEST_ORIGIN_AUTH2000 in ns["dirlisthost"]
assert len(ns["caches"]) > 10
assert len(ns["origins"]) == 1
credgen = ns["credential_generation"]
assert credgen["base_path"] == TEST_BASEPATH
assert credgen["strategy"] == "OAuth2"
assert credgen["issuer"] == TEST_ISSUER
assert credgen["max_scope_depth"] == 3
assert len(ns["scitokens"]) == 1
sci = ns["scitokens"][0]
assert sci["issuer"] == TEST_ISSUER
assert sci["base_path"] == [TEST_BASEPATH]
assert sci["restricted_path"] == []


if __name__ == '__main__':
pytest.main()
12 changes: 12 additions & 0 deletions src/webapp/data_federation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import urllib
import urllib.parse
from collections import OrderedDict
Expand Down Expand Up @@ -25,6 +26,8 @@ def get_scitokens_conf_block(self, service_name: str):
def get_grid_mapfile_line(self):
return ""

def get_namespaces_scitokens_block(self):
return None

class NullAuth(AuthMethod):
pass
Expand Down Expand Up @@ -100,6 +103,15 @@ def get_scitokens_conf_block(self, service_name: str):

return block

def get_namespaces_scitokens_block(self):
base_path = re.split(r"\s*,\s*", self.base_path)
restricted_path = re.split(r"\s*,\s*", self.restricted_path) if self.restricted_path else []
return {
"issuer": self.issuer,
"base_path": base_path,
"restricted_path": restricted_path,
}


# TODO Use a dataclass (https://docs.python.org/3.9/library/dataclasses.html)
# once we can ditch Python 3.6; the webapp no longer supports 3.6 but some of
Expand Down

0 comments on commit 95b3ffb

Please sign in to comment.