diff --git a/_cli/sparrow_cli/commands/up.py b/_cli/sparrow_cli/commands/up.py index db44ee87a..9c4d595c3 100644 --- a/_cli/sparrow_cli/commands/up.py +++ b/_cli/sparrow_cli/commands/up.py @@ -7,7 +7,6 @@ from sparrow_cli.help import echo_messages -from ..config.environment import validate_environment from ..config import SparrowConfig from ..util import compose, cmd, log from ..config.command_cache import get_backend_help_info @@ -40,9 +39,9 @@ def sparrow_up(ctx, container="", force_recreate=False): # Validate the presence of SPARROW_SECREY_KEY only if we are bringing # the application up. Eventually, this should be wrapped into a Python # version of the `sparrow up` command. - validate_environment() cfg = ctx.find_object(SparrowConfig) + cfg.validate_environment() echo_messages(cfg) diff --git a/_cli/sparrow_cli/config/__init__.py b/_cli/sparrow_cli/config/__init__.py index 3dc99d0f3..deca18580 100644 --- a/_cli/sparrow_cli/config/__init__.py +++ b/_cli/sparrow_cli/config/__init__.py @@ -6,7 +6,7 @@ from os import environ from macrostrat.utils.shell import git_revision_info from macrostrat.utils.logs import get_logger, setup_stderr_logs -from pydantic import BaseModel +import hashlib import docker from docker.client import DockerClient from enum import Enum @@ -133,6 +133,8 @@ def __init__(self, verbose=False, offline=False): details=f"SPARROW_PATH={environ['SPARROW_PATH']}", ) + self.enhance_secret_key() + self.project_name = self.infer_project_name() self.local_frontend = self.configure_local_frontend() @@ -172,6 +174,30 @@ def __init__(self, verbose=False, offline=False): else: self.messages += prepare_compose_overrides(self) + def enhance_secret_key(self): + secret_key = environ.get("SPARROW_SECRET_KEY") + if secret_key is None: + return + if len(secret_key) < 32: + self.add_message( + id="short-secret-key", + text="SPARROW_SECRET_KEY is shorter than 32 characters", + level=Level.WARNING, + details="This should be a randomly generated string of at least 32 characters.", + ) + environ["SPARROW_SECRET_KEY"] = hashlib.md5( + secret_key.encode("utf-8") + ).hexdigest() + + def validate_environment(self): + """Validate environment for services""" + # Ensure that the SPARROW_SECRET_KEY is set and >= 32 characters long + secret_key = environ.get("SPARROW_SECRET_KEY") + if secret_key is None: + raise SparrowCommandError( + "You must set the SPARROW_SECRET_KEY environment variable." + ) + def _setup_command_path(self): _bin = self.SPARROW_PATH / "_cli" / "bin" self.bin_directories = [_bin] @@ -277,13 +303,19 @@ def configure_local_frontend(self): return False if not has_command("node"): self.add_message( - "Cannot run frontend locally without [bold]node[/bold] available.", + id="no-node", + text="Node is not installed", + details=[ + "Cannot run frontend locally without [bold]node[/bold] available." + ], level=Level.ERROR, ) _local_frontend = False if self.is_frozen: self.add_message( - "Cannot run frontend locally in a frozen environment.", + id="frozen-env", + text="Frozen environment", + details=["Cannot run frontend locally in a frozen environment."], level=Level.ERROR, ) _local_frontend = False diff --git a/_cli/sparrow_cli/config/environment.py b/_cli/sparrow_cli/config/environment.py index 52f253fe5..bb0a55143 100644 --- a/_cli/sparrow_cli/config/environment.py +++ b/_cli/sparrow_cli/config/environment.py @@ -41,7 +41,8 @@ def prepare_docker_environment(): # Have to get rid of random printing to stdout in order to not break # logging and container ID # https://github.com/docker/scan-cli-plugin/issues/149 - environ.setdefault("DOCKER_SCAN_SUGGEST", "false") + # environ.setdefault("DOCKER_SCAN_SUGGEST", "false") + environ.setdefault("DOCKER_CLI_HINTS", "false") def prepare_compose_overrides(cfg) -> List[Message]: @@ -156,12 +157,3 @@ def add_override(name): environ["COMPOSE_FILE"] = ":".join(str(c) for c in compose_files) log.info(f"Docker compose overrides: {compose_files}") return messages - - -def validate_environment(): - # Check for failing environment - if environ.get("SPARROW_SECRET_KEY") is None: - print( - "[red]You [underline]must[/underline] set [bold]SPARROW_SECRET_KEY[/bold]. Exiting..." - ) - sys.exit(1) diff --git a/backend/conftest.py b/backend/conftest.py index 4c5b13497..1d3c62682 100644 --- a/backend/conftest.py +++ b/backend/conftest.py @@ -1,11 +1,13 @@ from pytest import fixture from os import environ from starlette.testclient import TestClient +from requests import Session as RequestsSession from sparrow.core.app import Sparrow from sparrow.core.context import _setup_context from macrostrat.database.utils import wait_for_database from sparrow.core.auth.create_user import _create_user from sparrow.tests.helpers.database import testing_database +from sqlalchemy.orm import scoped_session from sqlalchemy.orm import Session from sqlalchemy import event import logging @@ -71,6 +73,43 @@ def app(pytestconfig): yield _app +def wait_for_pgrest_api(postgrest_url): + """Wait for the PostgREST API to be available""" + from requests import get + from time import sleep + + while True: + try: + get(postgrest_url) + except Exception as err: + print(err) + sleep(0.5) + else: + break + + +@fixture(scope="session") +def pg_api(): + """PostgREST API client for testing""" + postgrest_url = environ.get("SPARROW_POSTGREST_URL") + + if postgrest_url is None: + raise ValueError("SPARROW_POSTGREST_URL not set") + + wait_for_pgrest_api(postgrest_url) + + class PostgRESTSession(RequestsSession): + def request(self, method, url, *args, **kwargs): + if url.startswith("/"): + url = postgrest_url + url + return super().request(method, url, *args, **kwargs) + + session = PostgRESTSession() + session.headers.update({"Accept": "application/json"}) + yield session + session.close() + + @fixture(scope="function") def statements(db): stmts = [] @@ -84,10 +123,14 @@ def catch_queries(conn, cursor, statement, parameters, context, executemany): @fixture(scope="class") -def db(app, pytestconfig): +def db(app, pytestconfig, request): # https://docs.sqlalchemy.org/en/13/orm/session_transaction.html # https://gist.github.com/zzzeek/8443477 - if pytestconfig.option.use_isolation: + use_isolation = pytestconfig.option.use_isolation + if request.cls is not None: + use_isolation = getattr(request.cls, "use_isolation", use_isolation) + + if use_isolation: connection = app.database.engine.connect() transaction = connection.begin() session = Session(bind=connection) @@ -114,6 +157,7 @@ def restart_savepoint(session, transaction): transaction.rollback() connection.close() else: + app.database.session = scoped_session(app.database._session_factory) yield app.database diff --git a/backend/sparrow/core/auth/api.py b/backend/sparrow/core/auth/api.py index 308218bec..bdd977cb3 100644 --- a/backend/sparrow/core/auth/api.py +++ b/backend/sparrow/core/auth/api.py @@ -30,7 +30,9 @@ async def login(request, username: str, password: str): if current_user is not None and current_user.is_correct_password(password): day = 24 * 60 * 60 - token = backend.set_cookie(None, "access", max_age=day, identity=username) + token = backend.set_cookie( + None, "access", max_age=day, identity=username, role="admin" + ) resp = JSONResponse(dict(login=True, username=username, token=token)) return backend.set_login_cookies(resp, identity=username) @@ -59,7 +61,7 @@ def refresh(request): identity = backend.get_identity(request, type="refresh") response = JSONResponse(dict(login=True, refresh=True, username=identity)) - return backend.set_access_cookie(response, identity=identity) + return backend.set_access_cookie(response, identity=identity, role="admin") @requires("admin") diff --git a/backend/sparrow/core/auth/backend.py b/backend/sparrow/core/auth/backend.py index f879bb346..39dd1fb9a 100644 --- a/backend/sparrow/core/auth/backend.py +++ b/backend/sparrow/core/auth/backend.py @@ -6,6 +6,7 @@ from typing import Tuple, Any import jwt +import warnings from starlette.authentication import ( BaseUser, SimpleUser, @@ -83,7 +84,14 @@ def get_identity(self, request: Request, type="access"): if "Authorization" not in request.headers: raise AuthenticationError(f"Could not find {name} on request") else: - value = self._decode(request.headers["Authorization"]) + header = request.headers["Authorization"] + if header.startswith("Bearer "): + header = header[7:] + else: + warnings.warn( + "Authorization header did not start with 'Bearer '. This is invalid and deprecated." + ) + value = self._decode(header) else: value = self._decode(cookie) diff --git a/backend/sparrow/core/tags/fixtures/tags.sql b/backend/sparrow/core/tags/fixtures/tags.sql index e42f53e63..526fa2de2 100644 --- a/backend/sparrow/core/tags/fixtures/tags.sql +++ b/backend/sparrow/core/tags/fixtures/tags.sql @@ -41,4 +41,11 @@ CREATE TABLE IF NOT EXISTS tags.project_tag ( tag_id integer REFERENCES tags.tag(id) ON DELETE CASCADE, project_id integer REFERENCES project(id) ON DELETE CASCADE, PRIMARY KEY (tag_id, project_id) -); \ No newline at end of file +); + +-- Add privileges +GRANT USAGE ON SCHEMA tags TO view_public; +GRANT SELECT ON ALL TABLES IN SCHEMA tags TO view_public; + +GRANT USAGE ON SCHEMA tags TO admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA tags TO admin; \ No newline at end of file diff --git a/backend/sparrow/database/fixtures/05-security.sql b/backend/sparrow/database/fixtures/05-security.sql index 2c939a3e7..b29c65b95 100644 --- a/backend/sparrow/database/fixtures/05-security.sql +++ b/backend/sparrow/database/fixtures/05-security.sql @@ -1,6 +1,19 @@ +-- Admin user can see all data in the database CREATE ROLE admin; + +GRANT USAGE ON SCHEMA public TO admin; +GRANT USAGE ON SCHEMA vocabulary TO admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public TO admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA vocabulary TO admin; + +-- Public user can see only public data CREATE ROLE view_public; +GRANT USAGE ON SCHEMA public TO view_public; +GRANT USAGE ON SCHEMA vocabulary TO view_public; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO view_public; +GRANT SELECT ON ALL TABLES IN SCHEMA vocabulary TO view_public; + -- Lock down core tables ALTER TABLE datum ENABLE ROW LEVEL SECURITY; @@ -74,17 +87,6 @@ FOR SELECT TO view_public USING (is_public(project)); -/* Allow admin to do everything */ -GRANT ALL ON datum TO admin; -GRANT ALL ON analysis TO admin; -GRANT ALL ON session TO admin; -GRANT ALL ON sample TO admin; -GRANT ALL ON project TO admin; - -/* Allow public to select only */ -GRANT SELECT ON datum TO view_public; -GRANT SELECT ON analysis TO view_public; -GRANT SELECT ON session TO view_public; -GRANT SELECT ON sample TO view_public; -GRANT SELECT ON project TO view_public; + + diff --git a/backend/sparrow/database/fixtures/06-views.sql b/backend/sparrow/database/fixtures/06-views.sql index 350f9f55f..db9ac1410 100644 --- a/backend/sparrow/database/fixtures/06-views.sql +++ b/backend/sparrow/database/fixtures/06-views.sql @@ -212,7 +212,7 @@ SELECT id, description, authority FROM vocabulary.material; CREATE VIEW core_view.sample AS -SELECT DISTINCT ON (s.id) +SELECT s.id, s.igsn, s.name, diff --git a/backend/sparrow/database/fixtures/07-postgrest-api.sql b/backend/sparrow/database/fixtures/07-postgrest-api.sql index a394c7798..a54683f15 100644 --- a/backend/sparrow/database/fixtures/07-postgrest-api.sql +++ b/backend/sparrow/database/fixtures/07-postgrest-api.sql @@ -1,3 +1,9 @@ +-- API-specific roles +-- https://postgrest.org/en/stable/tutorials/tut0.html +CREATE ROLE authenticator LOGIN NOINHERIT NOCREATEDB NOCREATEROLE NOSUPERUSER; + +GRANT admin TO authenticator; +GRANT view_public TO authenticator; CREATE SCHEMA IF NOT EXISTS sparrow_api; @@ -17,8 +23,10 @@ CREATE OR REPLACE VIEW sparrow_api.session AS SELECT * FROM core_view.session; CREATE OR REPLACE VIEW sparrow_api.sample AS -SELECT * FROM core_view.sample; +SELECT * FROM sample; +CREATE OR REPLACE VIEW sparrow_api.sample_data AS +SELECT * FROM core_view.sample; CREATE OR REPLACE VIEW core_view.sample_v3 AS SELECT @@ -119,4 +127,13 @@ CREATE OR REPLACE FUNCTION sparrow_api.sample_tile( ) SELECT ST_AsMVT(grouped_features) FROM grouped_features; -$$ LANGUAGE sql IMMUTABLE; \ No newline at end of file +$$ LANGUAGE sql IMMUTABLE; + + +GRANT USAGE ON SCHEMA sparrow_api TO view_public; +GRANT SELECT ON ALL TABLES IN SCHEMA sparrow_api TO view_public; +GRANT USAGE ON SCHEMA sparrow_api TO admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA sparrow_api TO admin; + + +NOTIFY pgrst, 'reload schema' \ No newline at end of file diff --git a/backend/sparrow/migrations/sql/add-sample-srid.sql b/backend/sparrow/migrations/sql/add-sample-srid.sql index bfba0b57a..1caac3bff 100644 --- a/backend/sparrow/migrations/sql/add-sample-srid.sql +++ b/backend/sparrow/migrations/sql/add-sample-srid.sql @@ -1,4 +1,5 @@ DROP SCHEMA IF EXISTS core_view CASCADE; +DROP SCHEMA IF EXISTS sparrow_api CASCADE; UPDATE sample SET "location" = ST_Transform("location", 4326) diff --git a/backend/sparrow/tests/test_sample_crud.py b/backend/sparrow/tests/test_sample_crud.py new file mode 100644 index 000000000..6922a48e0 --- /dev/null +++ b/backend/sparrow/tests/test_sample_crud.py @@ -0,0 +1,122 @@ +from datetime import datetime +from sqlalchemy.exc import ProgrammingError +from psycopg2.errors import InsufficientPrivilege + + +class TestSampleCRUD: + def test_sample_loading(self, db): + """We should be able to load already-existing values with their + primary keys. + """ + data = { + "name": "Soil 003", + "location": {"type": "Point", "coordinates": [-5, 5]}, + "sessions": [ + { + "date": str(datetime.now()), + "name": "Session primary key loading", + "analysis": [ + { + "analysis_type": "Soil aliquot pyrolysis", + "session_index": 0, + "datum": [ + { + "value": 0.280, + "error": 0.021, + "type": { + "parameter": "soil water content", + "unit": "weight %", + }, + } + ], + "attributes": [ + {"name": "Soil horizon", "value": "A"}, + {"name": "Soil depth range", "value": "0-10 cm"}, + ], + } + ], + "attributes": [ + {"name": "Operator", "value": "Wendy Sørensen"}, + {"name": "Operation mode", "value": "Vacuum stabilization"}, + ], + "instrument": {"name": "Postnova AF2000"}, + } + ], + "attributes": [ + {"name": "Soil type", "value": "Sandy loam"}, + {"name": "Soil horizon", "value": "A"}, + {"name": "Soil depth range", "value": "0-10 cm"}, + ], + } + + db.load_data("sample", data) + + def test_sample_deletion_unauthorized(self, db): + """We should not be able to delete a sample without the right permissions.""" + + db.session.execute("SET ROLE 'view_public'") + Sample = db.model.sample + model = db.session.query(Sample).filter_by(name="Soil 003").one() + try: + db.session.delete(model) + except ProgrammingError as err: + assert isinstance(err.orig, InsufficientPrivilege) + assert "permission denied for table sample" in str(err.orig) + + def test_sample_deletion(self, db): + """We should be able to delete a sample and all associated data.""" + db.session.rollback() + Sample = db.model.sample + model = db.session.query(Sample).filter_by(name="Soil 003").one() + db.session.delete(model) + + +class TestSamplePostgRESTAPI: + # When we are including external APIs like PostgREST, we need to + # disable the database isolation fixture, since it will prevent + # the external API from seeing the data we have loaded. + use_isolation = False + + def test_load_data(self, client, token): + data = {"name": "test sample 1"} + res = client.put( + "/api/v2/import-data/models/sample", + headers={"Authorization": "Bearer " + token}, + json={"data": data, "filename": None}, + ) + assert res.status_code == 200 + + def test_postgrest_api_accessible(self, pg_api): + """We should be able to access the PostgREST API.""" + res = pg_api.get("/") + assert res.status_code == 200 + + def test_sample_api_get(self, pg_api): + """We should be able to delete a sample via the API.""" + res = pg_api.get("/sample", params={"name": "eq.test sample 1"}) + assert res.status_code == 200 + data = res.json() + assert len(data) == 1 + name = data[0]["name"] + assert name == "test sample 1" + + def test_sample_api_delete_failed(self, pg_api): + """We should be prevented from deleting a sample via API without the right permissions.""" + res = pg_api.delete("/sample", params={"name": "eq.test sample 1"}) + assert res.status_code == 401 + data = res.json() + assert data["message"] == "permission denied for view sample" + + def test_sample_api_delete_authenticated(self, pg_api, token): + res = pg_api.delete( + "/sample", + params={"name": "eq.test sample 1"}, + headers={"Authorization": "Bearer " + token}, + ) + assert res.status_code == 204 + + def test_sample_was_deleted(self, pg_api): + res = pg_api.get("/sample", params={"name": "eq.test sample 1"}) + assert res.status_code == 200 + data = res.json() + assert len(data) == 0 diff --git a/compose-overrides/docker-compose.production.yaml b/compose-overrides/docker-compose.production.yaml index 26905b7d3..e14b8eafe 100644 --- a/compose-overrides/docker-compose.production.yaml +++ b/compose-overrides/docker-compose.production.yaml @@ -10,3 +10,5 @@ services: restart: unless-stopped db: restart: unless-stopped + api: + restart: unless-stopped diff --git a/docker-compose.testing.yaml b/docker-compose.testing.yaml index 8335850ba..8bf5dcea9 100644 --- a/docker-compose.testing.yaml +++ b/docker-compose.testing.yaml @@ -6,22 +6,28 @@ version: "3.6" x-backend: &x-backend - build: - context: ./backend - target: testing + image: ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend} # Make sure we get colorized terminal output depends_on: - db + - pg_api environment: - - SPARROW_SECRET_KEY=test_secret + - SPARROW_SECRET_KEY=test_secret_must_be_at_least_32_characters - SPARROW_CACHE_DATABASE_MODELS # Whether we should enable a worker pool for running tasks. # If we don't, tasks must be run via the command line. - SPARROW_TASK_WORKER=0 - SPARROW_TASK_BROKER=redis://broker:6379/0 + - SPARROW_POSTGREST_URL=http://pg_api:3000 services: backend: <<: *x-backend + # We place the build context here rather than in the shared section + # so we don't build the backend image twice before starting tests. + build: + context: backend + cache_from: + - ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend} command: pytest /app/sparrow_tests --teardown tty: true volumes: @@ -36,9 +42,29 @@ services: expose: - 5432 environment: - - POSTGRES_DB=sparrow + - POSTGRES_DB=sparrow_test - PGUSER=postgres - POSTGRES_HOST_AUTH_METHOD=trust + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 10s + timeout: 5s + retries: 3 + pg_api: + image: postgrest/postgrest:v11.2.0 + environment: + - PGRST_DB_URI=postgresql://authenticator:@db:5432/sparrow_test + - PGRST_DB_SCHEMAS=sparrow_api + - PGRST_DB_ANON_ROLE=view_public + - PGRST_JWT_SECRET=test_secret_must_be_at_least_32_characters + - PGRST_DB_MAX_ROWS=100 + - PGRST_SERVER_PORT=3000 + expose: + - 3000 + depends_on: + - db + ports: + - "${SPARROW_TEST_POSTGREST_PORT:-3001}:3000" # Additional testing of broker-based functionality. # Broker and worker are for testing task-based functionality # We disable this for minimal tests, but we can test with the brokers diff --git a/docker-compose.yaml b/docker-compose.yaml index 75ad6bfda..cd021dbf7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -133,13 +133,14 @@ services: interval: 10s timeout: 5s retries: 3 - api: - image: postgrest/postgrest:v10.0.0 + pg_api: + image: postgrest/postgrest:v11.2.0 environment: - - PGRST_DB_URI=postgresql://postgres:@db:5432/sparrow + - PGRST_DB_URI=postgresql://authenticator:@db:5432/sparrow - PGRST_DB_SCHEMAS=sparrow_api - - PGRST_DB_ANON_ROLE=postgres - - PGRST_DB_MAX_ROWS=1000 + - PGRST_DB_ANON_ROLE=view_public + - PGRST_JWT_SECRET=${SPARROW_SECRET_KEY} + - PGRST_DB_MAX_ROWS=100 profiles: - core depends_on: diff --git a/docs/content/authentication.md b/docs/content/authentication.md new file mode 100644 index 000000000..9b002eee1 --- /dev/null +++ b/docs/content/authentication.md @@ -0,0 +1,5 @@ +# Authentication and Security + +Sparrow's authentication system is based on [PostgREST's][postgrest] JWT system. + +[postgrest]: https://postgrest.org/en/stable/tutorials/tut0.html diff --git a/nginx-config/locations/core.conf b/nginx-config/locations/core.conf index 125ea58ee..f4b0383f8 100644 --- a/nginx-config/locations/core.conf +++ b/nginx-config/locations/core.conf @@ -85,7 +85,7 @@ location /error/ { # Configuration for PostgREST based internal API location ~ ^/api/v3/(.*)$ { resolver 127.0.0.11 valid=30s; # Docker's DNS server - set $upstream http://api:3000/$1$is_args$args; + set $upstream http://pg_api:3000/$1$is_args$args; proxy_pass $upstream; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade;