From 1d2921cf1fceebc16cc2baae3504eacbf7dd7a79 Mon Sep 17 00:00:00 2001 From: Josh Borrow Date: Fri, 16 Feb 2024 13:18:46 -0500 Subject: [PATCH] Add admin endpoint for migration --- hera_librarian/client.py | 73 +++++++++++++++ hera_librarian/models/admin.py | 48 ++++++++++ librarian_server/__init__.py | 2 + librarian_server/api/__init__.py | 1 + librarian_server/api/admin.py | 89 +++++++++++++++++++ .../test_database_reconstruction.py | 2 - tests/server_unit_test/test_admin.py | 50 +++++++++++ 7 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 hera_librarian/models/admin.py create mode 100644 librarian_server/api/admin.py create mode 100644 tests/server_unit_test/test_admin.py diff --git a/hera_librarian/client.py b/hera_librarian/client.py index 9fd425f..75b2eaa 100644 --- a/hera_librarian/client.py +++ b/hera_librarian/client.py @@ -13,6 +13,11 @@ from .deletion import DeletionPolicy from .errors import ErrorCategory, ErrorSeverity from .exceptions import LibrarianError, LibrarianHTTPError +from .models.admin import ( + AdminCreateFileRequest, + AdminCreateFileResponse, + AdminRequestFailedResponse, +) from .models.errors import ( ErrorClearRequest, ErrorClearResponse, @@ -660,3 +665,71 @@ def get_user(self, username: str) -> UserAdministrationGetResponse: raise ValueError(e.reason) else: # pragma: no cover raise e + + def add_file_row( + self, + name: str, + create_time: datetime, + size: int, + checksum: str, + uploader: str, + path: str, + store_name: str, + ): + """ + Add a file row for an already existing file on the store. + This is useful in the case that you need to re-build the + librarian database in place. This is inherrently a lossy process. + + Parameters + ---------- + name : str + The unique filename of this file. + create_time : datetime + The time at which this file was placed on the store. + size : int + Size in bytes of the file + checksum : str + Checksum (MD5 hash) of the file. + uploader : str + Uploader of the file. + path : str + Path to the instance (full) on the store. + store_name : str + The name of the store that this file is on. + + Returns + ------- + AdminCreateFileResponse + The response from the server. + + Raises + ------ + LibrarianError + If the file already exists on the store. + """ + + try: + response: AdminCreateFileResponse = self.post( + endpoint="add_file", + request=AdminCreateFileRequest( + name=name, + create_time=create_time, + size=size, + checksum=checksum, + uploader=uploader, + source=self.user, + path=path, + store_name=store_name, + ), + response=AdminCreateFileResponse, + ) + except LibrarianHTTPError as e: + if e.status_code == 400 and "Store" in e.reason: + raise LibrarianError(e.reason) + if e.status_code == 400 and "File" in e.reason: + raise LibrarianError(e.reason) + else: + raise LibrarianError(f"Unknown error. {e}") + + return response diff --git a/hera_librarian/models/admin.py b/hera_librarian/models/admin.py new file mode 100644 index 0000000..63ce5eb --- /dev/null +++ b/hera_librarian/models/admin.py @@ -0,0 +1,48 @@ +""" +Pydantic modems for the admin endpoints +""" + +from datetime import datetime + +from pydantic import BaseModel + + +class AdminCreateFileRequest(BaseModel): + # File properties + name: str + "The unique filename of this file." + create_time: datetime + "The time at which this file was placed on the stcaore." + size: int + "Size in bytes of the file" + checksum: str + "Checksum (MD5 hash) of the file." + + uploader: str + "Uploader of the file." + source: str + "Source of the file." + + # Instance properties + path: str + "Path to the instance (full) on the store." + store_name: str + "The name of the store that this file is on." + + +class AdminCreateFileResponse(BaseModel): + already_exists: bool = False + "In the case that the file already exists, this will be true." + + file_exists: bool = False + "If the file exists or not." + + success: bool = False + "Whether we were totally successful." + + +class AdminRequestFailedResponse(BaseModel): + reason: str + "The reason why the search failed." + suggested_remedy: str + "A suggested remedy for the failure." diff --git a/librarian_server/__init__.py b/librarian_server/__init__.py index ca83890..3277c65 100644 --- a/librarian_server/__init__.py +++ b/librarian_server/__init__.py @@ -21,6 +21,7 @@ def main() -> FastAPI: log.debug("Adding API router.") from .api import ( + admin_router, clone_router, error_router, ping_router, @@ -35,5 +36,6 @@ def main() -> FastAPI: app.include_router(search_router) app.include_router(error_router) app.include_router(users_router) + app.include_router(admin_router) return app diff --git a/librarian_server/api/__init__.py b/librarian_server/api/__init__.py index 7e38b02..b5b45b1 100644 --- a/librarian_server/api/__init__.py +++ b/librarian_server/api/__init__.py @@ -5,6 +5,7 @@ these endpoints with pydantic models. """ +from .admin import router as admin_router from .clone import router as clone_router from .errors import router as error_router from .ping import router as ping_router diff --git a/librarian_server/api/admin.py b/librarian_server/api/admin.py new file mode 100644 index 0000000..23aaf64 --- /dev/null +++ b/librarian_server/api/admin.py @@ -0,0 +1,89 @@ +""" +Administration endpoints. Used for managing the librarian server, +and handling in-place updates to the server (e.g. adding File and +Instance objects to the database, updating the database, etc. without +actually ingesting files). +""" + +from pathlib import Path + +from fastapi import APIRouter, Depends, Response, status +from sqlalchemy import select +from sqlalchemy.orm import Session + +from hera_librarian.deletion import DeletionPolicy +from hera_librarian.models.admin import ( + AdminCreateFileRequest, + AdminCreateFileResponse, + AdminRequestFailedResponse, +) + +from ..database import yield_session +from ..orm import File, Instance, StoreMetadata +from .auth import AdminUserDependency + +router = APIRouter(prefix="/api/v2/admin") + + +@router.post("/add_file") +def add_file( + request: AdminCreateFileRequest, + user: AdminUserDependency, + response: Response, + session: Session = Depends(yield_session), +): + """ + Creates a new file and instance in the database, assuming + that a file already exists. If the file does not exist on the + store already, we error out. + """ + + # First, get the store. + store = ( + session.query(StoreMetadata).filter_by(name=request.store_name).one_or_none() + ) + + if store is None: + response.status_code = status.HTTP_400_BAD_REQUEST + return AdminRequestFailedResponse( + reason=f"Store {request.store_name} does not exist.", + suggested_remedy="Create the store first. Maybe you need to run DB migration?", + ) + + # Check if the file exists already. + existing_file = session.get(File, request.name) + + if existing_file is not None: + return AdminCreateFileResponse(already_exists=True) + + # Check the file instance exists. + full_path = Path(request.path) + + if not full_path.exists(): + response.status_code = status.HTTP_400_BAD_REQUEST + return AdminRequestFailedResponse( + reason=f"File {full_path} does not exist.", + suggested_remedy="Create the file first, or make sure that you are using a local store.", + ) + + # Create the file and instance. + new_file = File.new_file( + filename=request.name, + size=request.size, + checksum=request.checksum, + uploader=request.uploader, + source=request.source, + ) + + new_instance = Instance.new_instance( + path=request.path, + file=new_file, + deletion_policy=DeletionPolicy.DISALLOWED, + store=store, + ) + + session.add_all([new_file, new_instance]) + + session.commit() + + return AdminCreateFileResponse(success=True, file_exists=True) diff --git a/tests/script_tests/test_database_reconstruction.py b/tests/script_tests/test_database_reconstruction.py index 7784230..aa290af 100644 --- a/tests/script_tests/test_database_reconstruction.py +++ b/tests/script_tests/test_database_reconstruction.py @@ -6,8 +6,6 @@ import subprocess import sys -from librarian_server_scripts.librarian_server_rebuild_database import run_migration - def test_database_reconstruction(test_database_reconstruction_server): setup, get_session, orm = test_database_reconstruction_server diff --git a/tests/server_unit_test/test_admin.py b/tests/server_unit_test/test_admin.py new file mode 100644 index 0000000..b39782e --- /dev/null +++ b/tests/server_unit_test/test_admin.py @@ -0,0 +1,50 @@ +""" +Tests for admin endpoints. +""" + +import shutil + +from hera_librarian.deletion import DeletionPolicy +from hera_librarian.models.admin import ( + AdminCreateFileRequest, + AdminCreateFileResponse, + AdminRequestFailedResponse, +) +from hera_librarian.utils import get_md5_from_path, get_size_from_path + + +def test_add_file(test_client, test_server, garbage_file): + """ + Tests that we can add a file with no row in database. + """ + + # First, create the file in the store. + setup = test_server[2] + + store = setup.store_directory + + full_path = store / "test_upload_without_uploading.txt" + + # Create the file in the store. + shutil.copy2(garbage_file, full_path) + + request = AdminCreateFileRequest( + name="test_upload_without_uploading.txt", + create_time=garbage_file.stat().st_ctime, + size=garbage_file.stat().st_size, + checksum=get_md5_from_path(full_path), + uploader="test", + source="test", + path=str(full_path), + store_name="local_store", + ) + + response = test_client.post_with_auth( + "/api/v2/admin/add_file", content=request.model_dump_json() + ) + + assert response.status_code == 200 + + response = AdminCreateFileResponse.model_validate_json(response.content) + + assert response.success