diff --git a/hera_librarian/cli.py b/hera_librarian/cli.py index 89c0ec1..3a2904d 100644 --- a/hera_librarian/cli.py +++ b/hera_librarian/cli.py @@ -528,6 +528,7 @@ def generate_parser(): config_upload_subparser(sub_parsers) config_search_errors_subparser(sub_parsers) config_clear_error_subparser(sub_parsers) + config_verify_file_subparser(sub_parsers) return ap @@ -1147,6 +1148,42 @@ def main(): return +def config_verify_file_subparser(sub_parsers): + """ + Configure the subparser for the 'verify_file' command. + """ + doc = "Verify the integrity and existence of a file in the librarian." + hlp = "Verify a file in the librarian" + + sp = sub_parsers.add_parser("verify-file", description=doc, help=hlp) + sp.add_argument("conn_name", metavar="CONNECTION-NAME", help=_conn_name_help) + sp.add_argument("name", help="The unique filename of the file to verify.") + sp.add_argument("size", type=int, help="Size in bytes of the file to verify.") + sp.add_argument("checksum", help="MD5 checksum of the file to verify.") + sp.add_argument("store_name", help="The name of the store where the file resides.") + sp.set_defaults(func=verify_file) + + return + +def verify_file(args): + """ + Execute the 'verify_file' command. + """ + client = get_client(args.conn_name, admin=True) + + try: + response = client.verify_file_row( + name=args.name, + size=args.size, + checksum=args.checksum, + store_name=args.store_name, + ) + if response["verified"]: + print("File verification successful.") + else: + print("File verification failed.") + except LibrarianError as e: + die(str(e)) if __name__ == "__main__": sys.exit(main()) diff --git a/hera_librarian/client.py b/hera_librarian/client.py index 57713f2..ed83e4a 100644 --- a/hera_librarian/client.py +++ b/hera_librarian/client.py @@ -733,3 +733,59 @@ def add_file_row( raise LibrarianError(f"Unknown error. {e}") return response + + def verify_file_row( + self, + name: str, + size: int, + checksum: str, + store_name: str, + ): + """ + Verify a file row against an existing file on the store. + This can confirm the integrity and existence of a file as recorded in the database. + + Parameters + ---------- + name : str + The unique filename of the file to verify. + size : int + Size in bytes of the file to verify. + checksum : str + MD5 checksum of the file to verify. + store_name : str + The name of the store where the file resides. + + Returns + ------- + dict + A dictionary indicating whether the file is verified. + + Raises + ------ + LibrarianError + If the verification fails or the file/store does not exist. + """ + + try: + response = self.post( + endpoint="admin/verify_file", + json={ + "name": name, + "size": size, + "checksum": checksum, + "store_name": store_name, + }, + ) + + if response.status_code != 200: + raise LibrarianError("Failed to verify the file due to an unexpected error.") + return response.json() + + except LibrarianHTTPError as e: + if e.status_code == 404: + raise LibrarianError("File or store not found for verification.") + elif e.status_code == 400: + raise LibrarianError("File verification failed due to mismatched properties.") + else: + raise LibrarianError(f"Unknown error during file verification. {e}") diff --git a/hera_librarian/models/admin.py b/hera_librarian/models/admin.py index 63ce5eb..39cd048 100644 --- a/hera_librarian/models/admin.py +++ b/hera_librarian/models/admin.py @@ -46,3 +46,14 @@ class AdminRequestFailedResponse(BaseModel): "The reason why the search failed." suggested_remedy: str "A suggested remedy for the failure." + +class AdminVerifyFileRequest(BaseModel): + # File properties + name: str + "The unique filename of this file." + size: int + "Size in bytes of the file" + checksum: str + "Checksum (MD5 hash) of the file." + store_name: str + "The name of the store that this file is on" diff --git a/librarian_server/api/admin.py b/librarian_server/api/admin.py index 012bf5f..fbe9ea6 100644 --- a/librarian_server/api/admin.py +++ b/librarian_server/api/admin.py @@ -7,7 +7,7 @@ from pathlib import Path -from fastapi import APIRouter, Depends, Response, status +from fastapi import APIRouter, Depends, Response, status, HTTPException from sqlalchemy import select from sqlalchemy.orm import Session @@ -16,6 +16,7 @@ AdminCreateFileRequest, AdminCreateFileResponse, AdminRequestFailedResponse, + AdminVerifyFileRequest, ) from ..database import yield_session @@ -96,3 +97,28 @@ def add_file( session.commit() return AdminCreateFileResponse(success=True, file_exists=True) + +@router.post("/verify_file") +def verify_file( + request: AdminVerifyFileRequest, + session: Session = Depends(yield_session), +): + """ + Verifies the properties of an existing file in the database. + """ + + store = session.query(StoreMetadata).filter_by(name=request.store_name).one_or_none() + if store is None: + raise HTTPException(status_code=404, detail="Store not found.") + # Fetch the file from the database + file = session.query(File).filter_by(name=request.name).one_or_none() + + if file is None or file.checksum != request.checksum or file.size != request.size: + return {"verified": False} + + # Check if the file exists in the specified store and matches the given properties + instance = session.query(Instance).filter_by(file_id=file.id, store_id=store.id).one_or_none() + if instance is None: + raise HTTPException(status_code=404, detail="File instance not found in the specified store.") + + return {"verified": True} \ No newline at end of file diff --git a/tests/client_unit_test/test_cli.py b/tests/client_unit_test/test_cli.py index fb06339..07ccdef 100644 --- a/tests/client_unit_test/test_cli.py +++ b/tests/client_unit_test/test_cli.py @@ -123,5 +123,6 @@ def test_generate_parser(): assert "set-file-deletion-policy" in available_subparsers assert "stage-files" in available_subparsers assert "upload" in available_subparsers + assert "verify-files" in available_subparsers return diff --git a/tests/server_unit_test/test_admin.py b/tests/server_unit_test/test_admin.py index 41f4b41..8604ac9 100644 --- a/tests/server_unit_test/test_admin.py +++ b/tests/server_unit_test/test_admin.py @@ -134,3 +134,43 @@ def test_add_file_no_store_exists(test_client): response = AdminRequestFailedResponse.model_validate_json(response.content) assert response.reason == "Store not_a_store does not exist." + +def test_verify_file_success(test_client, test_server, garbage_file, test_orm): + """ + Tests that a file's properties match the database record. + """ + setup = test_server[2] + store = setup.store_directory + full_path = store / "test_file_to_verify.txt" + # Create the file in the store + shutil.copy2(garbage_file, full_path) + + # Assume the file has been added to the database already; here we simulate the verification request + request = { + "name": "test_file_to_verify.txt", + "size": get_size_from_path(full_path), + "checksum": get_md5_from_path(full_path), + "store_name": "local_store", + } + + response = test_client.post_with_auth("/api/v2/admin/verify_file", json=request) + + assert response.status_code == 200 + assert response.json() == {"verified": True} + +def test_verify_file_failure(test_client, test_server, test_orm): + """ + Tests that verification fails when file properties do not match. + """ + # Assume a file "mismatched_file.txt" exists in the database but with different properties + request = { + "name": "mismatched_file.txt", + "size": 123, # Intentionally incorrect size + "checksum": "wrongchecksum", # Intentionally incorrect checksum + "store_name": "local_store", + } + + response = test_client.post_with_auth("/api/v2/admin/verify_file", json=request) + + assert response.status_code == 200 + assert response.json() == {"verified": False} \ No newline at end of file