From 4c506c24eebedabe82269d4454cad01ff615fc81 Mon Sep 17 00:00:00 2001 From: Damien Date: Tue, 5 Nov 2024 15:56:30 +0000 Subject: [PATCH] Update build docs script to enable --local option for local build on current branch. Move endpoint documentation to pydantic models. Add responses to path operators and add models to exceptions. Modify client accordingly Moved example queries out of default values to model configs. Update docummentation of routes/utils methods. --- .../libraries/smartnoise_synth.py | 5 +- client/lomas_client/utils.py | 31 +- client/notebooks/Demo_Client_Notebook.ipynb | 290 +++++------ core/lomas_core/error_handler.py | 41 +- core/lomas_core/models/constants.py | 29 ++ core/lomas_core/models/exceptions.py | 85 ++++ core/lomas_core/models/requests.py | 134 ++++- .../lomas_core/models/requests_examples.py | 74 +-- core/lomas_core/models/responses.py | 35 +- docs/build_docs.py | 97 ++-- docs/source/api.rst | 1 + docs/source/core_api.rst | 8 + docs/source/index.rst | 8 +- docs/source/server_cli.rst | 2 +- server/docker-compose.yml | 3 +- server/lomas_server/constants.py | 109 ---- .../lomas_server/dp_queries/dummy_dataset.py | 3 +- server/lomas_server/routes/routes_admin.py | 8 +- server/lomas_server/routes/routes_dp.py | 471 +++++++----------- server/lomas_server/routes/utils.py | 63 +-- server/lomas_server/tests/test_api.py | 247 +++++---- .../tests/test_api_diffprivlib.py | 70 +-- .../tests/test_api_smartnoise_synth.py | 122 +++-- .../tests/test_dummy_generation.py | 2 +- 24 files changed, 1064 insertions(+), 874 deletions(-) create mode 100644 core/lomas_core/models/exceptions.py rename server/lomas_server/utils/query_examples.py => core/lomas_core/models/requests_examples.py (71%) create mode 100644 docs/source/core_api.rst diff --git a/client/lomas_client/libraries/smartnoise_synth.py b/client/lomas_client/libraries/smartnoise_synth.py index 91b40fb8..ea156a1e 100644 --- a/client/lomas_client/libraries/smartnoise_synth.py +++ b/client/lomas_client/libraries/smartnoise_synth.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List, Optional, Type from lomas_core.models.requests import ( SmartnoiseSynthDummyQueryModel, @@ -38,6 +38,7 @@ def cost( constraints: dict = {}, ) -> Optional[CostResponse]: """This function estimates the cost of executing a SmartNoise query. + Args: synth_name (str): name of the Synthesizer model to use. Available synthesizer are @@ -113,6 +114,7 @@ def query( seed: int = DUMMY_SEED, ) -> Optional[QueryResponse]: """This function executes a SmartNoise Synthetic query. + Args: synth_name (str): name of the Synthesizer model to use. Available synthesizer are @@ -181,6 +183,7 @@ def query( "condition": condition, "nb_samples": nb_samples, } + request_model: Type[SmartnoiseSynthRequestModel] if dummy: endpoint = "dummy_smartnoise_synth_query" body_dict["dummy_nb_rows"] = nb_rows diff --git a/client/lomas_client/utils.py b/client/lomas_client/utils.py index 28b8fa8e..c991f5ae 100644 --- a/client/lomas_client/utils.py +++ b/client/lomas_client/utils.py @@ -10,6 +10,13 @@ InvalidQueryException, UnauthorizedAccessException, ) +from lomas_core.models.exceptions import ( + ExternalLibraryExceptionModel, + InternalServerExceptionModel, + InvalidQueryExceptionModel, + LomasServerExceptionTypeAdapter, + UnauthorizedAccessExceptionModel, +) def raise_error(response: requests.Response) -> str: @@ -21,18 +28,18 @@ def raise_error(response: requests.Response) -> str: Raise: Server Error """ - error_message = response.json() - if response.status_code == status.HTTP_400_BAD_REQUEST: - raise InvalidQueryException(error_message["InvalidQueryException"]) - if response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY: - raise ExternalLibraryException( - error_message["library"], error_message["ExternalLibraryException"] - ) - if response.status_code == status.HTTP_403_FORBIDDEN: - raise UnauthorizedAccessException(error_message["UnauthorizedAccessException"]) - if response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR: - raise InternalServerException(error_message["InternalServerException"]) - raise InternalServerException(f"Unknown {InternalServerException}") + error_model = LomasServerExceptionTypeAdapter.validate_json(response.json()) + match error_model: + case InvalidQueryExceptionModel(): + raise InvalidQueryException(error_model.message) + case ExternalLibraryExceptionModel(): + raise ExternalLibraryException(error_model.library, error_model.message) + case UnauthorizedAccessExceptionModel(): + raise UnauthorizedAccessException(error_model.message) + case InternalServerExceptionModel(): + raise InternalServerException("Internal Server Exception.") + case _: + raise InternalServerException(f"Unknown {InternalServerException}") def validate_synthesizer(synth_name: str, return_model: bool = False): diff --git a/client/notebooks/Demo_Client_Notebook.ipynb b/client/notebooks/Demo_Client_Notebook.ipynb index 3c7c0874..c05d04ee 100644 --- a/client/notebooks/Demo_Client_Notebook.ipynb +++ b/client/notebooks/Demo_Client_Notebook.ipynb @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 1, "id": "23bb4f13-7800-41b2-b429-68c2d02243d0", "metadata": {}, "outputs": [ @@ -33,7 +33,7 @@ "" ] }, - "execution_count": 30, + "execution_count": 1, "metadata": { "image/png": { "width": 800 @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 2, "id": "6fb569fc", "metadata": {}, "outputs": [], @@ -175,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 3, "id": "941991f7", "metadata": {}, "outputs": [], @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 4, "id": "0fdebac9-57fc-4410-878b-5a77425af634", "metadata": {}, "outputs": [ @@ -289,7 +289,7 @@ " 'categories': ['MALE', 'FEMALE']}}}" ] }, - "execution_count": 34, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -309,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 5, "id": "8719c070-16a3-4228-a09f-944178aa1ba7", "metadata": {}, "outputs": [], @@ -334,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 6, "id": "01f4365a", "metadata": {}, "outputs": [], @@ -345,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "id": "3f553b29", "metadata": {}, "outputs": [ @@ -457,7 +457,7 @@ "4 3614.604018 MALE " ] }, - "execution_count": 37, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -491,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "id": "9bd99db9-9de9-4b25-8718-989fea27b15a", "metadata": {}, "outputs": [ @@ -501,7 +501,7 @@ "InitialBudgetResponse(initial_epsilon=10.0, initial_delta=0.005)" ] }, - "execution_count": 38, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -522,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 9, "id": "99a4dd26-53af-412e-bcd1-f06fff57e6a4", "metadata": {}, "outputs": [ @@ -532,7 +532,7 @@ "SpentBudgetResponse(total_spent_epsilon=1.0, total_spent_delta=4.999999999999449e-05)" ] }, - "execution_count": 39, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -551,7 +551,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 10, "id": "f67e0596-5f96-4c8b-a843-3fbaef02bab1", "metadata": {}, "outputs": [ @@ -561,7 +561,7 @@ "RemainingBudgetResponse(remaining_epsilon=9.0, remaining_delta=0.004950000000000006)" ] }, - "execution_count": 40, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -620,7 +620,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 11, "id": "a0de0cfa-af54-46f7-9144-8778fb1a66c5", "metadata": {}, "outputs": [], @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 12, "id": "47663a1f-2b91-4f8a-8565-b3d7c9667e76", "metadata": {}, "outputs": [], @@ -655,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 13, "id": "e1a2b948-cf11-4325-a05e-147a0b4aaa30", "metadata": {}, "outputs": [ @@ -665,7 +665,7 @@ "CostResponse(epsilon=1.0, delta=4.999999999999449e-05)" ] }, - "execution_count": 43, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -681,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 14, "id": "4547b70f-0623-4ae6-93f1-9eaca724e514", "metadata": {}, "outputs": [ @@ -737,7 +737,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 15, "id": "90cf2a6d", "metadata": {}, "outputs": [], @@ -755,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 16, "id": "a30f277e", "metadata": {}, "outputs": [ @@ -763,7 +763,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average bill length in remote dummy: 48.55mm.\n" + "Average bill length in remote dummy: 48.58mm.\n" ] } ], @@ -792,7 +792,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 17, "id": "19e60263", "metadata": {}, "outputs": [ @@ -802,7 +802,7 @@ "RemainingBudgetResponse(remaining_epsilon=9.0, remaining_delta=0.004950000000000006)" ] }, - "execution_count": 47, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -813,7 +813,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 18, "id": "69767fac", "metadata": {}, "outputs": [], @@ -828,7 +828,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 19, "id": "6dbbdf93", "metadata": {}, "outputs": [ @@ -836,7 +836,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average bill length of penguins in real data: 44.6mm.\n" + "Average bill length of penguins in real data: 43.52mm.\n" ] } ], @@ -855,7 +855,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 20, "id": "39701fe5", "metadata": {}, "outputs": [ @@ -865,7 +865,7 @@ "RemainingBudgetResponse(remaining_epsilon=8.0, remaining_delta=0.004900000000000011)" ] }, - "execution_count": 50, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -884,7 +884,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 21, "id": "487f835f", "metadata": {}, "outputs": [ @@ -894,7 +894,7 @@ "SpentBudgetResponse(total_spent_epsilon=2.0, total_spent_delta=9.999999999998899e-05)" ] }, - "execution_count": 51, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -921,7 +921,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 22, "id": "b9685226", "metadata": {}, "outputs": [], @@ -944,7 +944,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 23, "id": "4331d86f", "metadata": {}, "outputs": [ @@ -1013,7 +1013,7 @@ " 'categories': ['MALE', 'FEMALE']}}" ] }, - "execution_count": 53, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1032,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 24, "id": "ff8cb7b6", "metadata": {}, "outputs": [], @@ -1042,7 +1042,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 25, "id": "70b2bdb1", "metadata": {}, "outputs": [ @@ -1052,7 +1052,7 @@ "(30.0, 65.0)" ] }, - "execution_count": 55, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1073,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 26, "id": "75e4933b", "metadata": {}, "outputs": [], @@ -1098,22 +1098,24 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 27, "id": "8041a647", "metadata": {}, "outputs": [ { - "ename": "InvalidQueryException", - "evalue": "The pipeline provided is not a measurement. It cannot be processed in this server.", + "ename": "ValidationError", + "evalue": "1 validation error for tagged-union[InvalidQueryExceptionModel,ExternalLibraryExceptionModel,UnauthorizedAccessExceptionModel,InternalServerExceptionModel]\n JSON input should be string, bytes or bytearray [type=json_type, input_value={'type': 'InvalidQueryExc...cessed in this server.'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/json_type", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInvalidQueryException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[57], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# No instruction for noise addition mechanism: Expect to fail !!!\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopendp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mopendp_pipeline\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mbill_length_transformation_pipeline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mdummy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 5\u001b[0m \u001b[43m)\u001b[49m\n", + "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[27], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# No instruction for noise addition mechanism: Expect to fail !!!\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopendp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mopendp_pipeline\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mbill_length_transformation_pipeline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mdummy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 5\u001b[0m \u001b[43m)\u001b[49m\n", "File \u001b[0;32m/code/lomas_client/libraries/opendp.py:105\u001b[0m, in \u001b[0;36mOpenDPClient.query\u001b[0;34m(self, opendp_pipeline, fixed_delta, dummy, nb_rows, seed)\u001b[0m\n\u001b[1;32m 102\u001b[0m body \u001b[38;5;241m=\u001b[39m request_model\u001b[38;5;241m.\u001b[39mmodel_validate(body_dict)\n\u001b[1;32m 103\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhttp_client\u001b[38;5;241m.\u001b[39mpost(endpoint, body)\n\u001b[0;32m--> 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvalidate_model_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mres\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mQueryResponse\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/code/lomas_client/utils.py:83\u001b[0m, in \u001b[0;36mvalidate_model_response\u001b[0;34m(response, response_model)\u001b[0m\n\u001b[1;32m 80\u001b[0m r_model \u001b[38;5;241m=\u001b[39m response_model\u001b[38;5;241m.\u001b[39mmodel_validate_json(data)\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r_model\n\u001b[0;32m---> 83\u001b[0m \u001b[43mraise_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m/code/lomas_client/utils.py:26\u001b[0m, in \u001b[0;36mraise_error\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m 24\u001b[0m error_message \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m status\u001b[38;5;241m.\u001b[39mHTTP_400_BAD_REQUEST:\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidQueryException(error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalidQueryException\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m status\u001b[38;5;241m.\u001b[39mHTTP_422_UNPROCESSABLE_ENTITY:\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ExternalLibraryException(\n\u001b[1;32m 29\u001b[0m error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlibrary\u001b[39m\u001b[38;5;124m\"\u001b[39m], error_message[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExternalLibraryException\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 30\u001b[0m )\n", - "\u001b[0;31mInvalidQueryException\u001b[0m: The pipeline provided is not a measurement. It cannot be processed in this server." + "File \u001b[0;32m/code/lomas_client/utils.py:90\u001b[0m, in \u001b[0;36mvalidate_model_response\u001b[0;34m(response, response_model)\u001b[0m\n\u001b[1;32m 87\u001b[0m r_model \u001b[38;5;241m=\u001b[39m response_model\u001b[38;5;241m.\u001b[39mmodel_validate_json(data)\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r_model\n\u001b[0;32m---> 90\u001b[0m \u001b[43mraise_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/code/lomas_client/utils.py:31\u001b[0m, in \u001b[0;36mraise_error\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_error\u001b[39m(response: requests\u001b[38;5;241m.\u001b[39mResponse) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Raise error message based on the HTTP response.\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \n\u001b[1;32m 25\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m Server Error\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 31\u001b[0m error_model \u001b[38;5;241m=\u001b[39m \u001b[43mLomasServerExceptionTypeAdapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mmatch\u001b[39;00m error_model:\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mcase\u001b[39;00m InvalidQueryExceptionModel():\n", + "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/pydantic/type_adapter.py:135\u001b[0m, in \u001b[0;36m_frame_depth..wrapper..wrapped\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m: TypeAdapterT, \u001b[38;5;241m*\u001b[39margs: P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: P\u001b[38;5;241m.\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m R:\n\u001b[1;32m 134\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_with_frame_depth(depth \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m): \u001b[38;5;66;03m# depth + 1 for the wrapper function\u001b[39;00m\n\u001b[0;32m--> 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/pydantic/type_adapter.py:384\u001b[0m, in \u001b[0;36mTypeAdapter.validate_json\u001b[0;34m(self, data, strict, context)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;129m@_frame_depth\u001b[39m(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidate_json\u001b[39m(\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m, data: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mbytes\u001b[39m, \u001b[38;5;241m/\u001b[39m, \u001b[38;5;241m*\u001b[39m, strict: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, context: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 371\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 372\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Usage docs: https://docs.pydantic.dev/2.9/concepts/json/#json-parsing\u001b[39;00m\n\u001b[1;32m 373\u001b[0m \n\u001b[1;32m 374\u001b[0m \u001b[38;5;124;03m Validate a JSON string or bytes against the model.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;124;03m The validated object.\u001b[39;00m\n\u001b[1;32m 383\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_json\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mValidationError\u001b[0m: 1 validation error for tagged-union[InvalidQueryExceptionModel,ExternalLibraryExceptionModel,UnauthorizedAccessExceptionModel,InternalServerExceptionModel]\n JSON input should be string, bytes or bytearray [type=json_type, input_value={'type': 'InvalidQueryExc...cessed in this server.'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/json_type" ] } ], @@ -1135,7 +1137,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 28, "id": "b8162859", "metadata": {}, "outputs": [], @@ -1156,7 +1158,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 29, "id": "df61bce0", "metadata": {}, "outputs": [ @@ -1164,7 +1166,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dummy result for variance: 32.28\n" + "Dummy result for variance: 42.6\n" ] } ], @@ -1194,7 +1196,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 30, "id": "7ae7f735", "metadata": {}, "outputs": [ @@ -1204,7 +1206,7 @@ "CostResponse(epsilon=0.7122093023265228, delta=0.0)" ] }, - "execution_count": 60, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1234,7 +1236,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 31, "id": "085555a5", "metadata": {}, "outputs": [], @@ -1246,7 +1248,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 32, "id": "674332e7", "metadata": {}, "outputs": [ @@ -1254,7 +1256,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Variance of bill length: 25.97 (from opendp query).\n" + "Variance of bill length: 35.06 (from opendp query).\n" ] } ], @@ -1281,7 +1283,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 33, "id": "f72b19d0", "metadata": {}, "outputs": [ @@ -1289,7 +1291,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Standard error of bill length: 0.27.\n" + "Standard error of bill length: 0.32.\n" ] } ], @@ -1301,7 +1303,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 34, "id": "62630a03", "metadata": {}, "outputs": [ @@ -1309,7 +1311,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The 95% confidence interval of the bill length of all penguins is [44.06, 45.14].\n" + "The 95% confidence interval of the bill length of all penguins is [42.89, 44.15].\n" ] } ], @@ -1331,7 +1333,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 35, "id": "9cbcd0cf-4211-4a55-aa71-0679e7b2fa63", "metadata": {}, "outputs": [], @@ -1359,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 36, "id": "b91b694d-2256-4c43-ac4f-091c6afb290a", "metadata": {}, "outputs": [], @@ -1370,7 +1372,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 37, "id": "0bf7ea1f-873c-4068-ae8b-edee16316a08", "metadata": {}, "outputs": [], @@ -1383,7 +1385,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 38, "id": "869d409c-1ee9-4eca-8189-976f844de284", "metadata": {}, "outputs": [ @@ -1393,7 +1395,7 @@ "([30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0])" ] }, - "execution_count": 68, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1405,7 +1407,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 39, "id": "6114c5f4-f8b1-4a8c-9770-e2a0ed7f180d", "metadata": {}, "outputs": [], @@ -1422,7 +1424,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 40, "id": "389f3ca5-66c0-41d2-86bd-e7131bbe9184", "metadata": {}, "outputs": [ @@ -1860,7 +1862,7 @@ " epsilon=2.0))])" ] }, - "execution_count": 70, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1888,7 +1890,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 41, "id": "e9f29610-52fc-4f0e-84fd-8d85cf52eea4", "metadata": {}, "outputs": [ @@ -1898,7 +1900,7 @@ "CostResponse(epsilon=2.0, delta=0.0)" ] }, - "execution_count": 71, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -1924,7 +1926,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 42, "id": "62538ac0-c6aa-4950-82e8-f53510f17d77", "metadata": {}, "outputs": [], @@ -1941,7 +1943,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 43, "id": "7727b13a-cd2f-4f97-b550-88a4360fd601", "metadata": {}, "outputs": [], @@ -1952,17 +1954,17 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 44, "id": "600d6c6e-7567-4564-92ad-d1538ac10af5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'The model has a mean accuracy of 0.41. It is a harsh metric because we are in a multi-label classification case.'" + "'The model has a mean accuracy of 0.32. It is a harsh metric because we are in a multi-label classification case.'" ] }, - "execution_count": 74, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1973,7 +1975,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 45, "id": "1824eea1-6ad8-4d2f-86d5-456a89318fef", "metadata": {}, "outputs": [ @@ -2411,7 +2413,7 @@ " epsilon=2.0))])" ] }, - "execution_count": 75, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -2423,17 +2425,17 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 46, "id": "1e4a56bd-95bf-4355-81a6-eaf7d16f69c8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'For these feature values, the predicted species is is Gentoo.'" + "'For these feature values, the predicted species is is Adelie.'" ] }, - "execution_count": 76, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -2473,7 +2475,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 47, "id": "ed292cec-8497-4e5b-b3bf-cca9227abf7d", "metadata": {}, "outputs": [], @@ -2489,7 +2491,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 48, "id": "6fab49d9-f5c5-437e-b70f-234b84c2f7e5", "metadata": {}, "outputs": [ @@ -2523,32 +2525,32 @@ " \n", " 0\n", " Dream\n", - " 45.625530\n", - " 19.924271\n", + " 59.189283\n", + " 18.603612\n", " \n", " \n", " 1\n", " Dream\n", - " 50.310276\n", - " 15.932499\n", + " 45.893746\n", + " 17.438823\n", " \n", " \n", " 2\n", " Dream\n", - " 38.179427\n", - " 17.864330\n", + " 48.175088\n", + " 16.088652\n", " \n", " \n", " 3\n", " Dream\n", - " 52.805474\n", - " 20.588617\n", + " 57.958338\n", + " 17.529872\n", " \n", " \n", " 4\n", " Dream\n", - " 48.936304\n", - " 17.846829\n", + " 51.722943\n", + " 19.493872\n", " \n", " \n", "\n", @@ -2556,14 +2558,14 @@ ], "text/plain": [ " island bill_length_mm bill_depth_mm\n", - "0 Dream 45.625530 19.924271\n", - "1 Dream 50.310276 15.932499\n", - "2 Dream 38.179427 17.864330\n", - "3 Dream 52.805474 20.588617\n", - "4 Dream 48.936304 17.846829" + "0 Dream 59.189283 18.603612\n", + "1 Dream 45.893746 17.438823\n", + "2 Dream 48.175088 16.088652\n", + "3 Dream 57.958338 17.529872\n", + "4 Dream 51.722943 19.493872" ] }, - "execution_count": 78, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -2582,7 +2584,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 49, "id": "82ab4aee-b4af-4fbd-93cc-b8171f7f9a52", "metadata": {}, "outputs": [ @@ -2592,7 +2594,7 @@ "CostResponse(epsilon=1.0, delta=0.00015673368198174188)" ] }, - "execution_count": 79, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -2616,7 +2618,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 50, "id": "fbc8b354-e4db-4472-957b-468e768eddc4", "metadata": {}, "outputs": [], @@ -2632,7 +2634,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 51, "id": "550fa89d-9537-4daf-9f96-42fa71f242b9", "metadata": {}, "outputs": [ @@ -2666,32 +2668,32 @@ " \n", " 0\n", " Dream\n", - " 51.809153\n", - " 15.684234\n", + " 52.053493\n", + " 15.699640\n", " \n", " \n", " 1\n", " Dream\n", - " 47.662356\n", - " 17.220457\n", + " 47.705728\n", + " 17.678879\n", " \n", " \n", " 2\n", " Torgersen\n", - " 56.764018\n", - " 16.225140\n", + " 56.875727\n", + " 16.196799\n", " \n", " \n", " 3\n", " Biscoe\n", - " 37.432851\n", - " 19.486586\n", + " 38.807937\n", + " 19.253387\n", " \n", " \n", " 4\n", " Dream\n", - " 45.584773\n", - " 15.340890\n", + " 46.332477\n", + " 15.361980\n", " \n", " \n", "\n", @@ -2699,14 +2701,14 @@ ], "text/plain": [ " island bill_length_mm bill_depth_mm\n", - "0 Dream 51.809153 15.684234\n", - "1 Dream 47.662356 17.220457\n", - "2 Torgersen 56.764018 16.225140\n", - "3 Biscoe 37.432851 19.486586\n", - "4 Dream 45.584773 15.340890" + "0 Dream 52.053493 15.699640\n", + "1 Dream 47.705728 17.678879\n", + "2 Torgersen 56.875727 16.196799\n", + "3 Biscoe 38.807937 19.253387\n", + "4 Dream 46.332477 15.361980" ] }, - "execution_count": 81, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -2725,7 +2727,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 52, "id": "7d5a336e-80f0-48fa-84a3-33d0e51a2d3b", "metadata": {}, "outputs": [], @@ -2736,7 +2738,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 53, "id": "e890a8d9-0c7b-4805-be8a-81e66d5fa7ca", "metadata": {}, "outputs": [ @@ -2744,8 +2746,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "The average with Smartnoise-SQL on private data was 44.6.\n", - "The average with Smartnoise-Synth on synthetic data is 44.4.\n" + "The average with Smartnoise-SQL on private data was 43.52.\n", + "The average with Smartnoise-Synth on synthetic data is 44.53.\n" ] } ], @@ -2758,7 +2760,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 54, "id": "11a14d9f-0fe3-4a5e-b425-d2192acd1e84", "metadata": {}, "outputs": [ @@ -2766,8 +2768,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "The variance with opendp on private data was 25.97.\n", - "The variance with Smartnoise-Synth on synthetic data is 35.68.\n" + "The variance with opendp on private data was 35.06.\n", + "The variance with Smartnoise-Synth on synthetic data is 36.25.\n" ] } ], @@ -2796,7 +2798,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 55, "id": "008fd230-cdfd-4e03-91ce-5a60b06c106d", "metadata": {}, "outputs": [ @@ -2806,7 +2808,7 @@ "5" ] }, - "execution_count": 85, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -2818,7 +2820,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 56, "id": "b712b269-64f2-4c7e-b8bf-d1a608933eff", "metadata": {}, "outputs": [ @@ -2840,13 +2842,13 @@ " 'result': {'res_type': 'smartnoise_sql',\n", " 'df': {'index': [0],\n", " 'columns': ['avg_bill_length_mm'],\n", - " 'data': [[44.42478864249474]],\n", + " 'data': [[43.75587056284081]],\n", " 'index_names': [None],\n", " 'column_names': [None]}}},\n", - " 'timestamp': 1729156298.0845885}" + " 'timestamp': 1732024136.80644}" ] }, - "execution_count": 86, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -2859,7 +2861,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 57, "id": "8dfaf2b6-2b6c-480b-bcd7-250b0b2806a6", "metadata": {}, "outputs": [ @@ -2881,13 +2883,13 @@ " 'result': {'res_type': 'smartnoise_sql',\n", " 'df': {'index': [0],\n", " 'columns': ['avg_bill_length_mm'],\n", - " 'data': [[44.59811070566323]],\n", + " 'data': [[43.52246114623609]],\n", " 'index_names': [None],\n", " 'column_names': [None]}}},\n", - " 'timestamp': 1729156317.1647706}" + " 'timestamp': 1732024185.9737833}" ] }, - "execution_count": 87, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -2900,7 +2902,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 58, "id": "376315ec-6f38-4919-959e-d6bf244a4952", "metadata": {}, "outputs": [ @@ -2919,11 +2921,11 @@ " 'response': {'epsilon': 0.7122093023265228,\n", " 'delta': 0.0,\n", " 'requested_by': 'Dr. Antartica',\n", - " 'result': {'res_type': 'opendp', 'value': 25.96771493027437}},\n", - " 'timestamp': 1729156327.9024074}" + " 'result': {'res_type': 'opendp', 'value': 35.063144457712596}},\n", + " 'timestamp': 1732024233.7957816}" ] }, - "execution_count": 88, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2936,7 +2938,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 59, "id": "638817e0-d88f-407a-8136-210309651cf2", "metadata": {}, "outputs": [ @@ -2947,7 +2949,7 @@ " 'dataset_name': 'PENGUIN',\n", " 'dp_librairy': 'diffprivlib',\n", " 'client_input': {'dataset_name': 'PENGUIN',\n", - " 'diffprivlib_json': '{\"module\": \"diffprivlib\", \"version\": \"0.6.4\", \"pipeline\": [{\"type\": \"_dpl_type:RandomForestClassifier\", \"name\": \"rf\", \"params\": {\"n_estimators\": 10, \"n_jobs\": 1, \"random_state\": null, \"verbose\": 0, \"warm_start\": false, \"max_depth\": 5, \"epsilon\": 2.0, \"bounds\": {\"_tuple\": true, \"_items\": [[30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0]]}, \"classes\": [\"Adelie\", \"Chinstrap\", \"Gentoo\"], \"shuffle\": false, \"accountant\": \"_dpl_instance:BudgetAccountant\"}}]}',\n", + " 'diffprivlib_json': '{\"module\": \"diffprivlib\", \"version\": \"0.6.5\", \"pipeline\": [{\"type\": \"_dpl_type:RandomForestClassifier\", \"name\": \"rf\", \"params\": {\"n_estimators\": 10, \"n_jobs\": 1, \"random_state\": null, \"verbose\": 0, \"warm_start\": false, \"max_depth\": 5, \"epsilon\": 2.0, \"bounds\": {\"_tuple\": true, \"_items\": [[30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0]]}, \"classes\": [\"Adelie\", \"Chinstrap\", \"Gentoo\"], \"shuffle\": false, \"accountant\": \"_dpl_instance:BudgetAccountant\"}}]}',\n", " 'feature_columns': ['bill_length_mm',\n", " 'bill_depth_mm',\n", " 'flipper_length_mm',\n", @@ -2960,7 +2962,7 @@ " 'delta': 0.0,\n", " 'requested_by': 'Dr. Antartica',\n", " 'result': {'res_type': 'diffprivlib',\n", - " 'score': 0.4117647058823529,\n", + " 'score': 0.3235294117647059,\n", " 'model': Pipeline(steps=[('rf',\n", " RandomForestClassifier(accountant=BudgetAccountant(spent_budget=[(2.0, 0)]),\n", " bounds=(array([ 30., 13., 150., 2000.]),\n", @@ -2968,10 +2970,10 @@ " classes=['Adelie', 'Chinstrap',\n", " 'Gentoo'],\n", " epsilon=2.0))])}},\n", - " 'timestamp': 1729156329.9130852}" + " 'timestamp': 1732024235.0577705}" ] }, - "execution_count": 89, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } diff --git a/core/lomas_core/error_handler.py b/core/lomas_core/error_handler.py index 593fae8f..79e3ee65 100644 --- a/core/lomas_core/error_handler.py +++ b/core/lomas_core/error_handler.py @@ -1,11 +1,18 @@ -from typing import Type +from typing import Any, Type from fastapi import FastAPI, Request, status +from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from pymongo.errors import WriteConcernError -from lomas_core.constants import INTERNAL_SERVER_ERROR +from lomas_core.constants import DPLibraries from lomas_core.logger import LOG +from lomas_core.models.exceptions import ( + ExternalLibraryExceptionModel, + InternalServerExceptionModel, + InvalidQueryExceptionModel, + UnauthorizedAccessExceptionModel, +) class InvalidQueryException(Exception): @@ -34,7 +41,7 @@ class ExternalLibraryException(Exception): external libraries (smartnoise-sql, opendp, diffprivlib) """ - def __init__(self, library: str, error_message: str) -> None: + def __init__(self, library: DPLibraries, error_message: str) -> None: """External Query Exception initialisation. Args: @@ -88,7 +95,9 @@ async def invalid_query_exception_handler( LOG.info(f"InvalidQueryException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, - content={"InvalidQueryException": exc.error_message}, + content=jsonable_encoder( + InvalidQueryExceptionModel(message=exc.error_message) + ), ) @app.exception_handler(ExternalLibraryException) @@ -98,10 +107,11 @@ async def external_library_exception_handler( LOG.info(f"ExternalLibraryException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - content={ - "ExternalLibraryException": exc.error_message, - "library": exc.library, - }, + content=jsonable_encoder( + ExternalLibraryExceptionModel( + message=exc.error_message, library=exc.library + ) + ), ) @app.exception_handler(UnauthorizedAccessException) @@ -111,7 +121,9 @@ async def unauthorized_access_exception_handler( LOG.info(f"UnauthorizedAccessException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_403_FORBIDDEN, - content={"UnauthorizedAccessException": exc.error_message}, + content=jsonable_encoder( + UnauthorizedAccessExceptionModel(message=exc.error_message) + ), ) @app.exception_handler(InternalServerException) @@ -121,5 +133,14 @@ async def internal_server_exception_handler( LOG.info(f"InternalServerException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - content={"InternalServerException": INTERNAL_SERVER_ERROR}, + content=jsonable_encoder(InternalServerExceptionModel()), ) + + +# Server error responses for DP queries +SERVER_QUERY_ERROR_RESPONSES: dict[int | str, dict[str, Any]] = { + status.HTTP_400_BAD_REQUEST: {"model": InvalidQueryExceptionModel}, + status.HTTP_422_UNPROCESSABLE_ENTITY: {"model": ExternalLibraryExceptionModel}, + status.HTTP_403_FORBIDDEN: {"model": UnauthorizedAccessExceptionModel}, + status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": InternalServerExceptionModel}, +} diff --git a/core/lomas_core/models/constants.py b/core/lomas_core/models/constants.py index 0d74892c..76abaabf 100644 --- a/core/lomas_core/models/constants.py +++ b/core/lomas_core/models/constants.py @@ -1,5 +1,7 @@ from enum import IntEnum, StrEnum +import pkg_resources + # Field names # ----------------------------------------------------------------------------- @@ -7,6 +9,17 @@ TYPE_FIELD = "type" CARDINALITY_FIELD = "cardinality" +JSON_SCHEMA_EXAMPLES = "examples" + + +# Requests +# ----------------------------------------------------------------------------- + +DUMMY_NB_ROWS = 100 +DUMMY_SEED = 42 + +OPENDP_VERSION = pkg_resources.get_distribution("opendp").version +DIFFPRIVLIB_VERSION = pkg_resources.get_distribution("diffprivlib").version # Metadata # ----------------------------------------------------------------------------- @@ -65,3 +78,19 @@ class PrivateDatabaseType(StrEnum): PATH = "PATH_DB" S3 = "S3_DB" + + +# Exceptions +# ----------------------------------------------------------------------------- + + +class ExceptionType(StrEnum): + """Lomas server exception types. + + To be used as discriminator when parsing corresponding models + """ + + INVALID_QUERY = "InvalidQueryException" + EXTERNAL_LIBRARY = "ExternalLibraryException" + UNAUTHORIZED_ACCESS = "UnauthorizedAccessException" + INTERNAL_SERVER = "InternalServerException" diff --git a/core/lomas_core/models/exceptions.py b/core/lomas_core/models/exceptions.py new file mode 100644 index 00000000..f77e3e10 --- /dev/null +++ b/core/lomas_core/models/exceptions.py @@ -0,0 +1,85 @@ +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter + +from lomas_core.constants import DPLibraries +from lomas_core.models.constants import ExceptionType + + +class LomasServerExceptionModel(BaseModel): + """Base model for lomas server exceptions.""" + + model_config = ConfigDict(use_attribute_docstrings=True) + type: str + """Exception type.""" + + +class InvalidQueryExceptionModel(LomasServerExceptionModel): + """Exception directly related to the query. + + For example if it does not contain a DP mechanism or there is not enough DP budget. + """ + + type: Literal[ExceptionType.INVALID_QUERY] = ExceptionType.INVALID_QUERY + """Exception type.""" + message: str + """Exception error message. + + This is for exceptions directly related to the query. + For example if it does not contain a DP mechanism or + there is not enough DP budget. + """ + # Note: we duplicate the class docstring to show it in the openapi doc. + + +class ExternalLibraryExceptionModel(LomasServerExceptionModel): + """For exceptions from libraries external to the lomas packages.""" + + type: Literal[ExceptionType.EXTERNAL_LIBRARY] = ExceptionType.EXTERNAL_LIBRARY + """Exception type.""" + library: DPLibraries + """The external library that caused the exception.""" + message: str + """Exception error message. + + For exceptions from libraries external to the lomas packages. + """ + + +class UnauthorizedAccessExceptionModel(LomasServerExceptionModel): + """Exception related to rights with regards to the query. + + (e.g. no user access for this dataset). + """ + + type: Literal[ExceptionType.UNAUTHORIZED_ACCESS] = ExceptionType.UNAUTHORIZED_ACCESS + """Exception type.""" + message: str + """Exception error message. + + Exception related to rights with regards to the query. + (e.g. no user access for this dataset). + """ + + +class InternalServerExceptionModel(LomasServerExceptionModel): + """For any unforseen internal exception.""" + + type: Literal[ExceptionType.INTERNAL_SERVER] = ExceptionType.INTERNAL_SERVER + """Exception type. + + For any unforseen internal exception. + """ + + +LomasServerExceptionTypeAdapter: TypeAdapter = TypeAdapter( + Annotated[ + Union[ + InvalidQueryExceptionModel, + ExternalLibraryExceptionModel, + UnauthorizedAccessExceptionModel, + InternalServerExceptionModel, + ], + Field(discriminator="type"), + ] +) diff --git a/core/lomas_core/models/requests.py b/core/lomas_core/models/requests.py index 05ee77b2..45a57587 100644 --- a/core/lomas_core/models/requests.py +++ b/core/lomas_core/models/requests.py @@ -8,6 +8,19 @@ SSynthMarginalSynthesizer, ) from lomas_core.error_handler import InternalServerException +from lomas_core.models.constants import JSON_SCHEMA_EXAMPLES +from lomas_core.models.requests_examples import ( + example_diffprivlib, + example_dummy_diffprivlib, + example_dummy_opendp, + example_dummy_smartnoise_sql, + example_dummy_smartnoise_synth_query, + example_opendp, + example_smartnoise_sql, + example_smartnoise_sql_cost, + example_smartnoise_synth_cost, + example_smartnoise_synth_query, +) class LomasRequestModel(BaseModel): @@ -20,14 +33,18 @@ class LomasRequestModel(BaseModel): dataset (or a potentially a dummy). """ + model_config = ConfigDict(use_attribute_docstrings=True) dataset_name: str + """The name of the dataset the request is aimed at.""" class GetDummyDataset(LomasRequestModel): """Model input to get a dummy dataset.""" dummy_nb_rows: int = Field(..., gt=0) + """The number of dummy rows to generate.""" dummy_seed: int + """The seed for the random generation of the dummy dataset.""" class QueryModel(LomasRequestModel): @@ -46,7 +63,9 @@ class DummyQueryModel(QueryModel): """Input model for a query on a dummy dataset.""" dummy_nb_rows: int = Field(..., gt=0) + """The number of rows in the dummy dataset.""" dummy_seed: int + """The seed to set at the start of the dummy dataset generation.""" # SmartnoiseSQL @@ -54,51 +73,110 @@ class DummyQueryModel(QueryModel): class SmartnoiseSQLRequestModel(LomasRequestModel): """Base input model for a smarnoise-sql request.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_smartnoise_sql_cost]} + ) + query_str: str + """The SQL query to execute. + + NOTE: the table name is \"df\", the query must end with \"FROM df\" + """ epsilon: float = Field(..., gt=0) - delta: float = Field(..., gt=0) + """Privacy parameter (e.g., 0.1).""" + delta: float = Field(..., ge=0) + """Privacy parameter (e.g., 1e-5).""" mechanisms: dict + """ + Dictionary of mechanisms for the query. + + See Smartnoise-SQL mechanisms documentation at + https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. + """ class SmartnoiseSQLQueryModel(SmartnoiseSQLRequestModel, QueryModel): """Base input model for a smartnoise-sql query.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_smartnoise_sql]} + ) + postprocess: bool + """ + Whether to postprocess the query results (default: True). + + See Smartnoise-SQL postprocessing documentation + https://docs.smartnoise.org/sql/advanced.html#postprocess. + """ class SmartnoiseSQLDummyQueryModel(SmartnoiseSQLQueryModel, DummyQueryModel): """Input model for a smartnoise-sql query on a dummy dataset.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_dummy_smartnoise_sql]} + ) + # SmartnoiseSynth # ---------------------------------------------------------------------------- class SmartnoiseSynthRequestModel(LomasRequestModel): """Base input model for a SmartnoiseSynth request.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_smartnoise_synth_cost]} + ) + synth_name: Union[SSynthMarginalSynthesizer, SSynthGanSynthesizer] + """Name of the synthesizer model to use.""" epsilon: float = Field(..., gt=0) - delta: Optional[float] = None + """Privacy parameter (e.g., 0.1).""" + delta: Optional[float] = Field(..., ge=0) + """Privacy parameter (e.g., 1e-5).""" select_cols: List + """List of columns to select.""" synth_params: dict + """ + Keyword arguments to pass to the synthesizer constructor. + + See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide + all parameters of the model except `epsilon` and `delta`. + """ nullable: bool + """True if some data cells may be null.""" constraints: str + """ + Dictionnary for custom table transformer constraints. + + Column that are not specified will be inferred based on metadata. + """ class SmartnoiseSynthQueryModel(SmartnoiseSynthRequestModel, QueryModel): """Base input model for a smarnoise-synth query.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_smartnoise_synth_query]} + ) + return_model: bool + """True to get Synthesizer model, False to get samples.""" condition: str + """Sampling condition in `model.sample` (only relevant if return_model is False).""" nb_samples: int + """Number of samples to generate. + + (only relevant if return_model is False) + """ class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel): """Input model for a smarnoise-synth query on a dummy dataset.""" - # Same as normal query. - return_model: bool - condition: str - nb_samples: int + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_dummy_smartnoise_synth_query]} + ) # OpenDP @@ -106,40 +184,78 @@ class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel) class OpenDPRequestModel(LomasRequestModel): """Base input model for an opendp request.""" - model_config = ConfigDict(use_attribute_docstrings=True) + model_config = ConfigDict( + use_attribute_docstrings=True, + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_opendp]}, + ) + opendp_json: str - """Opendp pipeline.""" - fixed_delta: Optional[float] = None + """The OpenDP pipeline for the query.""" + fixed_delta: Optional[float] = Field(..., ge=0) + """ + If the pipeline measurement is of type "ZeroConcentratedDivergence". + + (e.g. with "make_gaussian") then it is converted to "SmoothedMaxDivergence" + with "make_zCDP_to_approxDP" (see "opendp measurements documentation at + https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 + In that case a "fixed_delta" must be provided by the user. + """ class OpenDPQueryModel(OpenDPRequestModel, QueryModel): """Base input model for an opendp query.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_opendp]} + ) + class OpenDPDummyQueryModel(OpenDPRequestModel, DummyQueryModel): """Input model for an opendp query on a dummy dataset.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_dummy_opendp]} + ) + # DiffPrivLib # ---------------------------------------------------------------------------- class DiffPrivLibRequestModel(LomasRequestModel): """Base input model for a diffprivlib request.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_diffprivlib]} + ) + diffprivlib_json: str + """The DiffPrivLib pipeline for the query (See diffprivlib_logger package.).""" feature_columns: list + """The list of feature columns to train.""" target_columns: Optional[list] + """The list of target columns to predict.""" test_size: float = Field(..., gt=0.0, lt=1.0) + """The proportion of the test set.""" test_train_split_seed: int + """The seed for the random train/test split.""" imputer_strategy: str + """The imputation strategy.""" class DiffPrivLibQueryModel(DiffPrivLibRequestModel, QueryModel): """Base input model for a diffprivlib query.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_diffprivlib]} + ) + class DiffPrivLibDummyQueryModel(DiffPrivLibQueryModel, DummyQueryModel): """Input model for a DiffPrivLib query on a dummy dataset.""" + model_config = ConfigDict( + json_schema_extra={JSON_SCHEMA_EXAMPLES: [example_dummy_diffprivlib]} + ) + # Utils # ---------------------------------------------------------------------------- diff --git a/server/lomas_server/utils/query_examples.py b/core/lomas_core/models/requests_examples.py similarity index 71% rename from server/lomas_server/utils/query_examples.py rename to core/lomas_core/models/requests_examples.py index 94136488..5dcbf6d4 100644 --- a/server/lomas_server/utils/query_examples.py +++ b/core/lomas_core/models/requests_examples.py @@ -1,6 +1,9 @@ -from lomas_core.constants import SSynthGanSynthesizer +from typing import Dict + +from pydantic import JsonValue -from lomas_server.constants import ( +from lomas_core.constants import SSynthGanSynthesizer +from lomas_core.models.constants import ( DIFFPRIVLIB_VERSION, DUMMY_NB_ROWS, DUMMY_SEED, @@ -8,25 +11,25 @@ ) # Query constants -PENGUIN_DATASET = "PENGUIN" -QUERY_EPSILON = 0.1 -QUERY_DELTA = 0.00001 -SQL_QUERY = "SELECT COUNT(*) AS NB_ROW FROM df" -DP_MECHANISM = {"count": "gaussian"} -FEATURE_COLUMNS = [ +PENGUIN_DATASET: str = "PENGUIN" +QUERY_EPSILON: float = 0.1 +QUERY_DELTA: float = 0.00001 +SQL_QUERY: str = "SELECT COUNT(*) AS NB_ROW FROM df" +DP_MECHANISM: JsonValue = {"count": "gaussian"} +FEATURE_COLUMNS: JsonValue = [ "bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", ] -TARGET_COLUMNS = ["species"] -SPLIT_SEED = 4 -TEST_SIZE = 0.2 -IMPUTER_STRATEGY = "drop" -SNSYNTH_NB_SAMPLES = 200 +TARGET_COLUMNS: JsonValue = ["species"] +SPLIT_SEED: int = 4 +TEST_SIZE: float = 0.2 +IMPUTER_STRATEGY: str = "drop" +SNSYNTH_NB_SAMPLES: int = 200 -def make_dummy(example_query): +def make_dummy(example_query: Dict[str, JsonValue]) -> Dict[str, JsonValue]: """Make dummy example dummy query based on example query.""" example_query_dummy = dict(example_query) example_query_dummy["dummy_nb_rows"] = DUMMY_NB_ROWS @@ -35,18 +38,22 @@ def make_dummy(example_query): # Lomas logic -example_get_admin_db_data = { +# ----------------------------------------------------------------------------- + +example_get_admin_db_data: Dict[str, JsonValue] = { "dataset_name": PENGUIN_DATASET, } -example_get_dummy_dataset = { +example_get_dummy_dataset: Dict[str, JsonValue] = { "dataset_name": PENGUIN_DATASET, "dummy_nb_rows": DUMMY_NB_ROWS, "dummy_seed": DUMMY_SEED, } # Smartnoise-SQL -example_smartnoise_sql_cost = { +# ----------------------------------------------------------------------------- + +example_smartnoise_sql_cost: Dict[str, JsonValue] = { "query_str": SQL_QUERY, "dataset_name": PENGUIN_DATASET, "epsilon": QUERY_EPSILON, @@ -54,13 +61,15 @@ def make_dummy(example_query): "mechanisms": DP_MECHANISM, } -example_smartnoise_sql = dict(example_smartnoise_sql_cost) +example_smartnoise_sql: Dict[str, JsonValue] = dict(example_smartnoise_sql_cost) example_smartnoise_sql["postprocess"] = True -example_dummy_smartnoise_sql = make_dummy(example_smartnoise_sql) +example_dummy_smartnoise_sql: Dict[str, JsonValue] = make_dummy(example_smartnoise_sql) # Smartnoise-Synth -example_smartnoise_synth_cost = { +# ----------------------------------------------------------------------------- + +example_smartnoise_synth_cost: Dict[str, JsonValue] = { "dataset_name": PENGUIN_DATASET, "synth_name": SSynthGanSynthesizer.DP_CTGAN, "epsilon": QUERY_EPSILON, @@ -74,18 +83,21 @@ def make_dummy(example_query): "nullable": True, "constraints": "", } -example_smartnoise_synth_query = dict(example_smartnoise_synth_cost) +example_smartnoise_synth_query: Dict[str, JsonValue] = dict( + example_smartnoise_synth_cost +) example_smartnoise_synth_query["return_model"] = True example_smartnoise_synth_query["condition"] = "" example_smartnoise_synth_query["nb_samples"] = SNSYNTH_NB_SAMPLES -example_dummy_smartnoise_synth_query = make_dummy(example_smartnoise_synth_query) +example_dummy_smartnoise_synth_query: Dict[str, JsonValue] = make_dummy( + example_smartnoise_synth_query +) # OpenDP - -# Example inputs # ----------------------------------------------------------------------------- -OPENDP_PIPELINE = ( + +OPENDP_PIPELINE: str = ( f'{{"version": "{OPENDP_VERSION}", ' '"ast": {' '"_type": "partial_chain", "lhs": {' @@ -142,15 +154,17 @@ def make_dummy(example_query): "}}}" ) -example_opendp = { +example_opendp: Dict[str, JsonValue] = { "dataset_name": PENGUIN_DATASET, "opendp_json": OPENDP_PIPELINE, "fixed_delta": QUERY_DELTA, } -example_dummy_opendp = make_dummy(example_opendp) +example_dummy_opendp: Dict[str, JsonValue] = make_dummy(example_opendp) # DiffPrivLib -DIFFPRIVLIB_PIPELINE = ( +# ----------------------------------------------------------------------------- + +DIFFPRIVLIB_PIPELINE: str = ( '{"module": "diffprivlib", ' f'"version": "{DIFFPRIVLIB_VERSION}", ' '"pipeline": [' @@ -191,7 +205,7 @@ def make_dummy(example_query): "}" ) -example_diffprivlib = { +example_diffprivlib: Dict[str, JsonValue] = { "dataset_name": PENGUIN_DATASET, "diffprivlib_json": DIFFPRIVLIB_PIPELINE, "feature_columns": FEATURE_COLUMNS, @@ -200,4 +214,4 @@ def make_dummy(example_query): "test_train_split_seed": SPLIT_SEED, "imputer_strategy": IMPUTER_STRATEGY, } -example_dummy_diffprivlib = make_dummy(example_diffprivlib) +example_dummy_diffprivlib: Dict[str, JsonValue] = make_dummy(example_diffprivlib) diff --git a/core/lomas_core/models/responses.py b/core/lomas_core/models/responses.py index 25c5c84c..b6d1e0a2 100644 --- a/core/lomas_core/models/responses.py +++ b/core/lomas_core/models/responses.py @@ -30,30 +30,40 @@ class InitialBudgetResponse(ResponseModel): """Model for responses to initial budget queries.""" initial_epsilon: float + """The initial epsilon privacy loss budget.""" initial_delta: float + """The initial delta privacy loss budget.""" class SpentBudgetResponse(ResponseModel): """Model for responses to spent budget queries.""" total_spent_epsilon: float + """The total spent epsilon privacy loss budget.""" total_spent_delta: float + """The total spent delta privacy loss budget.""" class RemainingBudgetResponse(ResponseModel): """Model for responses to remaining budget queries.""" remaining_epsilon: float + """The remaining epsilon privacy loss budget.""" remaining_delta: float + """The remaining delta privacy loss budget.""" class DummyDsResponse(ResponseModel): """Model for responses to dummy dataset requests.""" model_config = ConfigDict(arbitrary_types_allowed=True) + dtypes: Dict[str, str] + """The dummy_df column data types.""" datetime_columns: List[str] + """The list of columns with datetime type.""" dummy_df: Annotated[pd.DataFrame, PlainSerializer(dataframe_to_dict)] + """The dummy dataframe.""" @field_validator("dummy_df", mode="before") @classmethod @@ -83,10 +93,14 @@ def deserialize_dummy_df( class CostResponse(ResponseModel): - """Model for responses to cost estimation requests.""" + """Model for responses to cost estimation requests or queries.""" + + model_config = ConfigDict(use_attribute_docstrings=True) epsilon: float + """The epsilon cost of the query.""" delta: float + """The delta cost of the query.""" # Query Responses @@ -98,13 +112,17 @@ class DiffPrivLibQueryResult(BaseModel): """Model for diffprivlib query result.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.DIFFPRIVLIB] = DPLibraries.DIFFPRIVLIB + """Result type description.""" score: float + """The trained model score.""" model: Annotated[ DiffprivlibMixin, PlainSerializer(serialize_model), PlainValidator(deserialize_model), ] + """The trained model.""" # SmartnoiseSQL @@ -112,12 +130,15 @@ class SmartnoiseSQLQueryResult(BaseModel): """Type for smartnoise_sql result type.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.SMARTNOISE_SQL] = DPLibraries.SMARTNOISE_SQL + """Result type description.""" df: Annotated[ pd.DataFrame, PlainSerializer(dataframe_to_dict), PlainValidator(dataframe_from_dict), ] + """Dataframe containing the query result.""" # SmartnoiseSynth @@ -125,22 +146,28 @@ class SmartnoiseSynthModel(BaseModel): """Type for smartnoise_synth result when it is a pickled model.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.SMARTNOISE_SYNTH] = DPLibraries.SMARTNOISE_SYNTH + """Result type description.""" model: Annotated[ Synthesizer, PlainSerializer(serialize_model), PlainValidator(deserialize_model) ] + """Synthetic data generator model.""" class SmartnoiseSynthSamples(BaseModel): """Type for smartnoise_synth result when it is a dataframe of samples.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal["sn_synth_samples"] = "sn_synth_samples" + """Result type description.""" df_samples: Annotated[ pd.DataFrame, PlainSerializer(dataframe_to_dict), PlainValidator(dataframe_from_dict), ] + """Dataframe containing the generated synthetic samples.""" # OpenDP @@ -148,7 +175,9 @@ class OpenDPQueryResult(BaseModel): """Type for opendp result.""" res_type: Literal[DPLibraries.OPENDP] = DPLibraries.OPENDP + """Result type description.""" value: Union[int, float, List[Union[int, float]]] + """The result value of the query.""" # Response object @@ -162,10 +191,12 @@ class OpenDPQueryResult(BaseModel): class QueryResponse(CostResponse): - """Model for responses to queries.""" + """Response to Lomas queries.""" requested_by: str + """The user that triggered the query.""" result: Annotated[ QueryResultTypeAlias, Discriminator("res_type"), ] + """The query result object.""" diff --git a/docs/build_docs.py b/docs/build_docs.py index d5cea36c..93605355 100644 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -11,6 +11,7 @@ The Sphinx templates are configured such that they render the correct hmtl to create links between versions. """ +import argparse import os import subprocess import yaml @@ -51,6 +52,7 @@ def git_ref_exists(git_ref: str) -> bool: def build_doc(version: str, language: str, tag: str, + local: bool = False ): """ Builds the documention for the given tag (git ref). @@ -65,6 +67,7 @@ def build_doc(version: str, version (str): Version to display language (str): Language (for formatting) tag (str): git ref + local (bool): whether to build on the local branch only """ start_branch_cmd = subprocess.run("git branch --show-current", stdout=subprocess.PIPE, shell=True, text=True) start_branch = start_branch_cmd.stdout.strip() @@ -73,19 +76,20 @@ def build_doc(version: str, os.environ["current_version"] = version os.environ["current_language"] = language - if not git_ref_exists(tag): + if not local and not git_ref_exists(tag): # Replace index if tag does not exist subprocess.run("mv source/index.rst source/index.rst.old", shell=True) subprocess.run("mv source/index_under_construction.rst source/index.rst", shell=True) else: - # Fetch and checkout branch to document - subprocess.run(f"git fetch origin {tag}:{tag}", shell=True) - subprocess.run(f"git checkout {tag}", shell=True) - - # Versions and conf.py always from calling branch - subprocess.run(f"git checkout {start_branch} -- source/conf.py", shell=True) - subprocess.run(f"git checkout {start_branch} -- versions.yaml", shell=True) + if not local: + # Fetch and checkout branch to document + subprocess.run(f"git fetch origin {tag}:{tag}", shell=True) + subprocess.run(f"git checkout {tag}", shell=True) + + # Versions and conf.py always from calling branch + subprocess.run(f"git checkout {start_branch} -- source/conf.py", shell=True) + subprocess.run(f"git checkout {start_branch} -- versions.yaml", shell=True) # Copy relevant sources and generate code docs rsts. subprocess.run("mkdir -p ./source/_static", shell=True) @@ -94,6 +98,7 @@ def build_doc(version: str, subprocess.run("cp ../CONTRIBUTING.md ./source/CONTRIBUTING.md", shell=True) subprocess.run("cp ../client/CONTRIBUTING.md ./source/CONTRIBUTING_CLIENT.md", shell=True) subprocess.run("cp ../server/CONTRIBUTING.md ./source/CONTRIBUTING_SERVER.md", shell=True) + subprocess.run("sphinx-apidoc -o ./source ../core/lomas_core/ --tocfile core_modules", shell=True) subprocess.run("sphinx-apidoc -o ./source ../client/lomas_client/ --tocfile client_modules", shell=True) subprocess.run("sphinx-apidoc -o ./source ../server/lomas_server/ --tocfile server_modules", shell=True) subprocess.run("mkdir -p ./source/notebooks", shell=True) @@ -110,11 +115,12 @@ def build_doc(version: str, subprocess.run("make html", shell=True) # Make things as they were before - if not git_ref_exists(tag): - subprocess.run("git reset --hard && git clean -f -d", shell=True) - else: - # Go back to calling branch - subprocess.run(f"git checkout {start_branch}", shell=True) + if not local: + if not git_ref_exists(tag): + subprocess.run("git reset --hard && git clean -f -d", shell=True) + else: + # Go back to calling branch + subprocess.run(f"git checkout {start_branch}", shell=True) return @@ -135,28 +141,43 @@ def move_dir(src: str, dst: str) -> None: if __name__ == "__main__": - # Set arguments to conf.py - # to separate a single local build from all builds we have a flag, see conf.py - os.environ["build_all_docs"] = str(True) - os.environ["pages_root"] = "https://dscc-admin-ch.github.io/lomas-docs" - - # manually build the master branch - build_doc("stable", "en", "master") - move_dir("./build/html/", "../pages/") - r = subprocess.run(["ls", "-al", "../pages"], text=True, stdout=subprocess.PIPE) - print(r.stdout) - - # reading the yaml file - with open("versions.yaml", "r") as yaml_file: - docs = yaml.safe_load(yaml_file) - - # and looping over all values to call our build with version, language and its tag - for version, details in docs.items(): - if version == "stable": - continue - tag = details.get('tag', '') - for language in details.get('languages', []): - build_doc(version, language, tag) - move_dir("./build/html/", "../pages/"+version+'/'+language+'/') - r = subprocess.run(["ls", "-al", "../pages"], text=True, stdout=subprocess.PIPE) - print(r.stdout) \ No newline at end of file + parser = argparse.ArgumentParser() + parser.add_argument("-l", "--local", action="store_true", help="local build on current branch") + + args = parser.parse_args() + + if args.local: + # Set arguments to conf.py + # to separate a single local build from all builds we have a flag, see conf.py + os.environ["build_all_docs"] = str(False) + os.environ["pages_root"] = "./build/html" + start_branch_cmd = subprocess.run("git branch --show-current", stdout=subprocess.PIPE, shell=True, text=True) + start_branch = start_branch_cmd.stdout.strip() + build_doc("stable", "en", "", True) + + else: + # Set arguments to conf.py + # to separate a single local build from all builds we have a flag, see conf.py + os.environ["build_all_docs"] = str(True) + os.environ["pages_root"] = "https://dscc-admin-ch.github.io/lomas-docs" + + # manually build the master branch + build_doc("stable", "en", "master") + move_dir("./build/html/", "../pages/") + r = subprocess.run(["ls", "-al", "../pages"], text=True, stdout=subprocess.PIPE) + print(r.stdout) + + # reading the yaml file + with open("versions.yaml", "r") as yaml_file: + docs = yaml.safe_load(yaml_file) + + # and looping over all values to call our build with version, language and its tag + for version, details in docs.items(): + if version == "stable": + continue + tag = details.get('tag', '') + for language in details.get('languages', []): + build_doc(version, language, tag) + move_dir("./build/html/", "../pages/"+version+'/'+language+'/') + r = subprocess.run(["ls", "-al", "../pages"], text=True, stdout=subprocess.PIPE) + print(r.stdout) \ No newline at end of file diff --git a/docs/source/api.rst b/docs/source/api.rst index 142f59e6..dced2a18 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -4,5 +4,6 @@ API Documentation .. toctree:: :maxdepth: 2 + core_api server_api client_api \ No newline at end of file diff --git a/docs/source/core_api.rst b/docs/source/core_api.rst new file mode 100644 index 00000000..a07c516b --- /dev/null +++ b/docs/source/core_api.rst @@ -0,0 +1,8 @@ +Core API +================== + +.. toctree:: + :maxdepth: 2 + :glob: + + core_modules \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index bd5037f3..ec5f5653 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -71,7 +71,13 @@ MongoDB database) and a client development environment in a Kubernetes cluster. For extensive informations about how to deploy, please refer to :doc:`Deployment ` documentation. -## Disclaimer +Core +======== +The ``lomas_core`` library serves as a base for both the ``lomas_client`` and ``lomas_server`` libraries. +It contains code that both these libraries rely on such as request and response models. + +Disclaimer +======== Lomas is a Proof of Concept that is still under development. The overall infrastructure security is not our current priority. While attention has been given to the 'logical' aspects within the server, many security aspects are not handled. For example, user authentication is not implemented. However, Lomas can be integrated into other secure infrastructures. diff --git a/docs/source/server_cli.rst b/docs/source/server_cli.rst index 030b1cf7..f92878bd 100644 --- a/docs/source/server_cli.rst +++ b/docs/source/server_cli.rst @@ -11,7 +11,7 @@ Overview The CLI allows data owners to interact with the administrative database directly from the command line. It provides functionalities for managing users, datasets, metatada and queries_archive collections within the MongoDB environment. -NOTE: it is possible to use a :doc:`streamlit app ` to interact with the database instead. +NOTE: it is possible to use a :doc:`streamlit app ` to interact with the database instead. MongoDB Connection ------------------ diff --git a/server/docker-compose.yml b/server/docker-compose.yml index e7e1744e..e622a3ec 100644 --- a/server/docker-compose.yml +++ b/server/docker-compose.yml @@ -116,7 +116,8 @@ services: ports: - 8888:8888 volumes: - - ../client/lomas_client:/code/lomas_client + - ../client/lomas_client/:/code/lomas_client + - ../core/lomas_core/:/code/lomas_core - ../client/configs/:/root/.jupyter/ - ../client/notebooks/:/code/notebooks/ - ./data/:/data/ diff --git a/server/lomas_server/constants.py b/server/lomas_server/constants.py index cae02535..bfb0fb75 100644 --- a/server/lomas_server/constants.py +++ b/server/lomas_server/constants.py @@ -2,8 +2,6 @@ import string from enum import StrEnum -import pkg_resources - # Config # ----------------------------------------------------------------------------- @@ -36,8 +34,6 @@ DELTA_LIMIT: float = 0.01 # Dummy dataset generation -DUMMY_NB_ROWS = 100 -DUMMY_SEED = 42 RANDOM_STRINGS = list(string.ascii_lowercase + string.ascii_uppercase + string.digits) NB_RANDOM_NONE = 5 # if nullable, how many random none to add @@ -101,108 +97,3 @@ class OpenDPDatasetInputMetric(StrEnum): HAMMING_DISTANCE = "HammingDistance" INT_DISTANCE = "u32" # opendp type for distance between datasets - - -# Example pipeline inputs -OPENDP_VERSION = pkg_resources.get_distribution("opendp").version -DIFFPRIVLIB_VERSION = pkg_resources.get_distribution("diffprivlib").version - -# Example inputs -# ----------------------------------------------------------------------------- -OPENDP_PIPELINE = ( - f'{{"version": "{OPENDP_VERSION}", ' - '"ast": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "constructor", ' - '"func": "make_chain_tt", ' - '"module": "combinators", ' - '"args": [' - "{" - '"_type": "constructor", ' - '"func": "make_select_column", ' - '"module": "transformations", ' - '"kwargs": {"key": "bill_length_mm", "TOA": "String"}' - "}, {" - '"_type": "constructor", ' - '"func": "make_split_dataframe", ' - '"module": "transformations", ' - '"kwargs": {"separator": ",", "col_names": {"_type": ' - '"list", "_items": ["species", "island", ' - '"bill_length_mm", "bill_depth_mm", "flipper_length_' - 'mm", "body_mass_g", "sex"]}}' - "}]}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_cast_default", ' - '"module": "transformations", ' - '"kwargs": {"TOA": "f64"}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_clamp", ' - '"module": "transformations", ' - '"kwargs": {"bounds": [30.0, 65.0]}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_resize", ' - '"module": "transformations", ' - '"kwargs": {"size": 346, "constant": 43.61}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_variance", ' - '"module": "transformations"' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_laplace", ' - '"module": "measurements", ' - '"kwargs": {"scale": 5.0}' - "}}}" -) - -DIFFPRIVLIB_PIPELINE = ( - '{"module": "diffprivlib", ' - f'"version": "{DIFFPRIVLIB_VERSION}", ' - '"pipeline": [' - "{" - '"type": "_dpl_type:StandardScaler", ' - '"name": "scaler", ' - '"params": {' - '"with_mean": true, ' - '"with_std": true, ' - '"copy": true, ' - '"epsilon": 0.5, ' - '"bounds": {' - '"_tuple": true, ' - '"_items": [[30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0]]' - "}, " - '"random_state": null, ' - '"accountant": "_dpl_instance:BudgetAccountant"' - "}" - "}, " - "{" - '"type": "_dpl_type:LogisticRegression", ' - '"name": "classifier", ' - '"params": {' - '"tol": 0.0001, ' - '"C": 1.0, ' - '"fit_intercept": true, ' - '"random_state": null, ' - '"max_iter": 100, ' - '"verbose": 0, ' - '"warm_start": false, ' - '"n_jobs": null, ' - '"epsilon": 1.0, ' - '"data_norm": 83.69469642643347, ' - '"accountant": "_dpl_instance:BudgetAccountant"' - "}" - "}" - "]" - "}" -) diff --git a/server/lomas_server/dp_queries/dummy_dataset.py b/server/lomas_server/dp_queries/dummy_dataset.py index c20b70ad..d1b5e294 100644 --- a/server/lomas_server/dp_queries/dummy_dataset.py +++ b/server/lomas_server/dp_queries/dummy_dataset.py @@ -10,12 +10,11 @@ Metadata, StrMetadata, ) +from lomas_core.models.constants import DUMMY_NB_ROWS, DUMMY_SEED from lomas_core.models.requests import DummyQueryModel from lomas_server.admin_database.admin_database import AdminDatabase from lomas_server.constants import ( - DUMMY_NB_ROWS, - DUMMY_SEED, NB_RANDOM_NONE, RANDOM_STRINGS, ) diff --git a/server/lomas_server/routes/routes_admin.py b/server/lomas_server/routes/routes_admin.py index 3350ac7d..0f9c4993 100644 --- a/server/lomas_server/routes/routes_admin.py +++ b/server/lomas_server/routes/routes_admin.py @@ -7,6 +7,10 @@ ) from lomas_core.models.collections import Metadata from lomas_core.models.requests import GetDummyDataset, LomasRequestModel +from lomas_core.models.requests_examples import ( + example_get_admin_db_data, + example_get_dummy_dataset, +) from lomas_core.models.responses import ( DummyDsResponse, InitialBudgetResponse, @@ -17,10 +21,6 @@ from lomas_server.data_connector.data_connector import get_column_dtypes from lomas_server.dp_queries.dummy_dataset import make_dummy_dataset from lomas_server.routes.utils import server_live -from lomas_server.utils.query_examples import ( - example_get_admin_db_data, - example_get_dummy_dataset, -) router = APIRouter() diff --git a/server/lomas_server/routes/routes_dp.py b/server/lomas_server/routes/routes_dp.py index 3d120327..9dc9aa2f 100644 --- a/server/lomas_server/routes/routes_dp.py +++ b/server/lomas_server/routes/routes_dp.py @@ -1,5 +1,8 @@ -from fastapi import APIRouter, Body, Depends, Header, Request +from typing import Annotated + +from fastapi import APIRouter, Depends, Header, Request from lomas_core.constants import DPLibraries +from lomas_core.error_handler import SERVER_QUERY_ERROR_RESPONSES from lomas_core.models.requests import ( DiffPrivLibDummyQueryModel, DiffPrivLibQueryModel, @@ -22,54 +25,33 @@ handle_query_on_private_dataset, server_live, ) -from lomas_server.utils.query_examples import ( - example_diffprivlib, - example_dummy_diffprivlib, - example_dummy_opendp, - example_dummy_smartnoise_sql, - example_dummy_smartnoise_synth_query, - example_opendp, - example_smartnoise_sql, - example_smartnoise_sql_cost, - example_smartnoise_synth_cost, - example_smartnoise_synth_query, -) router = APIRouter() +# Smartnoise SQL +# ----------------------------------------------------------------------------- + @router.post( "/smartnoise_sql_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def smartnoise_sql_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSQLQueryModel = Body(example_smartnoise_sql), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLQueryModel, ) -> QueryResponse: """ Handles queries for the SmartNoiseSQL library. + \f Args: + user_name (str): The user name. request (Request): Raw request object - query_json (SmartnoiseSQLModel): A JSON object containing: - - query: The SQL query to execute. NOTE: the table name is "df", - the query must end with "FROM df". - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - - mechanisms (dict, optional): Dictionary of mechanisms for the - query (default: {}). See "Smartnoise-SQL mechanisms documentation - https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. - - postprocess (bool, optional): Whether to postprocess the query - results (default: True). - See "Smartnoise-SQL postprocessing documentation - https://docs.smartnoise.org/sql/advanced.html#postprocess. - - Defaults to Body(example_smartnoise_sql). - - user_name (str, optional): The user name. - Defaults to Header(None). + smartnoise_sql_query (SmartnoiseSQLQueryModel): The smartnoise_sql query body. Raises: ExternalLibraryException: For exceptions from libraries @@ -81,54 +63,34 @@ def smartnoise_sql_handler( the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a SmartnoiseSQLQueryResult. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SQL + request, smartnoise_sql_query, user_name, DPLibraries.SMARTNOISE_SQL ) -# Smartnoise SQL Dummy query @router.post( "/dummy_smartnoise_sql_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_DUMMY"], ) def dummy_smartnoise_sql_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSQLDummyQueryModel = Body(example_dummy_smartnoise_sql), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the SmartNoiseSQL library. + \f Args: + user_name (str): The user name. request (Request): Raw request object - query_json (DummySmartnoiseSQLModel, optional): A JSON object containing: - - query: The SQL query to execute. NOTE: the table name is "df", - the query must end with "FROM df". - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - - mechanisms (dict, optional): Dictionary of mechanisms for the - query (default: {}). See Smartnoise-SQL mechanisms documentation - https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. - - postprocess (bool, optional): Whether to postprocess the query - results (default: True). - See Smartnoise-SQL postprocessing documentation - https://docs.smartnoise.org/sql/advanced.html#postprocess. - - nb_rows (int, optional): The number of rows in the dummy dataset - (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_dummy_smartnoise_sql). + smartnoise_sql_query (SmartnoiseSQLDummyQueryModel): + The smartnoise_sql query body. Raises: ExternalLibraryException: For exceptions from libraries @@ -136,96 +98,81 @@ def dummy_smartnoise_sql_handler( InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - query_response (pd.DataFrame): a DataFrame containing - the query response. + QueryResponse: A query response containing a SmartnoiseSQLQueryResult. """ return handle_query_on_dummy_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SQL + request, smartnoise_sql_query, user_name, DPLibraries.SMARTNOISE_SQL ) @router.post( "/estimate_smartnoise_sql_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_smartnoise_sql_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSQLRequestModel = Body(example_smartnoise_sql_cost), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of a SmartNoiseSQL query. + \f Args: + user_name (str): The user name. request (Request): Raw request object - query_json (SmartnoiseSQLRequestModel, optional): - A JSON object containing the following: - - query: The SQL query to estimate the cost for. - NOTE: the table name is "df", the query must end with "FROM df". - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - - mechanisms (dict, optional): Dictionary of mechanisms - for the query (default: {}). - See Smartnoise-SQL mechanisms documentation - https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. - - Defaults to Body(example_smartnoise_sql_cost). + smartnoise_sql_query (SmartnoiseSQLRequestModel): + The smartnoise_sql request body. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. InvalidQueryException: The dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.SMARTNOISE_SQL) + return handle_cost_query( + request, smartnoise_sql_query, user_name, DPLibraries.SMARTNOISE_SQL + ) + + +# Smartnoise Synth +# ----------------------------------------------------------------------------- @router.post( "/smartnoise_synth_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def smartnoise_synth_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthQueryModel = Body(example_smartnoise_synth_query), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthQueryModel, ) -> QueryResponse: """ - Handles queries for the SmartNoise Synth library. + Handles queries for the SmartNoiseSynth library. + \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthQueryModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints (dict): Dictionnary for custom table transformer constraints. - Column that are not specified will be inferred based on metadata. - - return_model (bool): True to get Synthesizer model, False to get samples - - condition (Optional[str]): sampling condition in `model.sample` - (only relevant if return_model is False) - - nb_samples (Optional[int]): number of samples to generate. - (only relevant if return_model is False) - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthQueryModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -234,65 +181,38 @@ def smartnoise_synth_handler( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a SmartnoiseSynthModel + or SmartnoiseSynthSamples. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) @router.post( "/dummy_smartnoise_synth_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def dummy_smartnoise_synth_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthDummyQueryModel = Body( - example_dummy_smartnoise_synth_query - ), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthDummyQueryModel, ) -> QueryResponse: """ - Handles queries for the SmartNoise Synth library. + Handles queries on dummy datasets for the SmartNoiseSynth library. + \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthDummyQueryModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints (dict): Dictionnary for custom table transformer constraints. - Column that are not specified will be inferred based on metadata. - - return_model (bool): True to get Synthesizer model, False to get samples - - condition (Optional[str]): sampling condition in `model.sample` - (only relevant if return_model is False) - - nb_samples (Optional[int]): number of samples to generate. - (only relevant if return_model is False) - - nb_rows (int, optional): The number of rows in the dummy dataset - (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthDummyQueryModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -301,56 +221,38 @@ def dummy_smartnoise_synth_handler( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a SmartnoiseSynthModel + or SmartnoiseSynthSamples. """ return handle_query_on_dummy_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) @router.post( "/estimate_smartnoise_synth_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_smartnoise_synth_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthRequestModel = Body(example_smartnoise_synth_cost), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthRequestModel, ) -> CostResponse: """ - Handles queries for the SmartNoise Synth library. + Computes the privacy loss budget cost of a SmartNoiseSynth query. + \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthRequestModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints - - nb_rows (int, optional): The number of rows in the dummy dataset - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthRequestModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -359,44 +261,39 @@ def estimate_smartnoise_synth_cost( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ return handle_cost_query( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) +# OpenDP +# ----------------------------------------------------------------------------- + + @router.post( "/opendp_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def opendp_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPQueryModel = Body(example_opendp), - user_name: str = Header(None), + opendp_query: OpenDPQueryModel, ) -> QueryResponse: """ Handles queries for the OpenDP Library. + \f Args: + user_name (str): The user name. request (Request): Raw request object. - query_json (OpenDPQueryModel, optional): A JSON object containing the following: - - opendp_pipeline: The OpenDP pipeline for the query. - - fixed_delta: If the pipeline measurement is of type - "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then it is - converted to "SmoothedMaxDivergence" with "make_zCDP_to_approxDP" - (see "opendp measurements documentation at - https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 - In that case a "fixed_delta" must be provided by the user. - - Defaults to Body(example_opendp). - - user_name (str, optional): The user name. - Defaults to Header(None). + opendp_query (OpenDPQueryModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries @@ -408,185 +305,149 @@ def opendp_query_handler( the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing an OpenDPQueryResult. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.OPENDP + request, opendp_query, user_name, DPLibraries.OPENDP ) @router.post( "/dummy_opendp_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_DUMMY"], ) def dummy_opendp_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPDummyQueryModel = Body(example_dummy_opendp), - user_name: str = Header(None), + opendp_query: OpenDPDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the OpenDP library. + \f Args: + user_name (str): The user name. request (Request): Raw request object. - query_json (OpenDPDummyQueryModel, optional): Model for opendp dummy query. - A JSON object containing the following: - - opendp_pipeline: Open - - fixed_delta: If the pipeline measurement is of type\ - "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then - it is converted to "SmoothedMaxDivergence" with - "make_zCDP_to_approxDP" (see opendp measurements documentation at - https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 - In that case a "fixed_delta" must be provided by the user. - - nb_rows (int, optional): The number of rows - in the dummy dataset (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_dummy_opendp). + opendp_query (OpenDPQueryModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: If there is not enough budget or the dataset - does not exist. + InvalidQueryException: The pipeline does not contain a "measurement", + there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - query_response (pd.DataFrame): a DataFrame containing - the query response. + QueryResponse: A query response containing an OpenDPQueryResult. """ return handle_query_on_dummy_dataset( - request, query_json, user_name, DPLibraries.OPENDP + request, opendp_query, user_name, DPLibraries.OPENDP ) @router.post( "/estimate_opendp_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_opendp_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPRequestModel = Body(example_opendp), - user_name: str = Header(None), + opendp_query: OpenDPRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of an OpenDP query. + \f Args: - request (Request): Raw request object - query_json (OpenDPRequestModel, optional): - A JSON object containing the following: - - "opendp_pipeline": The OpenDP pipeline for the query. - - Defaults to Body(example_opendp). + user_name (str): The user name. + request (Request): Raw request object. + opendp_query (OpenDPRequestModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The dataset does not exist or the - pipeline does not contain a measurement. + InvalidQueryException: The pipeline does not contain a "measurement", + there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.OPENDP) + return handle_cost_query(request, opendp_query, user_name, DPLibraries.OPENDP) + + +# DiffPrivLib +# ----------------------------------------------------------------------------- @router.post( "/diffprivlib_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def diffprivlib_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibQueryModel = Body(example_diffprivlib), - user_name: str = Header(None), + diffprivlib_query: DiffPrivLibQueryModel, ): """ Handles queries for the DiffPrivLib Library. + \f Args: - request (Request): Raw request object. - query_json (DiffPrivLibQueryModel, optional): - A JSON object containing the following: - - pipeline: The DiffPrivLib pipeline for the query. - - feature_columns: the list of feature column to train - - target_columns: the list of target column to predict - - test_size: proportion of the test set - - test_train_split_seed: seed for the random train test split, - - imputer_strategy: imputation strategy - - Defaults to Body(example_diffprivlib). - - user_name (str, optional): The user name. - Defaults to Header(None). + user_name (str): The user name. + request (Request): Raw request object + diffprivlib_query (DiffPrivLibQueryModel): The diffprivlib query body. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The pipeline does not contain a "measurement", - there is not enough budget or the dataset does not exist. + InvalidQueryException: If there is not enough budget or the dataset + does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a DiffPrivLibQueryResult. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.DIFFPRIVLIB + request, diffprivlib_query, user_name, DPLibraries.DIFFPRIVLIB ) @router.post( "/dummy_diffprivlib_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_DUMMY"], ) def dummy_diffprivlib_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibDummyQueryModel = Body(example_dummy_diffprivlib), - user_name: str = Header(None), + query_json: DiffPrivLibDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the DiffPrivLib library. + \f Args: - request (Request): Raw request object. - query_json (DiffPrivLibDummyQueryModel, optional): - A JSON object containing the following: - - pipeline: The DiffPrivLib pipeline for the query. - - feature_columns: the list of feature column to train - - target_columns: the list of target column to predict - - test_size: proportion of the test set - - test_train_split_seed: seed for the random train test split, - - imputer_strategy: imputation strategy - - nb_rows (int, optional): - The number of rows in the dummy dataset (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - Defaults to Body(example_dummy_diffprivlib) + user_name (str): The user name. + request (Request): Raw request object + diffprivlib_query (DiffPrivLibDummyQueryModel): The diffprivlib query body. Raises: ExternalLibraryException: For exceptions from libraries @@ -594,11 +455,11 @@ def dummy_diffprivlib_query_handler( InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - query_response (pd.DataFrame): a DataFrame containing - the query response. + QueryResponse: A query response containing a DiffPrivLibQueryResult. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.DIFFPRIVLIB @@ -608,19 +469,23 @@ def dummy_diffprivlib_query_handler( @router.post( "/estimate_diffprivlib_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_diffprivlib_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibRequestModel = Body(example_diffprivlib), - user_name: str = Header(None), + diffprivlib_query: DiffPrivLibRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of an DiffPrivLib query. + \f Args: + user_name (str): The user name. request (Request): Raw request object - query_json (DiffPrivLibRequestModel, optional): + diffprivlib_query (DiffPrivLibRequestModel): The diffprivlib query body. A JSON object containing the following: - pipeline: The DiffPrivLib pipeline for the query. - feature_columns: the list of feature column to train @@ -635,12 +500,14 @@ def estimate_diffprivlib_cost( ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The dataset does not exist or the - pipeline does not contain a measurement. + InvalidQueryException: If there is not enough budget or the dataset + does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.DIFFPRIVLIB) + return handle_cost_query( + request, diffprivlib_query, user_name, DPLibraries.DIFFPRIVLIB + ) diff --git a/server/lomas_server/routes/utils.py b/server/lomas_server/routes/utils.py index e38b641a..9f21766b 100644 --- a/server/lomas_server/routes/utils.py +++ b/server/lomas_server/routes/utils.py @@ -80,13 +80,14 @@ def handle_query_on_private_dataset( dp_library: DPLibraries, ) -> QueryResponse: """ - Handles queries for the SmartNoiseSQL library. + Handles queries on private datasets for all supported libraries. Args: request (Request): Raw request object - query_json (BaseModel): A JSON object containing the user request + query_model (DummyQueryModel): An instance of DummyQueryModel, + specific to the library. user_name (str): The user name - dp_library: Name of the DP library to use for the query + dp_library (DPLibraries): Name of the DP library to use for the request Raises: ExternalLibraryException: For exceptions from libraries @@ -98,14 +99,8 @@ def handle_query_on_private_dataset( the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A QueryResponse model containing the result of the query + (specific to the library) as well as the cost of the query. """ app = request.app @@ -131,18 +126,19 @@ def handle_query_on_private_dataset( def handle_query_on_dummy_dataset( request: Request, - query_json: DummyQueryModel, + query_model: DummyQueryModel, user_name: str, dp_library: DPLibraries, ) -> QueryResponse: """ - Handles queries for the SmartNoiseSQL library. + Handles queries on dummy datasets for all supported libraries. Args: request (Request): Raw request object - query_json (BaseModel): A JSON object containing the user request + query_model (DummyQueryModel): An instance of DummyQueryModel, + specific to the library. user_name (str): The user name - dp_library: Name of the DP library to use for the query + dp_library (DPLibraries): Name of the DP library to use for the request Raises: ExternalLibraryException: For exceptions from libraries @@ -150,20 +146,24 @@ def handle_query_on_dummy_dataset( InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the query response. + QueryResponse: A QueryResponse model containing the result of the query + (specific to the library) as well as the cost of such a query if it was + executed on a private dataset. """ app = request.app - dataset_name = query_json.dataset_name + dataset_name = query_model.dataset_name if not app.state.admin_database.has_user_access_to_dataset(user_name, dataset_name): raise UnauthorizedAccessException( f"{user_name} does not have access to {dataset_name}.", ) ds_data_connector = get_dummy_dataset_for_query( - app.state.admin_database, query_json + app.state.admin_database, query_model ) dummy_querier = querier_factory( dp_library, @@ -172,8 +172,8 @@ def handle_query_on_dummy_dataset( ) try: - eps_cost, delta_cost = dummy_querier.cost(query_json) - result = dummy_querier.query(query_json) + eps_cost, delta_cost = dummy_querier.cost(query_model) + result = dummy_querier.query(query_model) response = QueryResponse( requested_by=user_name, result=result, epsilon=eps_cost, delta=delta_cost ) @@ -188,7 +188,7 @@ def handle_query_on_dummy_dataset( @timing_protection def handle_cost_query( request: Request, - query_json: LomasRequestModel, + request_model: LomasRequestModel, user_name: str, dp_library: DPLibraries, ) -> CostResponse: @@ -197,31 +197,34 @@ def handle_cost_query( Args: request (Request): Raw request object - query_json (BaseModel): A JSON object containing the user request + request_model (LomasRequestModel): An instance of LomasRequestModel, + specific to the library. user_name (str): The user name - dp_library: Name of the DP library to use for the query + dp_library (DPLibraries): Name of the DP library to use for the request Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The dataset does not exist. + InvalidQueryException: If there is not enough budget or the dataset + does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: A cost response containing the epsilon and delta + privacy-loss budget cost for the request. """ app = request.app - dataset_name = query_json.dataset_name + dataset_name = request_model.dataset_name if not app.state.admin_database.has_user_access_to_dataset(user_name, dataset_name): raise UnauthorizedAccessException( f"{user_name} does not have access to {dataset_name}.", ) data_connector = data_connector_factory( - query_json.dataset_name, + request_model.dataset_name, app.state.admin_database, app.state.private_credentials, ) @@ -231,7 +234,7 @@ def handle_cost_query( admin_database=app.state.admin_database, ) try: - eps_cost, delta_cost = dp_querier.cost(query_json) + eps_cost, delta_cost = dp_querier.cost(request_model) except KNOWN_EXCEPTIONS as e: raise e except Exception as e: diff --git a/server/lomas_server/tests/test_api.py b/server/lomas_server/tests/test_api.py index 1335722c..23679550 100644 --- a/server/lomas_server/tests/test_api.py +++ b/server/lomas_server/tests/test_api.py @@ -10,6 +10,24 @@ from lomas_core.constants import DPLibraries from lomas_core.error_handler import InternalServerException from lomas_core.models.config import DBConfig +from lomas_core.models.exceptions import ( + ExternalLibraryExceptionModel, + InvalidQueryExceptionModel, + UnauthorizedAccessExceptionModel, +) +from lomas_core.models.requests_examples import ( + DUMMY_NB_ROWS, + PENGUIN_DATASET, + QUERY_DELTA, + QUERY_EPSILON, + example_dummy_opendp, + example_dummy_smartnoise_sql, + example_get_admin_db_data, + example_get_dummy_dataset, + example_opendp, + example_smartnoise_sql, + example_smartnoise_sql_cost, +) from lomas_core.models.responses import ( CostResponse, DummyDsResponse, @@ -38,19 +56,6 @@ TRUE_VALUES, ) from lomas_server.utils.config import CONFIG_LOADER -from lomas_server.utils.query_examples import ( - DUMMY_NB_ROWS, - PENGUIN_DATASET, - QUERY_DELTA, - QUERY_EPSILON, - example_dummy_opendp, - example_dummy_smartnoise_sql, - example_get_admin_db_data, - example_get_dummy_dataset, - example_opendp, - example_smartnoise_sql, - example_smartnoise_sql_cost, -) INITAL_EPSILON = 10 INITIAL_DELTA = 0.005 @@ -183,10 +188,13 @@ def test_get_dataset_metadata(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": f"Dataset {fake_dataset} does not " - + "exist. Please, verify the client object initialisation." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message=f"Dataset {fake_dataset} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() + ) # Expect to fail: user does have access to dataset other_dataset = "IRIS" @@ -196,10 +204,12 @@ def test_get_dataset_metadata(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to {other_dataset}." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to {other_dataset}." + ).model_dump() + ) def test_get_dummy_dataset(self) -> None: """Test_get_dummy_dataset.""" @@ -246,10 +256,13 @@ def test_get_dummy_dataset(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": f"Dataset {fake_dataset} does not " - + "exist. Please, verify the client object initialisation." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message=f"Dataset {fake_dataset} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() + ) # Expect to fail: missing argument dummy_nb_rows response = client.post( @@ -273,10 +286,12 @@ def test_get_dummy_dataset(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to {other_dataset}." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to {other_dataset}." + ).model_dump() + ) # Expect to fail: user does not exist fake_user = "fake_user" @@ -288,10 +303,13 @@ def test_get_dummy_dataset(self) -> None: headers=new_headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": f"User {fake_user} does not " - + "exist. Please, verify the client object initialisation." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"User {fake_user} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() + ) # Expect to work with datetimes and another user fake_user = "BirthdayGirl" @@ -367,14 +385,17 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error obtaining cost: " - + "Noise scale is too large using epsilon=1e-09 " - + "and bounds (0, 1) with Mechanism.gaussian. " - + "Try preprocessing to reduce senstivity, " - + "or try different privacy parameters.", - "library": "smartnoise_sql", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error obtaining cost: " + + "Noise scale is too large using epsilon=1e-09 " + + "and bounds (0, 1) with Mechanism.gaussian. " + + "Try preprocessing to reduce senstivity, " + + "or try different privacy parameters.", + library="smartnoise_sql", + ).model_dump() + ) # Expect to fail: query does not make sense input_smartnoise = dict(example_smartnoise_sql) @@ -387,11 +408,13 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error obtaining cost: " - + "Column cannot be found bill", - "library": "smartnoise_sql", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error obtaining cost: " + "Column cannot be found bill", + library="smartnoise_sql", + ).model_dump() + ) # Expect to fail: dataset without access input_smartnoise = dict(example_smartnoise_sql) @@ -402,10 +425,12 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + "Dr. Antartica does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message="Dr. Antartica does not have access to IRIS." + ).model_dump() + ) # Expect to fail: dataset does not exist input_smartnoise = dict(example_smartnoise_sql) @@ -416,11 +441,13 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "Dataset I_do_not_exist does not exist. " - + "Please, verify the client object initialisation." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="Dataset I_do_not_exist does not exist. " + + "Please, verify the client object initialisation." + ).model_dump() + ) # Expect to fail: user does not exist new_headers = self.headers @@ -431,11 +458,13 @@ def test_smartnoise_sql_query(self) -> None: headers=new_headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + "User I_do_not_exist does not exist. " - + "Please, verify the client object initialisation." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message="User I_do_not_exist does not exist. " + + "Please, verify the client object initialisation." + ).model_dump() + ) def test_smartnoise_sql_query_parameters(self) -> None: """Test smartnoise-sql query parameters.""" @@ -546,11 +575,10 @@ def test_dummy_smartnoise_sql_query(self) -> None: "/dummy_smartnoise_sql_query", json=example_dummy_smartnoise_sql, ) - assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "User None does not exist." - + " Please, verify the client object initialisation." - } + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + response_dict = json.loads(response.content.decode("utf8"))["detail"] + assert response_dict[0]["type"] == "missing" + assert response_dict[0]["loc"] == ["header", "user-name"] # Should fail: user does not have access to dataset body = dict(example_dummy_smartnoise_sql) @@ -561,10 +589,12 @@ def test_dummy_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_smartnoise_sql_cost(self) -> None: """Test_smartnoise_sql_cost.""" @@ -591,10 +621,9 @@ def test_smartnoise_sql_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ) def test_opendp_query(self) -> None: # pylint: disable=R0915 """Test_opendp_query.""" @@ -641,14 +670,18 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 "/opendp_query", json={ "dataset_name": PENGUIN_DATASET, + "fixed_delta": None, "opendp_json": transformation_pipeline.to_json(), }, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "The pipeline provided is not a " - + "measurement. It cannot be processed in this server." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="The pipeline provided is not a " + + "measurement. It cannot be processed in this server." + ).model_dump() + ) # Test MAX_DIVERGENCE (pure DP) md_pipeline = transformation_pipeline >> dp_p.m.then_laplace(scale=5.0) @@ -656,6 +689,7 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 "/opendp_query", json={ "dataset_name": PENGUIN_DATASET, + "fixed_delta": None, "opendp_json": md_pipeline.to_json(), }, ) @@ -675,15 +709,18 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 json_obj = { "dataset_name": PENGUIN_DATASET, "opendp_json": zcd_pipeline.to_json(), + "fixed_delta": None, } # Should error because missing fixed_delta response = client.post("/opendp_query", json=json_obj) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "fixed_delta must be set for smooth max divergence" - + " and zero concentrated divergence." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + ).model_dump() + ) # Should work because fixed_delta is set json_obj["fixed_delta"] = 1e-6 response = client.post("/opendp_query", json=json_obj) @@ -703,15 +740,18 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 json_obj = { "dataset_name": PENGUIN_DATASET, "opendp_json": sm_pipeline.to_json(), + "fixed_delta": None, } # Should error because missing fixed_delta response = client.post("/opendp_query", json=json_obj) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "fixed_delta must be set for smooth max divergence" - + " and zero concentrated divergence." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + ).model_dump() + ) # Should work because fixed_delta is set json_obj["fixed_delta"] = 1e-6 @@ -775,10 +815,12 @@ def test_dummy_opendp_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_opendp_cost(self) -> None: """Test_opendp_cost.""" @@ -805,10 +847,12 @@ def test_opendp_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_get_initial_budget(self) -> None: """Test_get_initial_budget.""" @@ -1009,8 +1053,11 @@ def test_subsequent_budget_limit_logic(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "Not enough budget for this query " - + "epsilon remaining 2.0, " - + "delta remaining 0.004970000100000034." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="Not enough budget for this query " + + "epsilon remaining 2.0, " + + "delta remaining 0.004970000100000034." + ).model_dump() + ) diff --git a/server/lomas_server/tests/test_api_diffprivlib.py b/server/lomas_server/tests/test_api_diffprivlib.py index 29065c1c..9624f8ca 100644 --- a/server/lomas_server/tests/test_api_diffprivlib.py +++ b/server/lomas_server/tests/test_api_diffprivlib.py @@ -10,6 +10,15 @@ from fastapi import status from fastapi.testclient import TestClient from lomas_core.constants import DPLibraries +from lomas_core.models.exceptions import ( + ExternalLibraryExceptionModel, + InvalidQueryExceptionModel, + UnauthorizedAccessExceptionModel, +) +from lomas_core.models.requests_examples import ( + example_diffprivlib, + example_dummy_diffprivlib, +) from lomas_core.models.responses import ( CostResponse, DiffPrivLibQueryResult, @@ -19,10 +28,6 @@ from lomas_server.app import app from lomas_server.tests.test_api import TestRootAPIEndpoint -from lomas_server.utils.query_examples import ( - example_diffprivlib, - example_dummy_diffprivlib, -) def validate_pipeline(response) -> QueryResponse: @@ -83,10 +88,12 @@ def test_imputation(diffprivlib_body, imputer_strategy): # Should not work unknow imputation strategy response = test_imputation(example_diffprivlib, "i_do_not_exist") assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "Imputation strategy i_do_not_exist not supported." - } + assert ( + response.json() + == InvalidQueryExceptionModel( + message="Imputation strategy i_do_not_exist not supported." + ).model_dump() + ) # Should not work: Privacy Leak Warning warnings.simplefilter("error", PrivacyLeakWarning) @@ -105,18 +112,21 @@ def test_imputation(diffprivlib_body, imputer_strategy): headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "PrivacyLeakWarning: " - + "Bounds parameter hasn't been specified, so falling back to " - + "determining bounds from the data.\n " - + "This will result in additional privacy leakage. " - + "To ensure differential privacy with no additional privacy " - + "loss, specify `bounds` for each valued returned by " - + "np.mean().. " - + "Lomas server cannot fit pipeline on data, " - + "PrivacyLeakWarning is a blocker.", - "library": DPLibraries.DIFFPRIVLIB, - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="PrivacyLeakWarning: " + + "Bounds parameter hasn't been specified, so falling back to " + + "determining bounds from the data.\n " + + "This will result in additional privacy leakage. " + + "To ensure differential privacy with no additional privacy " + + "loss, specify `bounds` for each valued returned by " + + "np.mean().. " + + "Lomas server cannot fit pipeline on data, " + + "PrivacyLeakWarning is a blocker.", + library=DPLibraries.DIFFPRIVLIB, + ).model_dump() + ) # Should not work: Compatibility Warning warnings.simplefilter("error", DiffprivlibCompatibilityWarning) @@ -345,10 +355,12 @@ def test_dummy_diffprivlib_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_diffprivlib_cost(self) -> None: """Test_diffprivlib_cost.""" @@ -375,7 +387,9 @@ def test_diffprivlib_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) diff --git a/server/lomas_server/tests/test_api_smartnoise_synth.py b/server/lomas_server/tests/test_api_smartnoise_synth.py index cadd0db5..936fd37d 100644 --- a/server/lomas_server/tests/test_api_smartnoise_synth.py +++ b/server/lomas_server/tests/test_api_smartnoise_synth.py @@ -2,6 +2,15 @@ from fastapi import status from fastapi.testclient import TestClient +from lomas_core.models.exceptions import ( + ExternalLibraryExceptionModel, + UnauthorizedAccessExceptionModel, +) +from lomas_core.models.requests_examples import ( + example_dummy_smartnoise_synth_query, + example_smartnoise_synth_cost, + example_smartnoise_synth_query, +) from lomas_core.models.responses import ( CostResponse, QueryResponse, @@ -19,11 +28,6 @@ from lomas_server.app import app from lomas_server.tests.constants import PENGUIN_COLUMNS, PUMS_COLUMNS from lomas_server.tests.test_api import TestRootAPIEndpoint -from lomas_server.utils.query_examples import ( - example_dummy_smartnoise_synth_query, - example_smartnoise_synth_cost, - example_smartnoise_synth_query, -) def validate_response(response) -> QueryResponse: @@ -76,14 +80,17 @@ def test_smartnoise_synth_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "sample_rate=1.4534883720930232 is not a valid value. " - + "Please provide a float between 0 and 1. " - + "Try decreasing batch_size in " - + "synth_params (default batch_size=500).", - "library": "smartnoise_synth", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error fitting model: " + + "sample_rate=1.4534883720930232 is not a valid value. " + + "Please provide a float between 0 and 1. " + + "Try decreasing batch_size in " + + "synth_params (default batch_size=500).", + library="smartnoise_synth", + ).model_dump() + ) def test_smartnoise_synth_query_samples(self) -> None: """Test smartnoise synth query return samples.""" @@ -154,7 +161,7 @@ def test_smartnoise_synth_query_select_cols(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "Error while selecting provided select_cols: " ) @@ -240,6 +247,7 @@ def test_smartnoise_synth_query_delta_none(self) -> None: json=body, headers=self.headers, ) + r_model = validate_response(response) assert r_model.requested_by == self.user_name @@ -273,10 +281,12 @@ def test_dummy_smartnoise_synth_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_smartnoise_synth_cost(self) -> None: """Test_smartnoise_synth_cost.""" @@ -303,10 +313,12 @@ def test_smartnoise_synth_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert ( + response.json() + == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() + ) def test_smartnoise_synth_query_datetime(self) -> None: """Test smartnoise synth query on other dataset for datetime columns.""" @@ -378,7 +390,7 @@ def test_smartnoise_synth_query_mwem(self) -> None: """Test smartnoise synth query MWEM Synthesizer.""" with TestClient(app) as client: - # Expect to fail: delta + # Expected to fail: delta body = dict(example_smartnoise_synth_query) body["synth_name"] = "mwem" body["synth_params"] = {} @@ -389,14 +401,17 @@ def test_smartnoise_synth_query_mwem(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error creating model: " - + "MWEMSynthesizer.__init__() got an " - + "unexpected keyword argument 'delta'", - "library": "smartnoise_synth", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error creating model: " + + "MWEMSynthesizer.__init__() got an " + + "unexpected keyword argument 'delta'", + library="smartnoise_synth", + ).model_dump() + ) - # Expect to work: limited columns and delta None + # Expected to work: limited columns and delta None body["delta"] = None response = client.post( "/smartnoise_synth_query", @@ -459,7 +474,7 @@ def test_smartnoise_synth_query_mst(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "mst synthesizer cannot be returned, only samples. " + "Please, change model or set `return_model=False`" ) @@ -480,7 +495,7 @@ def test_smartnoise_synth_query_pacsynth(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "pacsynth synthesizer not supported due to Rust panic. " + "Please select another Synthesizer." ) @@ -499,12 +514,15 @@ def test_smartnoise_synth_query_patectgan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "Inputted epsilon parameter is too small to create a private" - + " dataset. Try increasing epsilon and rerunning.", - "library": "smartnoise_synth", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error fitting model: " + + "Inputted epsilon parameter is too small to create a private" + + " dataset. Try increasing epsilon and rerunning.", + library="smartnoise_synth", + ).model_dump() + ) # Expect to work body["epsilon"] = 1.0 @@ -536,10 +554,13 @@ def test_smartnoise_synth_query_pategan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "pategan not reliable with this dataset.", - "library": "smartnoise_synth", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="pategan not reliable with this dataset.", + library="smartnoise_synth", + ).model_dump() + ) def test_smartnoise_synth_query_dpgan(self) -> None: """Test smartnoise synth query dpgan Synthesizer.""" @@ -555,13 +576,16 @@ def test_smartnoise_synth_query_dpgan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "Inputted epsilon and sigma parameters " - + "are too small to create a private dataset. " - + "Try increasing either parameter and rerunning.", - "library": "smartnoise_synth", - } + assert ( + response.json() + == ExternalLibraryExceptionModel( + message="Error fitting model: " + + "Inputted epsilon and sigma parameters " + + "are too small to create a private dataset. " + + "Try increasing either parameter and rerunning.", + library="smartnoise_synth", + ).model_dump() + ) body["epsilon"] = 1.0 response = client.post( diff --git a/server/lomas_server/tests/test_dummy_generation.py b/server/lomas_server/tests/test_dummy_generation.py index f9aa97dd..525e200f 100644 --- a/server/lomas_server/tests/test_dummy_generation.py +++ b/server/lomas_server/tests/test_dummy_generation.py @@ -2,8 +2,8 @@ from typing import Any from lomas_core.models.collections import Metadata +from lomas_core.models.constants import DUMMY_NB_ROWS, DUMMY_SEED -from lomas_server.constants import DUMMY_NB_ROWS, DUMMY_SEED from lomas_server.dp_queries.dummy_dataset import make_dummy_dataset