From c157a3c1dbd7c31d5c15c31547e31b1d90f122b3 Mon Sep 17 00:00:00 2001 From: Damien Date: Wed, 13 Nov 2024 16:44:55 +0000 Subject: [PATCH] Moved example queries out of default values to model configs --- core/lomas_core/error_handler.py | 11 +- core/lomas_core/models/requests.py | 104 ++++- core/lomas_core/models/responses.py | 26 +- .../dp_libraries/smartnoise_synth.py | 4 +- server/lomas_server/routes/routes_admin.py | 8 +- server/lomas_server/routes/routes_dp.py | 398 +++++++----------- server/lomas_server/tests/test_api.py | 167 ++++---- .../tests/test_api_diffprivlib.py | 54 ++- .../tests/test_api_smartnoise_synth.py | 95 ++--- server/lomas_server/utils/query_examples.py | 203 --------- 10 files changed, 426 insertions(+), 644 deletions(-) delete mode 100644 server/lomas_server/utils/query_examples.py diff --git a/core/lomas_core/error_handler.py b/core/lomas_core/error_handler.py index 699dc33a..c709e03a 100644 --- a/core/lomas_core/error_handler.py +++ b/core/lomas_core/error_handler.py @@ -1,6 +1,7 @@ from typing import Any, Type from fastapi import FastAPI, Request, status +from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse from pymongo.errors import WriteConcernError @@ -94,7 +95,7 @@ async def invalid_query_exception_handler( LOG.info(f"InvalidQueryException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, - content=InvalidQueryExceptionModel(message=exc.error_message), + content=jsonable_encoder(InvalidQueryExceptionModel(message=exc.error_message)), ) @app.exception_handler(ExternalLibraryException) @@ -104,9 +105,9 @@ async def external_library_exception_handler( LOG.info(f"ExternalLibraryException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - content=ExternalLibraryExceptionModel( + content=jsonable_encoder(ExternalLibraryExceptionModel( message=exc.error_message, library=exc.library - ), + )), ) @app.exception_handler(UnauthorizedAccessException) @@ -116,7 +117,7 @@ async def unauthorized_access_exception_handler( LOG.info(f"UnauthorizedAccessException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_403_FORBIDDEN, - content=UnauthorizedAccessExceptionModel(message=exc.error_message), + content=jsonable_encoder(UnauthorizedAccessExceptionModel(message=exc.error_message)), ) @app.exception_handler(InternalServerException) @@ -126,7 +127,7 @@ async def internal_server_exception_handler( LOG.info(f"InternalServerException raised: {exc.error_message}") return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - content=InternalServerExceptionModel(), + content=jsonable_encoder(InternalServerExceptionModel()), ) diff --git a/core/lomas_core/models/requests.py b/core/lomas_core/models/requests.py index d8658f74..293407c3 100644 --- a/core/lomas_core/models/requests.py +++ b/core/lomas_core/models/requests.py @@ -8,6 +8,18 @@ SSynthMarginalSynthesizer, ) from lomas_core.error_handler import InternalServerException +from lomas_core.models.requests_examples import ( + example_diffprivlib, + example_dummy_diffprivlib, + example_dummy_opendp, + example_dummy_smartnoise_sql, + example_dummy_smartnoise_synth_query, + example_opendp, + example_smartnoise_sql, + example_smartnoise_sql_cost, + example_smartnoise_synth_cost, + example_smartnoise_synth_query, +) class LomasRequestModel(BaseModel): @@ -29,7 +41,9 @@ class GetDummyDataset(LomasRequestModel): """Model input to get a dummy dataset.""" dummy_nb_rows: int = Field(..., gt=0) + """The number of dummy rows to generate.""" dummy_seed: int + """The seed for the random generation of the dummy dataset.""" class QueryModel(LomasRequestModel): @@ -58,6 +72,10 @@ class DummyQueryModel(QueryModel): class SmartnoiseSQLRequestModel(LomasRequestModel): """Base input model for a smarnoise-sql request.""" + model_config = ConfigDict( + json_schema_extra={"examples": [example_smartnoise_sql_cost]} + ) + query_str: str """The SQL query to execute. @@ -65,11 +83,11 @@ class SmartnoiseSQLRequestModel(LomasRequestModel): """ epsilon: float = Field(..., gt=0) """Privacy parameter (e.g., 0.1).""" - delta: float = Field(..., gt=0) + delta: float = Field(..., ge=0) """Privacy parameter (e.g., 1e-5).""" - mechanisms: dict = {} + mechanisms: dict """ - Dictionary of mechanisms for the query (default: {}). + Dictionary of mechanisms for the query. See Smartnoise-SQL mechanisms documentation at https://docs.smartnoise.org/sql/advanced.html#overriding-mechanisms. @@ -79,6 +97,8 @@ class SmartnoiseSQLRequestModel(LomasRequestModel): class SmartnoiseSQLQueryModel(SmartnoiseSQLRequestModel, QueryModel): """Base input model for a smartnoise-sql query.""" + model_config = ConfigDict(json_schema_extra={"examples": [example_smartnoise_sql]}) + postprocess: bool """ Whether to postprocess the query results (default: True). @@ -91,36 +111,69 @@ class SmartnoiseSQLQueryModel(SmartnoiseSQLRequestModel, QueryModel): class SmartnoiseSQLDummyQueryModel(SmartnoiseSQLQueryModel, DummyQueryModel): """Input model for a smartnoise-sql query on a dummy dataset.""" + model_config = ConfigDict( + json_schema_extra={"examples": [example_dummy_smartnoise_sql]} + ) + # SmartnoiseSynth # ---------------------------------------------------------------------------- class SmartnoiseSynthRequestModel(LomasRequestModel): """Base input model for a SmartnoiseSynth request.""" + model_config = ConfigDict( + json_schema_extra={"examples": [example_smartnoise_synth_cost]} + ) + synth_name: Union[SSynthMarginalSynthesizer, SSynthGanSynthesizer] + """Name of the synthesizer model to use.""" epsilon: float = Field(..., gt=0) - delta: Optional[float] = None + """Privacy parameter (e.g., 0.1).""" + delta: float = Field(..., ge=0) + """Privacy parameter (e.g., 1e-5).""" select_cols: List + """List of columns to select.""" synth_params: dict + """ + Keyword arguments to pass to the synthesizer constructor. + + See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide + all parameters of the model except `epsilon` and `delta`. + """ nullable: bool + """True if some data cells may be null.""" constraints: str + """ + Dictionnary for custom table transformer constraints. + + Column that are not specified will be inferred based on metadata. + """ class SmartnoiseSynthQueryModel(SmartnoiseSynthRequestModel, QueryModel): """Base input model for a smarnoise-synth query.""" + model_config = ConfigDict( + json_schema_extra={"examples": [example_smartnoise_synth_query]} + ) + return_model: bool + """True to get Synthesizer model, False to get samples.""" condition: str + """Sampling condition in `model.sample` (only relevant if return_model is False).""" nb_samples: int + """Number of samples to generate. + + (only relevant if return_model is False) + """ class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel): """Input model for a smarnoise-synth query on a dummy dataset.""" - # Same as normal query. - return_model: bool - condition: str - nb_samples: int + model_config = ConfigDict( + json_schema_extra={"examples": [example_dummy_smartnoise_synth_query]} + ) # OpenDP @@ -128,40 +181,69 @@ class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel) class OpenDPRequestModel(LomasRequestModel): """Base input model for an opendp request.""" - model_config = ConfigDict(use_attribute_docstrings=True) + model_config = ConfigDict( + use_attribute_docstrings=True, json_schema_extra={"examples": [example_opendp]} + ) + opendp_json: str - """Opendp pipeline.""" - fixed_delta: Optional[float] = None + """The OpenDP pipeline for the query.""" + fixed_delta: Optional[float] = Field(..., ge=0) + """ + If the pipeline measurement is of type "ZeroConcentratedDivergence". + + (e.g. with "make_gaussian") then it is converted to "SmoothedMaxDivergence" + with "make_zCDP_to_approxDP" (see "opendp measurements documentation at + https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 + In that case a "fixed_delta" must be provided by the user. + """ class OpenDPQueryModel(OpenDPRequestModel, QueryModel): """Base input model for an opendp query.""" + model_config = ConfigDict(json_schema_extra={"examples": [example_opendp]}) + class OpenDPDummyQueryModel(OpenDPRequestModel, DummyQueryModel): """Input model for an opendp query on a dummy dataset.""" + model_config = ConfigDict(json_schema_extra={"examples": [example_dummy_opendp]}) + # DiffPrivLib # ---------------------------------------------------------------------------- class DiffPrivLibRequestModel(LomasRequestModel): """Base input model for a diffprivlib request.""" + model_config = ConfigDict(json_schema_extra={"examples": [example_diffprivlib]}) + diffprivlib_json: str + """The DiffPrivLib pipeline for the query (See diffprivlib_logger package.).""" feature_columns: list + """The list of feature columns to train.""" target_columns: Optional[list] + """The list of target columns to predict.""" test_size: float = Field(..., gt=0.0, lt=1.0) + """The proportion of the test set.""" test_train_split_seed: int + """The seed for the random train/test split.""" imputer_strategy: str + """The imputation strategy.""" class DiffPrivLibQueryModel(DiffPrivLibRequestModel, QueryModel): """Base input model for a diffprivlib query.""" + model_config = ConfigDict(json_schema_extra={"examples": [example_diffprivlib]}) + class DiffPrivLibDummyQueryModel(DiffPrivLibQueryModel, DummyQueryModel): """Input model for a DiffPrivLib query on a dummy dataset.""" + model_config = ConfigDict( + json_schema_extra={"examples": [example_dummy_diffprivlib]} + ) + # Utils # ---------------------------------------------------------------------------- diff --git a/core/lomas_core/models/responses.py b/core/lomas_core/models/responses.py index db9d7b5f..c3f0bafe 100644 --- a/core/lomas_core/models/responses.py +++ b/core/lomas_core/models/responses.py @@ -37,23 +37,31 @@ class SpentBudgetResponse(ResponseModel): """Model for responses to spent budget queries.""" total_spent_epsilon: float + """The total spent epsilon privacy loss budget.""" total_spent_delta: float + """The total spent delta privacy loss budget.""" class RemainingBudgetResponse(ResponseModel): """Model for responses to remaining budget queries.""" remaining_epsilon: float + """The remaining epsilon privacy loss budget.""" remaining_delta: float + """The remaining delta privacy loss budget.""" class DummyDsResponse(ResponseModel): """Model for responses to dummy dataset requests.""" model_config = ConfigDict(arbitrary_types_allowed=True) + dtypes: Dict[str, str] + """The dummy_df column data types.""" datetime_columns: List[str] + """The list of columns with datetime type.""" dummy_df: Annotated[pd.DataFrame, PlainSerializer(dataframe_to_dict)] + """The dummy dataframe.""" @field_validator("dummy_df", mode="before") @classmethod @@ -86,6 +94,7 @@ class CostResponse(ResponseModel): """Model for responses to cost estimation requests or queries.""" model_config = ConfigDict(use_attribute_docstrings=True) + epsilon: float """The epsilon cost of the query.""" delta: float @@ -101,13 +110,17 @@ class DiffPrivLibQueryResult(BaseModel): """Model for diffprivlib query result.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.DIFFPRIVLIB] = DPLibraries.DIFFPRIVLIB + """Result type description.""" score: float + """The trained model score.""" model: Annotated[ DiffprivlibMixin, PlainSerializer(serialize_model), PlainValidator(deserialize_model), ] + """The trained model.""" # SmartnoiseSQL @@ -115,12 +128,15 @@ class SmartnoiseSQLQueryResult(BaseModel): """Type for smartnoise_sql result type.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.SMARTNOISE_SQL] = DPLibraries.SMARTNOISE_SQL + """Result type description.""" df: Annotated[ pd.DataFrame, PlainSerializer(dataframe_to_dict), PlainValidator(dataframe_from_dict), ] + """Dataframe containing the query result.""" # SmartnoiseSynth @@ -128,22 +144,28 @@ class SmartnoiseSynthModel(BaseModel): """Type for smartnoise_synth result when it is a pickled model.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal[DPLibraries.SMARTNOISE_SYNTH] = DPLibraries.SMARTNOISE_SYNTH + """Result type description.""" model: Annotated[ Synthesizer, PlainSerializer(serialize_model), PlainValidator(deserialize_model) ] + """Synthetic data generator model.""" class SmartnoiseSynthSamples(BaseModel): """Type for smartnoise_synth result when it is a dataframe of samples.""" model_config = ConfigDict(arbitrary_types_allowed=True) + res_type: Literal["sn_synth_samples"] = "sn_synth_samples" + """Result type description.""" df_samples: Annotated[ pd.DataFrame, PlainSerializer(dataframe_to_dict), PlainValidator(dataframe_from_dict), ] + """Dataframe containing the generated synthetic samples.""" # OpenDP @@ -151,7 +173,9 @@ class OpenDPQueryResult(BaseModel): """Type for opendp result.""" res_type: Literal[DPLibraries.OPENDP] = DPLibraries.OPENDP + """Result type description.""" value: Union[int, float, List[Union[int, float]]] + """The result value of the query.""" # Response object @@ -173,4 +197,4 @@ class QueryResponse(CostResponse): QueryResultTypeAlias, Discriminator("res_type"), ] - """The query result.""" + """The query result object.""" diff --git a/server/lomas_server/dp_queries/dp_libraries/smartnoise_synth.py b/server/lomas_server/dp_queries/dp_libraries/smartnoise_synth.py index 01f13630..4c9fbb58 100644 --- a/server/lomas_server/dp_queries/dp_libraries/smartnoise_synth.py +++ b/server/lomas_server/dp_queries/dp_libraries/smartnoise_synth.py @@ -297,7 +297,9 @@ def _get_fit_model( Returns: Synthesizer: Fitted synthesizer model """ - if query_json.delta is not None: + if query_json.synth_name != SSynthMarginalSynthesizer.MWEM: + # delta parameter is ignored for this synthesizer. + # TODO improve on this.... query_json.synth_params["delta"] = query_json.delta if query_json.synth_name == SSynthGanSynthesizer.DP_CTGAN: diff --git a/server/lomas_server/routes/routes_admin.py b/server/lomas_server/routes/routes_admin.py index 3350ac7d..0f9c4993 100644 --- a/server/lomas_server/routes/routes_admin.py +++ b/server/lomas_server/routes/routes_admin.py @@ -7,6 +7,10 @@ ) from lomas_core.models.collections import Metadata from lomas_core.models.requests import GetDummyDataset, LomasRequestModel +from lomas_core.models.requests_examples import ( + example_get_admin_db_data, + example_get_dummy_dataset, +) from lomas_core.models.responses import ( DummyDsResponse, InitialBudgetResponse, @@ -17,10 +21,6 @@ from lomas_server.data_connector.data_connector import get_column_dtypes from lomas_server.dp_queries.dummy_dataset import make_dummy_dataset from lomas_server.routes.utils import server_live -from lomas_server.utils.query_examples import ( - example_get_admin_db_data, - example_get_dummy_dataset, -) router = APIRouter() diff --git a/server/lomas_server/routes/routes_dp.py b/server/lomas_server/routes/routes_dp.py index c78ea28d..9dc9aa2f 100644 --- a/server/lomas_server/routes/routes_dp.py +++ b/server/lomas_server/routes/routes_dp.py @@ -1,4 +1,6 @@ -from fastapi import APIRouter, Body, Depends, Header, Request +from typing import Annotated + +from fastapi import APIRouter, Depends, Header, Request from lomas_core.constants import DPLibraries from lomas_core.error_handler import SERVER_QUERY_ERROR_RESPONSES from lomas_core.models.requests import ( @@ -23,24 +25,13 @@ handle_query_on_private_dataset, server_live, ) -from lomas_server.utils.query_examples import ( - example_diffprivlib, - example_dummy_diffprivlib, - example_dummy_opendp, - example_dummy_smartnoise_sql, - example_dummy_smartnoise_synth_query, - example_opendp, - example_smartnoise_sql, - example_smartnoise_sql_cost, - example_smartnoise_synth_cost, - example_smartnoise_synth_query, -) router = APIRouter() # Smartnoise SQL # ----------------------------------------------------------------------------- + @router.post( "/smartnoise_sql_query", dependencies=[Depends(server_live)], @@ -49,20 +40,18 @@ tags=["USER_QUERY"], ) def smartnoise_sql_handler( + user_name: Annotated[str, Header()], request: Request, - smartnoise_sql_query: SmartnoiseSQLQueryModel = Body(example_smartnoise_sql), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLQueryModel, ) -> QueryResponse: """ Handles queries for the SmartNoiseSQL library. \f Args: + user_name (str): The user name. request (Request): Raw request object smartnoise_sql_query (SmartnoiseSQLQueryModel): The smartnoise_sql query body. - Defaults to Body(example_smartnoise_sql). - user_name (str, optional): The user name. - Defaults to Header(None). Raises: ExternalLibraryException: For exceptions from libraries @@ -81,7 +70,6 @@ def smartnoise_sql_handler( ) -# Smartnoise SQL Dummy query @router.post( "/dummy_smartnoise_sql_query", dependencies=[Depends(server_live)], @@ -90,20 +78,19 @@ def smartnoise_sql_handler( tags=["USER_DUMMY"], ) def dummy_smartnoise_sql_handler( + user_name: Annotated[str, Header()], request: Request, - smartnoise_sql_query: SmartnoiseSQLDummyQueryModel = Body(example_dummy_smartnoise_sql), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the SmartNoiseSQL library. \f Args: + user_name (str): The user name. request (Request): Raw request object - smartnoise_sql_query (SmartnoiseSQLDummyQueryModel): The smartnoise_sql query body. - Defaults to Body(example_dummy_smartnoise_sql). - user_name (str, optional): The user name. - Defaults to Header(None). + smartnoise_sql_query (SmartnoiseSQLDummyQueryModel): + The smartnoise_sql query body. Raises: ExternalLibraryException: For exceptions from libraries @@ -125,26 +112,25 @@ def dummy_smartnoise_sql_handler( @router.post( "/estimate_smartnoise_sql_cost", dependencies=[Depends(server_live)], - response_model=QueryResponse, + response_model=CostResponse, responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_smartnoise_sql_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSQLRequestModel = Body(example_smartnoise_sql_cost), - user_name: str = Header(None), + smartnoise_sql_query: SmartnoiseSQLRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of a SmartNoiseSQL query. \f Args: + user_name (str): The user name. request (Request): Raw request object - smartnoise_sql_query (SmartnoiseSQLRequestModel): The smartnoise_sql request body. - Defaults to Body(example_smartnoise_sql_cost). - user_name (str, optional): The user name. - Defaults to Header(None). - + smartnoise_sql_query (SmartnoiseSQLRequestModel): + The smartnoise_sql request body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -154,13 +140,17 @@ def estimate_smartnoise_sql_cost( the user does not exist or does not have access to the dataset. Returns: - QueryResponse: A query response containing a SmartnoiseSQLQueryResult. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.SMARTNOISE_SQL) + return handle_cost_query( + request, smartnoise_sql_query, user_name, DPLibraries.SMARTNOISE_SQL + ) # Smartnoise Synth # ----------------------------------------------------------------------------- + + @router.post( "/smartnoise_synth_query", dependencies=[Depends(server_live)], @@ -169,38 +159,20 @@ def estimate_smartnoise_sql_cost( tags=["USER_QUERY"], ) def smartnoise_synth_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthQueryModel = Body(example_smartnoise_synth_query), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthQueryModel, ) -> QueryResponse: """ - Handles queries for the SmartNoise Synth library. + Handles queries for the SmartNoiseSynth library. \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthQueryModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints (dict): Dictionnary for custom table transformer constraints. - Column that are not specified will be inferred based on metadata. - - return_model (bool): True to get Synthesizer model, False to get samples - - condition (Optional[str]): sampling condition in `model.sample` - (only relevant if return_model is False) - - nb_samples (Optional[int]): number of samples to generate. - (only relevant if return_model is False) - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthQueryModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -209,65 +181,38 @@ def smartnoise_synth_handler( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a SmartnoiseSynthModel + or SmartnoiseSynthSamples. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) @router.post( "/dummy_smartnoise_synth_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def dummy_smartnoise_synth_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthDummyQueryModel = Body( - example_dummy_smartnoise_synth_query - ), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthDummyQueryModel, ) -> QueryResponse: """ - Handles queries for the SmartNoise Synth library. + Handles queries on dummy datasets for the SmartNoiseSynth library. + \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthDummyQueryModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints (dict): Dictionnary for custom table transformer constraints. - Column that are not specified will be inferred based on metadata. - - return_model (bool): True to get Synthesizer model, False to get samples - - condition (Optional[str]): sampling condition in `model.sample` - (only relevant if return_model is False) - - nb_samples (Optional[int]): number of samples to generate. - (only relevant if return_model is False) - - nb_rows (int, optional): The number of rows in the dummy dataset - (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthDummyQueryModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -276,56 +221,38 @@ def dummy_smartnoise_synth_handler( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a SmartnoiseSynthModel + or SmartnoiseSynthSamples. """ return handle_query_on_dummy_dataset( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) @router.post( "/estimate_smartnoise_synth_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_smartnoise_synth_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: SmartnoiseSynthRequestModel = Body(example_smartnoise_synth_cost), - user_name: str = Header(None), + smartnoise_synth_query: SmartnoiseSynthRequestModel, ) -> CostResponse: """ - Handles queries for the SmartNoise Synth library. + Computes the privacy loss budget cost of a SmartNoiseSynth query. + \f Args: - request (Request): Raw request object - query_json (SmartnoiseSynthRequestModel): A JSON object containing: - - synth_name (str): name of the Synthesizer model to use. - - epsilon (float): Privacy parameter (e.g., 0.1). - - delta (float): Privacy parameter (e.g., 1e-5). - mechanisms (dict[str, str], optional): Dictionary of mechanisms for the\ - query `See Smartnoise-SQL postprocessing documentation. - `__ - - select_cols (List[str]): List of columns to select. - - synth_params (dict): Keyword arguments to pass to the synthesizer - constructor. - See https://docs.smartnoise.org/synth/synthesizers/index.html#, provide - all parameters of the model except `epsilon` and `delta`. - - nullable (bool): True if some data cells may be null - - constraints - - nb_rows (int, optional): The number of rows in the dummy dataset - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_smartnoise_synth). user_name (str): The user name. + request (Request): Raw request object + smartnoise_synth_query (SmartnoiseSynthRequestModel): + The smartnoise_synth query body. + Raises: ExternalLibraryException: For exceptions from libraries external to this package. @@ -334,44 +261,39 @@ def estimate_smartnoise_synth_cost( does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. + Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ return handle_cost_query( - request, query_json, user_name, DPLibraries.SMARTNOISE_SYNTH + request, smartnoise_synth_query, user_name, DPLibraries.SMARTNOISE_SYNTH ) +# OpenDP +# ----------------------------------------------------------------------------- + + @router.post( "/opendp_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def opendp_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPQueryModel = Body(example_opendp), - user_name: str = Header(None), + opendp_query: OpenDPQueryModel, ) -> QueryResponse: """ Handles queries for the OpenDP Library. + \f Args: + user_name (str): The user name. request (Request): Raw request object. - query_json (OpenDPQueryModel, optional): A JSON object containing the following: - - opendp_pipeline: The OpenDP pipeline for the query. - - fixed_delta: If the pipeline measurement is of type - "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then it is - converted to "SmoothedMaxDivergence" with "make_zCDP_to_approxDP" - (see "opendp measurements documentation at - https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 - In that case a "fixed_delta" must be provided by the user. - - Defaults to Body(example_opendp). - - user_name (str, optional): The user name. - Defaults to Header(None). + opendp_query (OpenDPQueryModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries @@ -383,185 +305,149 @@ def opendp_query_handler( the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing an OpenDPQueryResult. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.OPENDP + request, opendp_query, user_name, DPLibraries.OPENDP ) @router.post( "/dummy_opendp_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_DUMMY"], ) def dummy_opendp_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPDummyQueryModel = Body(example_dummy_opendp), - user_name: str = Header(None), + opendp_query: OpenDPDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the OpenDP library. + \f Args: + user_name (str): The user name. request (Request): Raw request object. - query_json (OpenDPDummyQueryModel, optional): Model for opendp dummy query. - A JSON object containing the following: - - opendp_pipeline: Open - - fixed_delta: If the pipeline measurement is of type\ - "ZeroConcentratedDivergence" (e.g. with "make_gaussian") then - it is converted to "SmoothedMaxDivergence" with - "make_zCDP_to_approxDP" (see opendp measurements documentation at - https://docs.opendp.org/en/stable/api/python/opendp.combinators.html#opendp.combinators.make_zCDP_to_approxDP). # noqa # pylint: disable=C0301 - In that case a "fixed_delta" must be provided by the user. - - nb_rows (int, optional): The number of rows - in the dummy dataset (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - - Defaults to Body(example_dummy_opendp). + opendp_query (OpenDPQueryModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: If there is not enough budget or the dataset - does not exist. + InvalidQueryException: The pipeline does not contain a "measurement", + there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - query_response (pd.DataFrame): a DataFrame containing - the query response. + QueryResponse: A query response containing an OpenDPQueryResult. """ return handle_query_on_dummy_dataset( - request, query_json, user_name, DPLibraries.OPENDP + request, opendp_query, user_name, DPLibraries.OPENDP ) @router.post( "/estimate_opendp_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_opendp_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: OpenDPRequestModel = Body(example_opendp), - user_name: str = Header(None), + opendp_query: OpenDPRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of an OpenDP query. + \f Args: - request (Request): Raw request object - query_json (OpenDPRequestModel, optional): - A JSON object containing the following: - - "opendp_pipeline": The OpenDP pipeline for the query. - - Defaults to Body(example_opendp). + user_name (str): The user name. + request (Request): Raw request object. + opendp_query (OpenDPRequestModel): The opendp query object. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The dataset does not exist or the - pipeline does not contain a measurement. + InvalidQueryException: The pipeline does not contain a "measurement", + there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.OPENDP) + return handle_cost_query(request, opendp_query, user_name, DPLibraries.OPENDP) + + +# DiffPrivLib +# ----------------------------------------------------------------------------- @router.post( "/diffprivlib_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def diffprivlib_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibQueryModel = Body(example_diffprivlib), - user_name: str = Header(None), + diffprivlib_query: DiffPrivLibQueryModel, ): """ Handles queries for the DiffPrivLib Library. + \f Args: - request (Request): Raw request object. - query_json (DiffPrivLibQueryModel, optional): - A JSON object containing the following: - - pipeline: The DiffPrivLib pipeline for the query. - - feature_columns: the list of feature column to train - - target_columns: the list of target column to predict - - test_size: proportion of the test set - - test_train_split_seed: seed for the random train test split, - - imputer_strategy: imputation strategy - - Defaults to Body(example_diffprivlib). - - user_name (str, optional): The user name. - Defaults to Header(None). + user_name (str): The user name. + request (Request): Raw request object + diffprivlib_query (DiffPrivLibQueryModel): The diffprivlib query body. Raises: ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The pipeline does not contain a "measurement", - there is not enough budget or the dataset does not exist. + InvalidQueryException: If there is not enough budget or the dataset + does not exist. UnauthorizedAccessException: A query is already ongoing for this user, the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing the following: - - requested_by (str): The user name. - - query_response (pd.DataFrame): A DataFrame containing - the query response. - - spent_epsilon (float): The amount of epsilon budget spent - for the query. - - spent_delta (float): The amount of delta budget spent - for the query. + QueryResponse: A query response containing a DiffPrivLibQueryResult. """ return handle_query_on_private_dataset( - request, query_json, user_name, DPLibraries.DIFFPRIVLIB + request, diffprivlib_query, user_name, DPLibraries.DIFFPRIVLIB ) @router.post( "/dummy_diffprivlib_query", dependencies=[Depends(server_live)], + response_model=QueryResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_DUMMY"], ) def dummy_diffprivlib_query_handler( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibDummyQueryModel = Body(example_dummy_diffprivlib), - user_name: str = Header(None), + query_json: DiffPrivLibDummyQueryModel, ) -> QueryResponse: """ Handles queries on dummy datasets for the DiffPrivLib library. + \f Args: - request (Request): Raw request object. - query_json (DiffPrivLibDummyQueryModel, optional): - A JSON object containing the following: - - pipeline: The DiffPrivLib pipeline for the query. - - feature_columns: the list of feature column to train - - target_columns: the list of target column to predict - - test_size: proportion of the test set - - test_train_split_seed: seed for the random train test split, - - imputer_strategy: imputation strategy - - nb_rows (int, optional): - The number of rows in the dummy dataset (default: 100). - - seed (int, optional): The random seed for generating - the dummy dataset (default: 42). - Defaults to Body(example_dummy_diffprivlib) + user_name (str): The user name. + request (Request): Raw request object + diffprivlib_query (DiffPrivLibDummyQueryModel): The diffprivlib query body. Raises: ExternalLibraryException: For exceptions from libraries @@ -569,11 +455,11 @@ def dummy_diffprivlib_query_handler( InternalServerException: For any other unforseen exceptions. InvalidQueryException: If there is not enough budget or the dataset does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - query_response (pd.DataFrame): a DataFrame containing - the query response. + QueryResponse: A query response containing a DiffPrivLibQueryResult. """ return handle_query_on_dummy_dataset( request, query_json, user_name, DPLibraries.DIFFPRIVLIB @@ -583,19 +469,23 @@ def dummy_diffprivlib_query_handler( @router.post( "/estimate_diffprivlib_cost", dependencies=[Depends(server_live)], + response_model=CostResponse, + responses=SERVER_QUERY_ERROR_RESPONSES, tags=["USER_QUERY"], ) def estimate_diffprivlib_cost( + user_name: Annotated[str, Header()], request: Request, - query_json: DiffPrivLibRequestModel = Body(example_diffprivlib), - user_name: str = Header(None), + diffprivlib_query: DiffPrivLibRequestModel, ) -> CostResponse: """ Estimates the privacy loss budget cost of an DiffPrivLib query. + \f Args: + user_name (str): The user name. request (Request): Raw request object - query_json (DiffPrivLibRequestModel, optional): + diffprivlib_query (DiffPrivLibRequestModel): The diffprivlib query body. A JSON object containing the following: - pipeline: The DiffPrivLib pipeline for the query. - feature_columns: the list of feature column to train @@ -610,12 +500,14 @@ def estimate_diffprivlib_cost( ExternalLibraryException: For exceptions from libraries external to this package. InternalServerException: For any other unforseen exceptions. - InvalidQueryException: The dataset does not exist or the - pipeline does not contain a measurement. + InvalidQueryException: If there is not enough budget or the dataset + does not exist. + UnauthorizedAccessException: A query is already ongoing for this user, + the user does not exist or does not have access to the dataset. Returns: - JSONResponse: A JSON object containing: - - epsilon_cost (float): The estimated epsilon cost. - - delta_cost (float): The estimated delta cost. + CostResponse: The privacy loss cost of the input query. """ - return handle_cost_query(request, query_json, user_name, DPLibraries.DIFFPRIVLIB) + return handle_cost_query( + request, diffprivlib_query, user_name, DPLibraries.DIFFPRIVLIB + ) diff --git a/server/lomas_server/tests/test_api.py b/server/lomas_server/tests/test_api.py index 1335722c..91fa7071 100644 --- a/server/lomas_server/tests/test_api.py +++ b/server/lomas_server/tests/test_api.py @@ -10,6 +10,7 @@ from lomas_core.constants import DPLibraries from lomas_core.error_handler import InternalServerException from lomas_core.models.config import DBConfig +from lomas_core.models.exceptions import ExternalLibraryExceptionModel, InvalidQueryExceptionModel, UnauthorizedAccessExceptionModel from lomas_core.models.responses import ( CostResponse, DummyDsResponse, @@ -38,11 +39,13 @@ TRUE_VALUES, ) from lomas_server.utils.config import CONFIG_LOADER -from lomas_server.utils.query_examples import ( +from lomas_core.models.requests_examples import ( DUMMY_NB_ROWS, PENGUIN_DATASET, QUERY_DELTA, - QUERY_EPSILON, + QUERY_EPSILON +) +from lomas_core.models.requests_examples import ( example_dummy_opendp, example_dummy_smartnoise_sql, example_get_admin_db_data, @@ -183,10 +186,10 @@ def test_get_dataset_metadata(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": f"Dataset {fake_dataset} does not " - + "exist. Please, verify the client object initialisation." - } + assert response.json() == InvalidQueryExceptionModel( + message=f"Dataset {fake_dataset} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() # Expect to fail: user does have access to dataset other_dataset = "IRIS" @@ -196,10 +199,9 @@ def test_get_dataset_metadata(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to {other_dataset}." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to {other_dataset}." + ).model_dump() def test_get_dummy_dataset(self) -> None: """Test_get_dummy_dataset.""" @@ -246,10 +248,10 @@ def test_get_dummy_dataset(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": f"Dataset {fake_dataset} does not " - + "exist. Please, verify the client object initialisation." - } + assert response.json() == InvalidQueryExceptionModel( + message=f"Dataset {fake_dataset} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() # Expect to fail: missing argument dummy_nb_rows response = client.post( @@ -273,10 +275,9 @@ def test_get_dummy_dataset(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to {other_dataset}." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to {other_dataset}." + ).model_dump() # Expect to fail: user does not exist fake_user = "fake_user" @@ -288,10 +289,10 @@ def test_get_dummy_dataset(self) -> None: headers=new_headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": f"User {fake_user} does not " - + "exist. Please, verify the client object initialisation." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"User {fake_user} does not " + + "exist. Please, verify the client object initialisation." + ).model_dump() # Expect to work with datetimes and another user fake_user = "BirthdayGirl" @@ -367,14 +368,14 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error obtaining cost: " - + "Noise scale is too large using epsilon=1e-09 " - + "and bounds (0, 1) with Mechanism.gaussian. " - + "Try preprocessing to reduce senstivity, " - + "or try different privacy parameters.", - "library": "smartnoise_sql", - } + assert response.json() == ExternalLibraryExceptionModel( + message="Error obtaining cost: " + + "Noise scale is too large using epsilon=1e-09 " + + "and bounds (0, 1) with Mechanism.gaussian. " + + "Try preprocessing to reduce senstivity, " + + "or try different privacy parameters.", + library="smartnoise_sql" + ).model_dump() # Expect to fail: query does not make sense input_smartnoise = dict(example_smartnoise_sql) @@ -387,11 +388,11 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error obtaining cost: " + assert response.json() == ExternalLibraryExceptionModel( + message="Error obtaining cost: " + "Column cannot be found bill", - "library": "smartnoise_sql", - } + library="smartnoise_sql" + ).model_dump() # Expect to fail: dataset without access input_smartnoise = dict(example_smartnoise_sql) @@ -402,10 +403,9 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + "Dr. Antartica does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message="Dr. Antartica does not have access to IRIS." + ).model_dump() # Expect to fail: dataset does not exist input_smartnoise = dict(example_smartnoise_sql) @@ -416,11 +416,10 @@ def test_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "Dataset I_do_not_exist does not exist. " - + "Please, verify the client object initialisation." - } + assert response.json() == InvalidQueryExceptionModel( + message="Dataset I_do_not_exist does not exist. " + + "Please, verify the client object initialisation." + ).model_dump() # Expect to fail: user does not exist new_headers = self.headers @@ -431,11 +430,10 @@ def test_smartnoise_sql_query(self) -> None: headers=new_headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + "User I_do_not_exist does not exist. " - + "Please, verify the client object initialisation." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message="User I_do_not_exist does not exist. " + + "Please, verify the client object initialisation." + ).model_dump() def test_smartnoise_sql_query_parameters(self) -> None: """Test smartnoise-sql query parameters.""" @@ -546,11 +544,10 @@ def test_dummy_smartnoise_sql_query(self) -> None: "/dummy_smartnoise_sql_query", json=example_dummy_smartnoise_sql, ) - assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "User None does not exist." - + " Please, verify the client object initialisation." - } + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + response_dict = json.loads(response.content.decode("utf8"))["detail"] + assert response_dict[0]["type"] == "missing" + assert response_dict[0]["loc"] == ["header", "user-name"] # Should fail: user does not have access to dataset body = dict(example_dummy_smartnoise_sql) @@ -561,10 +558,9 @@ def test_dummy_smartnoise_sql_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_smartnoise_sql_cost(self) -> None: """Test_smartnoise_sql_cost.""" @@ -591,10 +587,9 @@ def test_smartnoise_sql_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ) def test_opendp_query(self) -> None: # pylint: disable=R0915 """Test_opendp_query.""" @@ -645,10 +640,10 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 }, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "The pipeline provided is not a " - + "measurement. It cannot be processed in this server." - } + assert response.json() == InvalidQueryExceptionModel( + message="The pipeline provided is not a " + + "measurement. It cannot be processed in this server." + ).model_dump() # Test MAX_DIVERGENCE (pure DP) md_pipeline = transformation_pipeline >> dp_p.m.then_laplace(scale=5.0) @@ -679,11 +674,10 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 # Should error because missing fixed_delta response = client.post("/opendp_query", json=json_obj) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "fixed_delta must be set for smooth max divergence" - + " and zero concentrated divergence." - } + assert response.json() == InvalidQueryExceptionModel( + message="fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + ).model_dump() # Should work because fixed_delta is set json_obj["fixed_delta"] = 1e-6 response = client.post("/opendp_query", json=json_obj) @@ -707,11 +701,10 @@ def test_opendp_query(self) -> None: # pylint: disable=R0915 # Should error because missing fixed_delta response = client.post("/opendp_query", json=json_obj) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "fixed_delta must be set for smooth max divergence" - + " and zero concentrated divergence." - } + assert response.json() == InvalidQueryExceptionModel( + message="fixed_delta must be set for smooth max divergence" + + " and zero concentrated divergence." + ).model_dump() # Should work because fixed_delta is set json_obj["fixed_delta"] = 1e-6 @@ -775,10 +768,9 @@ def test_dummy_opendp_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_opendp_cost(self) -> None: """Test_opendp_cost.""" @@ -805,10 +797,9 @@ def test_opendp_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_get_initial_budget(self) -> None: """Test_get_initial_budget.""" @@ -1009,8 +1000,8 @@ def test_subsequent_budget_limit_logic(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "Not enough budget for this query " - + "epsilon remaining 2.0, " - + "delta remaining 0.004970000100000034." - } + assert response.json() == InvalidQueryExceptionModel( + message="Not enough budget for this query " + + "epsilon remaining 2.0, " + + "delta remaining 0.004970000100000034." + ).model_dump() \ No newline at end of file diff --git a/server/lomas_server/tests/test_api_diffprivlib.py b/server/lomas_server/tests/test_api_diffprivlib.py index 29065c1c..96dcf2b7 100644 --- a/server/lomas_server/tests/test_api_diffprivlib.py +++ b/server/lomas_server/tests/test_api_diffprivlib.py @@ -10,6 +10,11 @@ from fastapi import status from fastapi.testclient import TestClient from lomas_core.constants import DPLibraries +from lomas_core.models.exceptions import ExternalLibraryExceptionModel, InvalidQueryExceptionModel, UnauthorizedAccessExceptionModel +from lomas_core.models.requests_examples import ( + example_diffprivlib, + example_dummy_diffprivlib, +) from lomas_core.models.responses import ( CostResponse, DiffPrivLibQueryResult, @@ -19,10 +24,6 @@ from lomas_server.app import app from lomas_server.tests.test_api import TestRootAPIEndpoint -from lomas_server.utils.query_examples import ( - example_diffprivlib, - example_dummy_diffprivlib, -) def validate_pipeline(response) -> QueryResponse: @@ -83,10 +84,9 @@ def test_imputation(diffprivlib_body, imputer_strategy): # Should not work unknow imputation strategy response = test_imputation(example_diffprivlib, "i_do_not_exist") assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json() == { - "InvalidQueryException": "" - + "Imputation strategy i_do_not_exist not supported." - } + assert response.json() == InvalidQueryExceptionModel( + message="Imputation strategy i_do_not_exist not supported." + ).model_dump() # Should not work: Privacy Leak Warning warnings.simplefilter("error", PrivacyLeakWarning) @@ -105,18 +105,18 @@ def test_imputation(diffprivlib_body, imputer_strategy): headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "PrivacyLeakWarning: " - + "Bounds parameter hasn't been specified, so falling back to " - + "determining bounds from the data.\n " - + "This will result in additional privacy leakage. " - + "To ensure differential privacy with no additional privacy " - + "loss, specify `bounds` for each valued returned by " - + "np.mean().. " - + "Lomas server cannot fit pipeline on data, " - + "PrivacyLeakWarning is a blocker.", - "library": DPLibraries.DIFFPRIVLIB, - } + assert response.json() == ExternalLibraryExceptionModel( + message="PrivacyLeakWarning: " + + "Bounds parameter hasn't been specified, so falling back to " + + "determining bounds from the data.\n " + + "This will result in additional privacy leakage. " + + "To ensure differential privacy with no additional privacy " + + "loss, specify `bounds` for each valued returned by " + + "np.mean().. " + + "Lomas server cannot fit pipeline on data, " + + "PrivacyLeakWarning is a blocker.", + library=DPLibraries.DIFFPRIVLIB + ).model_dump() # Should not work: Compatibility Warning warnings.simplefilter("error", DiffprivlibCompatibilityWarning) @@ -345,10 +345,9 @@ def test_dummy_diffprivlib_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_diffprivlib_cost(self) -> None: """Test_diffprivlib_cost.""" @@ -375,7 +374,6 @@ def test_diffprivlib_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() diff --git a/server/lomas_server/tests/test_api_smartnoise_synth.py b/server/lomas_server/tests/test_api_smartnoise_synth.py index cadd0db5..dac85695 100644 --- a/server/lomas_server/tests/test_api_smartnoise_synth.py +++ b/server/lomas_server/tests/test_api_smartnoise_synth.py @@ -1,7 +1,14 @@ import json +from attr import validate from fastapi import status from fastapi.testclient import TestClient +from lomas_core.models.exceptions import ExternalLibraryExceptionModel, UnauthorizedAccessExceptionModel +from lomas_core.models.requests_examples import ( + example_dummy_smartnoise_synth_query, + example_smartnoise_synth_cost, + example_smartnoise_synth_query, +) from lomas_core.models.responses import ( CostResponse, QueryResponse, @@ -19,11 +26,6 @@ from lomas_server.app import app from lomas_server.tests.constants import PENGUIN_COLUMNS, PUMS_COLUMNS from lomas_server.tests.test_api import TestRootAPIEndpoint -from lomas_server.utils.query_examples import ( - example_dummy_smartnoise_synth_query, - example_smartnoise_synth_cost, - example_smartnoise_synth_query, -) def validate_response(response) -> QueryResponse: @@ -76,14 +78,14 @@ def test_smartnoise_synth_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "sample_rate=1.4534883720930232 is not a valid value. " - + "Please provide a float between 0 and 1. " - + "Try decreasing batch_size in " - + "synth_params (default batch_size=500).", - "library": "smartnoise_synth", - } + assert response.json() == ExternalLibraryExceptionModel( + message="Error fitting model: " + + "sample_rate=1.4534883720930232 is not a valid value. " + + "Please provide a float between 0 and 1. " + + "Try decreasing batch_size in " + + "synth_params (default batch_size=500).", + library="smartnoise_synth", + ).model_dump() def test_smartnoise_synth_query_samples(self) -> None: """Test smartnoise synth query return samples.""" @@ -154,7 +156,7 @@ def test_smartnoise_synth_query_select_cols(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "Error while selecting provided select_cols: " ) @@ -273,10 +275,9 @@ def test_dummy_smartnoise_synth_query(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + message=f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_smartnoise_synth_cost(self) -> None: """Test_smartnoise_synth_cost.""" @@ -303,10 +304,9 @@ def test_smartnoise_synth_cost(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_403_FORBIDDEN - assert response.json() == { - "UnauthorizedAccessException": "" - + f"{self.user_name} does not have access to IRIS." - } + assert response.json() == UnauthorizedAccessExceptionModel( + f"{self.user_name} does not have access to IRIS." + ).model_dump() def test_smartnoise_synth_query_datetime(self) -> None: """Test smartnoise synth query on other dataset for datetime columns.""" @@ -378,7 +378,8 @@ def test_smartnoise_synth_query_mwem(self) -> None: """Test smartnoise synth query MWEM Synthesizer.""" with TestClient(app) as client: - # Expect to fail: delta + # Delta is simply ignored. + # This behaviour is reflected in the cost: delta=0 is returned body = dict(example_smartnoise_synth_query) body["synth_name"] = "mwem" body["synth_params"] = {} @@ -388,16 +389,10 @@ def test_smartnoise_synth_query_mwem(self) -> None: json=body, headers=self.headers, ) - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error creating model: " - + "MWEMSynthesizer.__init__() got an " - + "unexpected keyword argument 'delta'", - "library": "smartnoise_synth", - } + r_model = validate_response(response) # Expect to work: limited columns and delta None - body["delta"] = None + body["delta"] = 0 response = client.post( "/smartnoise_synth_query", json=body, @@ -459,7 +454,7 @@ def test_smartnoise_synth_query_mst(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "mst synthesizer cannot be returned, only samples. " + "Please, change model or set `return_model=False`" ) @@ -480,7 +475,7 @@ def test_smartnoise_synth_query_pacsynth(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json()["InvalidQueryException"].startswith( + assert response.json()["message"].startswith( "pacsynth synthesizer not supported due to Rust panic. " + "Please select another Synthesizer." ) @@ -499,12 +494,12 @@ def test_smartnoise_synth_query_patectgan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "Inputted epsilon parameter is too small to create a private" - + " dataset. Try increasing epsilon and rerunning.", - "library": "smartnoise_synth", - } + assert response.json() == ExternalLibraryExceptionModel( + message=f"Error fitting model: " + + "Inputted epsilon parameter is too small to create a private" + + " dataset. Try increasing epsilon and rerunning.", + library="smartnoise_synth", + ).model_dump() # Expect to work body["epsilon"] = 1.0 @@ -536,10 +531,10 @@ def test_smartnoise_synth_query_pategan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "pategan not reliable with this dataset.", - "library": "smartnoise_synth", - } + assert response.json() == ExternalLibraryExceptionModel( + message="pategan not reliable with this dataset.", + library="smartnoise_synth", + ).model_dump() def test_smartnoise_synth_query_dpgan(self) -> None: """Test smartnoise synth query dpgan Synthesizer.""" @@ -555,13 +550,13 @@ def test_smartnoise_synth_query_dpgan(self) -> None: headers=self.headers, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert response.json() == { - "ExternalLibraryException": "Error fitting model: " - + "Inputted epsilon and sigma parameters " - + "are too small to create a private dataset. " - + "Try increasing either parameter and rerunning.", - "library": "smartnoise_synth", - } + assert response.json() == ExternalLibraryExceptionModel( + message="Error fitting model: " + + "Inputted epsilon and sigma parameters " + + "are too small to create a private dataset. " + + "Try increasing either parameter and rerunning.", + library="smartnoise_synth", + ).model_dump() body["epsilon"] = 1.0 response = client.post( diff --git a/server/lomas_server/utils/query_examples.py b/server/lomas_server/utils/query_examples.py deleted file mode 100644 index 94136488..00000000 --- a/server/lomas_server/utils/query_examples.py +++ /dev/null @@ -1,203 +0,0 @@ -from lomas_core.constants import SSynthGanSynthesizer - -from lomas_server.constants import ( - DIFFPRIVLIB_VERSION, - DUMMY_NB_ROWS, - DUMMY_SEED, - OPENDP_VERSION, -) - -# Query constants -PENGUIN_DATASET = "PENGUIN" -QUERY_EPSILON = 0.1 -QUERY_DELTA = 0.00001 -SQL_QUERY = "SELECT COUNT(*) AS NB_ROW FROM df" -DP_MECHANISM = {"count": "gaussian"} -FEATURE_COLUMNS = [ - "bill_length_mm", - "bill_depth_mm", - "flipper_length_mm", - "body_mass_g", -] -TARGET_COLUMNS = ["species"] -SPLIT_SEED = 4 -TEST_SIZE = 0.2 -IMPUTER_STRATEGY = "drop" -SNSYNTH_NB_SAMPLES = 200 - - -def make_dummy(example_query): - """Make dummy example dummy query based on example query.""" - example_query_dummy = dict(example_query) - example_query_dummy["dummy_nb_rows"] = DUMMY_NB_ROWS - example_query_dummy["dummy_seed"] = DUMMY_SEED - return example_query_dummy - - -# Lomas logic -example_get_admin_db_data = { - "dataset_name": PENGUIN_DATASET, -} - -example_get_dummy_dataset = { - "dataset_name": PENGUIN_DATASET, - "dummy_nb_rows": DUMMY_NB_ROWS, - "dummy_seed": DUMMY_SEED, -} - -# Smartnoise-SQL -example_smartnoise_sql_cost = { - "query_str": SQL_QUERY, - "dataset_name": PENGUIN_DATASET, - "epsilon": QUERY_EPSILON, - "delta": QUERY_DELTA, - "mechanisms": DP_MECHANISM, -} - -example_smartnoise_sql = dict(example_smartnoise_sql_cost) -example_smartnoise_sql["postprocess"] = True - -example_dummy_smartnoise_sql = make_dummy(example_smartnoise_sql) - -# Smartnoise-Synth -example_smartnoise_synth_cost = { - "dataset_name": PENGUIN_DATASET, - "synth_name": SSynthGanSynthesizer.DP_CTGAN, - "epsilon": QUERY_EPSILON, - "delta": QUERY_DELTA, - "select_cols": [], - "synth_params": { - "embedding_dim": 128, - "batch_size": 50, - "epochs": 5, - }, - "nullable": True, - "constraints": "", -} -example_smartnoise_synth_query = dict(example_smartnoise_synth_cost) -example_smartnoise_synth_query["return_model"] = True -example_smartnoise_synth_query["condition"] = "" -example_smartnoise_synth_query["nb_samples"] = SNSYNTH_NB_SAMPLES - -example_dummy_smartnoise_synth_query = make_dummy(example_smartnoise_synth_query) - -# OpenDP - -# Example inputs -# ----------------------------------------------------------------------------- -OPENDP_PIPELINE = ( - f'{{"version": "{OPENDP_VERSION}", ' - '"ast": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "partial_chain", "lhs": {' - '"_type": "constructor", ' - '"func": "make_chain_tt", ' - '"module": "combinators", ' - '"args": [' - "{" - '"_type": "constructor", ' - '"func": "make_select_column", ' - '"module": "transformations", ' - '"kwargs": {"key": "bill_length_mm", "TOA": "String"}' - "}, {" - '"_type": "constructor", ' - '"func": "make_split_dataframe", ' - '"module": "transformations", ' - '"kwargs": {"separator": ",", "col_names": {"_type": ' - '"list", "_items": ["species", "island", ' - '"bill_length_mm", "bill_depth_mm", "flipper_length_' - 'mm", "body_mass_g", "sex"]}}' - "}]}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_cast_default", ' - '"module": "transformations", ' - '"kwargs": {"TOA": "f64"}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_clamp", ' - '"module": "transformations", ' - '"kwargs": {"bounds": [30.0, 65.0]}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_resize", ' - '"module": "transformations", ' - '"kwargs": {"size": 346, "constant": 43.61}' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_variance", ' - '"module": "transformations"' - "}}, " - '"rhs": {' - '"_type": "constructor", ' - '"func": "then_laplace", ' - '"module": "measurements", ' - '"kwargs": {"scale": 5.0}' - "}}}" -) - -example_opendp = { - "dataset_name": PENGUIN_DATASET, - "opendp_json": OPENDP_PIPELINE, - "fixed_delta": QUERY_DELTA, -} -example_dummy_opendp = make_dummy(example_opendp) - -# DiffPrivLib -DIFFPRIVLIB_PIPELINE = ( - '{"module": "diffprivlib", ' - f'"version": "{DIFFPRIVLIB_VERSION}", ' - '"pipeline": [' - "{" - '"type": "_dpl_type:StandardScaler", ' - '"name": "scaler", ' - '"params": {' - '"with_mean": true, ' - '"with_std": true, ' - '"copy": true, ' - '"epsilon": 0.5, ' - '"bounds": {' - '"_tuple": true, ' - '"_items": [[30.0, 13.0, 150.0, 2000.0], [65.0, 23.0, 250.0, 7000.0]]' - "}, " - '"random_state": null, ' - '"accountant": "_dpl_instance:BudgetAccountant"' - "}" - "}, " - "{" - '"type": "_dpl_type:LogisticRegression", ' - '"name": "classifier", ' - '"params": {' - '"tol": 0.0001, ' - '"C": 1.0, ' - '"fit_intercept": true, ' - '"random_state": null, ' - '"max_iter": 100, ' - '"verbose": 0, ' - '"warm_start": false, ' - '"n_jobs": null, ' - '"epsilon": 1.0, ' - '"data_norm": 83.69469642643347, ' - '"accountant": "_dpl_instance:BudgetAccountant"' - "}" - "}" - "]" - "}" -) - -example_diffprivlib = { - "dataset_name": PENGUIN_DATASET, - "diffprivlib_json": DIFFPRIVLIB_PIPELINE, - "feature_columns": FEATURE_COLUMNS, - "target_columns": TARGET_COLUMNS, - "test_size": TEST_SIZE, - "test_train_split_seed": SPLIT_SEED, - "imputer_strategy": IMPUTER_STRATEGY, -} -example_dummy_diffprivlib = make_dummy(example_diffprivlib)