From dc6ab89e4e8521c2a1defb1a5a3c9567f40a3e89 Mon Sep 17 00:00:00 2001 From: Ram Senthamarai Date: Wed, 11 Sep 2024 09:32:14 -0700 Subject: [PATCH 1/2] chore(anomaly-detection) Return meaningful errors on failure of save endpoint --- .../anomaly_detection/anomaly_detection.py | 39 +++++++++++++------ src/seer/anomaly_detection/models/external.py | 1 + 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/seer/anomaly_detection/anomaly_detection.py b/src/seer/anomaly_detection/anomaly_detection.py index 0417ef453..7cd2a57cc 100644 --- a/src/seer/anomaly_detection/anomaly_detection.py +++ b/src/seer/anomaly_detection/anomaly_detection.py @@ -253,7 +253,9 @@ def store_data( "minimum_required": min_len, }, ) - raise Exception(f"Insufficient time series data for alert {request.alert.id}") + return StoreDataResponse( + success=False, message=f"Insufficient time series data for alert {request.alert.id}" + ) logger.info( "store_alert_request", @@ -264,14 +266,27 @@ def store_data( "num_datapoints": len(request.timeseries), }, ) - ts, anomalies = self._batch_detect(request.timeseries, request.config) - alert_data_accessor.save_alert( - organization_id=request.organization_id, - project_id=request.project_id, - external_alert_id=request.alert.id, - config=request.config, - timeseries=ts, - anomalies=anomalies, - anomaly_algo_data={"window_size": anomalies.window_size}, - ) - return StoreDataResponse(success=True) + try: + ts, anomalies = self._batch_detect(request.timeseries, request.config) + alert_data_accessor.save_alert( + organization_id=request.organization_id, + project_id=request.project_id, + external_alert_id=request.alert.id, + config=request.config, + timeseries=ts, + anomalies=anomalies, + anomaly_algo_data={"window_size": anomalies.window_size}, + ) + return StoreDataResponse(success=True) + except Exception as e: + sentry_sdk.capture_exception(e) + logger.error( + "error_saving_alert", + extra={ + "organization_id": request.organization_id, + "project_id": request.project_id, + "external_alert_id": request.alert.id, + }, + exc_info=e, + ) + return StoreDataResponse(success=False, message=str(e)) diff --git a/src/seer/anomaly_detection/models/external.py b/src/seer/anomaly_detection/models/external.py index 30cd553d3..5c224aeca 100644 --- a/src/seer/anomaly_detection/models/external.py +++ b/src/seer/anomaly_detection/models/external.py @@ -85,3 +85,4 @@ class StoreDataRequest(BaseModel): class StoreDataResponse(BaseModel): success: bool + message: Optional[str] = Field(None) From bbf74617893b6afeabae0c7222d4bb61134474ba Mon Sep 17 00:00:00 2001 From: Ram Senthamarai Date: Wed, 11 Sep 2024 12:02:41 -0700 Subject: [PATCH 2/2] chore(anomaly-detection): Return meaningful errors for client side errors --- .../anomaly_detection/anomaly_detection.py | 63 +++++++++---------- .../detectors/anomaly_detectors.py | 3 +- .../anomaly_detection/detectors/mp_utils.py | 5 +- src/seer/anomaly_detection/models/external.py | 4 +- src/seer/app.py | 13 +++- src/seer/exceptions.py | 10 +++ 6 files changed, 59 insertions(+), 39 deletions(-) create mode 100644 src/seer/exceptions.py diff --git a/src/seer/anomaly_detection/anomaly_detection.py b/src/seer/anomaly_detection/anomaly_detection.py index 7cd2a57cc..73d708c96 100644 --- a/src/seer/anomaly_detection/anomaly_detection.py +++ b/src/seer/anomaly_detection/anomaly_detection.py @@ -23,6 +23,7 @@ TimeSeriesWithHistory, ) from seer.dependency_injection import inject, injected +from seer.exceptions import ClientError, ServerError anomaly_detection_module.enable() logger = logging.getLogger(__name__) @@ -107,10 +108,23 @@ def _online_detect( # Retrieve historic data historic = alert_data_accessor.query(alert.id) if historic is None: - raise Exception(f"Invalid alert id {alert.id}") + + logger.error( + "no_stored_history_data", + extra={ + "alert_id": alert.id, + }, + ) + raise ClientError("No timeseries data found for alert") if not isinstance(historic.anomalies, MPTimeSeriesAnomalies): - raise Exception("Invalid state") + logger.error( + "invalid_state", + extra={ + "note": f"Expecting object of type MPTimeSeriesAnomalies but found {type(historic.anomalies)}" + }, + ) + raise ServerError("Invalid state") anomalies: MPTimeSeriesAnomalies = historic.anomalies # TODO: Need to check the time gap between historic data and the new datapoint against the alert configuration @@ -168,7 +182,7 @@ def _combo_detect( "minimum_required": min_len, }, ) - raise Exception("Insufficient history data") + raise ClientError("Insufficient history data") logger.info( f"Detecting anomalies for time series with {len(ts_with_history.current)} datapoints and history of {len(ts_with_history.history)} datapoints" @@ -194,8 +208,6 @@ def _combo_detect( return ts_external, streamed_anomalies def _update_anomalies(self, ts_external: List[TimeSeriesPoint], anomalies: TimeSeriesAnomalies): - if anomalies is None: - raise Exception("No anomalies available for the timeseries.") for i, point in enumerate(ts_external): point.anomaly = Anomaly( anomaly_score=anomalies.scores[i], @@ -227,7 +239,7 @@ def detect_anomalies(self, request: DetectAnomaliesRequest) -> DetectAnomaliesRe else: ts, anomalies = self._batch_detect(request.context, request.config) self._update_anomalies(ts, anomalies) - return DetectAnomaliesResponse(timeseries=ts) + return DetectAnomaliesResponse(success=True, timeseries=ts) @inject def store_data( @@ -253,9 +265,7 @@ def store_data( "minimum_required": min_len, }, ) - return StoreDataResponse( - success=False, message=f"Insufficient time series data for alert {request.alert.id}" - ) + raise ClientError("Insufficient time series data for alert") logger.info( "store_alert_request", @@ -266,27 +276,14 @@ def store_data( "num_datapoints": len(request.timeseries), }, ) - try: - ts, anomalies = self._batch_detect(request.timeseries, request.config) - alert_data_accessor.save_alert( - organization_id=request.organization_id, - project_id=request.project_id, - external_alert_id=request.alert.id, - config=request.config, - timeseries=ts, - anomalies=anomalies, - anomaly_algo_data={"window_size": anomalies.window_size}, - ) - return StoreDataResponse(success=True) - except Exception as e: - sentry_sdk.capture_exception(e) - logger.error( - "error_saving_alert", - extra={ - "organization_id": request.organization_id, - "project_id": request.project_id, - "external_alert_id": request.alert.id, - }, - exc_info=e, - ) - return StoreDataResponse(success=False, message=str(e)) + ts, anomalies = self._batch_detect(request.timeseries, request.config) + alert_data_accessor.save_alert( + organization_id=request.organization_id, + project_id=request.project_id, + external_alert_id=request.alert.id, + config=request.config, + timeseries=ts, + anomalies=anomalies, + anomaly_algo_data={"window_size": anomalies.window_size}, + ) + return StoreDataResponse(success=True) diff --git a/src/seer/anomaly_detection/detectors/anomaly_detectors.py b/src/seer/anomaly_detection/detectors/anomaly_detectors.py index 8064f92ad..b3e8af558 100644 --- a/src/seer/anomaly_detection/detectors/anomaly_detectors.py +++ b/src/seer/anomaly_detection/detectors/anomaly_detectors.py @@ -18,6 +18,7 @@ TimeSeriesAnomalies, ) from seer.dependency_injection import inject, injected +from seer.exceptions import ServerError logger = logging.getLogger(__name__) @@ -85,7 +86,7 @@ def _compute_matrix_profile( logger.debug(f"window_size: {window_size}") if window_size <= 0: # TODO: Add sentry logging of this error - raise Exception("Invalid window size") + raise ServerError("Invalid window size") # Get the matrix profile for the time series mp = stumpy.stump( ts_values, diff --git a/src/seer/anomaly_detection/detectors/mp_utils.py b/src/seer/anomaly_detection/detectors/mp_utils.py index 5dfe2571d..d921398f7 100644 --- a/src/seer/anomaly_detection/detectors/mp_utils.py +++ b/src/seer/anomaly_detection/detectors/mp_utils.py @@ -5,6 +5,7 @@ from seer.anomaly_detection.detectors.mp_config import MPConfig from seer.anomaly_detection.detectors.normalizers import Normalizer from seer.dependency_injection import inject, injected +from seer.exceptions import ServerError class MPUtils(BaseModel): @@ -38,12 +39,12 @@ def get_mp_dist_from_mp( mp_dist = mp[:, 0] if mp_config is not None and mp_config.normalize_mp: if normalizer is None: - raise Exception("Need normalizer to normalize MP") + raise ServerError("Need normalizer to normalize MP") mp_dist = normalizer.normalize(mp_dist) if pad_to_len is not None: if pad_to_len < len(mp_dist): - raise Exception( + raise ServerError( "Requested length should be greater than or equal to current mp_dist" ) nan_value_count = np.empty(pad_to_len - len(mp_dist)) diff --git a/src/seer/anomaly_detection/models/external.py b/src/seer/anomaly_detection/models/external.py index 5c224aeca..c32753228 100644 --- a/src/seer/anomaly_detection/models/external.py +++ b/src/seer/anomaly_detection/models/external.py @@ -72,7 +72,9 @@ class DetectAnomaliesRequest(BaseModel): class DetectAnomaliesResponse(BaseModel): - timeseries: List[TimeSeriesPoint] + success: bool + message: Optional[str] = Field(None) + timeseries: Optional[List[TimeSeriesPoint]] = Field(None) class StoreDataRequest(BaseModel): diff --git a/src/seer/app.py b/src/seer/app.py index c5ade46e9..9ed0a45f4 100644 --- a/src/seer/app.py +++ b/src/seer/app.py @@ -52,6 +52,7 @@ from seer.bootup import bootup, module from seer.configuration import AppConfig from seer.dependency_injection import inject, injected, resolve +from seer.exceptions import ClientError from seer.grouping.grouping import ( BulkCreateGroupingRecordsResponse, CreateGroupingRecordsRequest, @@ -252,7 +253,11 @@ def summarize_issue_endpoint(data: SummarizeIssueRequest) -> SummarizeIssueRespo def detect_anomalies_endpoint(data: DetectAnomaliesRequest) -> DetectAnomaliesResponse: sentry_sdk.set_tag("organization_id", data.organization_id) sentry_sdk.set_tag("project_id", data.project_id) - return anomaly_detection().detect_anomalies(data) + try: + response = anomaly_detection().detect_anomalies(data) + except ClientError as e: + response = DetectAnomaliesResponse(success=False, message=str(e)) + return response @json_api(blueprint, "/v1/anomaly-detection/store") @@ -260,7 +265,11 @@ def detect_anomalies_endpoint(data: DetectAnomaliesRequest) -> DetectAnomaliesRe def store_data_endpoint(data: StoreDataRequest) -> StoreDataResponse: sentry_sdk.set_tag("organization_id", data.organization_id) sentry_sdk.set_tag("project_id", data.project_id) - return anomaly_detection().store_data(data) + try: + response = anomaly_detection().store_data(data) + except ClientError as e: + response = StoreDataResponse(success=False, message=str(e)) + return response @blueprint.route("/health/live", methods=["GET"]) diff --git a/src/seer/exceptions.py b/src/seer/exceptions.py new file mode 100644 index 000000000..2b66a432a --- /dev/null +++ b/src/seer/exceptions.py @@ -0,0 +1,10 @@ +class ClientError(Exception): + def __init__(self, message): + # Call the base class constructor with the parameters it needs + super().__init__(message) + + +class ServerError(Exception): + def __init__(self, message): + # Call the base class constructor with the parameters it needs + super().__init__(message)