Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/getsentry/seer into rvinnak…
Browse files Browse the repository at this point in the history
…ota/add-codecov-client
  • Loading branch information
rohitvinnakota-codecov committed Sep 17, 2024
2 parents 39f8d2c + 31ce656 commit 5f2e47e
Show file tree
Hide file tree
Showing 21 changed files with 68,016 additions and 42 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,4 @@ langfuse @ git+https://github.com/jennmueng/langfuse-python.git@9d9350de1e4e84fa
watchdog
stumpy==1.13.0
pytest_alembic==0.11.1
cryptography==43.0.0
cryptography==43.0.1
20 changes: 19 additions & 1 deletion src/seer/anomaly_detection/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from seer.anomaly_detection.models.external import AnomalyDetectionConfig, TimeSeriesPoint
from seer.db import DbDynamicAlert, DbDynamicAlertTimeSeries, Session
from seer.exceptions import ClientError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -49,6 +50,10 @@ def save_timepoint(
):
return NotImplemented

@abc.abstractmethod
def delete_alert_data(self, external_alert_id: int):
return NotImplemented


class DbAlertDataAccessor(AlertDataAccessor):

Expand Down Expand Up @@ -179,7 +184,7 @@ def save_timepoint(
.one_or_none()
)
if existing is None:
raise Exception(f"Alert with id {external_alert_id} not found")
raise ClientError(f"Alert with id {external_alert_id} not found")

new_record = DbDynamicAlertTimeSeries(
dynamic_alert_id=existing.id,
Expand All @@ -191,3 +196,16 @@ def save_timepoint(
)
session.add(new_record)
session.commit()

@sentry_sdk.trace
def delete_alert_data(self, external_alert_id: int):
with Session() as session:
existing = (
session.query(DbDynamicAlert)
.filter_by(external_alert_id=external_alert_id)
.one_or_none()
)
if existing is None:
raise ClientError(f"Alert with id {external_alert_id} not found")
session.delete(existing)
session.commit()
17 changes: 17 additions & 0 deletions src/seer/anomaly_detection/anomaly_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
AlertInSeer,
Anomaly,
AnomalyDetectionConfig,
DeleteAlertDataRequest,
DeleteAlertDataResponse,
DetectAnomaliesRequest,
DetectAnomaliesResponse,
StoreDataRequest,
Expand Down Expand Up @@ -233,6 +235,7 @@ def detect_anomalies(self, request: DetectAnomaliesRequest) -> DetectAnomaliesRe
sentry_sdk.set_tag("ad_mode", mode)

if isinstance(request.context, AlertInSeer):
sentry_sdk.set_tag("alert_id", request.context.id)
ts, anomalies = self._online_detect(request.context, request.config)
elif isinstance(request.context, TimeSeriesWithHistory):
ts, anomalies = self._combo_detect(request.context, request.config)
Expand Down Expand Up @@ -287,3 +290,17 @@ def store_data(
anomaly_algo_data={"window_size": anomalies.window_size},
)
return StoreDataResponse(success=True)

@inject
def delete_alert_data(
self, request: DeleteAlertDataRequest, alert_data_accessor: AlertDataAccessor = injected
) -> DeleteAlertDataResponse:
"""
Main entry point for deleting data related to an alert.
Parameters:
request: DeleteAlertDataRequest
Alert to clear
"""
alert_data_accessor.delete_alert_data(external_alert_id=request.alert.id)
return DeleteAlertDataResponse(success=True)
13 changes: 12 additions & 1 deletion src/seer/anomaly_detection/models/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

AnomalyFlags = Literal["none", "anomaly_lower_confidence", "anomaly_higher_confidence", "no_data"]
Sensitivities = Literal["low", "medium", "high"]
TimePeriods = Literal[15, 30, 60]
TimePeriods = Literal[5, 15, 30, 60]
Directions = Literal["up", "down", "both"]
Seasonalities = Literal["hourly", "daily", "weekly", "auto"]

Expand Down Expand Up @@ -88,3 +88,14 @@ class StoreDataRequest(BaseModel):
class StoreDataResponse(BaseModel):
success: bool
message: Optional[str] = Field(None)


class DeleteAlertDataRequest(BaseModel):
organization_id: int
project_id: int
alert: AlertInSeer


class DeleteAlertDataResponse(BaseModel):
success: bool
message: Optional[str] = Field(None)
18 changes: 18 additions & 0 deletions src/seer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from sentry_sdk.integrations.logging import LoggingIntegration

from seer.anomaly_detection.models.external import (
DeleteAlertDataRequest,
DeleteAlertDataResponse,
DetectAnomaliesRequest,
DetectAnomaliesResponse,
StoreDataRequest,
Expand Down Expand Up @@ -269,13 +271,29 @@ def detect_anomalies_endpoint(data: DetectAnomaliesRequest) -> DetectAnomaliesRe
def store_data_endpoint(data: StoreDataRequest) -> StoreDataResponse:
sentry_sdk.set_tag("organization_id", data.organization_id)
sentry_sdk.set_tag("project_id", data.project_id)
sentry_sdk.set_tag("alert_id", data.alert.id)
try:
response = anomaly_detection().store_data(data)
except ClientError as e:
response = StoreDataResponse(success=False, message=str(e))
return response


@json_api(blueprint, "/v1/anomaly-detection/delete-alert-data")
@sentry_sdk.trace
def delete_alert__data_endpoint(
data: DeleteAlertDataRequest,
) -> DeleteAlertDataResponse:
sentry_sdk.set_tag("organization_id", data.organization_id)
sentry_sdk.set_tag("project_id", data.project_id)
sentry_sdk.set_tag("alert_id", data.alert.id)
try:
response = anomaly_detection().delete_alert_data(data)
except ClientError as e:
response = DeleteAlertDataResponse(success=False, message=str(e))
return response


@blueprint.route("/health/live", methods=["GET"])
def health_check():
from seer.inference_models import models_loading_status
Expand Down
78 changes: 41 additions & 37 deletions src/seer/grouping/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,46 +424,50 @@ def insert_batch_grouping_records(
)
with Session() as session:
for i, entry in enumerate(data.data):
embedding = embeddings[i].astype("float32")
nearest_neighbor = self.query_nearest_k_neighbors(
session,
embedding,
entry.project_id,
entry.hash,
data.threshold,
data.k,
data.hnsw_candidates,
data.hnsw_distance,
data.use_reranking,
)

if nearest_neighbor:
neighbor, distance = nearest_neighbor[0][0], nearest_neighbor[0][1]
message_similarity_score = difflib.SequenceMatcher(
None, entry.message, neighbor.message
).ratio()
response = GroupingResponse(
parent_hash=neighbor.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
should_group=True,
)
groups_with_neighbor[str(entry.group_id)] = response
else:
insert_stmt = insert(DbGroupingRecord).values(
project_id=entry.project_id,
message=entry.message,
error_type=entry.exception_type,
hash=entry.hash,
stacktrace_embedding=embedding,
with sentry_sdk.start_span(
op="seer.grouping", description="insert single grouping record"
) as span:
span.set_data("stacktrace_len", len(data.stacktrace_list[i]))
embedding = embeddings[i].astype("float32")
nearest_neighbor = self.query_nearest_k_neighbors(
session,
embedding,
entry.project_id,
entry.hash,
data.threshold,
data.k,
data.hnsw_candidates,
data.hnsw_distance,
data.use_reranking,
)

session.execute(
insert_stmt.on_conflict_do_nothing(
index_elements=(DbGroupingRecord.project_id, DbGroupingRecord.hash)
if nearest_neighbor:
neighbor, distance = nearest_neighbor[0][0], nearest_neighbor[0][1]
message_similarity_score = difflib.SequenceMatcher(
None, entry.message, neighbor.message
).ratio()
response = GroupingResponse(
parent_hash=neighbor.hash,
stacktrace_distance=distance,
message_distance=1.0 - message_similarity_score,
should_group=True,
)
)
session.commit()
groups_with_neighbor[str(entry.group_id)] = response
else:
insert_stmt = insert(DbGroupingRecord).values(
project_id=entry.project_id,
message=entry.message,
error_type=entry.exception_type,
hash=entry.hash,
stacktrace_embedding=embedding,
)

session.execute(
insert_stmt.on_conflict_do_nothing(
index_elements=(DbGroupingRecord.project_id, DbGroupingRecord.hash)
)
)
session.commit()

return groups_with_neighbor

Expand Down
Empty file.
125 changes: 125 additions & 0 deletions tests/seer/anomaly_detection/detectors/test_anomaly_detectors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import unittest
from unittest.mock import MagicMock, patch

import numpy as np

from seer.anomaly_detection.detectors.anomaly_detectors import (
MPBatchAnomalyDetector,
MPStreamAnomalyDetector,
)
from seer.anomaly_detection.detectors.mp_config import MPConfig
from seer.anomaly_detection.models import MPTimeSeriesAnomalies
from seer.anomaly_detection.models.external import AnomalyDetectionConfig
from seer.anomaly_detection.models.timeseries import TimeSeries
from tests.seer.anomaly_detection.test_utils import convert_synthetic_ts


class TestMPBatchAnomalyDetector(unittest.TestCase):

def setUp(self):
self.detector = MPBatchAnomalyDetector()

self.config = AnomalyDetectionConfig(
time_period=15, sensitivity="low", direction="up", expected_seasonality="auto"
) # TODO: Placeholder values as not used in detection yet

self.mp_config = MPConfig(ignore_trivial=False, normalize_mp=False)
self.ws_selector = MagicMock()
self.scorer = MagicMock()
self.mp_utils = MagicMock()

@patch("stumpy.stump")
def test_compute_matrix_profile(self, mock_stump):

# Mock to return dummy values
mock_stump.return_value = np.array([1, 2, 3, 4])
self.scorer.batch_score = MagicMock(
return_value=(
[0.1, 6.5, 4.8, 0.2],
["none", "anomaly_higher_confidence", "anomaly_higher_confidence", "none"],
)
)

timeseries, mp_dists, window_sizes = convert_synthetic_ts(
"tests/seer/anomaly_detection/test_data/synthetic_series", as_ts_datatype=False
)

ts_values, mp_dist_baseline, window_size = timeseries[0], mp_dists[0], window_sizes[0]
ts = TimeSeries(timestamps=np.array([]), values=ts_values)

self.ws_selector.optimal_window_size = MagicMock(return_value=window_size)
self.mp_utils.get_mp_dist_from_mp = MagicMock(return_value=mp_dist_baseline)

result = self.detector._compute_matrix_profile(
ts,
self.config,
ws_selector=self.ws_selector,
mp_config=self.mp_config,
scorer=self.scorer,
mp_utils=self.mp_utils,
)

assert isinstance(result, MPTimeSeriesAnomalies)
assert isinstance(result.flags, list)
assert result.scores == [0.1, 6.5, 4.8, 0.2]
assert isinstance(result.scores, list)
assert result.flags == [
"none",
"anomaly_higher_confidence",
"anomaly_higher_confidence",
"none",
]
assert isinstance(result.matrix_profile, np.ndarray)
assert isinstance(result.window_size, int)
mock_stump.assert_called_once()
self.scorer.batch_score.assert_called_once()
self.ws_selector.optimal_window_size.assert_called_once()
self.mp_utils.get_mp_dist_from_mp.assert_called_once()


class TestMPStreamAnomalyDetector(unittest.TestCase):

def setUp(self):
self.detector = MPStreamAnomalyDetector(
base_timestamps=np.array([1, 2, 3]),
base_values=np.array([1.0, 2.0, 3.0]),
base_mp=np.array([0.1, 0.2, 0.3, 0.4]),
window_size=2,
)
self.timeseries = TimeSeries(
timestamps=np.array([1, 2, 3]), values=np.array([1.1, 2.1, 3.1])
)
self.config = AnomalyDetectionConfig(
time_period=15, sensitivity="low", direction="up", expected_seasonality="auto"
) # TODO: Placeholder values as not used in detection yet

@patch("stumpy.stumpi")
@patch("seer.anomaly_detection.detectors.MPScorer")
@patch("seer.anomaly_detection.detectors.MPUtils")
def test_detect(self, MockMPUtils, MockMPScorer, MockStumpi):
mock_stream = MagicMock()
MockStumpi.return_value = mock_stream
mock_scorer = MockMPScorer.return_value
mock_utils = MockMPUtils.return_value

mock_stream.P_ = np.array([0.1, 0.2])
mock_stream.I_ = np.array([0, 1])
mock_stream.left_I_ = np.array([0, 1])
mock_stream.T_ = np.array([1.1, 2.1])

mock_utils.get_mp_dist_from_mp.return_value = np.array([0.1, 0.2])

mock_scorer.stream_score.return_value = ([0.5], ["none"])

anomalies = self.detector.detect(self.timeseries, self.config, mock_scorer, mock_utils)

assert isinstance(anomalies, MPTimeSeriesAnomalies)
assert isinstance(anomalies.flags, list)
assert isinstance(anomalies.scores, list)
assert isinstance(anomalies.matrix_profile, np.ndarray)
assert isinstance(anomalies.window_size, int)
assert len(anomalies.scores) == 3
assert len(anomalies.flags) == 3
assert len(anomalies.matrix_profile) == 3
mock_scorer.stream_score.assert_called()
mock_stream.update.assert_called()
Loading

0 comments on commit 5f2e47e

Please sign in to comment.