From 26e4fa5efff448d624c9c74ddf9322def4ca2f3a Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Sun, 29 Sep 2024 18:04:06 +0000 Subject: [PATCH 01/10] add merlion ad --- THIRD_PARTY_LICENSES.txt | 5 + ads/opctl/operator/lowcode/anomaly/const.py | 84 ++++++++++ .../lowcode/anomaly/model/anomaly_merlion.py | 155 ++++++++++++++++++ .../operator/lowcode/anomaly/model/factory.py | 2 + .../lowcode/anomaly/model/randomcutforest.py | 2 +- .../operator/lowcode/anomaly/schema.yaml | 1 + pyproject.toml | 3 +- .../operators/anomaly/test_anomaly_simple.py | 2 +- 8 files changed, 251 insertions(+), 3 deletions(-) create mode 100644 ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 418d831bb..0d3700538 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -471,6 +471,11 @@ rrcf * Source code: https://github.com/kLabUM/rrcf * Project home: https://github.com/kLabUM/rrcf +Merlion +* Copyright 2021 Salesforce.com Inc +* License: BSD-3 Clause License +* Source code: https://github.com/salesforce/Merlion +* Project Home: https://github.com/salesforce/Merlion =============================== Licenses =============================== ------------------------------------------------------------------------ diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 73bac624b..da402e32b 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -4,8 +4,12 @@ # Copyright (c) 2023 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import random from ads.common.extended_enum import ExtendedEnumMeta from ads.opctl.operator.lowcode.common.const import DataColumns +from merlion.models.anomaly import autoencoder, deep_point_anomaly_detector, isolation_forest, spectral_residual, windstats, windstats_monthly +from merlion.models.anomaly.change_point import bocpd +from merlion.models.forecast import prophet class SupportedModels(str, metaclass=ExtendedEnumMeta): @@ -14,6 +18,7 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta): AutoMLX = "automlx" AutoTS = "autots" Auto = "auto" + MerilonAD = "merlion_ad" # TODS = "tods" class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta): @@ -56,6 +61,84 @@ class TODSSubModels(str, metaclass=ExtendedEnumMeta): } +class MerlionADSubmodels(str, metaclass=ExtendedEnumMeta): + """Supported Merlion AD sub models.""" + + # point anomaly + AUTOENCODER = "autoencoder" + DAGMM = "dagmm" + DBL = "dbl" + DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" + ISOLATION_FOREST = "isolation_forest" + LOF = "lof" + LSTM_ED = "lstm_ed" + # RANDOM_CUT_FOREST = "random_cut_forest" + SPECTRAL_RESIDUAL = "spectral_residual" + STAT_RESIDUAL = "stat_residual" + VAE = "vae" + WINDSTATS = "windstats" + WINDSTATS_MONTHLY = "windstats_monthly" + ZMS = "zms" + + # forecast_based + ARIMA = "arima" + ETS = "ets" + MSES = "mses" + PROPHET = "prophet" + SARIMA = "sarima" + + #changepoint + BOCPD = "bocpd" + + +MERLIONAD_IMPORT_MODEL_MAP = { + MerlionADSubmodels.AUTOENCODER: ".autoendcoder", + MerlionADSubmodels.DAGMM: ".dagmm", + MerlionADSubmodels.DBL: ".dbl", + MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector", + MerlionADSubmodels.ISOLATION_FOREST: ".isolation_forest", + MerlionADSubmodels.LOF: ".lof", + MerlionADSubmodels.LSTM_ED: ".lstm_ed", + # MerlionADSubmodels.RANDOM_CUT_FOREST: ".random_cut_forest", + MerlionADSubmodels.SPECTRAL_RESIDUAL: ".spectral_residual", + MerlionADSubmodels.STAT_RESIDUAL: ".stat_residual", + MerlionADSubmodels.VAE: ".vae", + MerlionADSubmodels.WINDSTATS: ".windstats", + MerlionADSubmodels.WINDSTATS_MONTHLY: ".windstats_monthly", + MerlionADSubmodels.ZMS: ".zms", + MerlionADSubmodels.ARIMA: ".forecast_based.arima", + MerlionADSubmodels.ETS: ".forecast_based.ets", + MerlionADSubmodels.MSES: ".forecast_based.mses", + MerlionADSubmodels.PROPHET: ".forecast_based.prophet", + MerlionADSubmodels.SARIMA: ".forecast_based.sarima", + MerlionADSubmodels.BOCPD: ".change_point.bocpd", +} + + +MERLIONAD_MODEL_MAP = { + MerlionADSubmodels.AUTOENCODER: "AutoEncoder", + MerlionADSubmodels.DAGMM: "DAGMM", + MerlionADSubmodels.DBL: "DynamicBaseline", + MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector", + MerlionADSubmodels.ISOLATION_FOREST: "IsolationForest", + MerlionADSubmodels.LOF: "LOF", + MerlionADSubmodels.LSTM_ED: "LSTMED", + # MerlionADSubmodels.RANDOM_CUT_FOREST: "RandomCutForest", + MerlionADSubmodels.SPECTRAL_RESIDUAL: "SpectralResidual", + MerlionADSubmodels.STAT_RESIDUAL: "StatThreshold", + MerlionADSubmodels.VAE: "VAE", + MerlionADSubmodels.WINDSTATS: "WindStats", + MerlionADSubmodels.WINDSTATS_MONTHLY: "MonthlyWindStats", + MerlionADSubmodels.ZMS: "ZMS", + MerlionADSubmodels.ARIMA: "ArimaDetector", + MerlionADSubmodels.ETS: "ETSDetector", + MerlionADSubmodels.MSES: "MSESDetector", + MerlionADSubmodels.PROPHET: "ProphetDetector", + MerlionADSubmodels.SARIMA: "SarimaDetector", + MerlionADSubmodels.BOCPD: "BOCPD", +} + + class SupportedMetrics(str, metaclass=ExtendedEnumMeta): UNSUPERVISED_UNIFY95 = "unsupervised_unify95" UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss" @@ -94,5 +177,6 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta): Series = DataColumns.Series +MERLION_DEFAULT_MODEL = "prophet" TODS_DEFAULT_MODEL = "ocsvm" SUBSAMPLE_THRESHOLD = 1000 diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py new file mode 100644 index 000000000..8efe0f8de --- /dev/null +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*-- + +# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import importlib +from collections import defaultdict + +import numpy as np +import pandas as pd +from merlion.utils import TimeSeries + +from ads.common.decorator.runtime_dependency import runtime_dependency +from ads.opctl.operator.lowcode.anomaly.const import ( + MERLION_DEFAULT_MODEL, + MERLIONAD_IMPORT_MODEL_MAP, + MERLIONAD_MODEL_MAP, + OutputColumns, +) +from tests.integration import other + +from .anomaly_dataset import AnomalyOutput +from .base_model import AnomalyOperatorBaseModel + + +class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): + """Class representing Merlion Anomaly Detection operator model.""" + + @runtime_dependency( + module="merlion", + err_msg=( + "Please run `pip3 install salesforce-merlion[all]` to " + "install the required packages." + ), + ) + def _get_config_model(self, model_list): + """ + Returns a dictionary with model names as keys and a list of model config and model object as values. + + Parameters + ---------- + model_list : list + A list of model names. + + Returns + ------- + dict + A dictionary with model names as keys and a list of model config and model object as values. + """ + model_config_map = {} + for model_name in model_list: + model_module = importlib.import_module( + name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), + package="merlion.models.anomaly", + ) + model_config = getattr( + model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" + ) + model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) + model_config_map[model_name] = [model_config, model] + return model_config_map + + def _build_model(self) -> AnomalyOutput: + """ + Builds a Merlion anomaly detection model and trains it using the given data. + + Parameters + ---------- + None + + Returns + ------- + AnomalyOutput + An AnomalyOutput object containing the anomaly detection results. + """ + model_kwargs = self.spec.model_kwargs + anomaly_output = AnomalyOutput(date_column="index") + anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) + model_config_map = {} + if model_kwargs.get("sub_model", None): + model_config_map = self._get_config_model(model_kwargs.get("sub_model")) + else: + from merlion.models.anomaly.forecast_based.prophet import ( # noqa: I001 + ProphetDetector, + ProphetDetectorConfig, + ) + + model_config_map[MERLION_DEFAULT_MODEL] = [ + ProphetDetectorConfig, + ProphetDetector, + ] + + date_column = self.spec.datetime_column.name + + anomaly_output = AnomalyOutput(date_column=date_column) + # model_objects = defaultdict(list) + for target, df in self.datasets.full_data_dict.items(): + data = df.set_index(date_column) + data = TimeSeries.from_pd(data) + for model_name, (model_config, model) in model_config_map.items(): + model_config = model_config(**self.spec.model_kwargs) + model = model(model_config) + + + scores = model.train(train_data=data, anomaly_labels=None) + + try: + y_pred = model.get_anomaly_label(data) + y_pred =(y_pred.to_pd().reset_index()["anom_score"] > 0).astype(int) + except Exception as e: + y_pred = ( + scores.to_pd().reset_index()["anom_score"] + > np.percentile( + scores.to_pd().reset_index()["anom_score"], anomaly_threshold + ) + ).astype(int) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + { + index_col: df[index_col], + OutputColumns.SCORE_COL: scores.to_pd().reset_index()[ + "anom_score" + ], + } + ).reset_index(drop=True) + # model_objects[model_name].append(model) + + anomaly_output.add_output(target, anomaly, score) + return anomaly_output + + def _generate_report(self): + """Genreates a report for the model.""" + import report_creator as rc + + other_sections = [ + rc.Heading("Selected Models Overview", level=2), + rc.Text( + "The following tables provide information regarding the chosen model." + ), + ] + + model_description = rc.Text( + "The Merlion anomaly detection model is a full-stack automated machine learning system for anomaly detection." + ) + + return ( + model_description, + other_sections, + ) diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 64028cba2..a4df08db0 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -8,6 +8,7 @@ from ..const import NonTimeADSupportedModels, SupportedModels from ..operator_config import AnomalyOperatorConfig from .anomaly_dataset import AnomalyDatasets +from .anomaly_merlion import AnomalyMerlionOperatorModel from .automlx import AutoMLXOperatorModel from .autots import AutoTSOperatorModel @@ -48,6 +49,7 @@ class AnomalyOperatorModelFactory: SupportedModels.AutoMLX: AutoMLXOperatorModel, # SupportedModels.TODS: TODSOperatorModel, SupportedModels.AutoTS: AutoTSOperatorModel, + SupportedModels.MerilonAD: AnomalyMerlionOperatorModel } _NonTime_MAP = { diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py index e2b8b9d5a..17f19351d 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -36,7 +36,7 @@ def _build_model(self) -> AnomalyOutput: # Set tree parameters num_trees = model_kwargs.get("num_trees", 200) shingle_size = model_kwargs.get("shingle_size", None) - anomaly_threshold = model_kwargs.get("anamoly_threshold", 95) + anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) for target, df in self.datasets.full_data_dict.items(): try: diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index bb5caa6ec..e692847b1 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -364,6 +364,7 @@ spec: - oneclasssvm - isolationforest - randomcutforest + - merlion_ad meta: description: "The model to be used for anomaly detection" diff --git a/pyproject.toml b/pyproject.toml index e25d8f91c..8a29392a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -179,7 +179,8 @@ anomaly = [ "oracledb", "report-creator==1.0.9", "rrcf==0.4.4", - "scikit-learn" + "scikit-learn", + "salesforce-merlion[all]==2.0.4" ] recommender = [ "oracle_ads[opctl]", diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index aac4dad3e..d7e319c49 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -52,7 +52,7 @@ for d in DATASETS: parameters_short.append((m, d)) -MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"] +MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest", "merlion_ad"] @pytest.mark.parametrize("model", ["autots"]) def test_artificial_big(model): From fdaa337b7f181f0cb9dceab2cf9794693c9b86e1 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 30 Sep 2024 04:49:32 +0000 Subject: [PATCH 02/10] run for big dataset --- tests/operators/anomaly/test_anomaly_simple.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index d7e319c49..2a3c26e4b 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -17,7 +17,7 @@ from ads.opctl.operator.cmd import run -MODELS = ["autots"] # "automlx", +MODELS = ["autots", "merlion_ad"] # "automlx", # Mandatory YAML parameters TEMPLATE_YAML = { @@ -52,9 +52,9 @@ for d in DATASETS: parameters_short.append((m, d)) -MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest", "merlion_ad"] +MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"] -@pytest.mark.parametrize("model", ["autots"]) +@pytest.mark.parametrize("model", ["autots", "merlion_ad"]) def test_artificial_big(model): all_data = [] TARGET_COLUMN = "sensor" From c284f0f446f5b77b1dbbde1e01f323e8b530d792 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 30 Sep 2024 05:46:53 +0000 Subject: [PATCH 03/10] tests enable --- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 8efe0f8de..212a2a8c2 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -18,7 +18,6 @@ MERLIONAD_MODEL_MAP, OutputColumns, ) -from tests.integration import other from .anomaly_dataset import AnomalyOutput from .base_model import AnomalyOperatorBaseModel @@ -100,6 +99,8 @@ def _build_model(self) -> AnomalyOutput: data = TimeSeries.from_pd(data) for model_name, (model_config, model) in model_config_map.items(): model_config = model_config(**self.spec.model_kwargs) + if hasattr(model_config, "target_seq_index"): + model_config.target_seq_index = df.columns.get_loc(self.spec.target_column) model = model(model_config) From 746850378ffb836a204a03af9ae2422879aa74d5 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 30 Sep 2024 06:03:50 +0000 Subject: [PATCH 04/10] tests enable --- tests/operators/anomaly/test_anomaly_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index 2a3c26e4b..20e392aa2 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -17,7 +17,7 @@ from ads.opctl.operator.cmd import run -MODELS = ["autots", "merlion_ad"] # "automlx", +MODELS = ["autots"] # "automlx", # Mandatory YAML parameters TEMPLATE_YAML = { From 467908203fb3f7153f63d2ecbed1a7706c33de02 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 30 Sep 2024 06:17:15 +0000 Subject: [PATCH 05/10] format with ruff --- .../lowcode/anomaly/model/anomaly_merlion.py | 14 ++++++++------ tests/operators/anomaly/test_anomaly_simple.py | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 212a2a8c2..f1ea87738 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -1,11 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- # Copyright (c) 2023, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import importlib -from collections import defaultdict import numpy as np import pandas as pd @@ -100,20 +98,24 @@ def _build_model(self) -> AnomalyOutput: for model_name, (model_config, model) in model_config_map.items(): model_config = model_config(**self.spec.model_kwargs) if hasattr(model_config, "target_seq_index"): - model_config.target_seq_index = df.columns.get_loc(self.spec.target_column) + model_config.target_seq_index = df.columns.get_loc( + self.spec.target_column + ) model = model(model_config) - scores = model.train(train_data=data, anomaly_labels=None) try: y_pred = model.get_anomaly_label(data) - y_pred =(y_pred.to_pd().reset_index()["anom_score"] > 0).astype(int) + y_pred = ( + y_pred.to_pd().reset_index()["anom_score"] > 0 + ).astype(int) except Exception as e: y_pred = ( scores.to_pd().reset_index()["anom_score"] > np.percentile( - scores.to_pd().reset_index()["anom_score"], anomaly_threshold + scores.to_pd().reset_index()["anom_score"], + anomaly_threshold, ) ).astype(int) diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index 20e392aa2..6d3e7daea 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -54,6 +54,7 @@ MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"] + @pytest.mark.parametrize("model", ["autots", "merlion_ad"]) def test_artificial_big(model): all_data = [] From 2e53e128059e64739ddab23587d1584b64cac055 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Wed, 16 Oct 2024 11:16:22 +0000 Subject: [PATCH 06/10] organize imports, fix format, missing commas --- .../operator/lowcode/anomaly/model/autots.py | 45 ++++++++++++------- .../operator/lowcode/anomaly/model/factory.py | 9 ++-- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/autots.py b/ads/opctl/operator/lowcode/anomaly/model/autots.py index 724aa2cae..c795440de 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/autots.py +++ b/ads/opctl/operator/lowcode/anomaly/model/autots.py @@ -5,15 +5,17 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from ads.common.decorator.runtime_dependency import runtime_dependency +from ads.opctl import logger from ads.opctl.operator.lowcode.anomaly.const import OutputColumns + +from ..const import SupportedModels from .anomaly_dataset import AnomalyOutput from .base_model import AnomalyOperatorBaseModel -from ..const import SupportedModels -from ads.opctl import logger class AutoTSOperatorModel(AnomalyOperatorBaseModel): """Class representing AutoTS Anomaly Detection operator model.""" + model_mapping = { "isolationforest": "IsolationForest", "lof": "LOF", @@ -22,30 +24,43 @@ class AutoTSOperatorModel(AnomalyOperatorBaseModel): "rolling_zscore": "rolling_zscore", "mad": "mad", "minmax": "minmax", - "iqr": "IQR" + "iqr": "IQR", } @runtime_dependency( module="autots", err_msg=( - "Please run `pip3 install autots` to " - "install the required dependencies for AutoTS." + "Please run `pip3 install autots` to " + "install the required dependencies for AutoTS." ), ) def _build_model(self) -> AnomalyOutput: from autots.evaluator.anomaly_detector import AnomalyDetector - method = SupportedModels.ISOLATIONFOREST if self.spec.model == SupportedModels.AutoTS else self.spec.model - model_params = {"method": self.model_mapping[method], - "transform_dict": self.spec.model_kwargs.get("transform_dict", {}), - "output": self.spec.model_kwargs.get("output", "univariate"), "method_params": {}} + method = ( + SupportedModels.ISOLATIONFOREST + if self.spec.model == SupportedModels.AutoTS + else self.spec.model + ) + model_params = { + "method": self.model_mapping[method], + "transform_dict": self.spec.model_kwargs.get("transform_dict", {}), + "output": self.spec.model_kwargs.get("output", "univariate"), + "method_params": {}, + } # Supported methods with contamination param - if method in [SupportedModels.ISOLATIONFOREST, SupportedModels.LOF, SupportedModels.EE]: - model_params["method_params"][ - "contamination"] = self.spec.contamination if self.spec.contamination else 0.01 - else: - if self.spec.contamination: - raise ValueError(f"The contamination parameter is not supported for the selected model \"{method}\"") + if method in [ + SupportedModels.ISOLATIONFOREST, + SupportedModels.LOF, + SupportedModels.EE, + ]: + model_params["method_params"]["contamination"] = ( + self.spec.contamination if self.spec.contamination else 0.01 + ) + elif self.spec.contamination: + raise ValueError( + f'The contamination parameter is not supported for the selected model "{method}"' + ) logger.info(f"model params: {model_params}") model = AnomalyDetector(**model_params) diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index a47f17405..71dbaef70 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -4,6 +4,9 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model + +from ..const import NonTimeADSupportedModels, SupportedModels +from ..operator_config import AnomalyOperatorConfig from .anomaly_dataset import AnomalyDatasets from .anomaly_merlion import AnomalyMerlionOperatorModel from .autots import AutoTSOperatorModel @@ -11,8 +14,6 @@ from .isolationforest import IsolationForestOperatorModel from .oneclasssvm import OneClassSVMOperatorModel from .randomcutforest import RandomCutForestOperatorModel -from ..const import NonTimeADSupportedModels, SupportedModels -from ..operator_config import AnomalyOperatorConfig class UnSupportedModelError(Exception): @@ -49,8 +50,8 @@ class AnomalyOperatorModelFactory: SupportedModels.ZSCORE: AutoTSOperatorModel, SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel, SupportedModels.EE: AutoTSOperatorModel, - SupportedModels.MAD: AutoTSOperatorModel - SupportedModels.MerilonAD: AnomalyMerlionOperatorModel + SupportedModels.MAD: AutoTSOperatorModel, + SupportedModels.MerilonAD: AnomalyMerlionOperatorModel, } _NonTime_MAP = { From cb7634b48c3dd56d16dbf5929202f9b3cfc8b60b Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 17 Oct 2024 08:04:49 +0000 Subject: [PATCH 07/10] model segregation, score to be normalized, tests enabled, overlapping meethods removed --- ads/opctl/operator/lowcode/anomaly/const.py | 99 ++++++------- .../lowcode/anomaly/model/anomaly_merlion.py | 135 ++++++++---------- .../operator/lowcode/anomaly/model/factory.py | 11 +- .../operator/lowcode/anomaly/schema.yaml | 11 +- ads/opctl/operator/lowcode/anomaly/utils.py | 3 + .../operators/anomaly/test_anomaly_simple.py | 79 ++++++++-- 6 files changed, 192 insertions(+), 146 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 07d5b5233..5e53979e1 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -5,11 +5,9 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import random + from ads.common.extended_enum import ExtendedEnumMeta from ads.opctl.operator.lowcode.common.const import DataColumns -from merlion.models.anomaly import autoencoder, deep_point_anomaly_detector, isolation_forest, spectral_residual, windstats, windstats_monthly -from merlion.models.anomaly.change_point import bocpd -from merlion.models.forecast import prophet class SupportedModels(str, metaclass=ExtendedEnumMeta): @@ -17,7 +15,6 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta): AutoTS = "autots" Auto = "auto" - MerilonAD = "merlion_ad" IQR = "iqr" LOF = "lof" ZSCORE = "zscore" @@ -26,6 +23,23 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta): EE = "ee" ISOLATIONFOREST = "isolationforest" + # point anomaly + DAGMM = "dagmm" + DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" + LSTM_ED = "lstm_ed" + SPECTRAL_RESIDUAL = "spectral_residual" + VAE = "vae" + + # forecast_based + ARIMA = "arima" + ETS = "ets" + PROPHET = "prophet" + SARIMA = "sarima" + + # changepoint + BOCPD = "bocpd" + + class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta): """Supported non time-based anomaly detection models.""" @@ -34,7 +48,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta): RandomCutForest = "randomcutforest" # TODO : Add DBScan # DBScan = "dbscan" - + class TODSSubModels(str, metaclass=ExtendedEnumMeta): """Supported TODS sub models.""" @@ -66,81 +80,51 @@ class TODSSubModels(str, metaclass=ExtendedEnumMeta): } -class MerlionADSubmodels(str, metaclass=ExtendedEnumMeta): +class MerlionADModels(str, metaclass=ExtendedEnumMeta): """Supported Merlion AD sub models.""" # point anomaly - AUTOENCODER = "autoencoder" DAGMM = "dagmm" - DBL = "dbl" DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector" - ISOLATION_FOREST = "isolation_forest" - LOF = "lof" LSTM_ED = "lstm_ed" - # RANDOM_CUT_FOREST = "random_cut_forest" SPECTRAL_RESIDUAL = "spectral_residual" - STAT_RESIDUAL = "stat_residual" VAE = "vae" - WINDSTATS = "windstats" - WINDSTATS_MONTHLY = "windstats_monthly" - ZMS = "zms" # forecast_based ARIMA = "arima" ETS = "ets" - MSES = "mses" PROPHET = "prophet" SARIMA = "sarima" - #changepoint + # changepoint BOCPD = "bocpd" MERLIONAD_IMPORT_MODEL_MAP = { - MerlionADSubmodels.AUTOENCODER: ".autoendcoder", - MerlionADSubmodels.DAGMM: ".dagmm", - MerlionADSubmodels.DBL: ".dbl", - MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector", - MerlionADSubmodels.ISOLATION_FOREST: ".isolation_forest", - MerlionADSubmodels.LOF: ".lof", - MerlionADSubmodels.LSTM_ED: ".lstm_ed", - # MerlionADSubmodels.RANDOM_CUT_FOREST: ".random_cut_forest", - MerlionADSubmodels.SPECTRAL_RESIDUAL: ".spectral_residual", - MerlionADSubmodels.STAT_RESIDUAL: ".stat_residual", - MerlionADSubmodels.VAE: ".vae", - MerlionADSubmodels.WINDSTATS: ".windstats", - MerlionADSubmodels.WINDSTATS_MONTHLY: ".windstats_monthly", - MerlionADSubmodels.ZMS: ".zms", - MerlionADSubmodels.ARIMA: ".forecast_based.arima", - MerlionADSubmodels.ETS: ".forecast_based.ets", - MerlionADSubmodels.MSES: ".forecast_based.mses", - MerlionADSubmodels.PROPHET: ".forecast_based.prophet", - MerlionADSubmodels.SARIMA: ".forecast_based.sarima", - MerlionADSubmodels.BOCPD: ".change_point.bocpd", + MerlionADModels.DAGMM: ".dagmm", + MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector", + MerlionADModels.LSTM_ED: ".lstm_ed", + MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual", + MerlionADModels.VAE: ".vae", + MerlionADModels.ARIMA: ".forecast_based.arima", + MerlionADModels.ETS: ".forecast_based.ets", + MerlionADModels.PROPHET: ".forecast_based.prophet", + MerlionADModels.SARIMA: ".forecast_based.sarima", + MerlionADModels.BOCPD: ".change_point.bocpd", } MERLIONAD_MODEL_MAP = { - MerlionADSubmodels.AUTOENCODER: "AutoEncoder", - MerlionADSubmodels.DAGMM: "DAGMM", - MerlionADSubmodels.DBL: "DynamicBaseline", - MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector", - MerlionADSubmodels.ISOLATION_FOREST: "IsolationForest", - MerlionADSubmodels.LOF: "LOF", - MerlionADSubmodels.LSTM_ED: "LSTMED", - # MerlionADSubmodels.RANDOM_CUT_FOREST: "RandomCutForest", - MerlionADSubmodels.SPECTRAL_RESIDUAL: "SpectralResidual", - MerlionADSubmodels.STAT_RESIDUAL: "StatThreshold", - MerlionADSubmodels.VAE: "VAE", - MerlionADSubmodels.WINDSTATS: "WindStats", - MerlionADSubmodels.WINDSTATS_MONTHLY: "MonthlyWindStats", - MerlionADSubmodels.ZMS: "ZMS", - MerlionADSubmodels.ARIMA: "ArimaDetector", - MerlionADSubmodels.ETS: "ETSDetector", - MerlionADSubmodels.MSES: "MSESDetector", - MerlionADSubmodels.PROPHET: "ProphetDetector", - MerlionADSubmodels.SARIMA: "SarimaDetector", - MerlionADSubmodels.BOCPD: "BOCPD", + MerlionADModels.DAGMM: "DAGMM", + MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector", + MerlionADModels.LSTM_ED: "LSTMED", + MerlionADModels.SPECTRAL_RESIDUAL: "SpectralResidual", + MerlionADModels.VAE: "VAE", + MerlionADModels.ARIMA: "ArimaDetector", + MerlionADModels.ETS: "ETSDetector", + MerlionADModels.PROPHET: "ProphetDetector", + MerlionADModels.SARIMA: "SarimaDetector", + MerlionADModels.BOCPD: "BOCPD", } @@ -182,6 +166,5 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta): Series = DataColumns.Series -MERLION_DEFAULT_MODEL = "prophet" TODS_DEFAULT_MODEL = "ocsvm" SUBSAMPLE_THRESHOLD = 1000 diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index f1ea87738..1a64ff6f9 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -11,7 +11,6 @@ from ads.common.decorator.runtime_dependency import runtime_dependency from ads.opctl.operator.lowcode.anomaly.const import ( - MERLION_DEFAULT_MODEL, MERLIONAD_IMPORT_MODEL_MAP, MERLIONAD_MODEL_MAP, OutputColumns, @@ -31,14 +30,14 @@ class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel): "install the required packages." ), ) - def _get_config_model(self, model_list): + def _get_config_model(self, model_name): """ Returns a dictionary with model names as keys and a list of model config and model object as values. Parameters ---------- - model_list : list - A list of model names. + model_name : str + model name from the Merlion model list. Returns ------- @@ -46,16 +45,15 @@ def _get_config_model(self, model_list): A dictionary with model names as keys and a list of model config and model object as values. """ model_config_map = {} - for model_name in model_list: - model_module = importlib.import_module( - name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), - package="merlion.models.anomaly", - ) - model_config = getattr( - model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" - ) - model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) - model_config_map[model_name] = [model_config, model] + model_module = importlib.import_module( + name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name), + package="merlion.models.anomaly", + ) + model_config = getattr( + model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config" + ) + model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name)) + model_config_map[model_name] = [model_config, model] return model_config_map def _build_model(self) -> AnomalyOutput: @@ -75,66 +73,57 @@ def _build_model(self) -> AnomalyOutput: anomaly_output = AnomalyOutput(date_column="index") anomaly_threshold = model_kwargs.get("anomaly_threshold", 95) model_config_map = {} - if model_kwargs.get("sub_model", None): - model_config_map = self._get_config_model(model_kwargs.get("sub_model")) - else: - from merlion.models.anomaly.forecast_based.prophet import ( # noqa: I001 - ProphetDetector, - ProphetDetectorConfig, - ) - - model_config_map[MERLION_DEFAULT_MODEL] = [ - ProphetDetectorConfig, - ProphetDetector, - ] - - date_column = self.spec.datetime_column.name - - anomaly_output = AnomalyOutput(date_column=date_column) - # model_objects = defaultdict(list) - for target, df in self.datasets.full_data_dict.items(): - data = df.set_index(date_column) - data = TimeSeries.from_pd(data) - for model_name, (model_config, model) in model_config_map.items(): - model_config = model_config(**self.spec.model_kwargs) - if hasattr(model_config, "target_seq_index"): - model_config.target_seq_index = df.columns.get_loc( - self.spec.target_column + model_config_map = self._get_config_model(self.spec.model) + + date_column = self.spec.datetime_column.name + + anomaly_output = AnomalyOutput(date_column=date_column) + # model_objects = defaultdict(list) + for target, df in self.datasets.full_data_dict.items(): + data = df.set_index(date_column) + data = TimeSeries.from_pd(data) + for model_name, (model_config, model) in model_config_map.items(): + model_config = model_config(**self.spec.model_kwargs) + if hasattr(model_config, "target_seq_index"): + model_config.target_seq_index = df.columns.get_loc( + self.spec.target_column + ) + model = model(model_config) + + scores = model.train(train_data=data, anomaly_labels=None) + scores = scores.to_pd().reset_index() + scores["anom_score"] = ( + scores["anom_score"] - scores["anom_score"].min() + ) / (scores["anom_score"].max() - scores["anom_score"].min()) + + try: + y_pred = model.get_anomaly_label(data) + y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype( + int + ) + except Exception as e: + y_pred = ( + scores["anom_score"] + > np.percentile( + scores["anom_score"], + anomaly_threshold, ) - model = model(model_config) - - scores = model.train(train_data=data, anomaly_labels=None) - - try: - y_pred = model.get_anomaly_label(data) - y_pred = ( - y_pred.to_pd().reset_index()["anom_score"] > 0 - ).astype(int) - except Exception as e: - y_pred = ( - scores.to_pd().reset_index()["anom_score"] - > np.percentile( - scores.to_pd().reset_index()["anom_score"], - anomaly_threshold, - ) - ).astype(int) - - index_col = df.columns[0] - - anomaly = pd.DataFrame( - {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} - ).reset_index(drop=True) - score = pd.DataFrame( - { - index_col: df[index_col], - OutputColumns.SCORE_COL: scores.to_pd().reset_index()[ - "anom_score" - ], - } - ).reset_index(drop=True) - # model_objects[model_name].append(model) - - anomaly_output.add_output(target, anomaly, score) + ).astype(int) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + { + index_col: df[index_col], + OutputColumns.SCORE_COL: scores["anom_score"], + } + ).reset_index(drop=True) + # model_objects[model_name].append(model) + + anomaly_output.add_output(target, anomaly, score) return anomaly_output def _generate_report(self): diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 71dbaef70..10df5733c 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -51,7 +51,16 @@ class AnomalyOperatorModelFactory: SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel, SupportedModels.EE: AutoTSOperatorModel, SupportedModels.MAD: AutoTSOperatorModel, - SupportedModels.MerilonAD: AnomalyMerlionOperatorModel, + SupportedModels.DAGMM: AnomalyMerlionOperatorModel, + SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel, + SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel, + SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel, + SupportedModels.VAE: AnomalyMerlionOperatorModel, + SupportedModels.ARIMA: AnomalyMerlionOperatorModel, + SupportedModels.ETS: AnomalyMerlionOperatorModel, + SupportedModels.PROPHET: AnomalyMerlionOperatorModel, + SupportedModels.SARIMA: AnomalyMerlionOperatorModel, + SupportedModels.BOCPD: AnomalyMerlionOperatorModel, } _NonTime_MAP = { diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index 82d831f6c..aba6c4e82 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -370,7 +370,16 @@ spec: - rolling_zscore - mad - ee - - merlion_ad + - dagmm + - deep_point_anomaly_detector + - lstm_ed + - spectral_residual + - vae + - arima + - ets + - sarima + - bocpd + - prophet meta: description: "The model to be used for anomaly detection" diff --git a/ads/opctl/operator/lowcode/anomaly/utils.py b/ads/opctl/operator/lowcode/anomaly/utils.py index d93b69810..902e7f186 100644 --- a/ads/opctl/operator/lowcode/anomaly/utils.py +++ b/ads/opctl/operator/lowcode/anomaly/utils.py @@ -5,6 +5,7 @@ import os +import numpy as np import pandas as pd from ads.opctl import logger @@ -27,6 +28,8 @@ def _build_metrics_df(y_true, y_pred, column_name): ) metrics = {} + np.nan_to_num(y_true, copy=False) + np.nan_to_num(y_pred, copy=False) metrics[SupportedMetrics.RECALL] = recall_score(y_true, y_pred) metrics[SupportedMetrics.PRECISION] = precision_score(y_true, y_pred) metrics[SupportedMetrics.ACCURACY] = accuracy_score(y_true, y_pred) diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index 083e3fd59..658d292a5 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -3,20 +3,44 @@ # Copyright (c) 2023, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -from ads.opctl.operator.lowcode.anomaly.const import NonTimeADSupportedModels, SupportedModels -import yaml +import os import subprocess -import pandas as pd -import pytest -from time import sleep -from copy import deepcopy import tempfile -import os -import numpy as np +from copy import deepcopy from datetime import datetime -from ads.opctl.operator.cmd import run +from time import sleep -MODELS = ["autots", "iqr", "lof", "zscore", "rolling_zscore", "mad", "ee", "isolationforest"] +import numpy as np +import pandas as pd +import pytest +import yaml + +from ads.opctl.operator.cmd import run +from ads.opctl.operator.lowcode.anomaly.const import ( + NonTimeADSupportedModels, + SupportedModels, +) + +MODELS = [ + "autots", + "iqr", + "lof", + "zscore", + "rolling_zscore", + "mad", + "ee", + "isolationforest", + "dagmm", + "deep_point_anomaly_detector", + "lstm_ed", + "spectral_residual", + "vae", + "arima", + "ets", + "prophet", + "sarima", + "bocpd", +] # Mandatory YAML parameters TEMPLATE_YAML = { @@ -51,10 +75,28 @@ for d in DATASETS: parameters_short.append((m, d)) -MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"] +# "autoencoder", "stat_residual", "mses",, "dbl", +# "windstats", "windstats_monthly", "zms", + +MODELS = [ + "autots", + "oneclasssvm", + "isolationforest", + "randomcutforest", + "dagmm", + "deep_point_anomaly_detector", + "lstm_ed", + "spectral_residual", + "vae", + "arima", + "ets", + "prophet", + "sarima", + "bocpd", +] -@pytest.mark.parametrize("model", ["autots", "merlion_ad"]) +@pytest.mark.parametrize("model", ["autots"]) def test_artificial_big(model): all_data = [] TARGET_COLUMN = "sensor" @@ -125,6 +167,10 @@ def test_artificial_small(model): np.concatenate([d1, d2, outliers], axis=0), columns=["val_1", "val_2"] ) d = d.reset_index().rename({"index": "ds"}, axis=1) + if model not in NonTimeADSupportedModels.values(): + d["ds"] = pd.date_range( + datetime.today(), periods=d.shape[0], freq="1D" + ).strftime("%Y-%m-%d") with tempfile.TemporaryDirectory() as tmpdirname: anomaly_yaml_filename = f"{tmpdirname}/anomaly.yaml" input_data = f"{tmpdirname}/data.csv" @@ -139,6 +185,7 @@ def test_artificial_small(model): yaml_i["spec"]["contamination"] = 0.3 if model in NonTimeADSupportedModels.values(): del yaml_i["spec"]["datetime_column"] + yaml_i["spec"]["target_column"] = "val_1" # run(yaml_i, debug=False) @@ -172,6 +219,9 @@ def test_validation(model): ) if model not in NonTimeADSupportedModels.values(): d = d.reset_index().rename({"index": "ds"}, axis=1) + d["ds"] = pd.date_range( + datetime.today(), periods=d.shape[0], freq="1D" + ).strftime("%Y-%m-%d") anomaly_col["ds"] = d["ds"] v = d.copy() v["anomaly"] = anomaly_col["anomaly"] @@ -218,11 +268,14 @@ def test_load_datasets(model, data_dict): yaml_i = deepcopy(TEMPLATE_YAML) yaml_i["spec"]["model"] = model yaml_i["spec"]["input_data"]["url"] = data_dict["url"] - if model in set(NonTimeADSupportedModels.values()) - set(SupportedModels.values()): + if model in set(NonTimeADSupportedModels.values()) - set( + SupportedModels.values() + ): del yaml_i["spec"]["datetime_column"] else: yaml_i["spec"]["datetime_column"]["name"] = data_dict["dt_col"] yaml_i["spec"]["output_directory"]["url"] = output_dirname + yaml_i["spec"]["target_column"] = data_dict["target"] # run(yaml_i, backend="operator.local", debug=False) From a2c82578a7972ed8eba542a30f7078879e526f25 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 17 Oct 2024 10:35:33 +0000 Subject: [PATCH 08/10] remove py38 as eol, add 3.11 --- .github/workflows/run-forecast-unit-tests.yml | 2 +- .github/workflows/run-operators-unit-tests.yml | 2 +- ads/opctl/operator/lowcode/anomaly/const.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-forecast-unit-tests.yml b/.github/workflows/run-forecast-unit-tests.yml index 1501862f5..7826c5b7c 100644 --- a/.github/workflows/run-forecast-unit-tests.yml +++ b/.github/workflows/run-forecast-unit-tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/run-operators-unit-tests.yml b/.github/workflows/run-operators-unit-tests.yml index d0fe4d9c7..239ee56c5 100644 --- a/.github/workflows/run-operators-unit-tests.yml +++ b/.github/workflows/run-operators-unit-tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index 5e53979e1..ea2bb7509 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -4,8 +4,6 @@ # Copyright (c) 2023 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import random - from ads.common.extended_enum import ExtendedEnumMeta from ads.opctl.operator.lowcode.common.const import DataColumns From 22203519bea69da10afdb1783a190f6770e45a80 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 17 Oct 2024 12:28:38 +0000 Subject: [PATCH 09/10] add alm_threshold --- .../lowcode/anomaly/model/anomaly_merlion.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 1a64ff6f9..6a0f056a4 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +from merlion.post_process.threshold import AggregateAlarms from merlion.utils import TimeSeries from ads.common.decorator.runtime_dependency import runtime_dependency @@ -83,7 +84,16 @@ def _build_model(self) -> AnomalyOutput: data = df.set_index(date_column) data = TimeSeries.from_pd(data) for model_name, (model_config, model) in model_config_map.items(): - model_config = model_config(**self.spec.model_kwargs) + model_config = model_config( + **{ + **self.spec.model_kwargs, + "threshold": AggregateAlarms( + alm_threshold=model_kwargs.get("alm_threshold") + if model_kwargs.get("alm_threshold") + else None + ), + } + ) if hasattr(model_config, "target_seq_index"): model_config.target_seq_index = df.columns.get_loc( self.spec.target_column From b01e0229386b7b85cc1936208b9032a25b5f3b42 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 17 Oct 2024 16:42:00 +0000 Subject: [PATCH 10/10] add alm_threshold --- .../lowcode/anomaly/model/anomaly_merlion.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py index 6a0f056a4..cc1e80b52 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +++ b/ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py @@ -15,6 +15,7 @@ MERLIONAD_IMPORT_MODEL_MAP, MERLIONAD_MODEL_MAP, OutputColumns, + SupportedModels, ) from .anomaly_dataset import AnomalyOutput @@ -84,16 +85,19 @@ def _build_model(self) -> AnomalyOutput: data = df.set_index(date_column) data = TimeSeries.from_pd(data) for model_name, (model_config, model) in model_config_map.items(): - model_config = model_config( - **{ - **self.spec.model_kwargs, - "threshold": AggregateAlarms( - alm_threshold=model_kwargs.get("alm_threshold") - if model_kwargs.get("alm_threshold") - else None - ), - } - ) + if self.spec.model == SupportedModels.BOCPD: + model_config = model_config(**self.spec.model_kwargs) + else: + model_config = model_config( + **{ + **self.spec.model_kwargs, + "threshold": AggregateAlarms( + alm_threshold=model_kwargs.get("alm_threshold") + if model_kwargs.get("alm_threshold") + else None + ), + } + ) if hasattr(model_config, "target_seq_index"): model_config.target_seq_index = df.columns.get_loc( self.spec.target_column