From 077ec398a6ff3db3efa037bc2998fccf13faac27 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Fri, 2 Aug 2024 07:40:59 +0000 Subject: [PATCH 01/14] intial commit rcf --- ads/opctl/operator/lowcode/anomaly/const.py | 1 + .../operator/lowcode/anomaly/model/factory.py | 2 + .../lowcode/anomaly/model/randomcutforest.py | 81 +++++++++++++++++++ .../operator/lowcode/anomaly/schema.yaml | 1 + pyproject.toml | 2 + test-requirements-operators.txt | 1 + 6 files changed, 88 insertions(+) create mode 100644 ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py index ff0e0fd22..73bac624b 100644 --- a/ads/opctl/operator/lowcode/anomaly/const.py +++ b/ads/opctl/operator/lowcode/anomaly/const.py @@ -21,6 +21,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta): OneClassSVM = "oneclasssvm" IsolationForest = "isolationforest" + RandomCutForest = "randomcutforest" # TODO : Add DBScan # DBScan = "dbscan" diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py index 49adfb04f..64028cba2 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/factory.py +++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py @@ -15,6 +15,7 @@ from .base_model import AnomalyOperatorBaseModel from .isolationforest import IsolationForestOperatorModel from .oneclasssvm import OneClassSVMOperatorModel +from .randomcutforest import RandomCutForestOperatorModel class UnSupportedModelError(Exception): @@ -52,6 +53,7 @@ class AnomalyOperatorModelFactory: _NonTime_MAP = { NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel, NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel, + NonTimeADSupportedModels.RandomCutForest: RandomCutForestOperatorModel, # TODO: Add DBScan model for non time based anomaly # NonTimeADSupportedModels.DBScan: DBScanOperatorModel, } diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py new file mode 100644 index 000000000..8c7483912 --- /dev/null +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Copyright (c) 2023, 2024 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import numpy as np +import pandas as pd + +from ads.common.decorator.runtime_dependency import runtime_dependency +from ads.opctl.operator.lowcode.anomaly.const import OutputColumns + +from .anomaly_dataset import AnomalyOutput +from .base_model import AnomalyOperatorBaseModel + + +class RandomCutForestOperatorModel(AnomalyOperatorBaseModel): + """ + Class representing Random Cut Forest Anomaly Detection operator model. + """ + + @runtime_dependency( + module="rrcf", + err_msg=( + "Please run `pip install rrcf` to " + "install the required dependencies for RandomCutForest." + ), + ) + def _build_model(self) -> AnomalyOutput: + from rrcf import RCTree + + model_kwargs = self.spec.model_kwargs + # map the output as per anomaly dataset class, 1: outlier, 0: inlier + self.outlier_map = {1: 0, -1: 1} + + anomaly_output = AnomalyOutput(date_column="index") + #TODO: PDB + import pdb + + pdb.set_trace() + + for target, df in self.datasets.full_data_dict.items(): + model = RCTree(**model_kwargs) + model.fit(df) + y_pred = model.predict(df) + y_pred = np.vectorize(self.outlier_map.get)(y_pred) + + scores = model.score_samples(df) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + {"index": df[index_col], OutputColumns.SCORE_COL: scores} + ).reset_index(drop=True) + + anomaly_output.add_output(target, anomaly, score) + + return anomaly_output + + def _generate_report(self): + """Generates the report.""" + import report_creator as rc + + other_sections = [ + rc.Heading("Selected Models Overview", level=2), + rc.Text( + "The following tables provide information regarding the chosen model." + ), + ] + + model_description = rc.Text( + "The Random Cut Forest (RCF) is an unsupervised machine learning algorithm that is used for anomaly detection." + " It works by building an ensemble of binary trees (random cut trees) and using them to compute anomaly scores for data points." + ) + + return ( + model_description, + other_sections, + ) diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml index e6c6cd998..bb5caa6ec 100644 --- a/ads/opctl/operator/lowcode/anomaly/schema.yaml +++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml @@ -363,6 +363,7 @@ spec: - auto - oneclasssvm - isolationforest + - randomcutforest meta: description: "The model to be used for anomaly detection" diff --git a/pyproject.toml b/pyproject.toml index fcc1310f4..4416fd34d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,6 +175,8 @@ anomaly = [ "autots", "oracledb", "report-creator==1.0.9", + "rrcf==0.4.4", + "scikit-learn" ] recommender = [ "oracle_ads[opctl]", diff --git a/test-requirements-operators.txt b/test-requirements-operators.txt index 838418393..64b1ba683 100644 --- a/test-requirements-operators.txt +++ b/test-requirements-operators.txt @@ -1,5 +1,6 @@ -r test-requirements.txt -e ".[forecast]" +-e ".[anomaly]" -e ".[recommender]" -e ".[feature-store-marketplace]" plotly From 97c78b0d6e07913fb4dd7c7bcb5548b52e1096f7 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 22 Aug 2024 04:13:31 +0000 Subject: [PATCH 02/14] draft commit rcf --- .../lowcode/anomaly/model/randomcutforest.py | 64 ++++++++++++++----- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py index 8c7483912..818042b02 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -30,32 +30,62 @@ def _build_model(self) -> AnomalyOutput: model_kwargs = self.spec.model_kwargs # map the output as per anomaly dataset class, 1: outlier, 0: inlier - self.outlier_map = {1: 0, -1: 1} + # self.outlier_map = {1: 0, -1: 1} anomaly_output = AnomalyOutput(date_column="index") - #TODO: PDB - import pdb + # TODO: PDB - pdb.set_trace() + # Set tree parameters + num_trees = model_kwargs.get("num_trees", 200) + shingle_size = model_kwargs.get("shingle_size", 1) + tree_size = model_kwargs.get("tree_size", 1000) for target, df in self.datasets.full_data_dict.items(): - model = RCTree(**model_kwargs) - model.fit(df) - y_pred = model.predict(df) - y_pred = np.vectorize(self.outlier_map.get)(y_pred) + df_values = df[self.spec.target_column].astype(float).values + points = np.vstack(list(rrcf.shingle(df_values, size=4))) - scores = model.score_samples(df) + sample_size_range = (1, 6) + n = points.shape[0] + avg_codisp = pd.Series(0.0, index=np.arange(n)) + index = np.zeros(n) - index_col = df.columns[0] + forest = [] + while len(forest) < num_trees: + ixs = np.random.choice(n, size=sample_size_range, replace=False) + trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs] + forest.extend(trees) + print(len(forest)) - anomaly = pd.DataFrame( - {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} - ).reset_index(drop=True) - score = pd.DataFrame( - {"index": df[index_col], OutputColumns.SCORE_COL: scores} - ).reset_index(drop=True) + for tree in forest: + codisp = pd.Series({leaf: tree.codisp(leaf) for leaf in tree.leaves}) + avg_codisp[codisp.index] += codisp + np.add.at(index, codisp.index.values, 1) - anomaly_output.add_output(target, anomaly, score) + avg_codisp /= index + avg_codisp.index = df.iloc[(4 - 1) :].index + avg_codisp = (avg_codisp - avg_codisp.min()) / ( + avg_codisp.max() - avg_codisp.min() + ) + + y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int) + + import pdb + + pdb.set_trace() + print("Done") + + # scores = model.score_samples(df) + + # index_col = df.columns[0] + + # anomaly = pd.DataFrame( + # {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} + # ).reset_index(drop=True) + # score = pd.DataFrame( + # {"index": df[index_col], OutputColumns.SCORE_COL: scores} + # ).reset_index(drop=True) + + # anomaly_output.add_output(target, anomaly, score) return anomaly_output From 8dbccaec0481f0c944e8cf0f2ddcdd53e0a773f0 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Thu, 22 Aug 2024 04:17:17 +0000 Subject: [PATCH 03/14] draft commit rcf --- .../operator/lowcode/anomaly/model/randomcutforest.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py index 818042b02..38d1c224e 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -33,7 +33,6 @@ def _build_model(self) -> AnomalyOutput: # self.outlier_map = {1: 0, -1: 1} anomaly_output = AnomalyOutput(date_column="index") - # TODO: PDB # Set tree parameters num_trees = model_kwargs.get("num_trees", 200) @@ -42,8 +41,11 @@ def _build_model(self) -> AnomalyOutput: for target, df in self.datasets.full_data_dict.items(): df_values = df[self.spec.target_column].astype(float).values + + # TODO: Update size to log logic points = np.vstack(list(rrcf.shingle(df_values, size=4))) + # TODO: remove hardcode sample_size_range = (1, 6) n = points.shape[0] avg_codisp = pd.Series(0.0, index=np.arange(n)) @@ -62,16 +64,19 @@ def _build_model(self) -> AnomalyOutput: np.add.at(index, codisp.index.values, 1) avg_codisp /= index + # TODO: remove hardcode avg_codisp.index = df.iloc[(4 - 1) :].index avg_codisp = (avg_codisp - avg_codisp.min()) / ( avg_codisp.max() - avg_codisp.min() ) + # TODO: use model kwargs for percentile threshold y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int) - import pdb + # TODO: rem pdb + # import pdb - pdb.set_trace() + # pdb.set_trace() print("Done") # scores = model.score_samples(df) From 8b5c6995c932229c53cb3b2e21cd250dca806bea Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Fri, 23 Aug 2024 07:26:24 +0000 Subject: [PATCH 04/14] update the todos, complete implementation --- .../lowcode/anomaly/model/base_model.py | 87 ++++++++------ .../lowcode/anomaly/model/randomcutforest.py | 110 +++++++++--------- 2 files changed, 106 insertions(+), 91 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py index e909976d8..983228ba5 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py +++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py @@ -55,6 +55,7 @@ def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets): def generate_report(self): """Generates the report.""" import matplotlib.pyplot as plt + plt.rcParams.update({'figure.max_open_warning': 0}) import report_creator as rc start_time = time.time() @@ -87,43 +88,57 @@ def generate_report(self): self.spec.datetime_column.name if self.spec.datetime_column else "index" ) + ( + model_description, + other_sections, + ) = self._generate_report() + blocks = [] for target, df in self.datasets.full_data_dict.items(): - figure_blocks = [] - time_col = df[date_column].reset_index(drop=True) - anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[ - OutputColumns.ANOMALY_COL - ] - anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1] - downsampled_time_col = time_col - selected_indices = list(range(len(time_col))) - if self.spec.subsample_report_data: - non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices] - # Downsample non-anomalous data if it exceeds the threshold (1000) - if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD: - downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD] - selected_indices = anomaly_indices + downsampled_non_anomaly_indices - selected_indices.sort() - downsampled_time_col = time_col[selected_indices] - - columns = set(df.columns).difference({date_column}) - for col in columns: - y = df[col].reset_index(drop=True) - - downsampled_y = y[selected_indices] - - fig, ax = plt.subplots(figsize=(8, 3), layout="constrained") - ax.grid() - ax.plot(downsampled_time_col, downsampled_y, color="black") - # Plot anomalies - for i in anomaly_indices: - ax.scatter(time_col[i], y[i], color="red", marker="o") - plt.xlabel(date_column) - plt.ylabel(col) - plt.title(f"`{col}` with reference to anomalies") - figure_blocks.append(rc.Widget(ax)) - - blocks.append(rc.Group(*figure_blocks, label=target)) + if target in anomaly_output.list_categories(): + figure_blocks = [] + time_col = df[date_column].reset_index(drop=True) + anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[ + OutputColumns.ANOMALY_COL + ] + anomaly_indices = [ + i for i, index in enumerate(anomaly_col) if index == 1 + ] + downsampled_time_col = time_col + selected_indices = list(range(len(time_col))) + if self.spec.subsample_report_data: + non_anomaly_indices = [ + i for i in range(len(time_col)) if i not in anomaly_indices + ] + # Downsample non-anomalous data if it exceeds the threshold (1000) + if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD: + downsampled_non_anomaly_indices = non_anomaly_indices[ + :: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD + ] + selected_indices = ( + anomaly_indices + downsampled_non_anomaly_indices + ) + selected_indices.sort() + downsampled_time_col = time_col[selected_indices] + + columns = set(df.columns).difference({date_column}) + for col in columns: + y = df[col].reset_index(drop=True) + + downsampled_y = y[selected_indices] + + fig, ax = plt.subplots(figsize=(8, 3), layout="constrained") + ax.grid() + ax.plot(downsampled_time_col, downsampled_y, color="black") + # Plot anomalies + for i in anomaly_indices: + ax.scatter(time_col[i], y[i], color="red", marker="o") + plt.xlabel(date_column) + plt.ylabel(col) + plt.title(f"`{col}` with reference to anomalies") + figure_blocks.append(rc.Widget(ax)) + + blocks.append(rc.Group(*figure_blocks, label=target)) plots = rc.Select(blocks) report_sections = [] @@ -133,7 +148,7 @@ def generate_report(self): yaml_appendix = rc.Yaml(self.config.to_dict()) summary = rc.Block( rc.Group( - rc.Text(f"You selected the **`{self.spec.model}`** model."), + rc.Text(f"You selected the **`{self.spec.model}`** model.\n{model_description.text}\n"), rc.Text( "Based on your dataset, you could have also selected " f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`." diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py index 38d1c224e..730938d03 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -7,6 +7,7 @@ import pandas as pd from ads.common.decorator.runtime_dependency import runtime_dependency +from ads.opctl import logger from ads.opctl.operator.lowcode.anomaly.const import OutputColumns from .anomaly_dataset import AnomalyOutput @@ -29,68 +30,67 @@ def _build_model(self) -> AnomalyOutput: from rrcf import RCTree model_kwargs = self.spec.model_kwargs - # map the output as per anomaly dataset class, 1: outlier, 0: inlier - # self.outlier_map = {1: 0, -1: 1} anomaly_output = AnomalyOutput(date_column="index") # Set tree parameters num_trees = model_kwargs.get("num_trees", 200) - shingle_size = model_kwargs.get("shingle_size", 1) - tree_size = model_kwargs.get("tree_size", 1000) + shingle_size = model_kwargs.get("shingle_size", None) + anamoly_threshold = model_kwargs.get("anamoly_threshold", 95) for target, df in self.datasets.full_data_dict.items(): - df_values = df[self.spec.target_column].astype(float).values - - # TODO: Update size to log logic - points = np.vstack(list(rrcf.shingle(df_values, size=4))) - - # TODO: remove hardcode - sample_size_range = (1, 6) - n = points.shape[0] - avg_codisp = pd.Series(0.0, index=np.arange(n)) - index = np.zeros(n) - - forest = [] - while len(forest) < num_trees: - ixs = np.random.choice(n, size=sample_size_range, replace=False) - trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs] - forest.extend(trees) - print(len(forest)) - - for tree in forest: - codisp = pd.Series({leaf: tree.codisp(leaf) for leaf in tree.leaves}) - avg_codisp[codisp.index] += codisp - np.add.at(index, codisp.index.values, 1) - - avg_codisp /= index - # TODO: remove hardcode - avg_codisp.index = df.iloc[(4 - 1) :].index - avg_codisp = (avg_codisp - avg_codisp.min()) / ( - avg_codisp.max() - avg_codisp.min() - ) - - # TODO: use model kwargs for percentile threshold - y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int) - - # TODO: rem pdb - # import pdb - - # pdb.set_trace() - print("Done") - - # scores = model.score_samples(df) - - # index_col = df.columns[0] - - # anomaly = pd.DataFrame( - # {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred} - # ).reset_index(drop=True) - # score = pd.DataFrame( - # {"index": df[index_col], OutputColumns.SCORE_COL: scores} - # ).reset_index(drop=True) - - # anomaly_output.add_output(target, anomaly, score) + try: + if df.shape[0] == 1: + raise ValueError("Dataset size must be greater than 1") + df_values = df[self.spec.target_column].astype(float).values + + cal_shingle_size = ( + shingle_size + if shingle_size + else int(2 ** np.floor(np.log2(df.shape[0])) / 2) + ) + points = np.vstack(list(rrcf.shingle(df_values, size=cal_shingle_size))) + + sample_size_range = (1, points.shape[0]) + n = points.shape[0] + avg_codisp = pd.Series(0.0, index=np.arange(n)) + index = np.zeros(n) + + forest = [] + while len(forest) < num_trees: + ixs = np.random.choice(n, size=sample_size_range, replace=False) + trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs] + forest.extend(trees) + + for tree in forest: + codisp = pd.Series( + {leaf: tree.codisp(leaf) for leaf in tree.leaves} + ) + avg_codisp[codisp.index] += codisp + np.add.at(index, codisp.index.values, 1) + + avg_codisp /= index + avg_codisp.index = df.iloc[(cal_shingle_size - 1) :].index + avg_codisp = (avg_codisp - avg_codisp.min()) / ( + avg_codisp.max() - avg_codisp.min() + ) + + y_pred = ( + avg_codisp > np.percentile(avg_codisp, anamoly_threshold) + ).astype(int) + + index_col = df.columns[0] + + anomaly = pd.DataFrame( + {index_col: y_pred.index, OutputColumns.ANOMALY_COL: y_pred} + ).reset_index(drop=True) + score = pd.DataFrame( + {"index": avg_codisp.index, OutputColumns.SCORE_COL: avg_codisp} + ).reset_index(drop=True) + + anomaly_output.add_output(target, anomaly, score) + except Exception as e: + logger.warn(f"Encountered Error: {e}. Skipping series {target}.") return anomaly_output From 8f4f3629c9f9621b755b607f42eea2cff0be5fd5 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 26 Aug 2024 07:21:42 +0000 Subject: [PATCH 05/14] update third party licenses --- THIRD_PARTY_LICENSES.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 1bd2e036c..6ee168932 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -453,6 +453,12 @@ mlforecast * Source code: https://github.com/Nixtla/mlforecast * Project home: https://github.com/Nixtla/mlforecast +rrcf +* Copyright 2018 kLabUM +* License: MIT License +* Source code: https://github.com/kLabUM/rrcf +* Project home: https://github.com/kLabUM/rrcf + ======= =============================== Licenses =============================== ------------------------------------------------------------------------ From c0d05651c20f903cfedf3bc9b00f8792f5dc4537 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Mon, 26 Aug 2024 07:41:01 +0000 Subject: [PATCH 06/14] fix typo --- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py index 730938d03..e2b8b9d5a 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py @@ -36,7 +36,7 @@ def _build_model(self) -> AnomalyOutput: # Set tree parameters num_trees = model_kwargs.get("num_trees", 200) shingle_size = model_kwargs.get("shingle_size", None) - anamoly_threshold = model_kwargs.get("anamoly_threshold", 95) + anomaly_threshold = model_kwargs.get("anamoly_threshold", 95) for target, df in self.datasets.full_data_dict.items(): try: @@ -76,7 +76,7 @@ def _build_model(self) -> AnomalyOutput: ) y_pred = ( - avg_codisp > np.percentile(avg_codisp, anamoly_threshold) + avg_codisp > np.percentile(avg_codisp, anomaly_threshold) ).astype(int) index_col = df.columns[0] From c54e341154601ca7d1a8938e036e4643a6dc2a44 Mon Sep 17 00:00:00 2001 From: Vikas Pandey Date: Tue, 3 Sep 2024 07:13:06 +0000 Subject: [PATCH 07/14] enable tests --- ads/opctl/operator/lowcode/anomaly/model/base_model.py | 10 ++++++++-- tests/operators/anomaly/test_anomaly_simple.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py index 983228ba5..e8de5213e 100644 --- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py +++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py @@ -16,7 +16,11 @@ from ads.common.object_storage_details import ObjectStorageDetails from ads.opctl import logger -from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD +from ads.opctl.operator.lowcode.anomaly.const import ( + SUBSAMPLE_THRESHOLD, + OutputColumns, + SupportedMetrics, +) from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer from ads.opctl.operator.lowcode.common.utils import ( disable_print, @@ -137,8 +141,10 @@ def generate_report(self): plt.ylabel(col) plt.title(f"`{col}` with reference to anomalies") figure_blocks.append(rc.Widget(ax)) + else: + figure_blocks = None - blocks.append(rc.Group(*figure_blocks, label=target)) + blocks.append(rc.Group(*figure_blocks, label=target)) if figure_blocks else None plots = rc.Select(blocks) report_sections = [] diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py index b94c49007..aac4dad3e 100644 --- a/tests/operators/anomaly/test_anomaly_simple.py +++ b/tests/operators/anomaly/test_anomaly_simple.py @@ -52,7 +52,7 @@ for d in DATASETS: parameters_short.append((m, d)) -MODELS = ["autots", "oneclasssvm", "isolationforest"] +MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"] @pytest.mark.parametrize("model", ["autots"]) def test_artificial_big(model): From d53a46e633069da8963999b182d80dc28cc4cd3b Mon Sep 17 00:00:00 2001 From: Mayoor Rao Date: Mon, 16 Sep 2024 21:34:22 -0700 Subject: [PATCH 08/14] Update manifest update --- ads/opctl/conda/cmds.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ads/opctl/conda/cmds.py b/ads/opctl/conda/cmds.py index d24c4f0d1..0fd34ccff 100644 --- a/ads/opctl/conda/cmds.py +++ b/ads/opctl/conda/cmds.py @@ -181,29 +181,29 @@ def _create( logger.info( f"Preparing manifest. Manifest in the environment: {conda_dep.get('manifest')}" ) - manifest = _fetch_manifest_template() + manifest_template = _fetch_manifest_template() if "name" not in manifest: - manifest["manifest"]["name"] = name - manifest["manifest"]["slug"] = slug + manifest_template["manifest"]["name"] = name + manifest_template["manifest"]["slug"] = slug if "type" not in manifest: logger.info("Setting manifest to published") - manifest["manifest"]["type"] = "published" + manifest_template["manifest"]["type"] = "published" if "version" not in manifest: - manifest["manifest"]["version"] = version - manifest["manifest"]["arch_type"] = "GPU" if gpu else "CPU" + manifest_template["manifest"]["version"] = version + manifest_template["manifest"]["arch_type"] = "GPU" if gpu else "CPU" - manifest["manifest"]["create_date"] = datetime.utcnow().strftime( + manifest_template["manifest"]["create_date"] = datetime.utcnow().strftime( "%a, %b %d, %Y, %H:%M:%S %Z UTC" ) if not "manifest_version" in manifest: - manifest["manifest"]["manifest_version"] = "1.0" + manifest_template["manifest"]["manifest_version"] = "1.0" logger.info(f"Creating conda environment {slug}") manifest_dict = { - k: manifest["manifest"][k] - for k in manifest["manifest"] - if manifest["manifest"][k] + k: manifest_template["manifest"][k] + for k in manifest_template["manifest"] + if manifest_template["manifest"][k] } if "manifest" in conda_dep: conda_dep["manifest"].update(manifest_dict) From 7e2337b5994f798cc3f5a5b2c3be02995400d2cc Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Thu, 19 Sep 2024 14:05:19 -0400 Subject: [PATCH 09/14] Update release note for 2.11.18. --- docs/source/release_notes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 42af9030f..6d45c3ed0 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -2,6 +2,12 @@ Release Notes ============= +2.11.18 +------- +Release date: September 19, 2024 + +* Added ``with_artifact()`` in ``ContainerRuntime`` class to support running container job with additional artifact. + 2.11.17 ------- Release date: August 9, 2024 From 141f10c1fcf6a41493b186309eab60d33078c0f3 Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Thu, 19 Sep 2024 14:05:33 -0400 Subject: [PATCH 10/14] Update release version to 2.11.18. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 630902cff..75cbd1b9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi" # Required name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below -version = "2.11.17" +version = "2.11.18" # Optional description = "Oracle Accelerated Data Science SDK" From 37794526f2e185e0bcbac2aebf89a28e2426ec12 Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Thu, 19 Sep 2024 19:54:13 -0400 Subject: [PATCH 11/14] Add requirement fiona<=1.9.6 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 75cbd1b9d..8a711f482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ data = [ ] geo = [ "geopandas<1.0.0", # in v1.0.0 removed the built-in dataset 'naturalearth_lowres', fix when relax version of geopandas needed + "fiona<=1.9.6", "oracle_ads[viz]" ] huggingface = [ From 2db4abe775cd90d087b9720e04bb1d6c1daee4de Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Thu, 19 Sep 2024 19:54:37 -0400 Subject: [PATCH 12/14] Update release notes. --- docs/source/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 6d45c3ed0..7322fe732 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -4,7 +4,7 @@ Release Notes 2.11.18 ------- -Release date: September 19, 2024 +Release date: September 20, 2024 * Added ``with_artifact()`` in ``ContainerRuntime`` class to support running container job with additional artifact. From 357d5dd6a49b741f5260826e8040dcffde23e894 Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Thu, 19 Sep 2024 19:55:07 -0400 Subject: [PATCH 13/14] Update container runtime docs. --- docs/source/user_guide/jobs/run_container.rst | 2 +- docs/source/user_guide/jobs/tabs/container_runtime.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/user_guide/jobs/run_container.rst b/docs/source/user_guide/jobs/run_container.rst index 9c469c555..b148831bc 100644 --- a/docs/source/user_guide/jobs/run_container.rst +++ b/docs/source/user_guide/jobs/run_container.rst @@ -22,7 +22,7 @@ Here is an example to create and run a container job: To configure ``ContainerRuntime``, you must specify the container ``image``. Similar to other runtime, you can add environment variables. -You can optionally specify the `entrypoint`, `cmd`, `image_digest` and `image_signature_id` for running the container. +You can optionally specify the `entrypoint`, `cmd`, `image_digest` and `image_signature_id` for running the container. You may also add additional artifact (file or directory) if needed. Please note that if you add a directory, it will be compressed as a zip file under `/home/datascience` and you will need to unzip if in your container. See also: diff --git a/docs/source/user_guide/jobs/tabs/container_runtime.rst b/docs/source/user_guide/jobs/tabs/container_runtime.rst index 0ef47d152..cccf7c172 100644 --- a/docs/source/user_guide/jobs/tabs/container_runtime.rst +++ b/docs/source/user_guide/jobs/tabs/container_runtime.rst @@ -33,6 +33,7 @@ .with_environment_variable(GREETINGS="Welcome to OCI Data Science") .with_entrypoint(["/bin/sh", "-c"]) .with_cmd("sleep 5 && echo $GREETINGS") + .artifact("") ) ) @@ -69,6 +70,7 @@ - name: GREETINGS value: Welcome to OCI Data Science image: .ocir.io// + scriptPathURI: path/to/artifact .. code-block:: python From abbdf010ecda00fdc53362155d72c83cb58a2286 Mon Sep 17 00:00:00 2001 From: Qiu Qin Date: Fri, 20 Sep 2024 12:01:27 -0400 Subject: [PATCH 14/14] Add fiona to THIRD_PARTY_LICENSES.txt --- THIRD_PARTY_LICENSES.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 6ee168932..197066ec8 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -72,6 +72,12 @@ fastavro * Source code: https://github.com/fastavro/fastavro * Project home: https://github.com/fastavro/fastavro +fiona +* Copyright (c) 2007, Sean C. Gillies +* License: BSD 3-Clause "New" or "Revised" License +* Source code: https://github.com/Toblerity/Fiona +* Project home: https://github.com/Toblerity/Fiona + folium * Copyright (C) 2013, Rob Story * License: MIT License