From 077ec398a6ff3db3efa037bc2998fccf13faac27 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Fri, 2 Aug 2024 07:40:59 +0000
Subject: [PATCH 01/14] intial commit rcf

---
 ads/opctl/operator/lowcode/anomaly/const.py   |  1 +
 .../operator/lowcode/anomaly/model/factory.py |  2 +
 .../lowcode/anomaly/model/randomcutforest.py  | 81 +++++++++++++++++++
 .../operator/lowcode/anomaly/schema.yaml      |  1 +
 pyproject.toml                                |  2 +
 test-requirements-operators.txt               |  1 +
 6 files changed, 88 insertions(+)
 create mode 100644 ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py

diff --git a/ads/opctl/operator/lowcode/anomaly/const.py b/ads/opctl/operator/lowcode/anomaly/const.py
index ff0e0fd22..73bac624b 100644
--- a/ads/opctl/operator/lowcode/anomaly/const.py
+++ b/ads/opctl/operator/lowcode/anomaly/const.py
@@ -21,6 +21,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
 
     OneClassSVM = "oneclasssvm"
     IsolationForest = "isolationforest"
+    RandomCutForest = "randomcutforest"
     # TODO : Add DBScan
     # DBScan = "dbscan"
     
diff --git a/ads/opctl/operator/lowcode/anomaly/model/factory.py b/ads/opctl/operator/lowcode/anomaly/model/factory.py
index 49adfb04f..64028cba2 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/factory.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/factory.py
@@ -15,6 +15,7 @@
 from .base_model import AnomalyOperatorBaseModel
 from .isolationforest import IsolationForestOperatorModel
 from .oneclasssvm import OneClassSVMOperatorModel
+from .randomcutforest import RandomCutForestOperatorModel
 
 
 class UnSupportedModelError(Exception):
@@ -52,6 +53,7 @@ class AnomalyOperatorModelFactory:
     _NonTime_MAP = {
         NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
         NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
+        NonTimeADSupportedModels.RandomCutForest: RandomCutForestOperatorModel,
         # TODO: Add DBScan model for non time based anomaly
         # NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
     }
diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
new file mode 100644
index 000000000..8c7483912
--- /dev/null
+++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import numpy as np
+import pandas as pd
+
+from ads.common.decorator.runtime_dependency import runtime_dependency
+from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
+
+from .anomaly_dataset import AnomalyOutput
+from .base_model import AnomalyOperatorBaseModel
+
+
+class RandomCutForestOperatorModel(AnomalyOperatorBaseModel):
+    """
+    Class representing Random Cut Forest Anomaly Detection operator model.
+    """
+
+    @runtime_dependency(
+        module="rrcf",
+        err_msg=(
+            "Please run `pip install rrcf` to "
+            "install the required dependencies for RandomCutForest."
+        ),
+    )
+    def _build_model(self) -> AnomalyOutput:
+        from rrcf import RCTree
+
+        model_kwargs = self.spec.model_kwargs
+        # map the output as per anomaly dataset class, 1: outlier, 0: inlier
+        self.outlier_map = {1: 0, -1: 1}
+
+        anomaly_output = AnomalyOutput(date_column="index")
+        #TODO: PDB
+        import pdb
+
+        pdb.set_trace()
+
+        for target, df in self.datasets.full_data_dict.items():
+            model = RCTree(**model_kwargs)
+            model.fit(df)
+            y_pred = model.predict(df)
+            y_pred = np.vectorize(self.outlier_map.get)(y_pred)
+
+            scores = model.score_samples(df)
+
+            index_col = df.columns[0]
+
+            anomaly = pd.DataFrame(
+                {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
+            ).reset_index(drop=True)
+            score = pd.DataFrame(
+                {"index": df[index_col], OutputColumns.SCORE_COL: scores}
+            ).reset_index(drop=True)
+
+            anomaly_output.add_output(target, anomaly, score)
+
+        return anomaly_output
+
+    def _generate_report(self):
+        """Generates the report."""
+        import report_creator as rc
+
+        other_sections = [
+            rc.Heading("Selected Models Overview", level=2),
+            rc.Text(
+                "The following tables provide information regarding the chosen model."
+            ),
+        ]
+
+        model_description = rc.Text(
+            "The Random Cut Forest (RCF) is an unsupervised machine learning algorithm that is used for anomaly detection."
+            " It works by building an ensemble of binary trees (random cut trees) and using them to compute anomaly scores for data points."
+        )
+
+        return (
+            model_description,
+            other_sections,
+        )
diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml
index e6c6cd998..bb5caa6ec 100644
--- a/ads/opctl/operator/lowcode/anomaly/schema.yaml
+++ b/ads/opctl/operator/lowcode/anomaly/schema.yaml
@@ -363,6 +363,7 @@ spec:
         - auto
         - oneclasssvm
         - isolationforest
+        - randomcutforest
       meta:
         description: "The model to be used for anomaly detection"
 
diff --git a/pyproject.toml b/pyproject.toml
index fcc1310f4..4416fd34d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -175,6 +175,8 @@ anomaly  = [
   "autots",
   "oracledb",
   "report-creator==1.0.9",
+  "rrcf==0.4.4",
+  "scikit-learn"
 ]
 recommender = [
   "oracle_ads[opctl]",
diff --git a/test-requirements-operators.txt b/test-requirements-operators.txt
index 838418393..64b1ba683 100644
--- a/test-requirements-operators.txt
+++ b/test-requirements-operators.txt
@@ -1,5 +1,6 @@
 -r test-requirements.txt
 -e ".[forecast]"
+-e ".[anomaly]"
 -e ".[recommender]"
 -e ".[feature-store-marketplace]"
 plotly

From 97c78b0d6e07913fb4dd7c7bcb5548b52e1096f7 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Thu, 22 Aug 2024 04:13:31 +0000
Subject: [PATCH 02/14] draft commit rcf

---
 .../lowcode/anomaly/model/randomcutforest.py  | 64 ++++++++++++++-----
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
index 8c7483912..818042b02 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
@@ -30,32 +30,62 @@ def _build_model(self) -> AnomalyOutput:
 
         model_kwargs = self.spec.model_kwargs
         # map the output as per anomaly dataset class, 1: outlier, 0: inlier
-        self.outlier_map = {1: 0, -1: 1}
+        # self.outlier_map = {1: 0, -1: 1}
 
         anomaly_output = AnomalyOutput(date_column="index")
-        #TODO: PDB
-        import pdb
+        # TODO: PDB
 
-        pdb.set_trace()
+        # Set tree parameters
+        num_trees = model_kwargs.get("num_trees", 200)
+        shingle_size = model_kwargs.get("shingle_size", 1)
+        tree_size = model_kwargs.get("tree_size", 1000)
 
         for target, df in self.datasets.full_data_dict.items():
-            model = RCTree(**model_kwargs)
-            model.fit(df)
-            y_pred = model.predict(df)
-            y_pred = np.vectorize(self.outlier_map.get)(y_pred)
+            df_values = df[self.spec.target_column].astype(float).values
+            points = np.vstack(list(rrcf.shingle(df_values, size=4)))
 
-            scores = model.score_samples(df)
+            sample_size_range = (1, 6)
+            n = points.shape[0]
+            avg_codisp = pd.Series(0.0, index=np.arange(n))
+            index = np.zeros(n)
 
-            index_col = df.columns[0]
+            forest = []
+            while len(forest) < num_trees:
+                ixs = np.random.choice(n, size=sample_size_range, replace=False)
+                trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
+                forest.extend(trees)
+                print(len(forest))
 
-            anomaly = pd.DataFrame(
-                {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
-            ).reset_index(drop=True)
-            score = pd.DataFrame(
-                {"index": df[index_col], OutputColumns.SCORE_COL: scores}
-            ).reset_index(drop=True)
+            for tree in forest:
+                codisp = pd.Series({leaf: tree.codisp(leaf) for leaf in tree.leaves})
+                avg_codisp[codisp.index] += codisp
+                np.add.at(index, codisp.index.values, 1)
 
-            anomaly_output.add_output(target, anomaly, score)
+            avg_codisp /= index
+            avg_codisp.index = df.iloc[(4 - 1) :].index
+            avg_codisp = (avg_codisp - avg_codisp.min()) / (
+                avg_codisp.max() - avg_codisp.min()
+            )
+
+            y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int)
+
+            import pdb
+
+            pdb.set_trace()
+            print("Done")
+
+            # scores = model.score_samples(df)
+
+            # index_col = df.columns[0]
+
+            # anomaly = pd.DataFrame(
+            #     {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
+            # ).reset_index(drop=True)
+            # score = pd.DataFrame(
+            #     {"index": df[index_col], OutputColumns.SCORE_COL: scores}
+            # ).reset_index(drop=True)
+
+            # anomaly_output.add_output(target, anomaly, score)
 
         return anomaly_output
 

From 8dbccaec0481f0c944e8cf0f2ddcdd53e0a773f0 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Thu, 22 Aug 2024 04:17:17 +0000
Subject: [PATCH 03/14] draft commit rcf

---
 .../operator/lowcode/anomaly/model/randomcutforest.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
index 818042b02..38d1c224e 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
@@ -33,7 +33,6 @@ def _build_model(self) -> AnomalyOutput:
         # self.outlier_map = {1: 0, -1: 1}
 
         anomaly_output = AnomalyOutput(date_column="index")
-        # TODO: PDB
 
         # Set tree parameters
         num_trees = model_kwargs.get("num_trees", 200)
@@ -42,8 +41,11 @@ def _build_model(self) -> AnomalyOutput:
 
         for target, df in self.datasets.full_data_dict.items():
             df_values = df[self.spec.target_column].astype(float).values
+
+            # TODO: Update size to log logic
             points = np.vstack(list(rrcf.shingle(df_values, size=4)))
 
+            # TODO: remove hardcode
             sample_size_range = (1, 6)
             n = points.shape[0]
             avg_codisp = pd.Series(0.0, index=np.arange(n))
@@ -62,16 +64,19 @@ def _build_model(self) -> AnomalyOutput:
                 np.add.at(index, codisp.index.values, 1)
 
             avg_codisp /= index
+            # TODO: remove hardcode
             avg_codisp.index = df.iloc[(4 - 1) :].index
             avg_codisp = (avg_codisp - avg_codisp.min()) / (
                 avg_codisp.max() - avg_codisp.min()
             )
 
+            # TODO: use model kwargs for percentile threshold
             y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int)
 
-            import pdb
+            # TODO: rem pdb
+            # import pdb
 
-            pdb.set_trace()
+            # pdb.set_trace()
             print("Done")
 
             # scores = model.score_samples(df)

From 8b5c6995c932229c53cb3b2e21cd250dca806bea Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Fri, 23 Aug 2024 07:26:24 +0000
Subject: [PATCH 04/14] update the todos, complete implementation

---
 .../lowcode/anomaly/model/base_model.py       |  87 ++++++++------
 .../lowcode/anomaly/model/randomcutforest.py  | 110 +++++++++---------
 2 files changed, 106 insertions(+), 91 deletions(-)

diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py
index e909976d8..983228ba5 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py
@@ -55,6 +55,7 @@ def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets):
     def generate_report(self):
         """Generates the report."""
         import matplotlib.pyplot as plt
+        plt.rcParams.update({'figure.max_open_warning': 0})
         import report_creator as rc
 
         start_time = time.time()
@@ -87,43 +88,57 @@ def generate_report(self):
             self.spec.datetime_column.name if self.spec.datetime_column else "index"
         )
 
+        (
+            model_description,
+            other_sections,
+        ) = self._generate_report()
+
         blocks = []
         for target, df in self.datasets.full_data_dict.items():
-            figure_blocks = []
-            time_col = df[date_column].reset_index(drop=True)
-            anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
-                OutputColumns.ANOMALY_COL
-            ]
-            anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
-            downsampled_time_col = time_col
-            selected_indices = list(range(len(time_col)))
-            if self.spec.subsample_report_data:
-                non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
-                # Downsample non-anomalous data if it exceeds the threshold (1000)
-                if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
-                    downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
-                    selected_indices = anomaly_indices + downsampled_non_anomaly_indices
-                    selected_indices.sort()
-                downsampled_time_col = time_col[selected_indices]
-
-            columns = set(df.columns).difference({date_column})
-            for col in columns:
-                y = df[col].reset_index(drop=True)
-
-                downsampled_y = y[selected_indices]
-
-                fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
-                ax.grid()
-                ax.plot(downsampled_time_col, downsampled_y, color="black")
-                # Plot anomalies
-                for i in anomaly_indices:
-                    ax.scatter(time_col[i], y[i], color="red", marker="o")
-                plt.xlabel(date_column)
-                plt.ylabel(col)
-                plt.title(f"`{col}` with reference to anomalies")
-                figure_blocks.append(rc.Widget(ax))
-
-        blocks.append(rc.Group(*figure_blocks, label=target))
+            if target in anomaly_output.list_categories():
+                figure_blocks = []
+                time_col = df[date_column].reset_index(drop=True)
+                anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
+                    OutputColumns.ANOMALY_COL
+                ]
+                anomaly_indices = [
+                    i for i, index in enumerate(anomaly_col) if index == 1
+                ]
+                downsampled_time_col = time_col
+                selected_indices = list(range(len(time_col)))
+                if self.spec.subsample_report_data:
+                    non_anomaly_indices = [
+                        i for i in range(len(time_col)) if i not in anomaly_indices
+                    ]
+                    # Downsample non-anomalous data if it exceeds the threshold (1000)
+                    if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
+                        downsampled_non_anomaly_indices = non_anomaly_indices[
+                            :: len(non_anomaly_indices) // SUBSAMPLE_THRESHOLD
+                        ]
+                        selected_indices = (
+                            anomaly_indices + downsampled_non_anomaly_indices
+                        )
+                        selected_indices.sort()
+                    downsampled_time_col = time_col[selected_indices]
+
+                columns = set(df.columns).difference({date_column})
+                for col in columns:
+                    y = df[col].reset_index(drop=True)
+
+                    downsampled_y = y[selected_indices]
+
+                    fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
+                    ax.grid()
+                    ax.plot(downsampled_time_col, downsampled_y, color="black")
+                    # Plot anomalies
+                    for i in anomaly_indices:
+                        ax.scatter(time_col[i], y[i], color="red", marker="o")
+                    plt.xlabel(date_column)
+                    plt.ylabel(col)
+                    plt.title(f"`{col}` with reference to anomalies")
+                    figure_blocks.append(rc.Widget(ax))
+
+            blocks.append(rc.Group(*figure_blocks, label=target))
         plots = rc.Select(blocks)
 
         report_sections = []
@@ -133,7 +148,7 @@ def generate_report(self):
         yaml_appendix = rc.Yaml(self.config.to_dict())
         summary = rc.Block(
             rc.Group(
-                rc.Text(f"You selected the **`{self.spec.model}`** model."),
+                rc.Text(f"You selected the **`{self.spec.model}`** model.\n{model_description.text}\n"),
                 rc.Text(
                     "Based on your dataset, you could have also selected "
                     f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
index 38d1c224e..730938d03 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
@@ -7,6 +7,7 @@
 import pandas as pd
 
 from ads.common.decorator.runtime_dependency import runtime_dependency
+from ads.opctl import logger
 from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
 
 from .anomaly_dataset import AnomalyOutput
@@ -29,68 +30,67 @@ def _build_model(self) -> AnomalyOutput:
         from rrcf import RCTree
 
         model_kwargs = self.spec.model_kwargs
-        # map the output as per anomaly dataset class, 1: outlier, 0: inlier
-        # self.outlier_map = {1: 0, -1: 1}
 
         anomaly_output = AnomalyOutput(date_column="index")
 
         # Set tree parameters
         num_trees = model_kwargs.get("num_trees", 200)
-        shingle_size = model_kwargs.get("shingle_size", 1)
-        tree_size = model_kwargs.get("tree_size", 1000)
+        shingle_size = model_kwargs.get("shingle_size", None)
+        anamoly_threshold = model_kwargs.get("anamoly_threshold", 95)
 
         for target, df in self.datasets.full_data_dict.items():
-            df_values = df[self.spec.target_column].astype(float).values
-
-            # TODO: Update size to log logic
-            points = np.vstack(list(rrcf.shingle(df_values, size=4)))
-
-            # TODO: remove hardcode
-            sample_size_range = (1, 6)
-            n = points.shape[0]
-            avg_codisp = pd.Series(0.0, index=np.arange(n))
-            index = np.zeros(n)
-
-            forest = []
-            while len(forest) < num_trees:
-                ixs = np.random.choice(n, size=sample_size_range, replace=False)
-                trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
-                forest.extend(trees)
-                print(len(forest))
-
-            for tree in forest:
-                codisp = pd.Series({leaf: tree.codisp(leaf) for leaf in tree.leaves})
-                avg_codisp[codisp.index] += codisp
-                np.add.at(index, codisp.index.values, 1)
-
-            avg_codisp /= index
-            # TODO: remove hardcode
-            avg_codisp.index = df.iloc[(4 - 1) :].index
-            avg_codisp = (avg_codisp - avg_codisp.min()) / (
-                avg_codisp.max() - avg_codisp.min()
-            )
-
-            # TODO: use model kwargs for percentile threshold
-            y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int)
-
-            # TODO: rem pdb
-            # import pdb
-
-            # pdb.set_trace()
-            print("Done")
-
-            # scores = model.score_samples(df)
-
-            # index_col = df.columns[0]
-
-            # anomaly = pd.DataFrame(
-            #     {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
-            # ).reset_index(drop=True)
-            # score = pd.DataFrame(
-            #     {"index": df[index_col], OutputColumns.SCORE_COL: scores}
-            # ).reset_index(drop=True)
-
-            # anomaly_output.add_output(target, anomaly, score)
+            try:
+                if df.shape[0] == 1:
+                    raise ValueError("Dataset size must be greater than 1")
+                df_values = df[self.spec.target_column].astype(float).values
+
+                cal_shingle_size = (
+                    shingle_size
+                    if shingle_size
+                    else int(2 ** np.floor(np.log2(df.shape[0])) / 2)
+                )
+                points = np.vstack(list(rrcf.shingle(df_values, size=cal_shingle_size)))
+
+                sample_size_range = (1, points.shape[0])
+                n = points.shape[0]
+                avg_codisp = pd.Series(0.0, index=np.arange(n))
+                index = np.zeros(n)
+
+                forest = []
+                while len(forest) < num_trees:
+                    ixs = np.random.choice(n, size=sample_size_range, replace=False)
+                    trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
+                    forest.extend(trees)
+
+                for tree in forest:
+                    codisp = pd.Series(
+                        {leaf: tree.codisp(leaf) for leaf in tree.leaves}
+                    )
+                    avg_codisp[codisp.index] += codisp
+                    np.add.at(index, codisp.index.values, 1)
+
+                avg_codisp /= index
+                avg_codisp.index = df.iloc[(cal_shingle_size - 1) :].index
+                avg_codisp = (avg_codisp - avg_codisp.min()) / (
+                    avg_codisp.max() - avg_codisp.min()
+                )
+
+                y_pred = (
+                    avg_codisp > np.percentile(avg_codisp, anamoly_threshold)
+                ).astype(int)
+
+                index_col = df.columns[0]
+
+                anomaly = pd.DataFrame(
+                    {index_col: y_pred.index, OutputColumns.ANOMALY_COL: y_pred}
+                ).reset_index(drop=True)
+                score = pd.DataFrame(
+                    {"index": avg_codisp.index, OutputColumns.SCORE_COL: avg_codisp}
+                ).reset_index(drop=True)
+
+                anomaly_output.add_output(target, anomaly, score)
+            except Exception as e:
+                logger.warn(f"Encountered Error: {e}. Skipping series {target}.")
 
         return anomaly_output
 

From 8f4f3629c9f9621b755b607f42eea2cff0be5fd5 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Mon, 26 Aug 2024 07:21:42 +0000
Subject: [PATCH 05/14] update third party licenses

---
 THIRD_PARTY_LICENSES.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt
index 1bd2e036c..6ee168932 100644
--- a/THIRD_PARTY_LICENSES.txt
+++ b/THIRD_PARTY_LICENSES.txt
@@ -453,6 +453,12 @@ mlforecast
 * Source code: https://github.com/Nixtla/mlforecast
 * Project home: https://github.com/Nixtla/mlforecast
 
+rrcf
+* Copyright 2018 kLabUM
+* License: MIT License
+* Source code: https://github.com/kLabUM/rrcf
+* Project home: https://github.com/kLabUM/rrcf
+
 =======
 =============================== Licenses ===============================
 ------------------------------------------------------------------------

From c0d05651c20f903cfedf3bc9b00f8792f5dc4537 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Mon, 26 Aug 2024 07:41:01 +0000
Subject: [PATCH 06/14] fix typo

---
 ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
index 730938d03..e2b8b9d5a 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py
@@ -36,7 +36,7 @@ def _build_model(self) -> AnomalyOutput:
         # Set tree parameters
         num_trees = model_kwargs.get("num_trees", 200)
         shingle_size = model_kwargs.get("shingle_size", None)
-        anamoly_threshold = model_kwargs.get("anamoly_threshold", 95)
+        anomaly_threshold = model_kwargs.get("anamoly_threshold", 95)
 
         for target, df in self.datasets.full_data_dict.items():
             try:
@@ -76,7 +76,7 @@ def _build_model(self) -> AnomalyOutput:
                 )
 
                 y_pred = (
-                    avg_codisp > np.percentile(avg_codisp, anamoly_threshold)
+                    avg_codisp > np.percentile(avg_codisp, anomaly_threshold)
                 ).astype(int)
 
                 index_col = df.columns[0]

From c54e341154601ca7d1a8938e036e4643a6dc2a44 Mon Sep 17 00:00:00 2001
From: Vikas Pandey <vikas.v.pandey@oracle.com>
Date: Tue, 3 Sep 2024 07:13:06 +0000
Subject: [PATCH 07/14] enable tests

---
 ads/opctl/operator/lowcode/anomaly/model/base_model.py | 10 ++++++++--
 tests/operators/anomaly/test_anomaly_simple.py         |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/ads/opctl/operator/lowcode/anomaly/model/base_model.py b/ads/opctl/operator/lowcode/anomaly/model/base_model.py
index 983228ba5..e8de5213e 100644
--- a/ads/opctl/operator/lowcode/anomaly/model/base_model.py
+++ b/ads/opctl/operator/lowcode/anomaly/model/base_model.py
@@ -16,7 +16,11 @@
 
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.opctl import logger
-from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
+from ads.opctl.operator.lowcode.anomaly.const import (
+    SUBSAMPLE_THRESHOLD,
+    OutputColumns,
+    SupportedMetrics,
+)
 from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
 from ads.opctl.operator.lowcode.common.utils import (
     disable_print,
@@ -137,8 +141,10 @@ def generate_report(self):
                     plt.ylabel(col)
                     plt.title(f"`{col}` with reference to anomalies")
                     figure_blocks.append(rc.Widget(ax))
+            else:
+                figure_blocks = None
 
-            blocks.append(rc.Group(*figure_blocks, label=target))
+            blocks.append(rc.Group(*figure_blocks, label=target)) if figure_blocks else None
         plots = rc.Select(blocks)
 
         report_sections = []
diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py
index b94c49007..aac4dad3e 100644
--- a/tests/operators/anomaly/test_anomaly_simple.py
+++ b/tests/operators/anomaly/test_anomaly_simple.py
@@ -52,7 +52,7 @@
     for d in DATASETS:
         parameters_short.append((m, d))
 
-MODELS = ["autots", "oneclasssvm", "isolationforest"]
+MODELS = ["autots", "oneclasssvm", "isolationforest", "randomcutforest"]
 
 @pytest.mark.parametrize("model", ["autots"])
 def test_artificial_big(model):

From d53a46e633069da8963999b182d80dc28cc4cd3b Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Mon, 16 Sep 2024 21:34:22 -0700
Subject: [PATCH 08/14] Update manifest update

---
 ads/opctl/conda/cmds.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/ads/opctl/conda/cmds.py b/ads/opctl/conda/cmds.py
index d24c4f0d1..0fd34ccff 100644
--- a/ads/opctl/conda/cmds.py
+++ b/ads/opctl/conda/cmds.py
@@ -181,29 +181,29 @@ def _create(
     logger.info(
         f"Preparing manifest. Manifest in the environment: {conda_dep.get('manifest')}"
     )
-    manifest = _fetch_manifest_template()
+    manifest_template = _fetch_manifest_template()
     if "name" not in manifest:
-        manifest["manifest"]["name"] = name
-    manifest["manifest"]["slug"] = slug
+        manifest_template["manifest"]["name"] = name
+    manifest_template["manifest"]["slug"] = slug
     if "type" not in manifest:
         logger.info("Setting manifest to published")
-        manifest["manifest"]["type"] = "published"
+        manifest_template["manifest"]["type"] = "published"
     if "version" not in manifest:
-        manifest["manifest"]["version"] = version
-    manifest["manifest"]["arch_type"] = "GPU" if gpu else "CPU"
+        manifest_template["manifest"]["version"] = version
+    manifest_template["manifest"]["arch_type"] = "GPU" if gpu else "CPU"
 
-    manifest["manifest"]["create_date"] = datetime.utcnow().strftime(
+    manifest_template["manifest"]["create_date"] = datetime.utcnow().strftime(
         "%a, %b %d, %Y, %H:%M:%S %Z UTC"
     )
 
     if not "manifest_version" in manifest:
-        manifest["manifest"]["manifest_version"] = "1.0"
+        manifest_template["manifest"]["manifest_version"] = "1.0"
 
     logger.info(f"Creating conda environment {slug}")
     manifest_dict = {
-        k: manifest["manifest"][k]
-        for k in manifest["manifest"]
-        if manifest["manifest"][k]
+        k: manifest_template["manifest"][k]
+        for k in manifest_template["manifest"]
+        if manifest_template["manifest"][k]
     }
     if "manifest" in conda_dep:
         conda_dep["manifest"].update(manifest_dict)

From 7e2337b5994f798cc3f5a5b2c3be02995400d2cc Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Thu, 19 Sep 2024 14:05:19 -0400
Subject: [PATCH 09/14] Update release note for 2.11.18.

---
 docs/source/release_notes.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 42af9030f..6d45c3ed0 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -2,6 +2,12 @@
 Release Notes
 =============
 
+2.11.18
+-------
+Release date: September 19, 2024
+
+* Added ``with_artifact()`` in ``ContainerRuntime`` class to support running container job with additional artifact.
+
 2.11.17
 -------
 Release date: August 9, 2024

From 141f10c1fcf6a41493b186309eab60d33078c0f3 Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Thu, 19 Sep 2024 14:05:33 -0400
Subject: [PATCH 10/14] Update release version to 2.11.18.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 630902cff..75cbd1b9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
 
 # Required
 name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
-version = "2.11.17"
+version = "2.11.18"
 
 # Optional
 description = "Oracle Accelerated Data Science SDK"

From 37794526f2e185e0bcbac2aebf89a28e2426ec12 Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Thu, 19 Sep 2024 19:54:13 -0400
Subject: [PATCH 11/14] Add requirement fiona<=1.9.6

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 75cbd1b9d..8a711f482 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ data = [
 ]
 geo = [
   "geopandas<1.0.0",  # in v1.0.0 removed the built-in dataset 'naturalearth_lowres', fix when relax version of geopandas needed
+  "fiona<=1.9.6",
   "oracle_ads[viz]"
 ]
 huggingface = [

From 2db4abe775cd90d087b9720e04bb1d6c1daee4de Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Thu, 19 Sep 2024 19:54:37 -0400
Subject: [PATCH 12/14] Update release notes.

---
 docs/source/release_notes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 6d45c3ed0..7322fe732 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -4,7 +4,7 @@ Release Notes
 
 2.11.18
 -------
-Release date: September 19, 2024
+Release date: September 20, 2024
 
 * Added ``with_artifact()`` in ``ContainerRuntime`` class to support running container job with additional artifact.
 

From 357d5dd6a49b741f5260826e8040dcffde23e894 Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Thu, 19 Sep 2024 19:55:07 -0400
Subject: [PATCH 13/14] Update container runtime docs.

---
 docs/source/user_guide/jobs/run_container.rst          | 2 +-
 docs/source/user_guide/jobs/tabs/container_runtime.rst | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/jobs/run_container.rst b/docs/source/user_guide/jobs/run_container.rst
index 9c469c555..b148831bc 100644
--- a/docs/source/user_guide/jobs/run_container.rst
+++ b/docs/source/user_guide/jobs/run_container.rst
@@ -22,7 +22,7 @@ Here is an example to create and run a container job:
 
 To configure ``ContainerRuntime``, you must specify the container ``image``.
 Similar to other runtime, you can add environment variables.
-You can optionally specify the `entrypoint`, `cmd`, `image_digest` and `image_signature_id` for running the container.
+You can optionally specify the `entrypoint`, `cmd`, `image_digest` and `image_signature_id` for running the container. You may also add additional artifact (file or directory) if needed. Please note that if you add a directory, it will be compressed as a zip file under `/home/datascience` and you will need to unzip if in your container.
 
 See also:
 
diff --git a/docs/source/user_guide/jobs/tabs/container_runtime.rst b/docs/source/user_guide/jobs/tabs/container_runtime.rst
index 0ef47d152..cccf7c172 100644
--- a/docs/source/user_guide/jobs/tabs/container_runtime.rst
+++ b/docs/source/user_guide/jobs/tabs/container_runtime.rst
@@ -33,6 +33,7 @@
             .with_environment_variable(GREETINGS="Welcome to OCI Data Science")
             .with_entrypoint(["/bin/sh", "-c"])
             .with_cmd("sleep 5 && echo $GREETINGS")
+            .artifact("<path/to/artifact>")
         )
     )
 
@@ -69,6 +70,7 @@
           - name: GREETINGS
             value: Welcome to OCI Data Science
           image: <region>.ocir.io/<your_tenancy>/<your_image>
+          scriptPathURI: path/to/artifact
 
 
 .. code-block:: python

From abbdf010ecda00fdc53362155d72c83cb58a2286 Mon Sep 17 00:00:00 2001
From: Qiu Qin <qiu.qin@oracle.com>
Date: Fri, 20 Sep 2024 12:01:27 -0400
Subject: [PATCH 14/14] Add fiona to THIRD_PARTY_LICENSES.txt

---
 THIRD_PARTY_LICENSES.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt
index 6ee168932..197066ec8 100644
--- a/THIRD_PARTY_LICENSES.txt
+++ b/THIRD_PARTY_LICENSES.txt
@@ -72,6 +72,12 @@ fastavro
 * Source code: https://github.com/fastavro/fastavro
 * Project home: https://github.com/fastavro/fastavro
 
+fiona
+* Copyright (c) 2007, Sean C. Gillies
+* License: BSD 3-Clause "New" or "Revised" License
+* Source code: https://github.com/Toblerity/Fiona
+* Project home: https://github.com/Toblerity/Fiona
+
 folium
 * Copyright (C) 2013, Rob Story
 * License: MIT License