Merge branch 'main' into bug/mlforecast_datetime_column

oracle · May 30, 2024 · 5934b73 · 5934b73
2 parents 2e39598 + dc583b3
commit 5934b73
Show file tree

Hide file tree

Showing 15 changed files with 36 additions and 18 deletions.
diff --git a/.github/workflows/run-forecast-unit-tests.yml b/.github/workflows/run-forecast-unit-tests.yml
@@ -27,7 +27,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.8", "3.9", "3.10"]
 
     steps:
       - uses: actions/checkout@v4
@@ -56,4 +56,7 @@ jobs:
           $CONDA/bin/conda init
           source /home/runner/.bashrc
           pip install -r test-requirements-operators.txt
+          pip install "oracle-automlx[classic]>=24.2.0"
+          pip install "oracle-automlx[forecasting]>=24.2.0"
+          pip install pandas>=2.2.0
           python -m pytest -v -p no:warnings --durations=5 tests/operators/forecast
diff --git a/.github/workflows/run-operators-unit-tests.yml b/.github/workflows/run-operators-unit-tests.yml
@@ -27,7 +27,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.8", "3.9", "3.10"]
 
     steps:
       - uses: actions/checkout@v4

diff --git a/ads/opctl/operator/lowcode/anomaly/__main__.py b/ads/opctl/operator/lowcode/anomaly/__main__.py
@@ -40,11 +40,11 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
                 AnomalyOperatorModelFactory.get_model(
                     operator_config, datasets
                 ).generate_report()
-            except Exception as e2:
+            except Exception as ee:
                 logger.debug(
-                    f"Failed to backup forecast with error {e2.args}. Raising original error."
+                    f"Failed to backup forecast with error {ee.args}. Raising original error."
                 )
-            raise e
+            raise ee
         else:
             raise e
 

diff --git a/ads/opctl/operator/lowcode/anomaly/model/autots.py b/ads/opctl/operator/lowcode/anomaly/model/autots.py
@@ -91,7 +91,7 @@ def _generate_report(self):
             ),
         ]
         model_description = rc.Text(
-            "The automlx model automatically pre-processes, selects and engineers "
+            "The autots model automatically pre-processes, selects and engineers "
             "high-quality features in your dataset, which then given to an automatically "
             "chosen and optimized machine learning model.."
         )

diff --git a/ads/opctl/operator/lowcode/anomaly/schema.yaml b/ads/opctl/operator/lowcode/anomaly/schema.yaml
@@ -349,9 +349,8 @@ spec:
     model:
       type: string
       required: false
-      default: automlx
+      default: autots
       allowed:
-        - automlx
         - autots
         - auto
       meta:

diff --git a/ads/opctl/operator/lowcode/anomaly/utils.py b/ads/opctl/operator/lowcode/anomaly/utils.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2023 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import os
@@ -77,5 +77,6 @@ def default_signer(**kwargs):
 
     return default_signer(**kwargs)
 
+
 def select_auto_model(datasets, operator_config):
-    return SupportedModels.AutoMLX
+    return SupportedModels.AutoTS
diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py
@@ -125,6 +125,7 @@ def _train_model(self, i, s_id, df, model_kwargs):
             logger.debug("===========Done===========")
         except Exception as e:
             self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
+            logger.debug(f"Encountered Error: {e}. Skipping.")
 
     def _build_model(self) -> pd.DataFrame:
         full_data_dict = self.datasets.get_data_by_series()

diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py
@@ -84,7 +84,7 @@ def _build_model(self) -> pd.DataFrame:
                 loglevel=logging.CRITICAL,
             )
         except Exception as e:
-            logger.info("Ray already initialized")
+            logger.info(f"Error. Has Ray already been initialized? Skipping. {e}")
 
         full_data_dict = self.datasets.get_data_by_series()
 
@@ -168,6 +168,7 @@ def _build_model(self) -> pd.DataFrame:
                     "model_name": self.spec.model,
                     "error": str(e),
                 }
+                logger.debug(f"Encountered Error: {e}. Skipping.")
 
         logger.debug("===========Forecast Generated===========")
 

diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py
@@ -209,6 +209,7 @@ def _build_model(self) -> pd.DataFrame:
                     "model_name": self.spec.model,
                     "error": str(e),
                 }
+            logger.debug(f"Encountered Error: {e}. Skipping.")
 
         logger.debug("===========Done===========")
 

diff --git a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py
@@ -1,3 +1,8 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import pandas as pd
 import numpy as np
 
@@ -47,7 +52,6 @@ def preprocess(self, df, series_id):
     )
     def _train_model(self, data_train, data_test, model_kwargs):
         try:
-
             import lightgbm as lgb
             from mlforecast import MLForecast
             from mlforecast.lag_transforms import ExpandingMean, RollingMean
@@ -159,6 +163,7 @@ def _train_model(self, data_train, data_test, model_kwargs):
                 "model_name": self.spec.model,
                 "error": str(e),
             }
+            logger.debug(f"Encountered Error: {e}. Skipping.")
 
     def _build_model(self) -> pd.DataFrame:
         data_train = self.datasets.get_all_data_long(include_horizon=False)

diff --git a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py
@@ -75,6 +75,8 @@ def _fit_model(data, params, additional_regressors, select_metric):
         m = m.add_future_regressor(name=add_reg)
     m.fit(df=data)
     accepted_regressors_config = m.config_regressors or dict()
+    if hasattr(accepted_regressors_config, "regressors"):
+        accepted_regressors_config = accepted_regressors_config.regressors or dict()
 
     enable_print()
     return m, list(accepted_regressors_config.keys())
@@ -122,7 +124,13 @@ def _train_model(self, i, s_id, df, model_kwargs):
 
             if self.loaded_models is not None and s_id in self.loaded_models:
                 model = self.loaded_models[s_id]
-                accepted_regressors_config = model.config_regressors or dict()
+                accepted_regressors_config = (
+                    model.config_regressors.regressors or dict()
+                )
+                if hasattr(accepted_regressors_config, "regressors"):
+                    accepted_regressors_config = (
+                        accepted_regressors_config.regressors or dict()
+                    )
                 self.accepted_regressors[s_id] = list(accepted_regressors_config.keys())
                 if self.loaded_trainers is not None and s_id in self.loaded_trainers:
                     model.trainer = self.loaded_trainers[s_id]

diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py
@@ -131,6 +131,7 @@ def _train_model(self, i, series_id, df, model_kwargs):
                 "model_name": self.spec.model,
                 "error": str(e),
             }
+            logger.debug(f"Encountered Error: {e}. Skipping.")
 
     def _build_model(self) -> pd.DataFrame:
         full_data_dict = self.datasets.get_data_by_series()

diff --git a/docs/source/user_guide/operators/anomaly_detection_operator/yaml_schema.rst b/docs/source/user_guide/operators/anomaly_detection_operator/yaml_schema.rst
@@ -33,13 +33,13 @@ Here is an example anomaly.yaml with every parameter specified:
         * **format**: the format of the datetime string in python notation `detailed here <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_.
 
     * **target_category_columns**: (optional) The Series ID of the target. When provided, the target data must be present for each date in the datetime_column and for each series id in the target_category_columns.
-    * **validation_data**: (optional) This dictionary contains the details for how to read the validation data. Validation data must contain all of the columns of input_data plus a column titles "anomaly".
+    * **validation_data**: (optional) This dictionary contains the details for how to read the validation data. Validation data must contain all of the columns of input_data plus a column titled "anomaly".
         * **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/path/to/data.csv``.
         * **kwargs**: Insert any other args for pandas to load the data (``format``, ``options``, etc.) See full list in ``YAML Schema`` section.
     * **output_directory**: (optional) This dictionary contains the details for where to put the output artifacts. The directory need not exist, but must be accessible by the Operator during runtime.
         * **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/subfolder/``.
         * **kwargs**: Insert any other args for pandas to load the data (``format``, ``options``, etc.) See full list in ``YAML Schema`` section.
-    * **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: ``arima``, ``automlx``, ``prophet``, ``neuralprophet``, ``autots``, and ``auto``.
+    * **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: ``autots``, and ``auto``.
     * **model_kwargs**: (optional) This kwargs dict passes straight through to the model framework. If you want to take direct control of the modeling, this is the best way.
     * **test_data**: (optional) This dictionary contains the details for how to read the test data. Test data should contain every datetime value of the input_data, (optionally) all of the series from target_category_columns, and a column titles "anomaly" with either a 1 (non-anomalous) or 0 (anomalous).
         * **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/path/to/data.csv``.

diff --git a/test-requirements-operators.txt b/test-requirements-operators.txt
@@ -2,7 +2,5 @@
 -e ".[forecast]"
 -e ".[feature-store-marketplace]"
 plotly
-oracle-automlx[classic]>=24.2.0
-oracle-automlx[forecasting]>=24.2.0
 pandas>=2.0.0
 protobuf==3.20.3
diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py
@@ -16,7 +16,7 @@
 from ads.opctl.operator.cmd import run
 
 
-MODELS = ["automlx", "autots"]
+MODELS = ["autots"]  # "automlx",
 
 # Mandatory YAML parameters
 TEMPLATE_YAML = {