From 7c32dfafd00fae2c641a0d85115f2c9f7c1cdb06 Mon Sep 17 00:00:00 2001 From: govarsha Date: Mon, 30 Oct 2023 15:53:51 +0530 Subject: [PATCH 01/10] added boolean disables and added test_metrics.csv generation --- .../operator/lowcode/forecast/model/arima.py | 10 +- .../lowcode/forecast/model/automlx.py | 8 +- .../operator/lowcode/forecast/model/autots.py | 10 +- .../lowcode/forecast/model/base_model.py | 345 ++++++++++-------- .../lowcode/forecast/model/neuralprophet.py | 10 +- .../lowcode/forecast/model/prophet.py | 10 +- .../lowcode/forecast/operator_config.py | 18 +- .../forecast/test_model_base_model.py | 209 ++++++++++- 8 files changed, 436 insertions(+), 184 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py index f10dbd104..ad4cf1684 100644 --- a/ads/opctl/operator/lowcode/forecast/model/arima.py +++ b/ads/opctl/operator/lowcode/forecast/model/arima.py @@ -11,11 +11,17 @@ from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig class ArimaOperatorModel(ForecastOperatorBaseModel): """Class representing ARIMA operator model.""" + def __init__(self, config: ForecastOperatorConfig): + super().__init__(config) + self.train_metrics = False + self.forecast_col_name = "yhat" + def _build_model(self) -> pd.DataFrame: full_data_dict = self.full_data_dict @@ -153,8 +159,6 @@ def _generate_report(self): "it predicts future values based on past values." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = self.data[self.spec.datetime_column.name] ds_forecast_col = self.outputs[0].index ci_col_names = ["yhat_lower", "yhat_upper"] @@ -162,8 +166,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py index 9cd458183..8a01cc958 100644 --- a/ads/opctl/operator/lowcode/forecast/model/automlx.py +++ b/ads/opctl/operator/lowcode/forecast/model/automlx.py @@ -29,6 +29,8 @@ def __init__(self, config: ForecastOperatorConfig): super().__init__(config) self.global_explanation = {} self.local_explanation = {} + self.train_metrics = False + self.forecast_col_name = "yhat" @runtime_dependency( module="automl", @@ -211,7 +213,7 @@ def _generate_report(self): all_sections = [selected_models_text, selected_models_section] - if self.spec.explain: + if self.spec.generate_explanations: # If the key is present, call the "explain_model" method self.explain_model() @@ -257,8 +259,6 @@ def _generate_report(self): "high-quality features in your dataset, which are then provided for further processing." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = self.data[self.spec.datetime_column.name] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = ["yhat_lower", "yhat_upper"] @@ -266,8 +266,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py index 612fe73db..659757d4f 100644 --- a/ads/opctl/operator/lowcode/forecast/model/autots.py +++ b/ads/opctl/operator/lowcode/forecast/model/autots.py @@ -12,6 +12,7 @@ from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig from ads.common.decorator.runtime_dependency import runtime_dependency @@ -22,6 +23,11 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel): """Class representing AutoTS operator model.""" + def __init__(self, config: ForecastOperatorConfig): + super().__init__(config) + self.train_metrics = False + self.forecast_col_name = "yhat" + @runtime_dependency( module="autots", err_msg="Please run `pip3 install autots` to install the required dependencies for autots.", @@ -250,8 +256,6 @@ def _generate_report(self) -> tuple: ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = pd.to_datetime(self.data[self.spec.datetime_column.name]) ds_forecast_col = self.outputs[0].index @@ -260,8 +264,6 @@ def _generate_report(self) -> tuple: return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 3ff673d5b..b678377d6 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -69,167 +69,176 @@ def generate_report(self): result_df = self._build_model() elapsed_time = time.time() - start_time - # build the report - ( - model_description, - other_sections, - forecast_col_name, - train_metrics, - ds_column_series, - ds_forecast_col, - ci_col_names, - ) = self._generate_report() - + # Generate metrics + summary_metrics = None + test_data = None + self.eval_metrics = None + + if self.spec.generate_report or self.spec.generate_metrics: + if self.train_metrics: + self.eval_metrics = utils.evaluate_metrics( + self.target_columns, + self.data, + self.outputs, + target_col=self.forecast_col_name, + ) + if self.spec.test_data: + ( + self.test_eval_metrics, + summary_metrics, + test_data, + ) = self._test_evaluate_metrics( + target_columns=self.target_columns, + test_filename=self.spec.test_data.url, + outputs=self.outputs, + target_col=self.forecast_col_name, + elapsed_time=elapsed_time, + ) report_sections = [] - title_text = dp.Text("# Forecast Report") - - md_columns = " * ".join([f"{x} \n" for x in self.target_columns]) - first_10_rows_blocks = [ - dp.DataTable( - df.head(10).rename({col: self.spec.target_column}, axis=1), - caption="Start", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - - last_10_rows_blocks = [ - dp.DataTable( - df.tail(10).rename({col: self.spec.target_column}, axis=1), - caption="End", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - - data_summary_blocks = [ - dp.DataTable( - df.rename({col: self.spec.target_column}, axis=1).describe(), - caption="Summary Statistics", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - summary = dp.Blocks( - dp.Select( - blocks=[ - dp.Group( - dp.Text(f"You selected the **`{self.spec.model}`** model."), - model_description, - dp.Text( - "Based on your dataset, you could have also selected " - f"any of the models: `{'`, `'.join(SupportedModels.keys())}`." - ), + + if self.spec.generate_report: + # build the report + ( + model_description, + other_sections, + ds_column_series, + ds_forecast_col, + ci_col_names, + ) = self._generate_report() + + title_text = dp.Text("# Forecast Report") + + md_columns = " * ".join([f"{x} \n" for x in self.target_columns]) + first_10_rows_blocks = [ + dp.DataTable( + df.head(10).rename({col: self.spec.target_column}, axis=1), + caption="Start", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + last_10_rows_blocks = [ + dp.DataTable( + df.tail(10).rename({col: self.spec.target_column}, axis=1), + caption="End", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + data_summary_blocks = [ + dp.DataTable( + df.rename({col: self.spec.target_column}, axis=1).describe(), + caption="Summary Statistics", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + summary = dp.Blocks( + dp.Select( + blocks=[ dp.Group( - dp.BigNumber( - heading="Analysis was completed in ", - value=utils.human_time_friendly(elapsed_time), - ), - dp.BigNumber( - heading="Starting time index", - value=ds_column_series.min().strftime( - "%B %d, %Y" - ), # "%r" # TODO: Figure out a smarter way to format - ), - dp.BigNumber( - heading="Ending time index", - value=ds_column_series.max().strftime( - "%B %d, %Y" - ), # "%r" # TODO: Figure out a smarter way to format + dp.Text(f"You selected the **`{self.spec.model}`** model."), + model_description, + dp.Text( + "Based on your dataset, you could have also selected " + f"any of the models: `{'`, `'.join(SupportedModels.keys())}`." ), - dp.BigNumber( - heading="Num series", value=len(self.target_columns) + dp.Group( + dp.BigNumber( + heading="Analysis was completed in ", + value=utils.human_time_friendly(elapsed_time), + ), + dp.BigNumber( + heading="Starting time index", + value=ds_column_series.min().strftime( + "%B %d, %Y" + ), # "%r" # TODO: Figure out a smarter way to format + ), + dp.BigNumber( + heading="Ending time index", + value=ds_column_series.max().strftime( + "%B %d, %Y" + ), # "%r" # TODO: Figure out a smarter way to format + ), + dp.BigNumber( + heading="Num series", value=len(self.target_columns) + ), + columns=4, ), - columns=4, + dp.Text("### First 10 Rows of Data"), + dp.Select(blocks=first_10_rows_blocks) + if len(first_10_rows_blocks) > 1 + else first_10_rows_blocks[0], + dp.Text("----"), + dp.Text("### Last 10 Rows of Data"), + dp.Select(blocks=last_10_rows_blocks) + if len(last_10_rows_blocks) > 1 + else last_10_rows_blocks[0], + dp.Text("### Data Summary Statistics"), + dp.Select(blocks=data_summary_blocks) + if len(data_summary_blocks) > 1 + else data_summary_blocks[0], + label="Summary", ), - dp.Text("### First 10 Rows of Data"), - dp.Select(blocks=first_10_rows_blocks) - if len(first_10_rows_blocks) > 1 - else first_10_rows_blocks[0], - dp.Text("----"), - dp.Text("### Last 10 Rows of Data"), - dp.Select(blocks=last_10_rows_blocks) - if len(last_10_rows_blocks) > 1 - else last_10_rows_blocks[0], - dp.Text("### Data Summary Statistics"), - dp.Select(blocks=data_summary_blocks) - if len(data_summary_blocks) > 1 - else data_summary_blocks[0], - label="Summary", - ), - dp.Text( - "The following report compares a variety of metrics and plots " - f"for your target columns: \n {md_columns}.\n", - label="Target Columns", - ), - ] - ), - ) - - train_metric_sections = [] - if train_metrics: - self.eval_metrics = utils.evaluate_metrics( - self.target_columns, - self.data, - self.outputs, - target_col=forecast_col_name, + dp.Text( + "The following report compares a variety of metrics and plots " + f"for your target columns: \n {md_columns}.\n", + label="Target Columns", + ), + ] + ), ) - sec6_text = dp.Text(f"## Historical Data Evaluation Metrics") - sec6 = dp.DataTable(self.eval_metrics) - train_metric_sections = [sec6_text, sec6] - test_eval_metrics = [] - test_data = None - if self.spec.test_data: - ( - self.test_eval_metrics, - summary_metrics, - test_data, - ) = self._test_evaluate_metrics( - target_columns=self.target_columns, - test_filename=self.spec.test_data.url, - outputs=self.outputs, - target_col=forecast_col_name, - elapsed_time=elapsed_time, - ) - sec7_text = dp.Text(f"## Holdout Data Evaluation Metrics") - sec7 = dp.DataTable(self.test_eval_metrics) + train_metric_sections = [] + if self.train_metrics: + sec6_text = dp.Text(f"## Historical Data Evaluation Metrics") + sec6 = dp.DataTable(self.eval_metrics) + train_metric_sections = [sec6_text, sec6] - sec8_text = dp.Text(f"## Holdout Data Summary Metrics") - sec8 = dp.DataTable(summary_metrics) + test_eval_metrics = [] + test_data = None + if self.spec.test_data: + sec7_text = dp.Text(f"## Holdout Data Evaluation Metrics") + sec7 = dp.DataTable(self.test_eval_metrics) - test_eval_metrics = [sec7_text, sec7, sec8_text, sec8] + sec8_text = dp.Text(f"## Holdout Data Summary Metrics") + sec8 = dp.DataTable(summary_metrics) - forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical") - forecast_sec = utils.get_forecast_plots( - self.data, - self.outputs, - self.target_columns, - test_data=test_data, - forecast_col_name=forecast_col_name, - ds_col=ds_column_series, - ds_forecast_col=ds_forecast_col, - ci_col_names=ci_col_names, - ci_interval_width=self.spec.confidence_interval_width, - ) - forecast_plots = [forecast_text, forecast_sec] - - yaml_appendix_title = dp.Text(f"## Reference: YAML File") - yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml") - report_sections = ( - [title_text, summary] - + forecast_plots - + other_sections - + test_eval_metrics - + train_metric_sections - + [yaml_appendix_title, yaml_appendix] - ) + test_eval_metrics = [sec7_text, sec7, sec8_text, sec8] + + forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical") + forecast_sec = utils.get_forecast_plots( + self.data, + self.outputs, + self.target_columns, + test_data=test_data, + forecast_col_name=self.forecast_col_name, + ds_col=ds_column_series, + ds_forecast_col=ds_forecast_col, + ci_col_names=ci_col_names, + ci_interval_width=self.spec.confidence_interval_width, + ) + forecast_plots = [forecast_text, forecast_sec] + + yaml_appendix_title = dp.Text(f"## Reference: YAML File") + yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml") + report_sections = ( + [title_text, summary] + + forecast_plots + + other_sections + + test_eval_metrics + + train_metric_sections + + [yaml_appendix_title, yaml_appendix] + ) # save the report and result CSV self._save_report( report_sections=report_sections, result_df=result_df, - metrics_df=self.test_eval_metrics, + metrics_df=self.eval_metrics, + test_metrics_df=self.test_eval_metrics, ) def _load_data(self): @@ -374,7 +383,11 @@ def _test_evaluate_metrics( return total_metrics, summary_metrics, data def _save_report( - self, report_sections: Tuple, result_df: pd.DataFrame, metrics_df: pd.DataFrame + self, + report_sections: Tuple, + result_df: pd.DataFrame, + metrics_df: pd.DataFrame, + test_metrics_df: pd.DataFrame, ): """Saves resulting reports to the given folder.""" import datapane as dp @@ -388,17 +401,19 @@ def _save_report( output_dir ) ) - # datapane html report - with tempfile.TemporaryDirectory() as temp_dir: - report_local_path = os.path.join(temp_dir, "___report.html") - dp.save_report(report_sections, report_local_path) - with open(report_local_path) as f1: - with fsspec.open( - os.path.join(output_dir, self.spec.report_file_name), - "w", - **default_signer(), - ) as f2: - f2.write(f1.read()) + + if self.spec.generate_report: + # datapane html report + with tempfile.TemporaryDirectory() as temp_dir: + report_local_path = os.path.join(temp_dir, "___report.html") + dp.save_report(report_sections, report_local_path) + with open(report_local_path) as f1: + with fsspec.open( + os.path.join(output_dir, self.spec.report_file_name), + "w", + **default_signer(), + ) as f2: + f2.write(f1.read()) # forecast csv report utils._write_data( @@ -409,7 +424,7 @@ def _save_report( ) # metrics csv report - if metrics_df is not None: + if self.spec.generate_metrics and metrics_df is not None: utils._write_data( data=metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), @@ -418,8 +433,18 @@ def _save_report( index=False, ) + # test_metrics csv report + if self.spec.generate_metrics and test_metrics_df is not None: + utils._write_data( + data=test_metrics_df.rename_axis("metrics").reset_index(), + filename=os.path.join(output_dir, self.spec.test_metrics_filename), + format="csv", + storage_options=default_signer(), + index=False, + ) + logger.warn( - f"The report has been successfully " + f"The outputs have been successfully " f"generated and placed to the: {output_dir}." ) diff --git a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py index e0c21a584..419f2b89a 100644 --- a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py @@ -25,6 +25,7 @@ from ..const import DEFAULT_TRIALS from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig def _get_np_metrics_dict(selected_metric): @@ -63,6 +64,11 @@ def _fit_model(data, params, additional_regressors, select_metric): class NeuralProphetOperatorModel(ForecastOperatorBaseModel): """Class representing NeuralProphet operator model.""" + def __init__(self, config: ForecastOperatorConfig): + super().__init__(config) + self.train_metrics = True + self.forecast_col_name = "yhat1" + def _build_model(self) -> pd.DataFrame: from neuralprophet import NeuralProphet @@ -327,8 +333,6 @@ def _generate_report(self): "Facebook Prophet and AR-Net." ) other_sections = all_sections - forecast_col_name = "yhat1" - train_metrics = True ds_column_series = self.data["ds"] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = None @@ -336,8 +340,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py index f4b220927..fc4301014 100644 --- a/ads/opctl/operator/lowcode/forecast/model/prophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/prophet.py @@ -12,6 +12,7 @@ from ..const import DEFAULT_TRIALS from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig def _add_unit(num, unit): @@ -31,6 +32,11 @@ def _fit_model(data, params, additional_regressors): class ProphetOperatorModel(ForecastOperatorBaseModel): """Class representing Prophet operator model.""" + def __init__(self, config: ForecastOperatorConfig): + super().__init__(config) + self.train_metrics = True + self.forecast_col_name = "yhat" + def _build_model(self) -> pd.DataFrame: from prophet import Prophet from prophet.diagnostics import cross_validation, performance_metrics @@ -306,8 +312,6 @@ def _generate_report(self): "data and shifts in the trend, and typically handles outliers well." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = True ds_column_series = self.data["ds"] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = ["yhat_lower", "yhat_upper"] @@ -315,8 +319,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/operator_config.py b/ads/opctl/operator/lowcode/forecast/operator_config.py index db97afdec..75a544568 100644 --- a/ads/opctl/operator/lowcode/forecast/operator_config.py +++ b/ads/opctl/operator/lowcode/forecast/operator_config.py @@ -86,13 +86,16 @@ class ForecastOperatorSpec(DataClassSerializable): report_title: str = None report_theme: str = None metrics_filename: str = None + test_metrics_filename: str = None forecast_filename: str = None target_column: str = None preprocessing: bool = None datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn) target_category_columns: List[str] = field(default_factory=list) horizon: Horizon = field(default_factory=Horizon) - explain: bool = None + generate_report: bool = None + generate_metrics: bool = None + generate_explanations: bool = None model: str = None model_kwargs: Dict = field(default_factory=dict) confidence_interval_width: float = None @@ -107,9 +110,20 @@ def __post_init__(self): self.preprocessing = ( self.preprocessing if self.preprocessing is not None else True ) - self.explain = self.explain if self.explain is not None else False + self.generate_report = ( + self.generate_report if self.generate_report is not None else True + ) + self.generate_metrics = ( + self.generate_metrics if self.generate_metrics is not None else True + ) + self.generate_explanations = ( + self.generate_explanations + if self.generate_explanations is not None + else False + ) self.report_theme = self.report_theme or "light" self.metrics_filename = self.metrics_filename or "metrics.csv" + self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv" self.forecast_filename = self.forecast_filename or "forecast.csv" self.target_column = self.target_column or "Sales" self.model_kwargs = self.model_kwargs or dict() diff --git a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py index 3d5b62941..39735c935 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py @@ -5,7 +5,214 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -class TestForecastOperatorBaseModel: +import unittest +from unittest.mock import patch, Mock +import pandas as pd +import datapane as dp +from ads.opctl.operator.common.utils import _build_image, _parse_input_args +from ads.opctl.operator.lowcode.forecast.model.prophet import ProphetOperatorModel +from ads.opctl.operator.lowcode.forecast.model.automlx import AutoMLXOperatorModel +from ads.opctl.operator.lowcode.forecast.model.base_model import ( + ForecastOperatorBaseModel, +) +from ads.opctl.operator.lowcode.forecast.operator_config import ( + ForecastOperatorConfig, + ForecastOperatorSpec, + TestData, + DateTimeColumn, + Horizon, + OutputDirectory, +) +from ads.opctl.operator.lowcode.forecast.const import SupportedMetrics + + +class TestForecastOperatorBaseModel(unittest.TestCase): """Tests the base class for the forecasting models""" pass + + def setUp(self): + self.target_columns = ["Sales_Product Group 107", "Sales_Product Group 108"] + self.target_category_columns = ["PPG_Code"] + self.test_filename = "test.csv" + self.full_data_dict = { + "Sales_Product Group 107": pd.DataFrame( + { + "ds": ["2020-10-31", "2020-11-07"], + "yhat": [1569.536030, 1568.052261], + } + ), + "Sales_Product Group 108": pd.DataFrame( + { + "ds": ["2020-10-31", "2020-11-07"], + "yhat": [1569.536030, 1568.052261], + } + ), + } + self.outputs = [ + pd.DataFrame( + { + "ds": [ + "2020-10-31", + "2020-11-07", + "2020-11-14", + "2020-11-21", + "2020-11-28", + ], + "yhat": [ + 1569.536030, + 1568.052261, + 1566.568493, + 1565.084725, + 1563.600957, + ], + } + ), + pd.DataFrame( + { + "ds": [ + "2020-10-31", + "2020-11-07", + "2020-11-14", + "2020-11-21", + "2020-11-28", + ], + "yhat": [ + 1284.534104, + 1269.692458, + 1254.850813, + 1240.009167, + 1225.167521, + ], + } + ), + ] + self.data = pd.DataFrame({"last_day_of_week": ["2020-10-31", "2020-11-07"]}) + self.target_col = "yhat" + self.datetime_column_name = "last_day_of_week" + self.original_target_column = "Sales" + self.eval_metrics = pd.DataFrame( + {"Sales_Product Group 107": [25.07]}, index=["sMAPE"] + ) + spec = Mock(spec=ForecastOperatorSpec) + spec.target_column = self.target_col + spec.target_category_columns = self.target_category_columns + spec.target_column = self.original_target_column + spec.test_data = Mock(spec=TestData) + spec.datetime_column = Mock(spec=DateTimeColumn) + spec.datetime_column.name = self.datetime_column_name + spec.datetime_column.format = None + spec.horizon = Mock(spec=Horizon) + spec.horizon.periods = 3 + spec.tuning = None + spec.output_directory = Mock(spec=OutputDirectory) + spec.output_directory.url = "URL" + spec.forecast_filename = "forecast" + spec.metrics_filename = "metrics" + spec.test_metrics_filename = "test_metrics" + spec.report_file_name = "report" + + config = Mock(spec=ForecastOperatorConfig) + config.spec = spec + + self.config = config + + @patch("datapane.save_report") + @patch("ads.opctl.operator.lowcode.forecast.utils.get_forecast_plots") + @patch("ads.opctl.operator.lowcode.forecast.utils.evaluate_metrics") + @patch("ads.opctl.operator.lowcode.forecast.utils._write_data") + @patch( + "ads.opctl.operator.lowcode.forecast.model.base_model.ForecastOperatorBaseModel._test_evaluate_metrics" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.base_model.ForecastOperatorBaseModel._load_data" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.prophet.ProphetOperatorModel._build_model" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.prophet.ProphetOperatorModel._generate_report" + ) + @patch("ads.opctl.operator.lowcode.forecast.model.base_model.open") + @patch("fsspec.open") + def test_boolean_disable( + self, + mock_fsspec_open, + mock_open, + mock__generate_report, + mock__build_model, + mock__load_data, + mock__test_evaluate_metrics, + mock__write_data, + mock_evaluate_metrics, + mock_get_forecast_plots, + mock_save_report, + ): + mock__test_evaluate_metrics.return_value = (pd.DataFrame(), None, None) + mock__generate_report.return_value = ( + dp.Text("Description"), + [dp.Text("Other Sections")], + pd.to_datetime(self.data["last_day_of_week"]), + None, + None, + ) + mock__load_data.return_value = None + mock__build_model.return_value = pd.DataFrame() + mock_evaluate_metrics.return_value = self.eval_metrics + mock_get_forecast_plots = dp.Text("Random Text") + + self.config.spec.generate_metrics = True + self.config.spec.generate_report = False + + prophet = ProphetOperatorModel(self.config) + prophet.target_columns = self.target_columns + prophet.full_data_dict = self.full_data_dict + + prophet.generate_report() + + # Metrics are generated, Report is not generated + mock__test_evaluate_metrics.assert_called_once() + mock_evaluate_metrics.assert_called_once() + self.assertTrue(mock_save_report.call_count == 0) + self.assertTrue(mock__write_data.call_count == 3) + + mock__test_evaluate_metrics.reset_mock() + mock_evaluate_metrics.reset_mock() + mock__write_data.reset_mock() + mock_save_report.reset_mock() + + self.config.spec.generate_metrics = False + self.config.spec.generate_report = True + prophet.generate_report() + + # Metrics are generated to be included in report but not saved, Report is generated + mock__test_evaluate_metrics.assert_called_once() + mock_evaluate_metrics.assert_called_once() + self.assertTrue(mock_save_report.call_count == 1) + self.assertTrue(mock__write_data.call_count == 1) + + @patch( + "ads.opctl.operator.lowcode.forecast.model.automlx.AutoMLXOperatorModel.explain_model" + ) + def test_boolean_disable_explanations(self, mock_explain_model): + self.config.spec.generate_explanations = False + + automlx = AutoMLXOperatorModel(self.config) + automlx.outputs = self.outputs + automlx.full_data_dict = {} + automlx.data = self.data + automlx.local_explanation = {"dummy": pd.DataFrame({"pt1": [1, 2, 3]})} + automlx._generate_report() + + # Explanations are not generated + mock_explain_model.assert_not_called() + + self.config.spec.generate_explanations = True + automlx._generate_report() + + # Explanations are generated + mock_explain_model.assert_called_once() + + +if __name__ == "__main__": + unittest.main() From 1660455453b9e6e3d09f7569d90e1c48e982b04e Mon Sep 17 00:00:00 2001 From: govarsha Date: Mon, 30 Oct 2023 16:23:16 +0530 Subject: [PATCH 02/10] updated metrics_filename, test_metrics_filename, forecast_filename in schema.yaml --- .../operator/lowcode/forecast/schema.yaml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index c20611d5e..6bf494e43 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -184,12 +184,24 @@ spec: allowed: - light - dark - report_metrics_name: + metrics_filename: required: false type: string - default: report.csv + default: metrics.csv meta: - description: "Placed into output_directory location. Defaults to report.csv" + description: "Placed into output_directory location. Defaults to metrics.csv" + test_metrics_filename: + required: false + type: string + default: test_metrics.csv + meta: + description: "Placed into output_directory location. Defaults to test_metrics.csv" + forecast_filename: + required: false + type: string + default: forecast.csv + meta: + description: "Placed into output_directory location. Defaults to forecast.csv" target_column: type: string From 6824a571857cc528feddee27f6a04581324c7706 Mon Sep 17 00:00:00 2001 From: govarsha Date: Mon, 30 Oct 2023 16:41:10 +0530 Subject: [PATCH 03/10] changed report_file_name to report_filename --- ads/opctl/operator/lowcode/forecast/model/base_model.py | 2 +- ads/opctl/operator/lowcode/forecast/operator_config.py | 4 ++-- ads/opctl/operator/lowcode/forecast/schema.yaml | 2 +- .../user_guide/operators/forecasting_operator/examples.rst | 2 +- .../with_extras/operator/forecast/test_model_base_model.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index b678377d6..61f57c6e6 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -409,7 +409,7 @@ def _save_report( dp.save_report(report_sections, report_local_path) with open(report_local_path) as f1: with fsspec.open( - os.path.join(output_dir, self.spec.report_file_name), + os.path.join(output_dir, self.spec.report_filename), "w", **default_signer(), ) as f2: diff --git a/ads/opctl/operator/lowcode/forecast/operator_config.py b/ads/opctl/operator/lowcode/forecast/operator_config.py index 75a544568..033dab565 100644 --- a/ads/opctl/operator/lowcode/forecast/operator_config.py +++ b/ads/opctl/operator/lowcode/forecast/operator_config.py @@ -82,7 +82,7 @@ class ForecastOperatorSpec(DataClassSerializable): additional_data: InputData = field(default_factory=InputData) test_data: TestData = field(default_factory=TestData) output_directory: OutputDirectory = field(default_factory=OutputDirectory) - report_file_name: str = None + report_filename: str = None report_title: str = None report_theme: str = None metrics_filename: str = None @@ -106,7 +106,7 @@ def __post_init__(self): """Adjusts the specification details.""" self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower() self.confidence_interval_width = self.confidence_interval_width or 0.80 - self.report_file_name = self.report_file_name or "report.html" + self.report_filename = self.report_filename or "report.html" self.preprocessing = ( self.preprocessing if self.preprocessing is not None else True ) diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index 6bf494e43..7d8a3af3f 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -168,7 +168,7 @@ spec: type: dict type: dict - report_file_name: + report_filename: required: false type: string default: report.html diff --git a/docs/source/user_guide/operators/forecasting_operator/examples.rst b/docs/source/user_guide/operators/forecasting_operator/examples.rst index 513a8f118..e76a76dfa 100644 --- a/docs/source/user_guide/operators/forecasting_operator/examples.rst +++ b/docs/source/user_guide/operators/forecasting_operator/examples.rst @@ -64,7 +64,7 @@ The yaml can also be maximally stated as follows: model: automlx model_kwargs: preprocessing: true - report_file_name: report.html + report_filename: report.html report_theme: light report_title: report tuning: diff --git a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py index 39735c935..b774d038a 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py @@ -110,7 +110,7 @@ def setUp(self): spec.forecast_filename = "forecast" spec.metrics_filename = "metrics" spec.test_metrics_filename = "test_metrics" - spec.report_file_name = "report" + spec.report_filename = "report" config = Mock(spec=ForecastOperatorConfig) config.spec = spec From dccc7793d095c1cf03ad9fae1224efd061e75385 Mon Sep 17 00:00:00 2001 From: govarsha Date: Mon, 30 Oct 2023 22:17:10 +0530 Subject: [PATCH 04/10] added changes to schema.yaml --- ads/opctl/operator/lowcode/forecast/schema.yaml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index 7d8a3af3f..0c5879810 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -215,13 +215,27 @@ spec: meta: description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true" - explain: + generate_explanations: type: boolean required: false default: false meta: description: "Explainability, both local and global, can be disabled using this flag. Defaults to false." + generate_report: + type: boolean + required: false + default: true + meta: + description: "Report file generation can be enabled using this flag. Defaults to true." + + generate_metrics: + type: boolean + required: false + default: true + meta: + description: "Metrics files generation can be enabled using this flag. Defaults to true." + datetime_column: type: dict required: true From a9417fd7f95c95d63732102f51bf9b0a049b6fcf Mon Sep 17 00:00:00 2001 From: govarsha Date: Mon, 30 Oct 2023 22:19:56 +0530 Subject: [PATCH 05/10] added comments --- ads/opctl/operator/lowcode/forecast/operator_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ads/opctl/operator/lowcode/forecast/operator_config.py b/ads/opctl/operator/lowcode/forecast/operator_config.py index 033dab565..aa99d7cfa 100644 --- a/ads/opctl/operator/lowcode/forecast/operator_config.py +++ b/ads/opctl/operator/lowcode/forecast/operator_config.py @@ -110,12 +110,15 @@ def __post_init__(self): self.preprocessing = ( self.preprocessing if self.preprocessing is not None else True ) + # For Report Generation. When user doesn't specify defaults to True self.generate_report = ( self.generate_report if self.generate_report is not None else True ) + # For Metrics files Generation. When user doesn't specify defaults to True self.generate_metrics = ( self.generate_metrics if self.generate_metrics is not None else True ) + # For Explanations Generation. When user doesn't specify defaults to False self.generate_explanations = ( self.generate_explanations if self.generate_explanations is not None From 9a8ab34d4f6ac4b89b98fa017716d49fbd172779 Mon Sep 17 00:00:00 2001 From: govarsha Date: Tue, 31 Oct 2023 16:40:46 +0530 Subject: [PATCH 06/10] added train_metrics to base_model --- ads/opctl/operator/lowcode/forecast/model/arima.py | 1 - ads/opctl/operator/lowcode/forecast/model/automlx.py | 1 - ads/opctl/operator/lowcode/forecast/model/autots.py | 1 - ads/opctl/operator/lowcode/forecast/model/base_model.py | 6 +++--- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py index 919fa6c96..0a8c37d0d 100644 --- a/ads/opctl/operator/lowcode/forecast/model/arima.py +++ b/ads/opctl/operator/lowcode/forecast/model/arima.py @@ -19,7 +19,6 @@ class ArimaOperatorModel(ForecastOperatorBaseModel): def __init__(self, config: ForecastOperatorConfig): super().__init__(config) - self.train_metrics = False self.forecast_col_name = "yhat" def _build_model(self) -> pd.DataFrame: diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py index e33062d2b..8f4721ec6 100644 --- a/ads/opctl/operator/lowcode/forecast/model/automlx.py +++ b/ads/opctl/operator/lowcode/forecast/model/automlx.py @@ -28,7 +28,6 @@ def __init__(self, config: ForecastOperatorConfig): super().__init__(config) self.global_explanation = {} self.local_explanation = {} - self.train_metrics = False self.forecast_col_name = "yhat" @runtime_dependency( diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py index 491231074..559a43ab8 100644 --- a/ads/opctl/operator/lowcode/forecast/model/autots.py +++ b/ads/opctl/operator/lowcode/forecast/model/autots.py @@ -25,7 +25,6 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel): def __init__(self, config: ForecastOperatorConfig): super().__init__(config) - self.train_metrics = False self.forecast_col_name = "yhat" @runtime_dependency( diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 1c0d973f6..0657b2c8b 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -57,7 +57,7 @@ def __init__(self, config: ForecastOperatorConfig): self.target_columns = ( None # This will become [target__category1__category2 ...] ) - + self.train_metrics = False self.perform_tuning = self.spec.tuning != None def generate_report(self): @@ -408,11 +408,11 @@ def _save_report( with tempfile.TemporaryDirectory() as temp_dir: report_local_path = os.path.join(temp_dir, "___report.html") dp.save_report(report_sections, report_local_path) - + report_path = os.path.join(output_dir, self.spec.report_file_name) with open(report_local_path) as f1: with fsspec.open( - report_path + report_path, "w", **( default_signer() From db4b6f5a41e0a1dd3ea04f8f10348f02375364c5 Mon Sep 17 00:00:00 2001 From: govarsha Date: Tue, 31 Oct 2023 16:43:08 +0530 Subject: [PATCH 07/10] added forecast_col_name attribute to base_model --- .../operator/lowcode/forecast/model/arima.py | 4 ---- .../lowcode/forecast/model/automlx.py | 1 - .../operator/lowcode/forecast/model/autots.py | 4 ---- .../lowcode/forecast/model/base_model.py | 1 + .../lowcode/forecast/model/prophet.py | 23 ++++--------------- 5 files changed, 6 insertions(+), 27 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py index 0a8c37d0d..9c42d3a22 100644 --- a/ads/opctl/operator/lowcode/forecast/model/arima.py +++ b/ads/opctl/operator/lowcode/forecast/model/arima.py @@ -17,10 +17,6 @@ class ArimaOperatorModel(ForecastOperatorBaseModel): """Class representing ARIMA operator model.""" - def __init__(self, config: ForecastOperatorConfig): - super().__init__(config) - self.forecast_col_name = "yhat" - def _build_model(self) -> pd.DataFrame: full_data_dict = self.full_data_dict diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py index 8f4721ec6..1c6bb4ea7 100644 --- a/ads/opctl/operator/lowcode/forecast/model/automlx.py +++ b/ads/opctl/operator/lowcode/forecast/model/automlx.py @@ -28,7 +28,6 @@ def __init__(self, config: ForecastOperatorConfig): super().__init__(config) self.global_explanation = {} self.local_explanation = {} - self.forecast_col_name = "yhat" @runtime_dependency( module="automl", diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py index 559a43ab8..239da6d1d 100644 --- a/ads/opctl/operator/lowcode/forecast/model/autots.py +++ b/ads/opctl/operator/lowcode/forecast/model/autots.py @@ -23,10 +23,6 @@ class AutoTSOperatorModel(ForecastOperatorBaseModel): """Class representing AutoTS operator model.""" - def __init__(self, config: ForecastOperatorConfig): - super().__init__(config) - self.forecast_col_name = "yhat" - @runtime_dependency( module="autots", err_msg="Please run `pip3 install autots` to install the required dependencies for autots.", diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 0657b2c8b..e2d5eb106 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -58,6 +58,7 @@ def __init__(self, config: ForecastOperatorConfig): None # This will become [target__category1__category2 ...] ) self.train_metrics = False + self.forecast_col_name = "yhat" self.perform_tuning = self.spec.tuning != None def generate_report(self): diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py index 5e7889c5b..0b6940448 100644 --- a/ads/opctl/operator/lowcode/forecast/model/prophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/prophet.py @@ -35,7 +35,6 @@ class ProphetOperatorModel(ForecastOperatorBaseModel): def __init__(self, config: ForecastOperatorConfig): super().__init__(config) self.train_metrics = True - self.forecast_col_name = "yhat" def _build_model(self) -> pd.DataFrame: from prophet import Prophet @@ -110,9 +109,7 @@ def objective(trial): elif unit == "Y": unit = "D" interval = interval * 365.25 - horizon = _add_unit( - int(self.spec.horizon * interval), unit=unit - ) + horizon = _add_unit(int(self.spec.horizon * interval), unit=unit) initial = _add_unit((data_i.shape[0] * interval) // 2, unit=unit) period = _add_unit((data_i.shape[0] * interval) // 4, unit=unit) @@ -209,32 +206,22 @@ def objective(trial): output_i.iloc[ : -self.spec.horizon, output_i.columns.get_loc(f"fitted_value") - ] = ( - outputs[f"{col}_{cat}"]["yhat"] - .iloc[: -self.spec.horizon] - .values - ) + ] = (outputs[f"{col}_{cat}"]["yhat"].iloc[: -self.spec.horizon].values) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(f"forecast_value"), ] = ( - outputs[f"{col}_{cat}"]["yhat"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat"].iloc[-self.spec.horizon :].values ) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(yhat_upper_name) ] = ( - outputs[f"{col}_{cat}"]["yhat_upper"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat_upper"].iloc[-self.spec.horizon :].values ) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(yhat_lower_name) ] = ( - outputs[f"{col}_{cat}"]["yhat_lower"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat_lower"].iloc[-self.spec.horizon :].values ) output_col = pd.concat([output_col, output_i]) From 1c4949ad7c0608dd8ceedc536e35f275ee3b586f Mon Sep 17 00:00:00 2001 From: govarsha Date: Tue, 31 Oct 2023 17:03:54 +0530 Subject: [PATCH 08/10] small fixes to be consistent with recent changes --- ads/opctl/operator/lowcode/forecast/model/base_model.py | 2 +- .../with_extras/operator/forecast/test_model_autots.py | 8 +++----- .../operator/forecast/test_model_base_model.py | 4 +--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index e2d5eb106..c57665907 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -410,7 +410,7 @@ def _save_report( report_local_path = os.path.join(temp_dir, "___report.html") dp.save_report(report_sections, report_local_path) - report_path = os.path.join(output_dir, self.spec.report_file_name) + report_path = os.path.join(output_dir, self.spec.report_filename) with open(report_local_path) as f1: with fsspec.open( report_path, diff --git a/tests/unitary/with_extras/operator/forecast/test_model_autots.py b/tests/unitary/with_extras/operator/forecast/test_model_autots.py index 267d01106..b5f53ef2a 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_autots.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_autots.py @@ -21,7 +21,6 @@ ForecastOperatorSpec, TestData, DateTimeColumn, - Horizon, OutputDirectory, ) from ads.opctl.operator.lowcode.forecast.const import SupportedMetrics @@ -36,8 +35,7 @@ def setUp(self): spec = Mock(spec=ForecastOperatorSpec) spec.datetime_column = Mock(spec=DateTimeColumn) spec.datetime_column.name = "last_day_of_week" - spec.horizon = Mock(spec=Horizon) - spec.horizon.periods = 3 + spec.horizon = 3 spec.tuning = None spec.model_kwargs = {} spec.confidence_interval_width = 0.7 @@ -58,7 +56,7 @@ def test_autots_parameter_passthrough(self, mock_concat, mock_autots): # When model_kwargs does not have anything, defaults should be sent as parameters. mock_autots.assert_called_once_with( - forecast_length=self.spec.horizon.periods, + forecast_length=self.spec.horizon, frequency="infer", prediction_interval=self.spec.confidence_interval_width, max_generations=AUTOTS_MAX_GENERATION, @@ -135,7 +133,7 @@ def test_autots_parameter_passthrough(self, mock_concat, mock_autots): # All parameters in model_kwargs should be passed to autots mock_autots.assert_called_once_with( - forecast_length=self.spec.horizon.periods, + forecast_length=self.spec.horizon, frequency=self.spec.model_kwargs.get("frequency"), prediction_interval=self.spec.confidence_interval_width, max_generations=self.spec.model_kwargs.get("max_generations"), diff --git a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py index b774d038a..968d24026 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py @@ -20,7 +20,6 @@ ForecastOperatorSpec, TestData, DateTimeColumn, - Horizon, OutputDirectory, ) from ads.opctl.operator.lowcode.forecast.const import SupportedMetrics @@ -102,8 +101,7 @@ def setUp(self): spec.datetime_column = Mock(spec=DateTimeColumn) spec.datetime_column.name = self.datetime_column_name spec.datetime_column.format = None - spec.horizon = Mock(spec=Horizon) - spec.horizon.periods = 3 + spec.horizon = 3 spec.tuning = None spec.output_directory = Mock(spec=OutputDirectory) spec.output_directory.url = "URL" From 8559ebf942ef7af5673865a19fba64ecba99bd95 Mon Sep 17 00:00:00 2001 From: govarsha Date: Tue, 31 Oct 2023 17:40:39 +0530 Subject: [PATCH 09/10] fixing merge conflicts --- ads/opctl/operator/lowcode/forecast/model/base_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index c57665907..c6c0c3910 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -427,6 +427,7 @@ def _save_report( utils._write_data( data=result_df, filename=os.path.join(output_dir, self.spec.forecast_filename), + storage_options=default_signer(), format="csv", ) @@ -436,6 +437,7 @@ def _save_report( data=metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), format="csv", + storage_options=default_signer(), index=False, ) From baff6cd534cfe5ac03e852ab4e3fe185a75d9a0d Mon Sep 17 00:00:00 2001 From: govarsha Date: Tue, 31 Oct 2023 18:14:30 +0530 Subject: [PATCH 10/10] resolving merge conflicts --- .../lowcode/forecast/model/base_model.py | 29 +++++++++++----- ads/opctl/operator/lowcode/forecast/utils.py | 33 +++++++++++-------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index c57665907..9ab4d7b7e 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -252,8 +252,16 @@ def _load_data(self): ) self.original_user_data = raw_data.copy() date_column = self.spec.datetime_column.name - freq = pd.infer_freq(raw_data[date_column].drop_duplicates().tail(5)) - self.spec.freq = freq + try: + self.spec.freq = pd.infer_freq( + raw_data[date_column].drop_duplicates().tail(5) + ) + except TypeError as e: + logger.warn( + f"Error determining frequency: {e.args}. Setting Frequency to None" + ) + logger.debug(f"Full traceback: {e}") + self.spec.freq = None utils.evaluate_model_compatibility(raw_data, self.spec) data = Transformations(raw_data, self.spec).run() self.original_total_data = data @@ -403,7 +411,12 @@ def _save_report( output_dir ) ) - # datapane html report + + if ObjectStorageDetails.is_oci_path(output_dir): + storage_options = default_signer() + else: + storage_options = dict() + if self.spec.generate_report: # datapane html report with tempfile.TemporaryDirectory() as temp_dir: @@ -415,11 +428,7 @@ def _save_report( with fsspec.open( report_path, "w", - **( - default_signer() - if ObjectStorageDetails.is_oci_path(report_path) - else {} - ), + **storage_options, ) as f2: f2.write(f1.read()) @@ -428,6 +437,7 @@ def _save_report( data=result_df, filename=os.path.join(output_dir, self.spec.forecast_filename), format="csv", + storage_options=storage_options, ) # metrics csv report @@ -436,6 +446,7 @@ def _save_report( data=metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), format="csv", + storage_options=storage_options, index=False, ) @@ -445,7 +456,7 @@ def _save_report( data=test_metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.test_metrics_filename), format="csv", - storage_options=default_signer(), + storage_options=storage_options, index=False, ) diff --git a/ads/opctl/operator/lowcode/forecast/utils.py b/ads/opctl/operator/lowcode/forecast/utils.py index d062061fa..d4fed189e 100644 --- a/ads/opctl/operator/lowcode/forecast/utils.py +++ b/ads/opctl/operator/lowcode/forecast/utils.py @@ -97,8 +97,8 @@ def _build_metrics_per_horizon( ] ) - for y_true, y_pred in zip( - actuals_df.itertuples(index=False), forecasts_df.itertuples(index=False) + for i, (y_true, y_pred) in enumerate( + zip(actuals_df.itertuples(index=False), forecasts_df.itertuples(index=False)) ): y_true, y_pred = np.array(y_true), np.array(y_pred) @@ -122,7 +122,10 @@ def _build_metrics_per_horizon( SupportedMetrics.MEDIAN_WMAPE: np.median(wmapes), } - metrics_df = metrics_df.append(metrics_row, ignore_index=True) + metrics_df = pd.concat( + [metrics_df, pd.DataFrame(metrics_row, index=[data["ds"][i]])], + ignore_index=True, + ) metrics_df.set_index(data["ds"], inplace=True) @@ -483,7 +486,9 @@ def select_auto_model(columns: List[str]) -> str: return SupportedModels.AutoMLX -def evaluate_model_compatibility(data: pd.DataFrame, dataset_info: ForecastOperatorSpec): +def evaluate_model_compatibility( + data: pd.DataFrame, dataset_info: ForecastOperatorSpec +): """ Function checks if the data is compatible with the model selected @@ -498,14 +503,16 @@ def evaluate_model_compatibility(data: pd.DataFrame, dataset_info: ForecastOpera None """ - date_column = dataset_info.datetime_column.name - freq = pd.infer_freq(data[date_column].drop_duplicates().tail(5)) - freq_in_secs = to_timedelta(freq) / to_timedelta("sec") - if freq_in_secs < 3600 and dataset_info.model == SupportedModels.AutoMLX: - message = "{} requires data with a frequency of at least one hour. Please try using a different model," \ - " or select the 'auto' option.".format( - SupportedModels.AutoMLX, freq) - raise Exception(message) + if dataset_info.model == SupportedModels.AutoMLX: + date_column = dataset_info.datetime_column.name + freq = pd.infer_freq(data[date_column].drop_duplicates().tail(5)) + freq_in_secs = to_timedelta(freq) / to_timedelta("sec") + if freq_in_secs < 3600: + message = ( + "{} requires data with a frequency of at least one hour. Please try using a different model," + " or select the 'auto' option.".format(SupportedModels.AutoMLX, freq) + ) + raise Exception(message) def to_timedelta(freq: str): @@ -522,7 +529,7 @@ def to_timedelta(freq: str): timedelta """ # Add '1' in case freq doesn't have any digit - if not bool(re.search(r'\d', freq)): + if not bool(re.search(r"\d", freq)): freq = f"1{freq}" # Convert to datetime.timedelta return pd.to_timedelta(freq)