diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py index 86d65614e..9c42d3a22 100644 --- a/ads/opctl/operator/lowcode/forecast/model/arima.py +++ b/ads/opctl/operator/lowcode/forecast/model/arima.py @@ -11,6 +11,7 @@ from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig class ArimaOperatorModel(ForecastOperatorBaseModel): @@ -149,8 +150,6 @@ def _generate_report(self): "it predicts future values based on past values." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = self.data[self.spec.datetime_column.name] ds_forecast_col = self.outputs[0].index ci_col_names = ["yhat_lower", "yhat_upper"] @@ -158,8 +157,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py index 1064b8c9e..1c6bb4ea7 100644 --- a/ads/opctl/operator/lowcode/forecast/model/automlx.py +++ b/ads/opctl/operator/lowcode/forecast/model/automlx.py @@ -217,7 +217,7 @@ def _generate_report(self): all_sections = [selected_models_text, selected_models_section] - if self.spec.explain: + if self.spec.generate_explanations: # If the key is present, call the "explain_model" method self.explain_model() @@ -263,8 +263,6 @@ def _generate_report(self): "high-quality features in your dataset, which are then provided for further processing." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = self.data[self.spec.datetime_column.name] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = ["yhat_lower", "yhat_upper"] @@ -272,8 +270,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py index a3a0edbf4..239da6d1d 100644 --- a/ads/opctl/operator/lowcode/forecast/model/autots.py +++ b/ads/opctl/operator/lowcode/forecast/model/autots.py @@ -12,6 +12,7 @@ from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig from ads.common.decorator.runtime_dependency import runtime_dependency @@ -261,8 +262,6 @@ def _generate_report(self) -> tuple: ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = False ds_column_series = pd.to_datetime(self.data[self.spec.datetime_column.name]) ds_forecast_col = self.outputs[0].index @@ -271,8 +270,6 @@ def _generate_report(self) -> tuple: return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 73fafb57c..c7e9c8dad 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -57,7 +57,8 @@ def __init__(self, config: ForecastOperatorConfig): self.target_columns = ( None # This will become [target__category1__category2 ...] ) - + self.train_metrics = False + self.forecast_col_name = "yhat" self.perform_tuning = self.spec.tuning != None def generate_report(self): @@ -70,167 +71,176 @@ def generate_report(self): result_df = self._build_model() elapsed_time = time.time() - start_time - # build the report - ( - model_description, - other_sections, - forecast_col_name, - train_metrics, - ds_column_series, - ds_forecast_col, - ci_col_names, - ) = self._generate_report() - + # Generate metrics + summary_metrics = None + test_data = None + self.eval_metrics = None + + if self.spec.generate_report or self.spec.generate_metrics: + if self.train_metrics: + self.eval_metrics = utils.evaluate_metrics( + self.target_columns, + self.data, + self.outputs, + target_col=self.forecast_col_name, + ) + if self.spec.test_data: + ( + self.test_eval_metrics, + summary_metrics, + test_data, + ) = self._test_evaluate_metrics( + target_columns=self.target_columns, + test_filename=self.spec.test_data.url, + outputs=self.outputs, + target_col=self.forecast_col_name, + elapsed_time=elapsed_time, + ) report_sections = [] - title_text = dp.Text("# Forecast Report") - - md_columns = " * ".join([f"{x} \n" for x in self.target_columns]) - first_10_rows_blocks = [ - dp.DataTable( - df.head(10).rename({col: self.spec.target_column}, axis=1), - caption="Start", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - - last_10_rows_blocks = [ - dp.DataTable( - df.tail(10).rename({col: self.spec.target_column}, axis=1), - caption="End", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - - data_summary_blocks = [ - dp.DataTable( - df.rename({col: self.spec.target_column}, axis=1).describe(), - caption="Summary Statistics", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - summary = dp.Blocks( - dp.Select( - blocks=[ - dp.Group( - dp.Text(f"You selected the **`{self.spec.model}`** model."), - model_description, - dp.Text( - "Based on your dataset, you could have also selected " - f"any of the models: `{'`, `'.join(SupportedModels.keys())}`." - ), + + if self.spec.generate_report: + # build the report + ( + model_description, + other_sections, + ds_column_series, + ds_forecast_col, + ci_col_names, + ) = self._generate_report() + + title_text = dp.Text("# Forecast Report") + + md_columns = " * ".join([f"{x} \n" for x in self.target_columns]) + first_10_rows_blocks = [ + dp.DataTable( + df.head(10).rename({col: self.spec.target_column}, axis=1), + caption="Start", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + last_10_rows_blocks = [ + dp.DataTable( + df.tail(10).rename({col: self.spec.target_column}, axis=1), + caption="End", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + data_summary_blocks = [ + dp.DataTable( + df.rename({col: self.spec.target_column}, axis=1).describe(), + caption="Summary Statistics", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + summary = dp.Blocks( + dp.Select( + blocks=[ dp.Group( - dp.BigNumber( - heading="Analysis was completed in ", - value=utils.human_time_friendly(elapsed_time), + dp.Text(f"You selected the **`{self.spec.model}`** model."), + model_description, + dp.Text( + "Based on your dataset, you could have also selected " + f"any of the models: `{'`, `'.join(SupportedModels.keys())}`." ), - dp.BigNumber( - heading="Starting time index", - value=ds_column_series.min().strftime( - "%B %d, %Y" - ), # "%r" # TODO: Figure out a smarter way to format + dp.Group( + dp.BigNumber( + heading="Analysis was completed in ", + value=utils.human_time_friendly(elapsed_time), + ), + dp.BigNumber( + heading="Starting time index", + value=ds_column_series.min().strftime( + "%B %d, %Y" + ), # "%r" # TODO: Figure out a smarter way to format + ), + dp.BigNumber( + heading="Ending time index", + value=ds_column_series.max().strftime( + "%B %d, %Y" + ), # "%r" # TODO: Figure out a smarter way to format + ), + dp.BigNumber( + heading="Num series", value=len(self.target_columns) + ), + columns=4, ), - dp.BigNumber( - heading="Ending time index", - value=ds_column_series.max().strftime( - "%B %d, %Y" - ), # "%r" # TODO: Figure out a smarter way to format - ), - dp.BigNumber( - heading="Num series", value=len(self.target_columns) - ), - columns=4, + dp.Text("### First 10 Rows of Data"), + dp.Select(blocks=first_10_rows_blocks) + if len(first_10_rows_blocks) > 1 + else first_10_rows_blocks[0], + dp.Text("----"), + dp.Text("### Last 10 Rows of Data"), + dp.Select(blocks=last_10_rows_blocks) + if len(last_10_rows_blocks) > 1 + else last_10_rows_blocks[0], + dp.Text("### Data Summary Statistics"), + dp.Select(blocks=data_summary_blocks) + if len(data_summary_blocks) > 1 + else data_summary_blocks[0], + label="Summary", ), - dp.Text("### First 10 Rows of Data"), - dp.Select(blocks=first_10_rows_blocks) - if len(first_10_rows_blocks) > 1 - else first_10_rows_blocks[0], - dp.Text("----"), - dp.Text("### Last 10 Rows of Data"), - dp.Select(blocks=last_10_rows_blocks) - if len(last_10_rows_blocks) > 1 - else last_10_rows_blocks[0], - dp.Text("### Data Summary Statistics"), - dp.Select(blocks=data_summary_blocks) - if len(data_summary_blocks) > 1 - else data_summary_blocks[0], - label="Summary", - ), - dp.Text( - "The following report compares a variety of metrics and plots " - f"for your target columns: \n {md_columns}.\n", - label="Target Columns", - ), - ] - ), - ) - - train_metric_sections = [] - if train_metrics: - self.eval_metrics = utils.evaluate_metrics( - self.target_columns, - self.data, - self.outputs, - target_col=forecast_col_name, + dp.Text( + "The following report compares a variety of metrics and plots " + f"for your target columns: \n {md_columns}.\n", + label="Target Columns", + ), + ] + ), ) - sec6_text = dp.Text(f"## Historical Data Evaluation Metrics") - sec6 = dp.DataTable(self.eval_metrics) - train_metric_sections = [sec6_text, sec6] - test_eval_metrics = [] - test_data = None - if self.spec.test_data: - ( - self.test_eval_metrics, - summary_metrics, - test_data, - ) = self._test_evaluate_metrics( - target_columns=self.target_columns, - test_filename=self.spec.test_data.url, - outputs=self.outputs, - target_col=forecast_col_name, - elapsed_time=elapsed_time, - ) - sec7_text = dp.Text(f"## Holdout Data Evaluation Metrics") - sec7 = dp.DataTable(self.test_eval_metrics) + train_metric_sections = [] + if self.train_metrics: + sec6_text = dp.Text(f"## Historical Data Evaluation Metrics") + sec6 = dp.DataTable(self.eval_metrics) + train_metric_sections = [sec6_text, sec6] - sec8_text = dp.Text(f"## Holdout Data Summary Metrics") - sec8 = dp.DataTable(summary_metrics) + test_eval_metrics = [] + test_data = None + if self.spec.test_data: + sec7_text = dp.Text(f"## Holdout Data Evaluation Metrics") + sec7 = dp.DataTable(self.test_eval_metrics) - test_eval_metrics = [sec7_text, sec7, sec8_text, sec8] + sec8_text = dp.Text(f"## Holdout Data Summary Metrics") + sec8 = dp.DataTable(summary_metrics) - forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical") - forecast_sec = utils.get_forecast_plots( - self.data, - self.outputs, - self.target_columns, - test_data=test_data, - forecast_col_name=forecast_col_name, - ds_col=ds_column_series, - ds_forecast_col=ds_forecast_col, - ci_col_names=ci_col_names, - ci_interval_width=self.spec.confidence_interval_width, - ) - forecast_plots = [forecast_text, forecast_sec] - - yaml_appendix_title = dp.Text(f"## Reference: YAML File") - yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml") - report_sections = ( - [title_text, summary] - + forecast_plots - + other_sections - + test_eval_metrics - + train_metric_sections - + [yaml_appendix_title, yaml_appendix] - ) + test_eval_metrics = [sec7_text, sec7, sec8_text, sec8] + + forecast_text = dp.Text(f"## Forecasted Data Overlaying Historical") + forecast_sec = utils.get_forecast_plots( + self.data, + self.outputs, + self.target_columns, + test_data=test_data, + forecast_col_name=self.forecast_col_name, + ds_col=ds_column_series, + ds_forecast_col=ds_forecast_col, + ci_col_names=ci_col_names, + ci_interval_width=self.spec.confidence_interval_width, + ) + forecast_plots = [forecast_text, forecast_sec] + + yaml_appendix_title = dp.Text(f"## Reference: YAML File") + yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml") + report_sections = ( + [title_text, summary] + + forecast_plots + + other_sections + + test_eval_metrics + + train_metric_sections + + [yaml_appendix_title, yaml_appendix] + ) # save the report and result CSV self._save_report( report_sections=report_sections, result_df=result_df, - metrics_df=self.test_eval_metrics, + metrics_df=self.eval_metrics, + test_metrics_df=self.test_eval_metrics, ) def _load_data(self): @@ -381,7 +391,11 @@ def _test_evaluate_metrics( return total_metrics, summary_metrics, data def _save_report( - self, report_sections: Tuple, result_df: pd.DataFrame, metrics_df: pd.DataFrame + self, + report_sections: Tuple, + result_df: pd.DataFrame, + metrics_df: pd.DataFrame, + test_metrics_df: pd.DataFrame, ): """Saves resulting reports to the given folder.""" import datapane as dp @@ -395,24 +409,27 @@ def _save_report( output_dir ) ) - # datapane html report + if ObjectStorageDetails.is_oci_path(output_dir): storage_options = default_signer() else: storage_options = dict() - - with tempfile.TemporaryDirectory() as temp_dir: - report_local_path = os.path.join(temp_dir, "___report.html") - dp.save_report(report_sections, report_local_path) - - report_path = os.path.join(output_dir, self.spec.report_file_name) - with open(report_local_path) as f1: - with fsspec.open( - report_path, - "w", - **storage_options, - ) as f2: - f2.write(f1.read()) + + # datapane html report + if self.spec.generate_report: + # datapane html report + with tempfile.TemporaryDirectory() as temp_dir: + report_local_path = os.path.join(temp_dir, "___report.html") + dp.save_report(report_sections, report_local_path) + + report_path = os.path.join(output_dir, self.spec.report_filename) + with open(report_local_path) as f1: + with fsspec.open( + report_path, + "w", + **storage_options, + ) as f2: + f2.write(f1.read()) # forecast csv report utils._write_data( @@ -423,7 +440,7 @@ def _save_report( ) # metrics csv report - if metrics_df is not None: + if self.spec.generate_metrics and metrics_df is not None: utils._write_data( data=metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), @@ -432,8 +449,18 @@ def _save_report( index=False, ) + # test_metrics csv report + if self.spec.generate_metrics and test_metrics_df is not None: + utils._write_data( + data=test_metrics_df.rename_axis("metrics").reset_index(), + filename=os.path.join(output_dir, self.spec.test_metrics_filename), + format="csv", + storage_options=storage_options, + index=False, + ) + logger.warn( - f"The report has been successfully " + f"The outputs have been successfully " f"generated and placed to the: {output_dir}." ) diff --git a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py index 347e97a2d..484663954 100644 --- a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py @@ -25,6 +25,7 @@ from ..const import DEFAULT_TRIALS from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig def _get_np_metrics_dict(selected_metric): @@ -63,6 +64,11 @@ def _fit_model(data, params, additional_regressors, select_metric): class NeuralProphetOperatorModel(ForecastOperatorBaseModel): """Class representing NeuralProphet operator model.""" + def __init__(self, config: ForecastOperatorConfig): + super().__init__(config) + self.train_metrics = True + self.forecast_col_name = "yhat1" + def _build_model(self) -> pd.DataFrame: from neuralprophet import NeuralProphet @@ -327,8 +333,6 @@ def _generate_report(self): "Facebook Prophet and AR-Net." ) other_sections = all_sections - forecast_col_name = "yhat1" - train_metrics = True ds_column_series = self.data["ds"] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = None @@ -336,8 +340,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py index 26e86418d..435e2fa95 100644 --- a/ads/opctl/operator/lowcode/forecast/model/prophet.py +++ b/ads/opctl/operator/lowcode/forecast/model/prophet.py @@ -14,6 +14,7 @@ from ..const import DEFAULT_TRIALS, PROPHET_INTERNAL_DATE_COL from .. import utils from .base_model import ForecastOperatorBaseModel +from ..operator_config import ForecastOperatorConfig def _add_unit(num, unit): @@ -35,6 +36,7 @@ class ProphetOperatorModel(ForecastOperatorBaseModel): def __init__(self, config: ForecastOperatorConfig): super().__init__(config) + self.train_metrics = True self.global_explanation = {} self.local_explanation = {} @@ -111,9 +113,7 @@ def objective(trial): elif unit == "Y": unit = "D" interval = interval * 365.25 - horizon = _add_unit( - int(self.spec.horizon * interval), unit=unit - ) + horizon = _add_unit(int(self.spec.horizon * interval), unit=unit) initial = _add_unit((data_i.shape[0] * interval) // 2, unit=unit) period = _add_unit((data_i.shape[0] * interval) // 4, unit=unit) @@ -210,32 +210,22 @@ def objective(trial): output_i.iloc[ : -self.spec.horizon, output_i.columns.get_loc(f"fitted_value") - ] = ( - outputs[f"{col}_{cat}"]["yhat"] - .iloc[: -self.spec.horizon] - .values - ) + ] = (outputs[f"{col}_{cat}"]["yhat"].iloc[: -self.spec.horizon].values) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(f"forecast_value"), ] = ( - outputs[f"{col}_{cat}"]["yhat"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat"].iloc[-self.spec.horizon :].values ) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(yhat_upper_name) ] = ( - outputs[f"{col}_{cat}"]["yhat_upper"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat_upper"].iloc[-self.spec.horizon :].values ) output_i.iloc[ -self.spec.horizon :, output_i.columns.get_loc(yhat_lower_name) ] = ( - outputs[f"{col}_{cat}"]["yhat_lower"] - .iloc[-self.spec.horizon :] - .values + outputs[f"{col}_{cat}"]["yhat_lower"].iloc[-self.spec.horizon :].values ) output_col = pd.concat([output_col, output_i]) @@ -354,8 +344,6 @@ def _generate_report(self): "data and shifts in the trend, and typically handles outliers well." ) other_sections = all_sections - forecast_col_name = "yhat" - train_metrics = True ds_column_series = self.data["ds"] ds_forecast_col = self.outputs[0]["ds"] ci_col_names = ["yhat_lower", "yhat_upper"] @@ -363,8 +351,6 @@ def _generate_report(self): return ( model_description, other_sections, - forecast_col_name, - train_metrics, ds_column_series, ds_forecast_col, ci_col_names, diff --git a/ads/opctl/operator/lowcode/forecast/operator_config.py b/ads/opctl/operator/lowcode/forecast/operator_config.py index 4ac526e5e..5f39af33e 100644 --- a/ads/opctl/operator/lowcode/forecast/operator_config.py +++ b/ads/opctl/operator/lowcode/forecast/operator_config.py @@ -73,18 +73,21 @@ class ForecastOperatorSpec(DataClassSerializable): additional_data: InputData = field(default_factory=InputData) test_data: TestData = field(default_factory=TestData) output_directory: OutputDirectory = field(default_factory=OutputDirectory) - report_file_name: str = None + report_filename: str = None report_title: str = None report_theme: str = None metrics_filename: str = None + test_metrics_filename: str = None forecast_filename: str = None target_column: str = None preprocessing: bool = None datetime_column: DateTimeColumn = field(default_factory=DateTimeColumn) target_category_columns: List[str] = field(default_factory=list) + generate_report: bool = None + generate_metrics: bool = None + generate_explanations: bool = None horizon: int = None freq: str = None - explain: bool = None model: str = None model_kwargs: Dict = field(default_factory=dict) confidence_interval_width: float = None @@ -95,13 +98,27 @@ def __post_init__(self): """Adjusts the specification details.""" self.metric = (self.metric or "").lower() or SupportedMetrics.SMAPE.lower() self.confidence_interval_width = self.confidence_interval_width or 0.80 - self.report_file_name = self.report_file_name or "report.html" + self.report_filename = self.report_filename or "report.html" self.preprocessing = ( self.preprocessing if self.preprocessing is not None else True ) - self.explain = self.explain if self.explain is not None else False + # For Report Generation. When user doesn't specify defaults to True + self.generate_report = ( + self.generate_report if self.generate_report is not None else True + ) + # For Metrics files Generation. When user doesn't specify defaults to True + self.generate_metrics = ( + self.generate_metrics if self.generate_metrics is not None else True + ) + # For Explanations Generation. When user doesn't specify defaults to False + self.generate_explanations = ( + self.generate_explanations + if self.generate_explanations is not None + else False + ) self.report_theme = self.report_theme or "light" self.metrics_filename = self.metrics_filename or "metrics.csv" + self.test_metrics_filename = self.test_metrics_filename or "test_metrics.csv" self.forecast_filename = self.forecast_filename or "forecast.csv" self.target_column = self.target_column or "Sales" self.model_kwargs = self.model_kwargs or dict() diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index 9245b06c8..d80893424 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -168,7 +168,7 @@ spec: type: dict type: dict - report_file_name: + report_filename: required: false type: string default: report.html @@ -184,12 +184,24 @@ spec: allowed: - light - dark - report_metrics_name: + metrics_filename: required: false type: string - default: report.csv + default: metrics.csv meta: - description: "Placed into output_directory location. Defaults to report.csv" + description: "Placed into output_directory location. Defaults to metrics.csv" + test_metrics_filename: + required: false + type: string + default: test_metrics.csv + meta: + description: "Placed into output_directory location. Defaults to test_metrics.csv" + forecast_filename: + required: false + type: string + default: forecast.csv + meta: + description: "Placed into output_directory location. Defaults to forecast.csv" target_column: type: string @@ -203,13 +215,27 @@ spec: meta: description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true" - explain: + generate_explanations: type: boolean required: false default: false meta: description: "Explainability, both local and global, can be disabled using this flag. Defaults to false." + generate_report: + type: boolean + required: false + default: true + meta: + description: "Report file generation can be enabled using this flag. Defaults to true." + + generate_metrics: + type: boolean + required: false + default: true + meta: + description: "Metrics files generation can be enabled using this flag. Defaults to true." + datetime_column: type: dict required: true diff --git a/docs/source/user_guide/operators/forecasting_operator/examples.rst b/docs/source/user_guide/operators/forecasting_operator/examples.rst index f54e602b7..1b9ec3aea 100644 --- a/docs/source/user_guide/operators/forecasting_operator/examples.rst +++ b/docs/source/user_guide/operators/forecasting_operator/examples.rst @@ -63,7 +63,7 @@ The yaml can also be maximally stated as follows: model: automlx model_kwargs: preprocessing: true - report_file_name: report.html + report_filename: report.html report_theme: light report_title: report tuning: diff --git a/tests/unitary/with_extras/operator/forecast/test_model_autots.py b/tests/unitary/with_extras/operator/forecast/test_model_autots.py index 267d01106..b5f53ef2a 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_autots.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_autots.py @@ -21,7 +21,6 @@ ForecastOperatorSpec, TestData, DateTimeColumn, - Horizon, OutputDirectory, ) from ads.opctl.operator.lowcode.forecast.const import SupportedMetrics @@ -36,8 +35,7 @@ def setUp(self): spec = Mock(spec=ForecastOperatorSpec) spec.datetime_column = Mock(spec=DateTimeColumn) spec.datetime_column.name = "last_day_of_week" - spec.horizon = Mock(spec=Horizon) - spec.horizon.periods = 3 + spec.horizon = 3 spec.tuning = None spec.model_kwargs = {} spec.confidence_interval_width = 0.7 @@ -58,7 +56,7 @@ def test_autots_parameter_passthrough(self, mock_concat, mock_autots): # When model_kwargs does not have anything, defaults should be sent as parameters. mock_autots.assert_called_once_with( - forecast_length=self.spec.horizon.periods, + forecast_length=self.spec.horizon, frequency="infer", prediction_interval=self.spec.confidence_interval_width, max_generations=AUTOTS_MAX_GENERATION, @@ -135,7 +133,7 @@ def test_autots_parameter_passthrough(self, mock_concat, mock_autots): # All parameters in model_kwargs should be passed to autots mock_autots.assert_called_once_with( - forecast_length=self.spec.horizon.periods, + forecast_length=self.spec.horizon, frequency=self.spec.model_kwargs.get("frequency"), prediction_interval=self.spec.confidence_interval_width, max_generations=self.spec.model_kwargs.get("max_generations"), diff --git a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py index 3d5b62941..968d24026 100644 --- a/tests/unitary/with_extras/operator/forecast/test_model_base_model.py +++ b/tests/unitary/with_extras/operator/forecast/test_model_base_model.py @@ -5,7 +5,212 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -class TestForecastOperatorBaseModel: +import unittest +from unittest.mock import patch, Mock +import pandas as pd +import datapane as dp +from ads.opctl.operator.common.utils import _build_image, _parse_input_args +from ads.opctl.operator.lowcode.forecast.model.prophet import ProphetOperatorModel +from ads.opctl.operator.lowcode.forecast.model.automlx import AutoMLXOperatorModel +from ads.opctl.operator.lowcode.forecast.model.base_model import ( + ForecastOperatorBaseModel, +) +from ads.opctl.operator.lowcode.forecast.operator_config import ( + ForecastOperatorConfig, + ForecastOperatorSpec, + TestData, + DateTimeColumn, + OutputDirectory, +) +from ads.opctl.operator.lowcode.forecast.const import SupportedMetrics + + +class TestForecastOperatorBaseModel(unittest.TestCase): """Tests the base class for the forecasting models""" pass + + def setUp(self): + self.target_columns = ["Sales_Product Group 107", "Sales_Product Group 108"] + self.target_category_columns = ["PPG_Code"] + self.test_filename = "test.csv" + self.full_data_dict = { + "Sales_Product Group 107": pd.DataFrame( + { + "ds": ["2020-10-31", "2020-11-07"], + "yhat": [1569.536030, 1568.052261], + } + ), + "Sales_Product Group 108": pd.DataFrame( + { + "ds": ["2020-10-31", "2020-11-07"], + "yhat": [1569.536030, 1568.052261], + } + ), + } + self.outputs = [ + pd.DataFrame( + { + "ds": [ + "2020-10-31", + "2020-11-07", + "2020-11-14", + "2020-11-21", + "2020-11-28", + ], + "yhat": [ + 1569.536030, + 1568.052261, + 1566.568493, + 1565.084725, + 1563.600957, + ], + } + ), + pd.DataFrame( + { + "ds": [ + "2020-10-31", + "2020-11-07", + "2020-11-14", + "2020-11-21", + "2020-11-28", + ], + "yhat": [ + 1284.534104, + 1269.692458, + 1254.850813, + 1240.009167, + 1225.167521, + ], + } + ), + ] + self.data = pd.DataFrame({"last_day_of_week": ["2020-10-31", "2020-11-07"]}) + self.target_col = "yhat" + self.datetime_column_name = "last_day_of_week" + self.original_target_column = "Sales" + self.eval_metrics = pd.DataFrame( + {"Sales_Product Group 107": [25.07]}, index=["sMAPE"] + ) + spec = Mock(spec=ForecastOperatorSpec) + spec.target_column = self.target_col + spec.target_category_columns = self.target_category_columns + spec.target_column = self.original_target_column + spec.test_data = Mock(spec=TestData) + spec.datetime_column = Mock(spec=DateTimeColumn) + spec.datetime_column.name = self.datetime_column_name + spec.datetime_column.format = None + spec.horizon = 3 + spec.tuning = None + spec.output_directory = Mock(spec=OutputDirectory) + spec.output_directory.url = "URL" + spec.forecast_filename = "forecast" + spec.metrics_filename = "metrics" + spec.test_metrics_filename = "test_metrics" + spec.report_filename = "report" + + config = Mock(spec=ForecastOperatorConfig) + config.spec = spec + + self.config = config + + @patch("datapane.save_report") + @patch("ads.opctl.operator.lowcode.forecast.utils.get_forecast_plots") + @patch("ads.opctl.operator.lowcode.forecast.utils.evaluate_metrics") + @patch("ads.opctl.operator.lowcode.forecast.utils._write_data") + @patch( + "ads.opctl.operator.lowcode.forecast.model.base_model.ForecastOperatorBaseModel._test_evaluate_metrics" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.base_model.ForecastOperatorBaseModel._load_data" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.prophet.ProphetOperatorModel._build_model" + ) + @patch( + "ads.opctl.operator.lowcode.forecast.model.prophet.ProphetOperatorModel._generate_report" + ) + @patch("ads.opctl.operator.lowcode.forecast.model.base_model.open") + @patch("fsspec.open") + def test_boolean_disable( + self, + mock_fsspec_open, + mock_open, + mock__generate_report, + mock__build_model, + mock__load_data, + mock__test_evaluate_metrics, + mock__write_data, + mock_evaluate_metrics, + mock_get_forecast_plots, + mock_save_report, + ): + mock__test_evaluate_metrics.return_value = (pd.DataFrame(), None, None) + mock__generate_report.return_value = ( + dp.Text("Description"), + [dp.Text("Other Sections")], + pd.to_datetime(self.data["last_day_of_week"]), + None, + None, + ) + mock__load_data.return_value = None + mock__build_model.return_value = pd.DataFrame() + mock_evaluate_metrics.return_value = self.eval_metrics + mock_get_forecast_plots = dp.Text("Random Text") + + self.config.spec.generate_metrics = True + self.config.spec.generate_report = False + + prophet = ProphetOperatorModel(self.config) + prophet.target_columns = self.target_columns + prophet.full_data_dict = self.full_data_dict + + prophet.generate_report() + + # Metrics are generated, Report is not generated + mock__test_evaluate_metrics.assert_called_once() + mock_evaluate_metrics.assert_called_once() + self.assertTrue(mock_save_report.call_count == 0) + self.assertTrue(mock__write_data.call_count == 3) + + mock__test_evaluate_metrics.reset_mock() + mock_evaluate_metrics.reset_mock() + mock__write_data.reset_mock() + mock_save_report.reset_mock() + + self.config.spec.generate_metrics = False + self.config.spec.generate_report = True + prophet.generate_report() + + # Metrics are generated to be included in report but not saved, Report is generated + mock__test_evaluate_metrics.assert_called_once() + mock_evaluate_metrics.assert_called_once() + self.assertTrue(mock_save_report.call_count == 1) + self.assertTrue(mock__write_data.call_count == 1) + + @patch( + "ads.opctl.operator.lowcode.forecast.model.automlx.AutoMLXOperatorModel.explain_model" + ) + def test_boolean_disable_explanations(self, mock_explain_model): + self.config.spec.generate_explanations = False + + automlx = AutoMLXOperatorModel(self.config) + automlx.outputs = self.outputs + automlx.full_data_dict = {} + automlx.data = self.data + automlx.local_explanation = {"dummy": pd.DataFrame({"pt1": [1, 2, 3]})} + automlx._generate_report() + + # Explanations are not generated + mock_explain_model.assert_not_called() + + self.config.spec.generate_explanations = True + automlx._generate_report() + + # Explanations are generated + mock_explain_model.assert_called_once() + + +if __name__ == "__main__": + unittest.main()