diff --git a/ads/opctl/operator/lowcode/forecast/model/arima.py b/ads/opctl/operator/lowcode/forecast/model/arima.py index ef68b4a88..2deeae274 100644 --- a/ads/opctl/operator/lowcode/forecast/model/arima.py +++ b/ads/opctl/operator/lowcode/forecast/model/arima.py @@ -115,6 +115,7 @@ def _build_model(self) -> pd.DataFrame: output_i[yhat_upper_name] = outputs[f"{col}_{cat}"]["yhat_upper"].values output_i[yhat_lower_name] = outputs[f"{col}_{cat}"]["yhat_lower"].values output_col = pd.concat([output_col, output_i]) + # output_col = output_col.sort_values(operator.ds_column).reset_index(drop=True) output_col = output_col.reset_index(drop=True) outputs_merged = pd.concat([outputs_merged, output_col], axis=1) @@ -136,12 +137,11 @@ def _generate_report(self): import datapane as dp sec5_text = dp.Text(f"## ARIMA Model Parameters") - sec5 = dp.Select( - blocks=[ - dp.HTML(m.summary().as_html(), label=self.target_columns[i]) - for i, m in enumerate(self.models) - ] - ) + blocks = [ + dp.HTML(m.summary().as_html(), label=self.target_columns[i]) + for i, m in enumerate(self.models) + ] + sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] all_sections = [sec5_text, sec5] model_description = dp.Text( diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py index aba5fb021..124e6b9f7 100644 --- a/ads/opctl/operator/lowcode/forecast/model/autots.py +++ b/ads/opctl/operator/lowcode/forecast/model/autots.py @@ -1,3 +1,9 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*-- + +# Copyright (c) 2023 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + import pandas as pd import numpy as np @@ -42,7 +48,9 @@ def _build_model(self) -> pd.DataFrame: no_negatives=False, constraint=None, ensemble=self.spec.model_kwargs.get("ensemble", "auto"), - initial_template=self.spec.model_kwargs.get("initial_template", "General+Random"), + initial_template=self.spec.model_kwargs.get( + "initial_template", "General+Random" + ), random_seed=2022, holiday_country=self.spec.model_kwargs.get("holiday_country", "US"), subset=None, @@ -52,13 +60,19 @@ def _build_model(self) -> pd.DataFrame: drop_data_older_than_periods=None, model_list=self.spec.model_kwargs.get("model_list", "multivariate"), transformer_list=self.spec.model_kwargs.get("transformer_list", "auto"), - transformer_max_depth=self.spec.model_kwargs.get("transformer_max_depth", 6), + transformer_max_depth=self.spec.model_kwargs.get( + "transformer_max_depth", 6 + ), models_mode=self.spec.model_kwargs.get("models_mode", "random"), num_validations=self.spec.model_kwargs.get("num_validations", "auto"), models_to_validate=self.spec.model_kwargs.get("models_to_validate", 0.15), max_per_model_class=None, - validation_method=self.spec.model_kwargs.get("validation_method", "backwards"), - min_allowed_train_percent=self.spec.model_kwargs.get("min_allowed_train_percent", 0.5), + validation_method=self.spec.model_kwargs.get( + "validation_method", "backwards" + ), + min_allowed_train_percent=self.spec.model_kwargs.get( + "min_allowed_train_percent", 0.5 + ), remove_leading_zeroes=False, prefill_na=None, introduce_na=None, @@ -163,6 +177,7 @@ def _build_model(self) -> pd.DataFrame: output_i[yhat_upper_name] = outputs[f"{col}_{cat}"]["yhat_upper"] output_i[yhat_lower_name] = outputs[f"{col}_{cat}"]["yhat_lower"] output_col = pd.concat([output_col, output_i]) + output_col = output_col.reset_index(drop=True) outputs_merged = pd.concat([outputs_merged, output_col], axis=1) @@ -205,19 +220,16 @@ def _generate_report(self) -> tuple: # Section 2: AutoTS Model Parameters sec2_text = dp.Text(f"## AutoTS Model Parameters") # TODO: ODSC-47612 Format the parameters better for display in report. - sec2 = dp.Select( - blocks=[ - dp.HTML( - pd.DataFrame( - [self.models.best_model_params["models"][x]["ModelParameters"]] - ).to_html(), - label=self.original_target_column + "_model_" +str(i), - ) - for i, x in enumerate( - list(self.models.best_model_params["models"].keys()) - ) - ] - ) + blocks = [ + dp.HTML( + pd.DataFrame( + [self.models.best_model_params["models"][x]["ModelParameters"]] + ).to_html(), + label=self.original_target_column + "_model_" + str(i), + ) + for i, x in enumerate(list(self.models.best_model_params["models"].keys())) + ] + sec2 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0] all_sections = [sec1_text, sec_1, sec2_text, sec2] # Model Description diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 220963ace..b92b5c29c 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -83,6 +83,32 @@ def generate_report(self): title_text = dp.Text("# Forecast Report") md_columns = " * ".join([f"{x} \n" for x in self.target_columns]) + first_10_rows_blocks = [ + dp.DataTable( + df.head(10).rename({col: self.spec.target_column}, axis=1), + caption="Start", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + last_10_rows_blocks = [ + dp.DataTable( + df.tail(10).rename({col: self.spec.target_column}, axis=1), + caption="End", + label=col, + ) + for col, df in self.full_data_dict.items() + ] + + data_summary_blocks = [ + dp.DataTable( + df.rename({col: self.spec.target_column}, axis=1).describe(), + caption="Summary Statistics", + label=col, + ) + for col, df in self.full_data_dict.items() + ] summary = dp.Blocks( dp.Select( blocks=[ @@ -116,45 +142,18 @@ def generate_report(self): columns=4, ), dp.Text("### First 10 Rows of Data"), - dp.Select( - blocks=[ - dp.DataTable( - df.head(10).rename( - {col: self.spec.target_column}, axis=1 - ), - caption="Start", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - ), + dp.Select(blocks=first_10_rows_blocks) + if len(first_10_rows_blocks) > 1 + else first_10_rows_blocks[0], dp.Text("----"), dp.Text("### Last 10 Rows of Data"), - dp.Select( - blocks=[ - dp.DataTable( - df.tail(10).rename( - {col: self.spec.target_column}, axis=1 - ), - caption="End", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - ), + dp.Select(blocks=last_10_rows_blocks) + if len(last_10_rows_blocks) > 1 + else last_10_rows_blocks[0], dp.Text("### Data Summary Statistics"), - dp.Select( - blocks=[ - dp.DataTable( - df.rename( - {col: self.spec.target_column}, axis=1 - ).describe(), - caption="Summary Statistics", - label=col, - ) - for col, df in self.full_data_dict.items() - ] - ), + dp.Select(blocks=data_summary_blocks) + if len(data_summary_blocks) > 1 + else data_summary_blocks[0], label="Summary", ), dp.Text( @@ -407,7 +406,7 @@ def _save_report( # metrics csv report utils._write_data( - data=metrics_df.rename_axis('metrics').reset_index(), + data=metrics_df.rename_axis("metrics").reset_index(), filename=os.path.join(output_dir, self.spec.metrics_filename), format="csv", storage_options=default_signer(), diff --git a/ads/opctl/operator/lowcode/forecast/utils.py b/ads/opctl/operator/lowcode/forecast/utils.py index d88499c4c..ad45af266 100644 --- a/ads/opctl/operator/lowcode/forecast/utils.py +++ b/ads/opctl/operator/lowcode/forecast/utils.py @@ -175,6 +175,7 @@ def _clean_data(data, target_column, datetime_column, target_category_columns=No df = pd.concat([df, data_cat_clean], axis=1) new_target_columns.append(f"{target_column}_{cat}") df = df.reset_index() + return df.fillna(0), new_target_columns raise ValueError( @@ -328,9 +329,8 @@ def evaluate_metrics(target_columns, data, outputs, target_col="yhat"): def _select_plot_list(fn, target_columns): import datapane as dp - return dp.Select( - blocks=[dp.Plot(fn(i, col), label=col) for i, col in enumerate(target_columns)] - ) + blocks = [dp.Plot(fn(i, col), label=col) for i, col in enumerate(target_columns)] + return dp.Select(blocks=blocks) if len(target_columns) > 1 else blocks[0] def _add_unit(num, unit):