Skip to content

Commit

Permalink
ODSC-47630/fix_datapane_failure_for_only_one_series (#351)
Browse files Browse the repository at this point in the history
  • Loading branch information
govarsha authored Sep 26, 2023
1 parent 5ce3453 commit e106bd9
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 63 deletions.
12 changes: 6 additions & 6 deletions ads/opctl/operator/lowcode/forecast/model/arima.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def _build_model(self) -> pd.DataFrame:
output_i[yhat_upper_name] = outputs[f"{col}_{cat}"]["yhat_upper"].values
output_i[yhat_lower_name] = outputs[f"{col}_{cat}"]["yhat_lower"].values
output_col = pd.concat([output_col, output_i])

# output_col = output_col.sort_values(operator.ds_column).reset_index(drop=True)
output_col = output_col.reset_index(drop=True)
outputs_merged = pd.concat([outputs_merged, output_col], axis=1)
Expand All @@ -136,12 +137,11 @@ def _generate_report(self):
import datapane as dp

sec5_text = dp.Text(f"## ARIMA Model Parameters")
sec5 = dp.Select(
blocks=[
dp.HTML(m.summary().as_html(), label=self.target_columns[i])
for i, m in enumerate(self.models)
]
)
blocks = [
dp.HTML(m.summary().as_html(), label=self.target_columns[i])
for i, m in enumerate(self.models)
]
sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
all_sections = [sec5_text, sec5]

model_description = dp.Text(
Expand Down
46 changes: 29 additions & 17 deletions ads/opctl/operator/lowcode/forecast/model/autots.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import pandas as pd
import numpy as np

Expand Down Expand Up @@ -42,7 +48,9 @@ def _build_model(self) -> pd.DataFrame:
no_negatives=False,
constraint=None,
ensemble=self.spec.model_kwargs.get("ensemble", "auto"),
initial_template=self.spec.model_kwargs.get("initial_template", "General+Random"),
initial_template=self.spec.model_kwargs.get(
"initial_template", "General+Random"
),
random_seed=2022,
holiday_country=self.spec.model_kwargs.get("holiday_country", "US"),
subset=None,
Expand All @@ -52,13 +60,19 @@ def _build_model(self) -> pd.DataFrame:
drop_data_older_than_periods=None,
model_list=self.spec.model_kwargs.get("model_list", "multivariate"),
transformer_list=self.spec.model_kwargs.get("transformer_list", "auto"),
transformer_max_depth=self.spec.model_kwargs.get("transformer_max_depth", 6),
transformer_max_depth=self.spec.model_kwargs.get(
"transformer_max_depth", 6
),
models_mode=self.spec.model_kwargs.get("models_mode", "random"),
num_validations=self.spec.model_kwargs.get("num_validations", "auto"),
models_to_validate=self.spec.model_kwargs.get("models_to_validate", 0.15),
max_per_model_class=None,
validation_method=self.spec.model_kwargs.get("validation_method", "backwards"),
min_allowed_train_percent=self.spec.model_kwargs.get("min_allowed_train_percent", 0.5),
validation_method=self.spec.model_kwargs.get(
"validation_method", "backwards"
),
min_allowed_train_percent=self.spec.model_kwargs.get(
"min_allowed_train_percent", 0.5
),
remove_leading_zeroes=False,
prefill_na=None,
introduce_na=None,
Expand Down Expand Up @@ -163,6 +177,7 @@ def _build_model(self) -> pd.DataFrame:
output_i[yhat_upper_name] = outputs[f"{col}_{cat}"]["yhat_upper"]
output_i[yhat_lower_name] = outputs[f"{col}_{cat}"]["yhat_lower"]
output_col = pd.concat([output_col, output_i])

output_col = output_col.reset_index(drop=True)
outputs_merged = pd.concat([outputs_merged, output_col], axis=1)

Expand Down Expand Up @@ -205,19 +220,16 @@ def _generate_report(self) -> tuple:
# Section 2: AutoTS Model Parameters
sec2_text = dp.Text(f"## AutoTS Model Parameters")
# TODO: ODSC-47612 Format the parameters better for display in report.
sec2 = dp.Select(
blocks=[
dp.HTML(
pd.DataFrame(
[self.models.best_model_params["models"][x]["ModelParameters"]]
).to_html(),
label=self.original_target_column + "_model_" +str(i),
)
for i, x in enumerate(
list(self.models.best_model_params["models"].keys())
)
]
)
blocks = [
dp.HTML(
pd.DataFrame(
[self.models.best_model_params["models"][x]["ModelParameters"]]
).to_html(),
label=self.original_target_column + "_model_" + str(i),
)
for i, x in enumerate(list(self.models.best_model_params["models"].keys()))
]
sec2 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
all_sections = [sec1_text, sec_1, sec2_text, sec2]

# Model Description
Expand Down
73 changes: 36 additions & 37 deletions ads/opctl/operator/lowcode/forecast/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,32 @@ def generate_report(self):
title_text = dp.Text("# Forecast Report")

md_columns = " * ".join([f"{x} \n" for x in self.target_columns])
first_10_rows_blocks = [
dp.DataTable(
df.head(10).rename({col: self.spec.target_column}, axis=1),
caption="Start",
label=col,
)
for col, df in self.full_data_dict.items()
]

last_10_rows_blocks = [
dp.DataTable(
df.tail(10).rename({col: self.spec.target_column}, axis=1),
caption="End",
label=col,
)
for col, df in self.full_data_dict.items()
]

data_summary_blocks = [
dp.DataTable(
df.rename({col: self.spec.target_column}, axis=1).describe(),
caption="Summary Statistics",
label=col,
)
for col, df in self.full_data_dict.items()
]
summary = dp.Blocks(
dp.Select(
blocks=[
Expand Down Expand Up @@ -116,45 +142,18 @@ def generate_report(self):
columns=4,
),
dp.Text("### First 10 Rows of Data"),
dp.Select(
blocks=[
dp.DataTable(
df.head(10).rename(
{col: self.spec.target_column}, axis=1
),
caption="Start",
label=col,
)
for col, df in self.full_data_dict.items()
]
),
dp.Select(blocks=first_10_rows_blocks)
if len(first_10_rows_blocks) > 1
else first_10_rows_blocks[0],
dp.Text("----"),
dp.Text("### Last 10 Rows of Data"),
dp.Select(
blocks=[
dp.DataTable(
df.tail(10).rename(
{col: self.spec.target_column}, axis=1
),
caption="End",
label=col,
)
for col, df in self.full_data_dict.items()
]
),
dp.Select(blocks=last_10_rows_blocks)
if len(last_10_rows_blocks) > 1
else last_10_rows_blocks[0],
dp.Text("### Data Summary Statistics"),
dp.Select(
blocks=[
dp.DataTable(
df.rename(
{col: self.spec.target_column}, axis=1
).describe(),
caption="Summary Statistics",
label=col,
)
for col, df in self.full_data_dict.items()
]
),
dp.Select(blocks=data_summary_blocks)
if len(data_summary_blocks) > 1
else data_summary_blocks[0],
label="Summary",
),
dp.Text(
Expand Down Expand Up @@ -407,7 +406,7 @@ def _save_report(

# metrics csv report
utils._write_data(
data=metrics_df.rename_axis('metrics').reset_index(),
data=metrics_df.rename_axis("metrics").reset_index(),
filename=os.path.join(output_dir, self.spec.metrics_filename),
format="csv",
storage_options=default_signer(),
Expand Down
6 changes: 3 additions & 3 deletions ads/opctl/operator/lowcode/forecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def _clean_data(data, target_column, datetime_column, target_category_columns=No
df = pd.concat([df, data_cat_clean], axis=1)
new_target_columns.append(f"{target_column}_{cat}")
df = df.reset_index()

return df.fillna(0), new_target_columns

raise ValueError(
Expand Down Expand Up @@ -328,9 +329,8 @@ def evaluate_metrics(target_columns, data, outputs, target_col="yhat"):
def _select_plot_list(fn, target_columns):
import datapane as dp

return dp.Select(
blocks=[dp.Plot(fn(i, col), label=col) for i, col in enumerate(target_columns)]
)
blocks = [dp.Plot(fn(i, col), label=col) for i, col in enumerate(target_columns)]
return dp.Select(blocks=blocks) if len(target_columns) > 1 else blocks[0]


def _add_unit(num, unit):
Expand Down

0 comments on commit e106bd9

Please sign in to comment.