Skip to content

Commit

Permalink
Formatting changes
Browse files Browse the repository at this point in the history
  • Loading branch information
PGijsbers committed Dec 12, 2024
1 parent 3b7faea commit 9504820
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 56 deletions.
1 change: 0 additions & 1 deletion amlb/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,6 @@ def handle_unfulfilled(message, on_auto="warn"):


class BenchmarkTask:

def __init__(self, benchmark: Benchmark, task_def, fold):
"""
Expand Down
17 changes: 11 additions & 6 deletions amlb/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,10 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
return self._benchmark_definition(name, self.config, defaults)

def _benchmark_definition(
self, name: str, config_: Namespace, defaults: TaskConstraint | None = None
self,
name: str,
config_: Namespace,
defaults_for_task: TaskConstraint | None = None,
):
"""
:param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
Expand All @@ -222,8 +225,9 @@ def _benchmark_definition(
file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
name, config_.benchmarks.definition_dir
)
if defaults is not None:
defaults = Namespace(**dataclasses.asdict(defaults))
defaults = None
if defaults_for_task is not None:
defaults = Namespace(**dataclasses.asdict(defaults_for_task))
defaults = Namespace.merge(
defaults, file_defaults, Namespace(name="__defaults__")
)
Expand Down Expand Up @@ -261,7 +265,6 @@ def _add_task_defaults(task: Namespace, config_: Namespace):
if task["metric"] is None:
task["metric"] = None


if task["ec2_instance_type"] is None:
task["ec2_instance_type"] = Resources.lookup_ec2_instance_type(
config_, task.cores
Expand Down Expand Up @@ -311,8 +314,10 @@ def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str:
if cores <= 0 or cores > max(supported_cores):
return cores_to_size.default

cores = next((c for c in sorted(supported_cores) if c >= cores), "default")
return cores_to_size[str(cores)]
best_match = next(
(str(c) for c in sorted(supported_cores) if c >= cores), "default"
)
return cores_to_size[best_match]

@staticmethod
def generate_task_identifier(task: Namespace) -> str | None:
Expand Down
8 changes: 5 additions & 3 deletions frameworks/FEDOT/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ def run_fedot_tabular(dataset: Dataset, config: TaskConfig):
__file__, "exec.py", input_data=data, dataset=dataset, config=config
)


def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv

dataset = deepcopy(dataset)

data = dict(
Expand All @@ -43,6 +45,6 @@ def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
repeated_item_id=dataset.repeated_item_id,
)

return run_in_venv(__file__, "exec_ts.py",
input_data=data, dataset=dataset, config=config)
return run_in_venv(
__file__, "exec_ts.py", input_data=data, dataset=dataset, config=config
)
24 changes: 13 additions & 11 deletions frameworks/FEDOT/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
def run(dataset, config):
log.info("\n**** FEDOT ****\n")

is_classification = config.type == 'classification'
is_classification = config.type == "classification"
scoring_metric = get_fedot_metrics(config)

training_params = {"preset": "best_quality", "n_jobs": config.cores}
training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
training_params.update(
{k: v for k, v in config.framework_params.items() if not k.startswith("_")}
)
n_jobs = training_params["n_jobs"]

log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
Expand Down Expand Up @@ -62,15 +64,15 @@ def run(dataset, config):

def get_fedot_metrics(config):
metrics_mapping = dict(
acc='accuracy',
auc='roc_auc',
f1='f1',
logloss='neg_log_loss',
mae='mae',
mse='mse',
msle='msle',
r2='r2',
rmse='rmse',
acc="accuracy",
auc="roc_auc",
f1="f1",
logloss="neg_log_loss",
mae="mae",
mse="mse",
msle="msle",
r2="r2",
rmse="rmse",
)
scoring_metric = metrics_mapping.get(config.metric, None)

Expand Down
83 changes: 48 additions & 35 deletions frameworks/FEDOT/exec_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,28 @@ def run(dataset, config):
scoring_metric = get_fedot_metrics(config)

training_params = {"preset": "best_quality", "n_jobs": config.cores}
training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
training_params.update(
{k: v for k, v in config.framework_params.items() if not k.startswith("_")}
)
n_jobs = training_params["n_jobs"]

log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
optimizing {scoring_metric}")

task = Task(
TaskTypesEnum.ts_forecasting,
TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps)
TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps),
)

train_df, test_df = load_timeseries_dataset(dataset)
id_column = dataset.id_column

max_runtime_minutes_per_ts = config.max_runtime_seconds / 60 / train_df[id_column].nunique()
log.info(f'Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series')
max_runtime_minutes_per_ts = (
config.max_runtime_seconds / 60 / train_df[id_column].nunique()
)
log.info(
f"Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series"
)

training_duration, predict_duration = 0, 0
models_count = 0
Expand All @@ -51,10 +57,12 @@ def run(dataset, config):
features=train_series,
target=train_series,
task=task,
data_type=DataTypesEnum.ts
data_type=DataTypesEnum.ts,
)

test_sub_df = test_df[test_df[id_column] == label].drop(columns=[id_column], axis=1)
test_sub_df = test_df[test_df[id_column] == label].drop(
columns=[id_column], axis=1
)
horizon = len(test_sub_df[dataset.target])

fedot = Fedot(
Expand All @@ -63,8 +71,9 @@ def run(dataset, config):
timeout=max_runtime_minutes_per_ts,
metric=scoring_metric,
seed=config.seed,
max_pipeline_fit_time=max_runtime_minutes_per_ts / 5, # fit at least 5 pipelines
**training_params
max_pipeline_fit_time=max_runtime_minutes_per_ts
/ 5, # fit at least 5 pipelines
**training_params,
)

with Timer() as training:
Expand All @@ -75,7 +84,7 @@ def run(dataset, config):
try:
prediction = fedot.forecast(train_input, horizon=horizon)
except Exception as e:
log.info(f'Pipeline crashed due to {e}. Using no-op forecasting')
log.info(f"Pipeline crashed due to {e}. Using no-op forecasting")
prediction = np.full(horizon, train_series[-1])

predict_duration += predict.duration
Expand All @@ -92,25 +101,27 @@ def run(dataset, config):
optional_columns[str(quantile)] = all_series_predictions

save_artifacts(fedot, config)
return result(output_file=config.output_predictions_file,
predictions=all_series_predictions,
truth=truth_only,
target_is_encoded=False,
models_count=models_count,
training_duration=training_duration,
predict_duration=predict_duration,
optional_columns=pd.DataFrame(optional_columns))
return result(
output_file=config.output_predictions_file,
predictions=all_series_predictions,
truth=truth_only,
target_is_encoded=False,
models_count=models_count,
training_duration=training_duration,
predict_duration=predict_duration,
optional_columns=pd.DataFrame(optional_columns),
)


def get_fedot_metrics(config):
metrics_mapping = dict(
mape='mape',
smape='smape',
mase='mase',
mse='mse',
rmse='rmse',
mae='mae',
r2='r2',
mape="mape",
smape="smape",
mase="mase",
mse="mse",
rmse="rmse",
mae="mae",
r2="r2",
)
scoring_metric = metrics_mapping.get(config.metric, None)

Expand All @@ -121,27 +132,29 @@ def get_fedot_metrics(config):


def save_artifacts(automl, config):

artifacts = config.framework_params.get('_save_artifacts', [])
if 'models' in artifacts:
artifacts = config.framework_params.get("_save_artifacts", [])
if "models" in artifacts:
try:
models_dir = output_subdir('models', config)
models_file = os.path.join(models_dir, 'model.json')
models_dir = output_subdir("models", config)
models_file = os.path.join(models_dir, "model.json")
automl.current_pipeline.save(models_file)
except Exception as e:
log.info(f"Error when saving 'models': {e}.", exc_info=True)

if 'info' in artifacts:
if "info" in artifacts:
try:
info_dir = output_subdir("info", config)
if automl.history:
automl.history.save(os.path.join(info_dir, 'history.json'))
automl.history.save(os.path.join(info_dir, "history.json"))
else:
log.info(f"There is no optimization history info to save.")
log.info("There is no optimization history info to save.")
except Exception as e:
log.info(f"Error when saving info about optimisation history: {e}.", exc_info=True)
log.info(
f"Error when saving info about optimisation history: {e}.",
exc_info=True,
)

if 'leaderboard' in artifacts:
if "leaderboard" in artifacts:
try:
leaderboard_dir = output_subdir("leaderboard", config)
if automl.history:
Expand All @@ -151,5 +164,5 @@ def save_artifacts(automl, config):
log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True)


if __name__ == '__main__':
if __name__ == "__main__":
call_run(run)

0 comments on commit 9504820

Please sign in to comment.