aimclub · leostre · May 21, 2024 · Jun 14, 2024 · Jun 20, 2024 · Jun 24, 2024
diff --git a/.github/workflows/poetry_unit_test.yml b/.github/workflows/poetry_unit_test.yml
@@ -39,7 +39,7 @@ jobs:
  run: poetry install
 
  - name: Bump up FEDOT to a stable revision (temporary)
- run: poetry add git+https://github.com/aimclub/FEDOT.git@e0b4ee7
+ run: poetry add git+https://github.com/aimclub/FEDOT.git@master
 
  - name: Run tests with pytest
  run: poetry run pytest --cov=fedot_ind --cov-report xml:coverage.xml tests/unit

diff --git a/examples/real_world_examples/industrial_examples/early_classification_example.ipynb b/examples/real_world_examples/industrial_examples/early_classification_example.ipynb
diff --git a/fedot_ind/core/architecture/abstraction/decorators.py b/fedot_ind/core/architecture/abstraction/decorators.py
@@ -11,9 +11,10 @@
 
 def fedot_data_type(func):
  def decorated_func(self, *args):
- if not isinstance(args[0], InputData):
- args[0] = DataConverter(data=args[0])
- features = args[0].features
+ data, *rest_args = args
+ if not isinstance(data, InputData):
+ data = DataConverter(data=data)
+ features = data.features
 
  if len(features.shape) < 4:
  try:
@@ -22,7 +23,7 @@ def decorated_func(self, *args):
  input_data_squeezed = np.squeeze(features)
  else:
  input_data_squeezed = features
- return func(self, input_data_squeezed, args[1])
+ return func(self, input_data_squeezed, *rest_args)
 
  return decorated_func
 
@@ -42,13 +43,14 @@ def decorated_func(self, *args):
 
 def convert_to_3d_torch_array(func):
  def decorated_func(self, *args):
- init_data = args[0]
+ init_data, *args = args
  data = DataConverter(data=init_data).convert_to_torch_format()
  if isinstance(init_data, InputData):
  init_data.features = data
  else:
  init_data = data
- return func(self, init_data, *args[1:])
+ return func(self, init_data, *args)
+
  return decorated_func
 
 

diff --git a/fedot_ind/core/metrics/metrics_implementation.py b/fedot_ind/core/metrics/metrics_implementation.py
@@ -1,6 +1,7 @@
 from typing import Optional
 from typing import Union
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from fedot.core.data.data import InputData
@@ -221,6 +222,10 @@ def smape(a, f, _=None):
  (np.abs(a) + np.abs(f)) * 100)
 
 
+def rmse(y_true, y_pred):
+ return mean_squared_error(y_true, y_pred, squared=False)
+
+
 def mape(A, F):
  return mean_absolute_percentage_error(A, F)
 
@@ -232,9 +237,6 @@ def calculate_regression_metric(target,
  **kwargs):
  target = target.astype(float)
 
- def rmse(y_true, y_pred):
- return np.sqrt(mean_squared_error(y_true, y_pred))
-
  metric_dict = {'r2': r2_score,
  'mse': mean_squared_error,
  'rmse': rmse,
@@ -261,9 +263,6 @@ def calculate_forecasting_metric(target,
  **kwargs):
  target = target.astype(float)
 
- def rmse(y_true, y_pred):
- return np.sqrt(mean_squared_error(y_true, y_pred))
-
  metric_dict = {
  'rmse': rmse,
  'mae': mean_absolute_error,
@@ -347,8 +346,102 @@ def kl_divergence(solution: pd.DataFrame,
  return np.average(solution.mean())
 
 
-class AnomalyMetric(QualityMetric):
+class ETSCPareto(QualityMetric, ParetoMetrics):
+ def __init__(self,
+ target,
+ predicted_labels,
+ predicted_probs=None,
+ weigths: tuple = None,
+ mode: str = 'robust',
+ reduce: bool = True,
+ metric_list: tuple = (
+ 'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'),
+ default_value: float = 0.0):
+ self.target = target.flatten()
+ self.predicted_labels = predicted_labels
+ self.predicted_probs = predicted_probs
+ self.metric_list = metric_list
+ self.default_value = default_value
+ self.weights = weigths
+ self.mode = mode
+ self.columns = ['robustness'] if self.mode == 'robust' else []
+ self.columns.extend(metric_list)
+ self.reduce = reduce
+
+ def metric(self) -> float:
+ if len(self.predicted_labels.shape) == 1:
+ self.predicted_labels = self.predicted_labels[None, ...]
+ self.predicted_probs = self.predicted_probs[None, ...]
+ print(f'''
+ target shape {self.target.shape}
+ prediction {self.predicted_labels.shape}
+ predicted_probs (scores) {self.predicted_probs.shape}
+ ''')
+ n_metrics = len(self.metric_list) + (self.mode == 'robust')
+ n_est = self.predicted_labels.shape[0]
+ result = np.zeros((n_est, n_metrics))
+ print(result.shape)
+ if self.mode == 'robust':
+ mask = self.predicted_probs >= 0
+ print('mask', mask.shape)
+ if not mask.any():
+ return result
+ robustness = mask.sum(-1) / self.predicted_probs.shape[-1]
+ print('rob', robustness.shape)
+ result[:, 0] = robustness.flatten()
+ else:
+ mask = np.ones_like(self.predicted_probs, dtype=bool)
+
+ for est in range(n_est):
+ for i, metric in enumerate(self.metric_list, 1):
+ assert metric in CLASSIFICATION_METRIC_DICT, f'{metric} is not found in available metrics'
+ metric_value = CLASSIFICATION_METRIC_DICT[metric](self.target[mask[est]],
+ self.predicted_labels[est][mask[est]])
+ result[est, i] = metric_value
+
+ if self.weights is None:
+ if self.reduce:
+ self.weights = np.empty(n_metrics)
+ self.weights.fill(1 / len(self.weights))
+ else:
+ self.weights = np.eye(n_metrics)
+ else:
+ assert self.weights.shape[-1] == self.metrics.shape[-1], 'Metrics and weights size mismatch!'
+ self.weights /= self.weights.sum()
 
+ result = result @ self.weights.T
+ result[np.isnan(result)] = self.default_value
+ if not self.reduce:
+ return pd.DataFrame(result, columns=self.columns)
+ else:
+ return result
+
+ def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
+ if not metrics_names:
+ metrics_names = ('Robustness', 'Accuracy')
+ plt.figure(figsize=(10, 10))
+ assert metrics.shape[-1] == 2, 'only 2 metrics can be plotted'
+ for i, metric in enumerate(metrics):
+ selection = metric[select]
+ sizes = ((np.arange(selection.shape[0]) * 2)[::-1]) ** 1.5 + 10
+ plt.scatter(*(metric[select]).T,
+ s=sizes,
+ label=i)
+ plt.legend(loc="upper right", bbox_to_anchor=(1.5, 1))
+ plt.ylabel(metrics_names[1])
+ plt.xlabel(metrics_names[0])
+ plt.xlim((-0.05, 1.05))
+ plt.ylim((-0.05, 1.05))
+ plt.xticks(np.linspace(0, 1, 11))
+ plt.yticks(np.linspace(0, 1, 11))
+ plt.grid(True)
+
+ def select_pareto_front(self, metrics, maximize=True):
+ pareto_mask = self.pareto_metric_list(metrics, maximise=maximize)
+ return metrics[pareto_mask]
+
+
+class AnomalyMetric(QualityMetric):
  def __init__(self,
  target,
  predicted_labels,
@@ -617,3 +710,29 @@ def calculate_detection_metric(
  target=target,
  predicted_labels=labels).metric()
  return metric_dict
+
+
+REGRESSION_METRIC_DICT = {'r2': r2_score,
+ 'mse': mean_squared_error,
+ 'rmse': rmse,
+ 'mae': mean_absolute_error,
+ 'msle': mean_squared_log_error,
+ 'mape': mean_absolute_percentage_error,
+ 'median_absolute_error': median_absolute_error,
+ 'explained_variance_score': explained_variance_score,
+ 'max_error': max_error,
+ 'd2_absolute_error_score': d2_absolute_error_score}
+
+CLASSIFICATION_METRIC_DICT = {'accuracy': accuracy_score,
+ 'f1': f1_score,
+ 'roc_auc': roc_auc_score,
+ 'precision': precision_score,
+ 'logloss': log_loss}
+
+FORECASTING_METRICS_DICT = {
+ 'rmse': rmse,
+ 'mae': mean_absolute_error,
+ 'median_absolute_error': median_absolute_error,
+ 'smape': smape,
+ 'mase': mase
+}
diff --git a/fedot_ind/core/models/early_tc/__init__.py b/fedot_ind/core/models/early_tc/__init__.py
diff --git a/fedot_ind/core/models/early_tc/base_early_tc.py b/fedot_ind/core/models/early_tc/base_early_tc.py
@@ -0,0 +1,165 @@
+from typing import Optional, List
+from fedot.core.operations.operation_parameters import OperationParameters
+from sklearn.preprocessing import StandardScaler
+from sklearn.base import ClassifierMixin, BaseEstimator
+from sktime.classification.dictionary_based import WEASEL
+from fedot_ind.core.architecture.settings.computational import backend_methods as np
+
+
+class EarlyTSClassifier(ClassifierMixin, BaseEstimator):
+ """
+ Base class for Early Time Series Classification models
+ which implement prefix-wise predictions via traiing multiple slave estimators.
+
+ Args:
+ ``interval_percentage (float in (1, 100])``: define how much points should be between prediction points.
+ ``consecutive_predictions (int)``: how many last subsequent estimators should classify object equally.
+ ``accuracy_importance (float in [0, 1])``: trade-off coefficient between earliness and accuracy.
+ ``prediction_mode (str in ['last_available', 'best_by_metrics_mean', 'all'])``:
+ - if 'last_available', returns the latest estimator prediction allowed by prefix length;
+ - if 'best_by_metrics_mean', returns the best of estimators estimated
+ with weighted average of accuracy and earliness
+ - if 'all', returns all estiamtors predictions
+ ``transform_score (bool)``: whether or not to scale scores to [-1, 1] interval
+ ``min_ts_step (int)``: minimal difference between to subsequent prefix' lengths
+ """
+
+ def __init__(self, params: Optional[OperationParameters] = {}):
+ super().__init__()
+ self.interval_percentage = params.get('interval_percentage', 10)
+ self.consecutive_predictions = params.get('consecutive_predictions', 1)
+ self.accuracy_importance = params.get('accuracy_importance', 1.)
+ self.min_ts_length = params.get('min_ts_step', 3)
+ self.random_state = params.get('random_state', None)
+
+ self.prediction_mode = params.get('prediction_mode', 'last_available')
+ self.transform_score = params.get('transform_score', True)
+ self.weasel_params = {}
+
+ def _init_model(self, X, y):
+ max_data_length = X.shape[-1]
+ self.prediction_idx = self._compute_prediction_points(max_data_length)
+ self.n_pred = len(self.prediction_idx)
+ self.slave_estimators = [
+ WEASEL(random_state=self.random_state, support_probabilities=True, **self.weasel_params)
+ for _ in range(self.n_pred)]
+ self.scalers = [StandardScaler() for _ in range(self.n_pred)]
+ self._chosen_estimator_idx = -1
+ self.classes_ = [np.unique(y)]
+ self._estimator_for_predict = [-1]
+
+ @property
+ def required_length(self):
+ if not hasattr(self, '_chosen_estimator_idx'):
+ return None
+ return self.prediction_idx[self._chosen_estimator_idx]
+
+ @property
+ def n_classes(self):
+ return len(self.classes_[0])
+
+ def fit(self, X, y=None):
+ assert y is not None, 'Pass y'
+ y = np.array(y).flatten()
+ self._init_model(X, y)
+ for i in range(self.n_pred):
+ self._fit_one_interval(X, y, i)
+
+ def _fit_one_interval(self, X, y, i):
+ X_part = X[..., :self.prediction_idx[i] + 1]
+ X_part = self.scalers[i].fit_transform(X_part)
+ probas = self.slave_estimators[i].fit_predict_proba(X_part, y)
+ return probas
+
+ def _predict_one_slave(self, X, i, offset=0):
+ X_part = X[..., max(0, offset - 1):self.prediction_idx[i] + 1]
+ X_part = self.scalers[i].transform(X_part)
+ probas = self.slave_estimators[i].predict_proba(X_part)
+ return probas, np.argmax(probas, axis=-1)
+
+ def _compute_prediction_points(self, n_idx):
+ interval_length = max(int(n_idx * self.interval_percentage / 100), self.min_ts_length)
+ prediction_idx = np.arange(n_idx - 1, -1, -interval_length)[::-1][1:]
+ self.earliness = 1 - prediction_idx / n_idx # /n_idx because else the last hm score is always 0
+ return prediction_idx
+
+ def _select_estimators(self, X, training=False):
+ offset = 0
+ if not training and self.prediction_mode == 'best_by_metrics_mean':
+ estimator_indices = [self._chosen_estimator_idx]
+ elif not training and self.prediction_mode == 'last_available':
+ last_idx, offset = self._get_applicable_index(X.shape[-1] - 1)
+ estimator_indices = [last_idx]
+ elif training or self.prediction_mode == 'all':
+ last_idx, offset = self._get_applicable_index(X.shape[-1] - 1)
+ estimator_indices = np.arange(last_idx + 1)
+ else:
+ raise ValueError('Unknown prediction mode')
+ return estimator_indices, offset
+
+ def _predict(self, X, training=True):
+ estimator_indices, offset = self._select_estimators(X, training)
+ if not training:
+ self._estimator_for_predict = estimator_indices
+ prediction = (np.stack(array_list) for array_list in zip(
+ *[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary
+ ))
+ return prediction # see the output in _predict_one_slave
+
+ def _consecutive_count(self, predicted_labels: List[np.array]):
+ n = len(predicted_labels[0])
+ prediction_points = len(predicted_labels)
+ consecutive_labels = np.ones((prediction_points, n))
+ for i in range(1, prediction_points):
+ equal = predicted_labels[i - 1] == predicted_labels[i]
+ consecutive_labels[i, equal] = consecutive_labels[i - 1, equal] + 1
+ return consecutive_labels # prediction_points x n_instances
+
+ def predict_proba(self, *args):
+ """
+ Args:
+ X (np.array): input features
+ Returns:
+ predictions as a numpy array of shape (2, n_selected_estimators, n_instances, n_classes)
+ where first subarray stands for probas, and second for scores
+ """
+ predicted_probas, scores, *_ = args
+ if self.transform_score:
+ scores = self._transform_score(scores)
+ scores = np.tile(scores[..., None], (1, 1, self.n_classes))
+ prediction = np.stack([predicted_probas, scores], axis=0)
+ if prediction.shape[1] == 1:
+ prediction = prediction.squeeze(1)
+ return prediction
+
+ def predict(self, X):
+ """
+ Args:
+ X (np.array): input features
+ Returns:
+ predictions as a numpy array of shape (2, n_selected_estimators, n_instances)
+ where first subarray stands for labels, and second for scores
+ """
+ prediction = self.predict_proba(X)
+ labels = prediction[0:1].argmax(-1)
+ scores = prediction[1:2, ..., 0]
+ prediction = np.stack([labels, scores], 0)
+ if prediction.shape[1] == 1:
+ prediction = prediction.squeeze(1)
+ return prediction
+
+ def _score(self, X, y, accuracy_importance=None, training=True):
+ y = np.array(y).flatten()
+ accuracy_importance = accuracy_importance or self.accuracy_importance
+ predictions = self._predict(X, training)[0]
+ prediction_points = predictions.shape[0]
+ accuracies = (predictions == np.tile(y, (prediction_points, 1))).sum(axis=1) / len(y)
+ return (1 - accuracy_importance) * self.earliness[:prediction_points] + accuracy_importance * accuracies
+
+ def _get_applicable_index(self, last_available_idx):
+ idx = np.searchsorted(self.prediction_idx, last_available_idx, side='right')
+ if idx == 0:
+ raise RuntimeError('Too few points for prediction!')
+ idx -= 1
+ offset = last_available_idx - self.prediction_idx[idx]
+ return idx, offset