Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ETSC models #161

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
d484164
pulled changes
leostre May 21, 2024
6ca5bcd
metrics started
leostre Jun 14, 2024
e69fcd4
metrics ended
leostre Jun 20, 2024
bc4064d
in basis teaser is completed, need some make-up and add cut ts support
leostre Jun 24, 2024
6500db7
teaser inherits sklearn's classifier mixin now
leostre Jun 26, 2024
4204d6a
class tree reconf. added proba_thresholding classifier (not registered)
leostre Jun 27, 2024
5ac6f70
class tree reconf. added proba_thresholding classifier (not registered)
leostre Jun 27, 2024
48e8328
both etc models are registered, available via api
leostre Jun 28, 2024
bd07645
Merge branch 'teaser' of https://github.com/aimclub/Fedot.Industrial …
leostre Jun 28, 2024
18a895a
ecec added
leostre Jul 2, 2024
0c10a07
economy_k added
leostre Jul 4, 2024
251bca6
mlstm init
leostre Jul 5, 2024
ace6626
mlstm registered
leostre Jul 5, 2024
42ba3f0
fitting w augmentation
leostre Jul 9, 2024
5d9182e
all work, but need eval
leostre Jul 11, 2024
41c329c
evth converged to one interface + refactored
leostre Jul 12, 2024
96c1009
slight fixes
leostre Jul 12, 2024
743c404
metrics started
leostre Jun 14, 2024
c3de115
metrics ended
leostre Jun 20, 2024
d4ee881
in basis teaser is completed, need some make-up and add cut ts support
leostre Jun 24, 2024
588846a
teaser inherits sklearn's classifier mixin now
leostre Jun 26, 2024
d939a34
class tree reconf. added proba_thresholding classifier (not registered)
leostre Jun 27, 2024
4d3e57d
both etc models are registered, available via api
leostre Jun 28, 2024
4da7c13
ecec added
leostre Jul 2, 2024
d11fa8d
economy_k added
leostre Jul 4, 2024
9f16244
mlstm init
leostre Jul 5, 2024
d118462
mlstm registered
leostre Jul 5, 2024
7a2c477
fitting w augmentation
leostre Jul 9, 2024
370eb28
all work, but need eval
leostre Jul 11, 2024
e51949b
evth converged to one interface + refactored
leostre Jul 12, 2024
926eb92
slight fixes
leostre Jul 12, 2024
49d0570
refactored train loop + microfixes
leostre Jul 15, 2024
2e2ad91
to pull req
leostre Jul 16, 2024
81b2a67
Merge branch 'teaser' of https://github.com/aimclub/Fedot.Industrial …
leostre Jul 16, 2024
fa80269
Automated autopep8 fixes
Jul 16, 2024
c2126ed
both etc models are registered, available via api
leostre Jun 28, 2024
143b7a2
fitting w augmentation
leostre Jul 9, 2024
ded4f22
all work, but need eval
leostre Jul 11, 2024
40afba9
evth converged to one interface + refactored
leostre Jul 12, 2024
d6ad8fd
slight fixes
leostre Jul 12, 2024
8fec47f
added tests and notebook
leostre Jul 22, 2024
bb2af69
Merge branch 'teaser' of https://github.com/aimclub/Fedot.Industrial …
leostre Jul 23, 2024
3d9022c
Automated autopep8 fixes
Jul 23, 2024
e68331c
Delete tests/unit/core/models/test_teaser.py due to its inclusion int…
leostre Jul 25, 2024
b50e8b1
Delete fedot_ind/core/metrics/interval_metrics.py due to the fucntion…
leostre Jul 25, 2024
c1ab5da
Apply suggestions from code review
leostre Jul 25, 2024
916f899
changed bump up fedot
leostre Jul 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/poetry_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: poetry install

- name: Bump up FEDOT to a stable revision (temporary)
run: poetry add git+https://github.com/aimclub/FEDOT.git@e0b4ee7
run: poetry add git+https://github.com/aimclub/FEDOT.git@master

- name: Run tests with pytest
run: poetry run pytest --cov=fedot_ind --cov-report xml:coverage.xml tests/unit
Expand Down

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions fedot_ind/core/architecture/abstraction/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

def fedot_data_type(func):
def decorated_func(self, *args):
if not isinstance(args[0], InputData):
args[0] = DataConverter(data=args[0])
features = args[0].features
data, *rest_args = args
if not isinstance(data, InputData):
data = DataConverter(data=data)
features = data.features

if len(features.shape) < 4:
try:
Expand All @@ -22,7 +23,7 @@ def decorated_func(self, *args):
input_data_squeezed = np.squeeze(features)
else:
input_data_squeezed = features
return func(self, input_data_squeezed, args[1])
return func(self, input_data_squeezed, *rest_args)

return decorated_func

Expand All @@ -42,13 +43,14 @@ def decorated_func(self, *args):

def convert_to_3d_torch_array(func):
def decorated_func(self, *args):
init_data = args[0]
init_data, *args = args
data = DataConverter(data=init_data).convert_to_torch_format()
if isinstance(init_data, InputData):
init_data.features = data
else:
init_data = data
return func(self, init_data, *args[1:])
return func(self, init_data, *args)

return decorated_func


Expand Down
133 changes: 126 additions & 7 deletions fedot_ind/core/metrics/metrics_implementation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
from typing import Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from fedot.core.data.data import InputData
Expand Down Expand Up @@ -221,6 +222,10 @@ def smape(a, f, _=None):
(np.abs(a) + np.abs(f)) * 100)


def rmse(y_true, y_pred):
return mean_squared_error(y_true, y_pred, squared=False)


def mape(A, F):
return mean_absolute_percentage_error(A, F)

Expand All @@ -232,9 +237,6 @@ def calculate_regression_metric(target,
**kwargs):
target = target.astype(float)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

metric_dict = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
Expand All @@ -261,9 +263,6 @@ def calculate_forecasting_metric(target,
**kwargs):
target = target.astype(float)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

metric_dict = {
'rmse': rmse,
'mae': mean_absolute_error,
Expand Down Expand Up @@ -347,8 +346,102 @@ def kl_divergence(solution: pd.DataFrame,
return np.average(solution.mean())


class AnomalyMetric(QualityMetric):
class ETSCPareto(QualityMetric, ParetoMetrics):
def __init__(self,
target,
predicted_labels,
predicted_probs=None,
weigths: tuple = None,
mode: str = 'robust',
reduce: bool = True,
metric_list: tuple = (
'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'),
default_value: float = 0.0):
self.target = target.flatten()
self.predicted_labels = predicted_labels
self.predicted_probs = predicted_probs
self.metric_list = metric_list
self.default_value = default_value
self.weights = weigths
self.mode = mode
self.columns = ['robustness'] if self.mode == 'robust' else []
self.columns.extend(metric_list)
self.reduce = reduce

def metric(self) -> float:
if len(self.predicted_labels.shape) == 1:
self.predicted_labels = self.predicted_labels[None, ...]
self.predicted_probs = self.predicted_probs[None, ...]
print(f'''
target shape {self.target.shape}
prediction {self.predicted_labels.shape}
predicted_probs (scores) {self.predicted_probs.shape}
''')
n_metrics = len(self.metric_list) + (self.mode == 'robust')
n_est = self.predicted_labels.shape[0]
result = np.zeros((n_est, n_metrics))
print(result.shape)
if self.mode == 'robust':
mask = self.predicted_probs >= 0
print('mask', mask.shape)
if not mask.any():
return result
robustness = mask.sum(-1) / self.predicted_probs.shape[-1]
print('rob', robustness.shape)
result[:, 0] = robustness.flatten()
else:
mask = np.ones_like(self.predicted_probs, dtype=bool)

for est in range(n_est):
for i, metric in enumerate(self.metric_list, 1):
assert metric in CLASSIFICATION_METRIC_DICT, f'{metric} is not found in available metrics'
metric_value = CLASSIFICATION_METRIC_DICT[metric](self.target[mask[est]],
self.predicted_labels[est][mask[est]])
result[est, i] = metric_value

if self.weights is None:
if self.reduce:
self.weights = np.empty(n_metrics)
self.weights.fill(1 / len(self.weights))
else:
self.weights = np.eye(n_metrics)
else:
assert self.weights.shape[-1] == self.metrics.shape[-1], 'Metrics and weights size mismatch!'
self.weights /= self.weights.sum()

result = result @ self.weights.T
result[np.isnan(result)] = self.default_value
if not self.reduce:
return pd.DataFrame(result, columns=self.columns)
else:
return result

def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
if not metrics_names:
metrics_names = ('Robustness', 'Accuracy')
plt.figure(figsize=(10, 10))
assert metrics.shape[-1] == 2, 'only 2 metrics can be plotted'
for i, metric in enumerate(metrics):
selection = metric[select]
sizes = ((np.arange(selection.shape[0]) * 2)[::-1]) ** 1.5 + 10
plt.scatter(*(metric[select]).T,
s=sizes,
label=i)
plt.legend(loc="upper right", bbox_to_anchor=(1.5, 1))
plt.ylabel(metrics_names[1])
plt.xlabel(metrics_names[0])
plt.xlim((-0.05, 1.05))
plt.ylim((-0.05, 1.05))
plt.xticks(np.linspace(0, 1, 11))
plt.yticks(np.linspace(0, 1, 11))
plt.grid(True)

def select_pareto_front(self, metrics, maximize=True):
pareto_mask = self.pareto_metric_list(metrics, maximise=maximize)
return metrics[pareto_mask]


class AnomalyMetric(QualityMetric):
def __init__(self,
target,
predicted_labels,
Expand Down Expand Up @@ -617,3 +710,29 @@ def calculate_detection_metric(
target=target,
predicted_labels=labels).metric()
return metric_dict


REGRESSION_METRIC_DICT = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
'mae': mean_absolute_error,
'msle': mean_squared_log_error,
'mape': mean_absolute_percentage_error,
'median_absolute_error': median_absolute_error,
'explained_variance_score': explained_variance_score,
'max_error': max_error,
'd2_absolute_error_score': d2_absolute_error_score}

CLASSIFICATION_METRIC_DICT = {'accuracy': accuracy_score,
'f1': f1_score,
'roc_auc': roc_auc_score,
'precision': precision_score,
'logloss': log_loss}

FORECASTING_METRICS_DICT = {
'rmse': rmse,
'mae': mean_absolute_error,
'median_absolute_error': median_absolute_error,
'smape': smape,
'mase': mase
}
Empty file.
165 changes: 165 additions & 0 deletions fedot_ind/core/models/early_tc/base_early_tc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from typing import Optional, List
from fedot.core.operations.operation_parameters import OperationParameters
from sklearn.preprocessing import StandardScaler
from sklearn.base import ClassifierMixin, BaseEstimator
from sktime.classification.dictionary_based import WEASEL
from fedot_ind.core.architecture.settings.computational import backend_methods as np


class EarlyTSClassifier(ClassifierMixin, BaseEstimator):
"""
Base class for Early Time Series Classification models
which implement prefix-wise predictions via traiing multiple slave estimators.

Args:
``interval_percentage (float in (1, 100])``: define how much points should be between prediction points.
``consecutive_predictions (int)``: how many last subsequent estimators should classify object equally.
``accuracy_importance (float in [0, 1])``: trade-off coefficient between earliness and accuracy.
``prediction_mode (str in ['last_available', 'best_by_metrics_mean', 'all'])``:
- if 'last_available', returns the latest estimator prediction allowed by prefix length;
- if 'best_by_metrics_mean', returns the best of estimators estimated
with weighted average of accuracy and earliness
- if 'all', returns all estiamtors predictions
``transform_score (bool)``: whether or not to scale scores to [-1, 1] interval
``min_ts_step (int)``: minimal difference between to subsequent prefix' lengths
"""

def __init__(self, params: Optional[OperationParameters] = {}):
super().__init__()
self.interval_percentage = params.get('interval_percentage', 10)
self.consecutive_predictions = params.get('consecutive_predictions', 1)
self.accuracy_importance = params.get('accuracy_importance', 1.)
self.min_ts_length = params.get('min_ts_step', 3)
self.random_state = params.get('random_state', None)

self.prediction_mode = params.get('prediction_mode', 'last_available')
self.transform_score = params.get('transform_score', True)
self.weasel_params = {}

def _init_model(self, X, y):
max_data_length = X.shape[-1]
self.prediction_idx = self._compute_prediction_points(max_data_length)
self.n_pred = len(self.prediction_idx)
self.slave_estimators = [
WEASEL(random_state=self.random_state, support_probabilities=True, **self.weasel_params)
for _ in range(self.n_pred)]
self.scalers = [StandardScaler() for _ in range(self.n_pred)]
self._chosen_estimator_idx = -1
self.classes_ = [np.unique(y)]
self._estimator_for_predict = [-1]

@property
def required_length(self):
if not hasattr(self, '_chosen_estimator_idx'):
return None
return self.prediction_idx[self._chosen_estimator_idx]

@property
def n_classes(self):
return len(self.classes_[0])

def fit(self, X, y=None):
assert y is not None, 'Pass y'
y = np.array(y).flatten()
self._init_model(X, y)
for i in range(self.n_pred):
self._fit_one_interval(X, y, i)

def _fit_one_interval(self, X, y, i):
X_part = X[..., :self.prediction_idx[i] + 1]
X_part = self.scalers[i].fit_transform(X_part)
probas = self.slave_estimators[i].fit_predict_proba(X_part, y)
return probas

def _predict_one_slave(self, X, i, offset=0):
X_part = X[..., max(0, offset - 1):self.prediction_idx[i] + 1]
X_part = self.scalers[i].transform(X_part)
probas = self.slave_estimators[i].predict_proba(X_part)
return probas, np.argmax(probas, axis=-1)

def _compute_prediction_points(self, n_idx):
interval_length = max(int(n_idx * self.interval_percentage / 100), self.min_ts_length)
prediction_idx = np.arange(n_idx - 1, -1, -interval_length)[::-1][1:]
self.earliness = 1 - prediction_idx / n_idx # /n_idx because else the last hm score is always 0
return prediction_idx

def _select_estimators(self, X, training=False):
offset = 0
if not training and self.prediction_mode == 'best_by_metrics_mean':
estimator_indices = [self._chosen_estimator_idx]
elif not training and self.prediction_mode == 'last_available':
last_idx, offset = self._get_applicable_index(X.shape[-1] - 1)
estimator_indices = [last_idx]
elif training or self.prediction_mode == 'all':
last_idx, offset = self._get_applicable_index(X.shape[-1] - 1)
estimator_indices = np.arange(last_idx + 1)
else:
raise ValueError('Unknown prediction mode')
return estimator_indices, offset

def _predict(self, X, training=True):
estimator_indices, offset = self._select_estimators(X, training)
if not training:
self._estimator_for_predict = estimator_indices
prediction = (np.stack(array_list) for array_list in zip(
*[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary
))
return prediction # see the output in _predict_one_slave

def _consecutive_count(self, predicted_labels: List[np.array]):
n = len(predicted_labels[0])
prediction_points = len(predicted_labels)
consecutive_labels = np.ones((prediction_points, n))
for i in range(1, prediction_points):
equal = predicted_labels[i - 1] == predicted_labels[i]
consecutive_labels[i, equal] = consecutive_labels[i - 1, equal] + 1
return consecutive_labels # prediction_points x n_instances

def predict_proba(self, *args):
"""
Args:
X (np.array): input features
Returns:
predictions as a numpy array of shape (2, n_selected_estimators, n_instances, n_classes)
where first subarray stands for probas, and second for scores
"""
predicted_probas, scores, *_ = args
if self.transform_score:
scores = self._transform_score(scores)
scores = np.tile(scores[..., None], (1, 1, self.n_classes))
prediction = np.stack([predicted_probas, scores], axis=0)
if prediction.shape[1] == 1:
prediction = prediction.squeeze(1)
return prediction

def predict(self, X):
"""
Args:
X (np.array): input features
Returns:
predictions as a numpy array of shape (2, n_selected_estimators, n_instances)
where first subarray stands for labels, and second for scores
"""
prediction = self.predict_proba(X)
labels = prediction[0:1].argmax(-1)
scores = prediction[1:2, ..., 0]
prediction = np.stack([labels, scores], 0)
if prediction.shape[1] == 1:
prediction = prediction.squeeze(1)
return prediction

def _score(self, X, y, accuracy_importance=None, training=True):
y = np.array(y).flatten()
accuracy_importance = accuracy_importance or self.accuracy_importance
predictions = self._predict(X, training)[0]
prediction_points = predictions.shape[0]
accuracies = (predictions == np.tile(y, (prediction_points, 1))).sum(axis=1) / len(y)
return (1 - accuracy_importance) * self.earliness[:prediction_points] + accuracy_importance * accuracies

def _get_applicable_index(self, last_available_idx):
idx = np.searchsorted(self.prediction_idx, last_available_idx, side='right')
if idx == 0:
raise RuntimeError('Too few points for prediction!')
idx -= 1
offset = last_available_idx - self.prediction_idx[idx]
return idx, offset
Loading
Loading