Skip to content

Commit

Permalink
Fast topological features (#1252)
Browse files Browse the repository at this point in the history
* add fast topo

* fix fast topo

* Automated autopep8 fixes

* pep8

* add to initial assumption

* make code more clear in `fit` method

* add `fast_topoligical_features` to docs

* fix aligment for pipeline builders in `TSForecastingAssumptions`

* add topo to `FedotBuilder` docs

* fix table in docs

* add params to fast_topo

* change params and add it to tuner search space

* fix integration tests

* add new param stride

* fix param

* dirty speedup

* delete ica from initial assumption due to instability

* fix documentation

* fix test

* delete fast_topo from assumption and delete fast_train tag

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
kasyanovse and github-actions[bot] committed Jan 27, 2024
1 parent 5e726e9 commit 5efd0fb
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,One-Hot Encoder, Feature encoding
`label_encoding`,Label Encoder, Feature encoding
`resample`,Imbalanced binary class transformation in classification, Data transformation
`topological_features`,Calculation of topological features, only for time series,Data transformation
`topological_features`,Calculation of topological features,Time series transformation
`fast_topological_features`,Fast calculation of part of topological features,Time series transformation


.. csv-table:: Feature transformation operations implementations
Expand Down Expand Up @@ -105,7 +106,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
`one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`,
`label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree`
`resample`,`FEDOT model using sklearn.utils.resample`,
`topological_features`,FEDOT model,`ts`
`topological_features`,FEDOT model,`ts`,
`fast_topological_features`,FEDOT model,`ts`


Models used
Expand Down
3 changes: 2 additions & 1 deletion fedot/api/api_utils/assumptions/task_assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class TSForecastingAssumptions(TaskAssumptions):
def builders(self):
return {
'lagged_ridge':
PipelineBuilder().add_sequence('lagged', 'ridge'),
PipelineBuilder()
.add_sequence('lagged', 'ridge'),
'topological':
PipelineBuilder()
.add_node('lagged')
Expand Down
2 changes: 2 additions & 0 deletions fedot/api/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ def setup_pipeline_structure(
- ``diff_filter`` -> Derivative Filter Transformation
- ``cut`` -> Cut Transformation
- ``exog_ts`` -> Exogeneus Transformation
- ``topological_features`` -> Topological features
- ``fast_topological_features`` -> Fast implementation of topological features
max_depth: max depth of a pipeline. Defaults to ``6``.
Expand Down
6 changes: 5 additions & 1 deletion fedot/core/operations/evaluation/common_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \
ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \
PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation
from fedot.core.operations.evaluation.operation_implementations.\
data_operations.topological.fast_topological_extractor import \
FastTopologicalFeaturesImplementation
from fedot.core.operations.evaluation.operation_implementations.data_operations.topological. \
topological_extractor import TopologicalFeaturesImplementation
from fedot.core.operations.operation_parameters import OperationParameters
Expand Down Expand Up @@ -47,7 +50,8 @@ class FedotPreprocessingStrategy(EvaluationStrategy):
'one_hot_encoding': OneHotEncodingImplementation,
'label_encoding': LabelEncodingImplementation,
'fast_ica': FastICAImplementation,
'topological_features': TopologicalFeaturesImplementation
'topological_features': TopologicalFeaturesImplementation,
'fast_topological_features': FastTopologicalFeaturesImplementation,
}

def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from itertools import chain
from typing import Optional

import numpy as np
from gph import ripser_parallel as ripser
from joblib import Parallel, delayed

from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \
DataOperationImplementation
from fedot.core.operations.operation_parameters import OperationParameters


class FastTopologicalFeaturesImplementation(DataOperationImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.window_size_as_share = params.get('window_size_as_share')
self.max_homology_dimension = params.get('max_homology_dimension')
self.metric = params.get('metric')
self.stride = params.get('stride')
self.n_jobs = params.get('n_jobs')
self.quantiles = (0.1, 0.25, 0.5, 0.75, 0.9)
self._shape = len(self.quantiles)
self._window_size = None

def fit(self, input_data: InputData):
self._window_size = int(input_data.features.shape[1] * self.window_size_as_share)
self._window_size = max(self._window_size, 2)
self._window_size = min(self._window_size, input_data.features.shape[1] - 2)
return self

def transform(self, input_data: InputData) -> OutputData:
features = input_data.features
with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel:
topological_features = parallel(delayed(self._extract_features)
(np.mean(features[i:i+2, ::self.stride], axis=0))
for i in range(0, features.shape[0], 2))
if len(topological_features) * 2 < features.shape[0]:
topological_features.append(topological_features[-1])
result = np.array(list(chain(*zip(topological_features, topological_features))))
if result.shape[0] > features.shape[0]:
result = result[:-1, :]
np.nan_to_num(result, copy=False, nan=0, posinf=0, neginf=0)
return result

def _extract_features(self, x):
x_sliced = np.array([x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)])
x_processed = ripser(x_sliced,
maxdim=self.max_homology_dimension,
coeff=2,
metric=self.metric,
n_threads=1,
collapse_edges=False)["dgms"]
result = np.zeros(self._shape * (self.max_homology_dimension + 1))
for i, xp in enumerate(x_processed):
if xp.shape[0] > 0:
result[i * self._shape:(i + 1) * self._shape] = np.quantile(xp[:, 1] - xp[:, 0], self.quantiles,
overwrite_input=True, method='hazen')
return result
16 changes: 16 additions & 0 deletions fedot/core/pipelines/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,22 @@ def get_parameters_dict(self):
'sampling-scope': [0.9, 0.99],
'type': 'continuous'}
},
'fast_topological_features': {
'window_size_as_share': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.1, 0.9],
'type': 'continuous'
},
'max_homology_dimension': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [1, 5],
'type': 'discrete'
},
'metric': {
'hyperopt-dist': hp.choice,
'sampling-scope': [['euclidean', 'manhattan', 'cosine']],
'type': 'categorical'}
}
}

if self.custom_search_space is not None:
Expand Down
14 changes: 14 additions & 0 deletions fedot/core/repository/data/data_operation_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,20 @@
"presets": [
"ts"
],
"input_type": "[DataTypesEnum.table]",
"output_type": "[DataTypesEnum.table]",
"tags": [
"non_applicable_for_ts",
"feature_space_transformation"
]
},
"fast_topological_features": {
"meta": "custom_ts_preprocessing",
"presets": [
"ts"
],
"input_type": "[DataTypesEnum.table]",
"output_type": "[DataTypesEnum.table]",
"tags": [
"non_applicable_for_ts",
"feature_space_transformation"
Expand Down
7 changes: 7 additions & 0 deletions fedot/core/repository/data/default_operation_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,12 @@
},
"topological_features": {
"n_jobs": -1
},
"fast_topological_features": {
"n_jobs": 1,
"window_size_as_share": 0.66,
"max_homology_dimension": 1,
"metric": "euclidean",
"stride": 1
}
}
14 changes: 9 additions & 5 deletions test/integration/models/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,16 @@ def get_data_for_testing(task_type, data_type, length=100, features_count=1,
return None

if task_type is TaskTypesEnum.ts_forecasting:
task = Task(task_type, TsForecastingParams(max(length // 10, 2)))
forecast_length = max(length // 10, 2)
task = Task(task_type, TsForecastingParams(forecast_length))
if data_type is DataTypesEnum.ts:
features = np.zeros(length) + value
else:
features = np.zeros((length, features_count)) + value
if data_type is DataTypesEnum.table:
target = np.zeros(length) + value
target = np.zeros((length, forecast_length)) + value
else:
target = features

else:
task = Task(task_type)
data_type = DataTypesEnum.table
Expand Down Expand Up @@ -156,11 +156,15 @@ def fit_time_for_operation(operation: OperationMetaInfo,
return perf_counter() - start_time

for task_type in operation.task_type:
for data_type in operation.input_types:
input_types = operation.input_types
if task_type is TaskTypesEnum.ts_forecasting:
if operation.input_types == [DataTypesEnum.table]:
input_types = [DataTypesEnum.ts]
for data_type in input_types:
perfomance_values = []
for length in data_lengths:
data = get_data_for_testing(task_type, data_type,
length=length, features_count=2,
length=length, features_count=10,
random=True)
if data is not None:
min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times))
Expand Down

0 comments on commit 5efd0fb

Please sign in to comment.