-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasets.py
48 lines (33 loc) · 1.46 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pynever.datasets as pyn_datasets
import torch.utils.data as pyt_data
import pandas
class ComponentDegradationAD(pyn_datasets.Dataset, pyt_data.Dataset):
"""
Dataset compatible with pynever and pytorch for the "One Year Industrial Component Degradation" dataset.
This particular dataset is for the task of Anomaly Detection (i.e., to recognize when the blade is degraded).
Moreover, it is developed for the use with an autoencoder (i.e., the outputs corresponds to the inputs).
Attributes
----------
"""
def __init__(self, filepath: str, columns_to_drop: list = None, is_training: bool = True):
df = pandas.read_csv(filepath)
if columns_to_drop is None:
df = df.drop(labels=['timestamp', 'mode'], axis='columns')
else:
df = df.drop(labels=columns_to_drop, axis='columns')
# We define an arbitrary cutoff assuming that the first 200000 samples represent the behaviour of a new blade.
new_cutoff = 200000
if is_training:
self.df = df[0:new_cutoff]
else:
self.df = df # [new_cutoff:]
def __len__(self):
return self.df.__len__()
def get_features_ids(self):
return self.df.columns
def __getitem__(self, index: int):
if index >= self.__len__():
raise IndexError
sample = self.df.iloc[index].values
target = self.df.iloc[index].values
return sample, target