diff --git a/pyproject.toml b/pyproject.toml index 1ae5844..a069c9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "anomalytics" description = "The ultimate anomaly detection library." readme = "README.md" -version = "0.2.1" +version = "0.2.2" license = {file = "LICENSE"} requires-python = ">=3.10" authors = [ diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py index 7a7394f..6eb968e 100644 --- a/src/anomalytics/__init__.py +++ b/src/anomalytics/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.1" +__version__ = "0.2.2" __all__ = [ "get_anomaly", diff --git a/src/anomalytics/models/abstract.py b/src/anomalytics/models/abstract.py index 6a0e97f..d921546 100644 --- a/src/anomalytics/models/abstract.py +++ b/src/anomalytics/models/abstract.py @@ -8,14 +8,16 @@ class Detector(metaclass=abc.ABCMeta): @abc.abstractmethod def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize the anomaly detection model with a specific statisticail method. ## Parameters ---------- - dataset : typing.Union[pandas.DataFrame, pandas.Series] + dataset : typing.Optional[typing.Union[pandas.DataFrame, pandas.Series]], default is None DataFame or Series objects to be analyzed. Index must be date-time and values must be numeric. @@ -24,6 +26,19 @@ def __init__( """ ... + @abc.abstractmethod + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + """ + Assign dataset to the `Detector` object, if it is not assigned during initialization. + + ## Parameters + ---------- + dataset : typing.Union[pandas.DataFrame, pandas.Series] + DataFame or Series objects to be analyzed. + Index must be date-time and values must be numeric. + """ + ... + @abc.abstractmethod def fit(self) -> None: """ diff --git a/src/anomalytics/models/autoencoder.py b/src/anomalytics/models/autoencoder.py index ecd174b..f072103 100644 --- a/src/anomalytics/models/autoencoder.py +++ b/src/anomalytics/models/autoencoder.py @@ -33,7 +33,9 @@ class AutoencoderDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize Autoencoder model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/block_maxima.py b/src/anomalytics/models/block_maxima.py index 6ff01f4..f49ba02 100644 --- a/src/anomalytics/models/block_maxima.py +++ b/src/anomalytics/models/block_maxima.py @@ -33,7 +33,9 @@ class BlockMaximaDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize Block Maxima model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/dbscan.py b/src/anomalytics/models/dbscan.py index b0ef6b9..21f1f1a 100644 --- a/src/anomalytics/models/dbscan.py +++ b/src/anomalytics/models/dbscan.py @@ -33,7 +33,9 @@ class DBSCANDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize DBSCAN model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/detector.py b/src/anomalytics/models/detector.py index a5dabed..dcdbaf5 100644 --- a/src/anomalytics/models/detector.py +++ b/src/anomalytics/models/detector.py @@ -12,7 +12,7 @@ class FactoryDetector: def __init__( self, method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"], - dataset: typing.Union[pd.DataFrame, pd.Series], + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, anomaly_type: typing.Literal["high", "low"] = "high", ): self.method = method @@ -67,7 +67,7 @@ def __call__(self): def get_detector( method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"], - dataset: typing.Union[pd.DataFrame, pd.Series], + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, anomaly_type: typing.Literal["high", "low"] = "high", ): return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)() diff --git a/src/anomalytics/models/isoforest.py b/src/anomalytics/models/isoforest.py index b615581..de4e6d2 100644 --- a/src/anomalytics/models/isoforest.py +++ b/src/anomalytics/models/isoforest.py @@ -33,7 +33,9 @@ class IsoForestDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize Isolation Forest model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/mad.py b/src/anomalytics/models/mad.py index ffe84d7..4c18702 100644 --- a/src/anomalytics/models/mad.py +++ b/src/anomalytics/models/mad.py @@ -33,7 +33,9 @@ class MADDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize Mean Absolute Deviation model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/one_class_svm.py b/src/anomalytics/models/one_class_svm.py index b9419fa..311b0ae 100644 --- a/src/anomalytics/models/one_class_svm.py +++ b/src/anomalytics/models/one_class_svm.py @@ -33,7 +33,9 @@ class OneClassSVMDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize 1 Class SVM model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/src/anomalytics/models/peaks_over_threshold.py b/src/anomalytics/models/peaks_over_threshold.py index 276c64b..9c30266 100644 --- a/src/anomalytics/models/peaks_over_threshold.py +++ b/src/anomalytics/models/peaks_over_threshold.py @@ -1,9 +1,7 @@ -import datetime import logging import typing import warnings -import numpy as np import pandas as pd from anomalytics.evals.kolmogorv_smirnov import ks_1sample @@ -135,7 +133,9 @@ class POTDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize POT model for anomaly detection. @@ -153,6 +153,26 @@ def __init__( if anomaly_type not in ["high", "low"]: raise ValueError(f"Invalid value! The `anomaly_type` argument must be 'high' or 'low'") + if dataset is not None: + self.__process_dataset(dataset=dataset) + else: + self.__datetime = None # type: ignore + self.__dataset = dataset + self.__time_window = None # type: ignore + + self.__anomaly_type = anomaly_type + + self.__exceedance_threshold = None # type: ignore + self.__exceedance = None # type: ignore + self.__anomaly_score = None # type: ignore + self.__anomaly_threshold = None # type: ignore + self.__detection = None # type: ignore + self.__eval = None # type: ignore + self.__params = {} + + logger.info("successfully initialized POT detection model") + + def __process_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: if not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, pd.Series): raise TypeError("Invalid value! The `dataset` argument must be a Pandas DataFrame or Series") @@ -186,8 +206,6 @@ def __init__( ) from _error self.__datetime = None self.__dataset = dataset - - self.__anomaly_type = anomaly_type self.__time_window = set_time_window( total_rows=self.__dataset.shape[0], method="POT", @@ -196,15 +214,10 @@ def __init__( t1_pct=0.3, t2_pct=0.0, ) - self.__exceedance_threshold = None # type: ignore - self.__exceedance = None # type: ignore - self.__anomaly_score = None # type: ignore - self.__anomaly_threshold = None # type: ignore - self.__detection = None # type: ignore - self.__eval = None # type: ignore - self.__params = {} + print("The dataset is successfully processed!") - logger.info("successfully initialized POT detection model") + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + return self.__process_dataset(dataset=dataset) def reset_time_window( self, diff --git a/src/anomalytics/models/zscore.py b/src/anomalytics/models/zscore.py index b46943b..ae10431 100644 --- a/src/anomalytics/models/zscore.py +++ b/src/anomalytics/models/zscore.py @@ -33,7 +33,9 @@ class ZScoreDetector(Detector): __params: typing.Dict def __init__( - self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" + self, + dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None, + anomaly_type: typing.Literal["high", "low"] = "high", ): """ Initialize Z-Score model for anomaly detection. @@ -57,6 +59,9 @@ def __init__( self.__eval = None # type: ignore self.__params = {} + def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None: + raise NotImplementedError("Not yet implemented!") + def fit(self) -> None: raise NotImplementedError("Not yet implemented!") diff --git a/tests/test_version.py b/tests/test_version.py index 181289a..9b58df8 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,4 +2,4 @@ def test_pkg_version(): - assert __version__ == "0.2.1" + assert __version__ == "0.2.2" diff --git a/tests/unit/detectors/test_pot_detector.py b/tests/unit/detectors/test_pot_detector.py index e4c31cc..038bb1b 100644 --- a/tests/unit/detectors/test_pot_detector.py +++ b/tests/unit/detectors/test_pot_detector.py @@ -16,16 +16,19 @@ class TestPOTDetector(unittest.TestCase): def setUp(self) -> None: super().setUp() - self.pot1_series_detector = atics.get_detector(method="POT", dataset=self.sample_1_ts) # type: ignore + self.pot1_series_detector = atics.get_detector(method="POT") # type: ignore + self.pot1_series_detector.assign_dataset(dataset=self.sample_1_ts) # type: ignore + self.pot2_series_detector = atics.get_detector(method="POT", dataset=self.sample_2_ts, anomaly_type="low") # type: ignore self.pot3_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_3_df) # type: ignore - self.pot4_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_4_df) # type: ignore + self.pot4_dataframe_detector = atics.get_detector(method="POT") # type: ignore + self.pot4_dataframe_detector.assign_dataset(dataset=self.sample_4_df) # type: ignore def test_instance_is_pot_detector_class_successful(self): - self.assertIsInstance(obj=self.pot1_series_detector, cls=POTDetector) + self.assertIsInstance(self.pot1_series_detector, POTDetector) def test_detector_string_method_successful(self): - self.assertEqual(first=str(self.pot1_series_detector), second=str(POTDetector(dataset=self.sample_1_ts))) # type: ignore + self.assertEqual(str(self.pot1_series_detector), str(POTDetector(dataset=self.sample_1_ts))) # type: ignore def test_reset_time_window_to_historical_successful(self): t0 = self.pot1_series_detector.t0 @@ -38,6 +41,27 @@ def test_reset_time_window_to_historical_successful(self): self.assertNotEqual(t1, self.pot1_series_detector.t1) self.assertNotEqual(t2, self.pot1_series_detector.t2) + def test_initialize_pot_detector_without_dataframe_dataset_successful(self): + pot_detector = atics.get_detector(method="POT") + self.assertIsInstance(pot_detector, cls=POTDetector) + + def test_assign_dataset_after_detector_initialization_successful(self): + pot_ts_detector = atics.get_detector(method="POT") + pot_ts_detector.assign_dataset(dataset=self.sample_2_ts) # type: ignore + + pot_df_detector = atics.get_detector(method="POT") + pot_df_detector.assign_dataset(dataset=self.sample_3_df) # type: ignore + + self.assertIsInstance(pot_ts_detector, POTDetector) + self.assertEqual(pot_ts_detector.t0, 34) + self.assertEqual(pot_ts_detector.t1, 15) + self.assertEqual(pot_ts_detector.t2, 1) + + self.assertIsInstance(pot_df_detector, POTDetector) + self.assertEqual(pot_df_detector.t0, 6) + self.assertEqual(pot_df_detector.t1, 3) + self.assertEqual(pot_df_detector.t2, 1) + def test_exceedance_thresholds_dataframe_for_high_anomaly_type_successful(self): expected_exceedance_thresholds = pd.DataFrame( data={