feat(detector): initialize without dataset (#40)

* feat(detector): initialize detector without dataset for later assignment * build(version): bump version to v0.2.2 --------- Co-authored-by: N. L <nino@pleno.earth>
Aeternalis-Ingenium · Dec 21, 2023 · 8995865 · 8995865
1 parent 542a714
commit 8995865
Show file tree

Hide file tree

Showing 14 changed files with 118 additions and 31 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "anomalytics"
 description = "The ultimate anomaly detection library."
 readme = "README.md"
-version = "0.2.1"
+version = "0.2.2"
 license = {file = "LICENSE"}
 requires-python = ">=3.10"
 authors = [

diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 
 __all__ = [
     "get_anomaly",

diff --git a/src/anomalytics/models/abstract.py b/src/anomalytics/models/abstract.py
@@ -8,14 +8,16 @@
 class Detector(metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize the anomaly detection model with a specific statisticail method.
 
         ## Parameters
         ----------
-        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+        dataset : typing.Optional[typing.Union[pandas.DataFrame, pandas.Series]], default is None
             DataFame or Series objects to be analyzed.
             Index must be date-time and values must be numeric.
 
@@ -24,6 +26,19 @@ def __init__(
         """
         ...
 
+    @abc.abstractmethod
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        """
+        Assign dataset to the `Detector` object, if it is not assigned during initialization.
+
+        ## Parameters
+        ----------
+        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+            DataFame or Series objects to be analyzed.
+            Index must be date-time and values must be numeric.
+        """
+        ...
+
     @abc.abstractmethod
     def fit(self) -> None:
         """

diff --git a/src/anomalytics/models/autoencoder.py b/src/anomalytics/models/autoencoder.py
@@ -33,7 +33,9 @@ class AutoencoderDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize Autoencoder model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/block_maxima.py b/src/anomalytics/models/block_maxima.py
@@ -33,7 +33,9 @@ class BlockMaximaDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize Block Maxima model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/dbscan.py b/src/anomalytics/models/dbscan.py
@@ -33,7 +33,9 @@ class DBSCANDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize DBSCAN model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/detector.py b/src/anomalytics/models/detector.py
@@ -12,7 +12,7 @@ class FactoryDetector:
     def __init__(
         self,
         method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
-        dataset: typing.Union[pd.DataFrame, pd.Series],
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
         anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         self.method = method
@@ -67,7 +67,7 @@ def __call__(self):
 
 def get_detector(
     method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
-    dataset: typing.Union[pd.DataFrame, pd.Series],
+    dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
     anomaly_type: typing.Literal["high", "low"] = "high",
 ):
     return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)()
diff --git a/src/anomalytics/models/isoforest.py b/src/anomalytics/models/isoforest.py
@@ -33,7 +33,9 @@ class IsoForestDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize Isolation Forest model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/mad.py b/src/anomalytics/models/mad.py
@@ -33,7 +33,9 @@ class MADDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize Mean Absolute Deviation model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/one_class_svm.py b/src/anomalytics/models/one_class_svm.py
@@ -33,7 +33,9 @@ class OneClassSVMDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize 1 Class SVM model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/src/anomalytics/models/peaks_over_threshold.py b/src/anomalytics/models/peaks_over_threshold.py
@@ -1,9 +1,7 @@
-import datetime
 import logging
 import typing
 import warnings
 
-import numpy as np
 import pandas as pd
 
 from anomalytics.evals.kolmogorv_smirnov import ks_1sample
@@ -135,7 +133,9 @@ class POTDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize POT model for anomaly detection.
@@ -153,6 +153,26 @@ def __init__(
 
         if anomaly_type not in ["high", "low"]:
             raise ValueError(f"Invalid value! The `anomaly_type` argument must be 'high' or 'low'")
+        if dataset is not None:
+            self.__process_dataset(dataset=dataset)
+        else:
+            self.__datetime = None  # type: ignore
+            self.__dataset = dataset
+            self.__time_window = None  # type: ignore
+
+        self.__anomaly_type = anomaly_type
+
+        self.__exceedance_threshold = None  # type: ignore
+        self.__exceedance = None  # type: ignore
+        self.__anomaly_score = None  # type: ignore
+        self.__anomaly_threshold = None  # type: ignore
+        self.__detection = None  # type: ignore
+        self.__eval = None  # type: ignore
+        self.__params = {}
+
+        logger.info("successfully initialized POT detection model")
+
+    def __process_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
         if not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, pd.Series):
             raise TypeError("Invalid value! The `dataset` argument must be a Pandas DataFrame or Series")
 
@@ -186,8 +206,6 @@ def __init__(
                     ) from _error
             self.__datetime = None
             self.__dataset = dataset
-
-        self.__anomaly_type = anomaly_type
         self.__time_window = set_time_window(
             total_rows=self.__dataset.shape[0],
             method="POT",
@@ -196,15 +214,10 @@ def __init__(
             t1_pct=0.3,
             t2_pct=0.0,
         )
-        self.__exceedance_threshold = None  # type: ignore
-        self.__exceedance = None  # type: ignore
-        self.__anomaly_score = None  # type: ignore
-        self.__anomaly_threshold = None  # type: ignore
-        self.__detection = None  # type: ignore
-        self.__eval = None  # type: ignore
-        self.__params = {}
+        print("The dataset is successfully processed!")
 
-        logger.info("successfully initialized POT detection model")
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        return self.__process_dataset(dataset=dataset)
 
     def reset_time_window(
         self,

diff --git a/src/anomalytics/models/zscore.py b/src/anomalytics/models/zscore.py
@@ -33,7 +33,9 @@ class ZScoreDetector(Detector):
     __params: typing.Dict
 
     def __init__(
-        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+        self,
+        dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
+        anomaly_type: typing.Literal["high", "low"] = "high",
     ):
         """
         Initialize Z-Score model for anomaly detection.
@@ -57,6 +59,9 @@ def __init__(
         self.__eval = None  # type: ignore
         self.__params = {}
 
+    def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
     def fit(self) -> None:
         raise NotImplementedError("Not yet implemented!")
 

diff --git a/tests/test_version.py b/tests/test_version.py
@@ -2,4 +2,4 @@
 
 
 def test_pkg_version():
-    assert __version__ == "0.2.1"
+    assert __version__ == "0.2.2"
diff --git a/tests/unit/detectors/test_pot_detector.py b/tests/unit/detectors/test_pot_detector.py
@@ -16,16 +16,19 @@
 class TestPOTDetector(unittest.TestCase):
     def setUp(self) -> None:
         super().setUp()
-        self.pot1_series_detector = atics.get_detector(method="POT", dataset=self.sample_1_ts)  # type: ignore
+        self.pot1_series_detector = atics.get_detector(method="POT")  # type: ignore
+        self.pot1_series_detector.assign_dataset(dataset=self.sample_1_ts)  # type: ignore
+
         self.pot2_series_detector = atics.get_detector(method="POT", dataset=self.sample_2_ts, anomaly_type="low")  # type: ignore
         self.pot3_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_3_df)  # type: ignore
-        self.pot4_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_4_df)  # type: ignore
+        self.pot4_dataframe_detector = atics.get_detector(method="POT")  # type: ignore
+        self.pot4_dataframe_detector.assign_dataset(dataset=self.sample_4_df)  # type: ignore
 
     def test_instance_is_pot_detector_class_successful(self):
-        self.assertIsInstance(obj=self.pot1_series_detector, cls=POTDetector)
+        self.assertIsInstance(self.pot1_series_detector, POTDetector)
 
     def test_detector_string_method_successful(self):
-        self.assertEqual(first=str(self.pot1_series_detector), second=str(POTDetector(dataset=self.sample_1_ts)))  # type: ignore
+        self.assertEqual(str(self.pot1_series_detector), str(POTDetector(dataset=self.sample_1_ts)))  # type: ignore
 
     def test_reset_time_window_to_historical_successful(self):
         t0 = self.pot1_series_detector.t0
@@ -38,6 +41,27 @@ def test_reset_time_window_to_historical_successful(self):
         self.assertNotEqual(t1, self.pot1_series_detector.t1)
         self.assertNotEqual(t2, self.pot1_series_detector.t2)
 
+    def test_initialize_pot_detector_without_dataframe_dataset_successful(self):
+        pot_detector = atics.get_detector(method="POT")
+        self.assertIsInstance(pot_detector, cls=POTDetector)
+
+    def test_assign_dataset_after_detector_initialization_successful(self):
+        pot_ts_detector = atics.get_detector(method="POT")
+        pot_ts_detector.assign_dataset(dataset=self.sample_2_ts)  # type: ignore
+
+        pot_df_detector = atics.get_detector(method="POT")
+        pot_df_detector.assign_dataset(dataset=self.sample_3_df)  # type: ignore
+
+        self.assertIsInstance(pot_ts_detector, POTDetector)
+        self.assertEqual(pot_ts_detector.t0, 34)
+        self.assertEqual(pot_ts_detector.t1, 15)
+        self.assertEqual(pot_ts_detector.t2, 1)
+
+        self.assertIsInstance(pot_df_detector, POTDetector)
+        self.assertEqual(pot_df_detector.t0, 6)
+        self.assertEqual(pot_df_detector.t1, 3)
+        self.assertEqual(pot_df_detector.t2, 1)
+
     def test_exceedance_thresholds_dataframe_for_high_anomaly_type_successful(self):
         expected_exceedance_thresholds = pd.DataFrame(
             data={