Skip to content

Commit

Permalink
feat(detector): initialize without dataset (#40)
Browse files Browse the repository at this point in the history
* feat(detector): initialize detector without dataset for later assignment

* build(version): bump version to v0.2.2

---------

Co-authored-by: N. L <nino@pleno.earth>
  • Loading branch information
Aeternalis-Ingenium and ninopleno authored Dec 21, 2023
1 parent 542a714 commit 8995865
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 31 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "anomalytics"
description = "The ultimate anomaly detection library."
readme = "README.md"
version = "0.2.1"
version = "0.2.2"
license = {file = "LICENSE"}
requires-python = ">=3.10"
authors = [
Expand Down
2 changes: 1 addition & 1 deletion src/anomalytics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.2.1"
__version__ = "0.2.2"

__all__ = [
"get_anomaly",
Expand Down
19 changes: 17 additions & 2 deletions src/anomalytics/models/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
class Detector(metaclass=abc.ABCMeta):
@abc.abstractmethod
def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize the anomaly detection model with a specific statisticail method.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
dataset : typing.Optional[typing.Union[pandas.DataFrame, pandas.Series]], default is None
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
Expand All @@ -24,6 +26,19 @@ def __init__(
"""
...

@abc.abstractmethod
def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
"""
Assign dataset to the `Detector` object, if it is not assigned during initialization.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
"""
...

@abc.abstractmethod
def fit(self) -> None:
"""
Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class AutoencoderDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize Autoencoder model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/block_maxima.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class BlockMaximaDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize Block Maxima model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class DBSCANDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize DBSCAN model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
4 changes: 2 additions & 2 deletions src/anomalytics/models/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class FactoryDetector:
def __init__(
self,
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
dataset: typing.Union[pd.DataFrame, pd.Series],
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
self.method = method
Expand Down Expand Up @@ -67,7 +67,7 @@ def __call__(self):

def get_detector(
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
dataset: typing.Union[pd.DataFrame, pd.Series],
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)()
7 changes: 6 additions & 1 deletion src/anomalytics/models/isoforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class IsoForestDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize Isolation Forest model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/mad.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class MADDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize Mean Absolute Deviation model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/one_class_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class OneClassSVMDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize 1 Class SVM model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
39 changes: 26 additions & 13 deletions src/anomalytics/models/peaks_over_threshold.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import datetime
import logging
import typing
import warnings

import numpy as np
import pandas as pd

from anomalytics.evals.kolmogorv_smirnov import ks_1sample
Expand Down Expand Up @@ -135,7 +133,9 @@ class POTDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize POT model for anomaly detection.
Expand All @@ -153,6 +153,26 @@ def __init__(

if anomaly_type not in ["high", "low"]:
raise ValueError(f"Invalid value! The `anomaly_type` argument must be 'high' or 'low'")
if dataset is not None:
self.__process_dataset(dataset=dataset)
else:
self.__datetime = None # type: ignore
self.__dataset = dataset
self.__time_window = None # type: ignore

self.__anomaly_type = anomaly_type

self.__exceedance_threshold = None # type: ignore
self.__exceedance = None # type: ignore
self.__anomaly_score = None # type: ignore
self.__anomaly_threshold = None # type: ignore
self.__detection = None # type: ignore
self.__eval = None # type: ignore
self.__params = {}

logger.info("successfully initialized POT detection model")

def __process_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
if not isinstance(dataset, pd.DataFrame) and not isinstance(dataset, pd.Series):
raise TypeError("Invalid value! The `dataset` argument must be a Pandas DataFrame or Series")

Expand Down Expand Up @@ -186,8 +206,6 @@ def __init__(
) from _error
self.__datetime = None
self.__dataset = dataset

self.__anomaly_type = anomaly_type
self.__time_window = set_time_window(
total_rows=self.__dataset.shape[0],
method="POT",
Expand All @@ -196,15 +214,10 @@ def __init__(
t1_pct=0.3,
t2_pct=0.0,
)
self.__exceedance_threshold = None # type: ignore
self.__exceedance = None # type: ignore
self.__anomaly_score = None # type: ignore
self.__anomaly_threshold = None # type: ignore
self.__detection = None # type: ignore
self.__eval = None # type: ignore
self.__params = {}
print("The dataset is successfully processed!")

logger.info("successfully initialized POT detection model")
def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
return self.__process_dataset(dataset=dataset)

def reset_time_window(
self,
Expand Down
7 changes: 6 additions & 1 deletion src/anomalytics/models/zscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class ZScoreDetector(Detector):
__params: typing.Dict

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
self,
dataset: typing.Optional[typing.Union[pd.DataFrame, pd.Series]] = None,
anomaly_type: typing.Literal["high", "low"] = "high",
):
"""
Initialize Z-Score model for anomaly detection.
Expand All @@ -57,6 +59,9 @@ def __init__(
self.__eval = None # type: ignore
self.__params = {}

def assign_dataset(self, dataset: typing.Union[pd.DataFrame, pd.Series]) -> None:
raise NotImplementedError("Not yet implemented!")

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


def test_pkg_version():
assert __version__ == "0.2.1"
assert __version__ == "0.2.2"
32 changes: 28 additions & 4 deletions tests/unit/detectors/test_pot_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,19 @@
class TestPOTDetector(unittest.TestCase):
def setUp(self) -> None:
super().setUp()
self.pot1_series_detector = atics.get_detector(method="POT", dataset=self.sample_1_ts) # type: ignore
self.pot1_series_detector = atics.get_detector(method="POT") # type: ignore
self.pot1_series_detector.assign_dataset(dataset=self.sample_1_ts) # type: ignore

self.pot2_series_detector = atics.get_detector(method="POT", dataset=self.sample_2_ts, anomaly_type="low") # type: ignore
self.pot3_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_3_df) # type: ignore
self.pot4_dataframe_detector = atics.get_detector(method="POT", dataset=self.sample_4_df) # type: ignore
self.pot4_dataframe_detector = atics.get_detector(method="POT") # type: ignore
self.pot4_dataframe_detector.assign_dataset(dataset=self.sample_4_df) # type: ignore

def test_instance_is_pot_detector_class_successful(self):
self.assertIsInstance(obj=self.pot1_series_detector, cls=POTDetector)
self.assertIsInstance(self.pot1_series_detector, POTDetector)

def test_detector_string_method_successful(self):
self.assertEqual(first=str(self.pot1_series_detector), second=str(POTDetector(dataset=self.sample_1_ts))) # type: ignore
self.assertEqual(str(self.pot1_series_detector), str(POTDetector(dataset=self.sample_1_ts))) # type: ignore

def test_reset_time_window_to_historical_successful(self):
t0 = self.pot1_series_detector.t0
Expand All @@ -38,6 +41,27 @@ def test_reset_time_window_to_historical_successful(self):
self.assertNotEqual(t1, self.pot1_series_detector.t1)
self.assertNotEqual(t2, self.pot1_series_detector.t2)

def test_initialize_pot_detector_without_dataframe_dataset_successful(self):
pot_detector = atics.get_detector(method="POT")
self.assertIsInstance(pot_detector, cls=POTDetector)

def test_assign_dataset_after_detector_initialization_successful(self):
pot_ts_detector = atics.get_detector(method="POT")
pot_ts_detector.assign_dataset(dataset=self.sample_2_ts) # type: ignore

pot_df_detector = atics.get_detector(method="POT")
pot_df_detector.assign_dataset(dataset=self.sample_3_df) # type: ignore

self.assertIsInstance(pot_ts_detector, POTDetector)
self.assertEqual(pot_ts_detector.t0, 34)
self.assertEqual(pot_ts_detector.t1, 15)
self.assertEqual(pot_ts_detector.t2, 1)

self.assertIsInstance(pot_df_detector, POTDetector)
self.assertEqual(pot_df_detector.t0, 6)
self.assertEqual(pot_df_detector.t1, 3)
self.assertEqual(pot_df_detector.t2, 1)

def test_exceedance_thresholds_dataframe_for_high_anomaly_type_successful(self):
expected_exceedance_thresholds = pd.DataFrame(
data={
Expand Down

0 comments on commit 8995865

Please sign in to comment.