Skip to content

Commit

Permalink
Fix issues with LayeredModelConfig accessing underlying model attribu…
Browse files Browse the repository at this point in the history
…tes. (#104)

* Simplify BOCPD implementation.

* Remove overcomplicated _online_model check.

* Fix LayeredModel access to underlying model params

* Add docstring.

* Have AutoMLMixin inherit from LayeredModel.

* Add feature summary line for change point detect.

* Update version to 1.2.1.
  • Loading branch information
aadyotb committed Jun 9, 2022
1 parent 4505a47 commit ea67ed4
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 86 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ anomaly detection and/or forecasting.
| Multivariate Forecasting || | |||| | | | |
| Univariate Anomaly Detection ||||| | |||||
| Multivariate Anomaly Detection || ||| | ||| | | |
| Change Point Detection ||||| | | | || |
| AutoML | ✅ | | | ✅ | | | | | ✅ | | ✅
| Ensembles || | | | | || | | |
| Benchmarking || | | | || | | | |
Expand Down
70 changes: 22 additions & 48 deletions merlion/models/anomaly/change_point/bocpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import copy
from enum import Enum
import logging
from typing import Iterator, List, Tuple, Union
from typing import List, Tuple, Union
import warnings

import numpy as np
Expand All @@ -21,7 +21,6 @@
from scipy.stats import norm
from tqdm import tqdm

from merlion.models.automl.base import AutoMLMixIn, ModelBase
from merlion.models.anomaly.base import NoCalibrationDetectorConfig, DetectorBase
from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
from merlion.models.forecast.base import ForecasterConfig
Expand Down Expand Up @@ -136,7 +135,7 @@ def change_kind(self, change_kind: Union[str, ChangeKind]):
self._change_kind = change_kind


class BOCPD(AutoMLMixIn, ForecastingDetectorBase):
class BOCPD(ForecastingDetectorBase):
"""
Bayesian online change point detection algorithm described by
`Adams & MacKay (2007) <https://arxiv.org/abs/0710.3742>`__.
Expand Down Expand Up @@ -216,50 +215,6 @@ def min_likelihood(self) -> float:
"""
return self.config.min_likelihood

@property
def model(self):
"""
:return: The model itself. Implemented to support the ``train()`` method of `AutoMLMixIn`.
The setter is equivalent to calling ``self.__setstate__(model.__getstate__())``.
"""
return self

@model.setter
def model(self, model):
self.__setstate__(model.__getstate__())

def generate_theta(self, train_data: TimeSeries) -> Iterator:
if self.change_kind is not ChangeKind.Auto:
return iter([self.change_kind])
else:
return iter(ck for ck in ChangeKind if ck is not ChangeKind.Auto)

def set_theta(self, model, theta, train_data: TimeSeries = None):
model.config.change_kind = theta

def evaluate_theta(
self, thetas: Iterator, train_data: TimeSeries, train_config=None, **kwargs
) -> Tuple[float, ModelBase, TimeSeries]:
# If not automatically detecting the change kind, train as normal
if self.change_kind is not ChangeKind.Auto:
train_scores = ForecastingDetectorBase.train(self, train_data, train_config=train_config, **kwargs)
log_likelihood = logsumexp([p.logp for p in self.posterior_beam]).item()
return log_likelihood, self, train_scores

# Otherwise, evaluate all thetas as options
candidates = []
for change_kind in thetas:
candidate = copy.deepcopy(self)
candidate.config.change_kind = change_kind
train_scores = candidate.train(train_data, train_config=train_config, **kwargs)
log_likelihood = logsumexp([p.logp for p in candidate.posterior_beam]).item()
candidates.append((log_likelihood, candidate, train_scores))
logger.info(f"Change kind {change_kind.name} has log likelihood {log_likelihood:.3f}.")

# Choose the model with the best log likelihood
i_best = np.argmax([candidate[0] for candidate in candidates])
return candidates[i_best]

def _create_posterior(self, logp: float) -> _PosteriorBeam:
posterior = self.change_kind.value()
return _PosteriorBeam(run_length=0, posterior=posterior, cp_prior=self.cp_prior, logp=logp)
Expand Down Expand Up @@ -460,7 +415,26 @@ def update(self, time_series: TimeSeries):
return self._get_anom_scores(time_stamps)

def _train(self, train_data: pd.DataFrame, train_config=None) -> pd.DataFrame:
return self.update(time_series=TimeSeries.from_pd(train_data)).to_pd()
# If not automatically detecting the change kind, train as normal
if self.change_kind is not ChangeKind.Auto:
return self.update(time_series=TimeSeries.from_pd(train_data)).to_pd()

# Otherwise, evaluate all change kinds as options
candidates = []
for change_kind in [ck for ck in ChangeKind if ck is not ChangeKind.Auto]:
candidate = copy.deepcopy(self)
candidate.config.change_kind = change_kind
train_scores = candidate._train(train_data, train_config=train_config)
log_likelihood = logsumexp([p.logp for p in candidate.posterior_beam]).item()
candidates.append((log_likelihood, candidate, train_scores))
logger.info(f"Change kind {change_kind.name} has log likelihood {log_likelihood:.3f}.")

# Choose the model with the best log likelihood
i_best = np.argmax([candidate[0] for candidate in candidates])
log_likelihood, best, train_scores = candidates[i_best]
self.__setstate__(best.__getstate__())
logger.info(f"Using change kind {self.change_kind.name} because it has the best log likelihood.")
return train_scores

def get_anomaly_score(self, time_series: TimeSeries, time_series_prev: TimeSeries = None) -> TimeSeries:
return DetectorBase.get_anomaly_score(self, time_series, time_series_prev)
Expand Down
16 changes: 8 additions & 8 deletions merlion/models/automl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@
from copy import deepcopy
from typing import Any, Iterator, Optional, Tuple

from merlion.models.base import ModelBase
from merlion.models.layers import ModelBase, LayeredModel
from merlion.utils import TimeSeries
from merlion.utils.misc import AutodocABCMeta


class AutoMLMixIn(ModelBase, metaclass=AutodocABCMeta):
class AutoMLMixIn(LayeredModel, metaclass=AutodocABCMeta):
"""
Base Interface for Implemented AutoML Layers
This abstract class contains all of the methods that Layers should implement. Ideally, these would be generated by
an existing mix-in.
Abstract base class which converts `LayeredModel` into an AutoML models.
"""

def train(self, train_data: TimeSeries, **kwargs):
train_data = self.train_pre_process(train_data)
def train_model(self, train_data: TimeSeries, **kwargs):
"""
Generates a set of candidate models and picks the best one.
:param train_data: the data to train on, after any pre-processing transforms have been applied.
"""
candidate_thetas = self.generate_theta(train_data)
theta, model, train_result = self.evaluate_theta(candidate_thetas, train_data, kwargs)
if model is not None:
Expand Down
4 changes: 2 additions & 2 deletions merlion/models/automl/seasonality.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from merlion.models.automl.base import AutoMLMixIn
from merlion.models.base import ModelBase
from merlion.models.layers import LayeredModelConfig, LayeredModel
from merlion.models.layers import LayeredModelConfig
from merlion.transform.resample import TemporalResample
from merlion.utils import TimeSeries, UnivariateTimeSeries, autosarima_utils
from merlion.utils.misc import AutodocABCMeta
Expand Down Expand Up @@ -117,7 +117,7 @@ def to_dict(self, _skipped_keys=None):
return config_dict


class SeasonalityLayer(AutoMLMixIn, LayeredModel, metaclass=AutodocABCMeta):
class SeasonalityLayer(AutoMLMixIn, metaclass=AutodocABCMeta):
"""
Seasonality Layer that uses AutoSARIMA-like methods to determine seasonality of your data. Can be used directly on
any model that implements `SeasonalityModel` class.
Expand Down
12 changes: 2 additions & 10 deletions merlion/models/forecast/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ForecasterConfig(Config):

max_forecast_steps: Optional[int] = None
target_seq_index: Optional[int] = None
invert_transform: bool = False

def __init__(self, max_forecast_steps: int = None, target_seq_index: int = None, invert_transform=False, **kwargs):
"""
Expand Down Expand Up @@ -89,10 +90,6 @@ def invert_transform(self):
"""
return self.config.invert_transform

@property
def _online_model(self) -> bool:
return False

@property
def require_univariate(self) -> bool:
"""
Expand Down Expand Up @@ -234,17 +231,12 @@ def forecast(
if time_series_prev is None:
time_series_prev_df = None
else:
tf = to_pd_datetime(self.last_train_time + self.timedelta)
time_series_prev = self.transform(time_series_prev)
assert time_series_prev.dim == self.dim, (
f"time_series_prev has dimension of {time_series_prev.dim} that is different from "
f"training data dimension of {self.dim} for the model"
)
if self._online_model and to_pd_datetime(time_series_prev.tf) < tf:
time_series_prev = None
time_series_prev_df = None
else:
time_series_prev_df = time_series_prev.to_pd()
time_series_prev_df = time_series_prev.to_pd()

# Make the prediction
forecast, err = self._forecast(
Expand Down
7 changes: 3 additions & 4 deletions merlion/models/forecast/ets.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,6 @@ def seasonal(self):
def seasonal_periods(self):
return self.config.seasonal_periods

@property
def _online_model(self) -> bool:
return True

def set_seasonality(self, theta, train_data: UnivariateTimeSeries):
if theta > 1:
self.config.seasonal_periods = int(theta)
Expand Down Expand Up @@ -154,6 +150,9 @@ def _train(self, train_data: pd.DataFrame, train_config=None):
def _forecast(
self, time_stamps: Union[int, List[int]], time_series_prev: pd.DataFrame = None, return_prev=False
) -> Tuple[pd.DataFrame, pd.DataFrame]:
# Ignore time_series_prev if it comes after the training data
if time_series_prev is not None and time_series_prev.index[-1] < self.last_train_time + self.timedelta:
time_series_prev = None

# Basic forecasting without time_series_prev
if time_series_prev is None:
Expand Down
51 changes: 38 additions & 13 deletions merlion/models/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
`AutoML models <merlion.models.automl>`_.
"""
import copy
import inspect
import logging
from typing import Any, Dict, List, Union

Expand All @@ -20,11 +21,16 @@
from merlion.models.anomaly.base import DetectorBase, DetectorConfig
from merlion.models.forecast.base import ForecasterBase, ForecasterConfig
from merlion.models.anomaly.forecast_based.base import ForecastingDetectorBase
from merlion.transform.base import Identity
from merlion.transform.sequence import TransformSequence
from merlion.utils import TimeSeries
from merlion.utils.misc import AutodocABCMeta

logger = logging.getLogger(__name__)

_DETECTOR_MEMBERS = dict(inspect.getmembers(DetectorConfig)).keys()
_FORECASTER_MEMBERS = dict(inspect.getmembers(ForecasterConfig)).keys()


class LayeredModelConfig(Config):
"""
Expand All @@ -43,7 +49,13 @@ def __init__(self, model: Union[ModelBase, Dict], model_kwargs=None, **kwargs):
# Model-specific kwargs override kwargs when creating the model.
model_kwargs = {} if model_kwargs is None else model_kwargs
if isinstance(model, dict):
model.update({k: v for k, v in kwargs.items() if k not in model and k not in model_kwargs})
model.update(
{
k: v
for k, v in kwargs.items()
if k not in model and k not in model_kwargs and k not in _LAYERED_MEMBERS
}
)
model, extra_kwargs = ModelFactory.create(**{**model, **model_kwargs, "return_unused_kwargs": True})
kwargs.update(extra_kwargs)
self.model = model
Expand Down Expand Up @@ -82,7 +94,7 @@ def to_dict(self, _skipped_keys=None):
def from_dict(cls, config_dict: Dict[str, Any], return_unused_kwargs=False, dim=None, **kwargs):
config, kwargs = super().from_dict(config_dict=config_dict, return_unused_kwargs=True, dim=dim, **kwargs)
if config.model is None:
used = {k: v for k, v in kwargs.items() if hasattr(DetectorConfig, k) or hasattr(ForecasterConfig, k)}
used = {k: v for k, v in kwargs.items() if k in _DETECTOR_MEMBERS or k in _FORECASTER_MEMBERS}
config.model_kwargs.update(used)
kwargs = {k: v for k, v in kwargs.items() if k not in used}

Expand All @@ -106,8 +118,8 @@ def __getattr__(self, item):
if item in ["model", "_model", "base_model"]:
return super().__getattribute__(item)
base_model = self.base_model
is_detector_attr = isinstance(base_model, DetectorBase) and hasattr(DetectorConfig, item)
is_forecaster_attr = isinstance(base_model, ForecasterBase) and hasattr(ForecasterConfig, item)
is_detector_attr = isinstance(base_model, DetectorBase) and item in _DETECTOR_MEMBERS
is_forecaster_attr = isinstance(base_model, ForecasterBase) and item in _FORECASTER_MEMBERS
if is_detector_attr or is_forecaster_attr:
return getattr(base_model.config, item)
elif base_model is None and item in self.model_kwargs:
Expand All @@ -117,10 +129,9 @@ def __getattr__(self, item):
def __setattr__(self, key, value):
if hasattr(self, "_model"):
base_model = self.base_model
is_detector_attr = isinstance(base_model, DetectorBase) and hasattr(DetectorConfig, key)
is_forecaster_attr = isinstance(base_model, ForecasterBase) and hasattr(ForecasterConfig, key)
is_layered_attr = hasattr(LayeredModelConfig, key)
if not is_layered_attr and (is_detector_attr or is_forecaster_attr):
is_detector_attr = isinstance(base_model, DetectorBase) and key in _DETECTOR_MEMBERS
is_forecaster_attr = isinstance(base_model, ForecasterBase) and key in _FORECASTER_MEMBERS
if key not in _LAYERED_MEMBERS and (is_detector_attr or is_forecaster_attr):
return setattr(self.model.config, key, value)
return super().__setattr__(key, value)

Expand All @@ -133,6 +144,9 @@ def get_unused_kwargs(self, **kwargs):
return {k: v for k, v in kwargs.items() if k not in valid_keys}


_LAYERED_MEMBERS = dict(inspect.getmembers(LayeredModelConfig)).keys()


class LayeredModel(ModelBase, metaclass=AutodocABCMeta):
"""
Abstract class implementing a model which wraps around another internal model.
Expand All @@ -158,7 +172,9 @@ class LayeredModel(ModelBase, metaclass=AutodocABCMeta):
.. note::
For the time being, every layer of the model is allowed to have its own ``transform``.
For the time being, every layer of the model is allowed to have its own ``transform``. However, after the
model is trained, the entire transform will be composed as a single `TransformSequence` and will be owned by
the base model.
"""

config_class = LayeredModelConfig
Expand Down Expand Up @@ -272,9 +288,21 @@ def __getattr__(self, item):
return attr
return self.__getattribute__(item)

def train_model(self, train_data, *args, **kwargs):
"""
Trains the underlying model. May be overridden, e.g. for AutoML.
:param train_data: the data to train on, after any pre-processing transforms have been applied.
"""
return self.model.train(train_data, *args, **kwargs)

def train(self, train_data: TimeSeries, *args, **kwargs):
train_data = self.train_pre_process(train_data)
return self.model.train(train_data, *args, **kwargs)
ret = self.train_model(train_data, *args, **kwargs)
# Push the layered model transform to the owned model
self.model.transform = TransformSequence([self.transform, self.model.transform])
self.transform = Identity()
return ret


class LayeredDetector(LayeredModel, DetectorBase):
Expand All @@ -289,7 +317,6 @@ def _get_anomaly_score(self, time_series: pd.DataFrame, time_series_prev: pd.Dat
raise NotImplementedError("Layered model _get_anomaly_score() should not be called.")

def get_anomaly_score(self, time_series: TimeSeries, time_series_prev: TimeSeries = None) -> TimeSeries:
time_series, time_series_prev = self.transform_time_series(time_series, time_series_prev)
return self.model.get_anomaly_score(time_series, time_series_prev)


Expand All @@ -305,8 +332,6 @@ def _forecast(self, time_stamps: List[int], time_series_prev: TimeSeries = None,
raise NotImplementedError("Layered model _forecast() should not be called.")

def forecast(self, time_stamps, time_series_prev: TimeSeries = None, *args, **kwargs):
if time_series_prev is not None:
time_series_prev = self.transform(time_series_prev)
return self.model.forecast(time_stamps, time_series_prev, *args, **kwargs)


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def read_file(fname):

setup(
name="salesforce-merlion",
version="1.2.0",
version="1.2.1",
author=", ".join(read_file("AUTHORS.md").split("\n")),
author_email="abhatnagar@salesforce.com",
description="Merlion: A Machine Learning Framework for Time Series Intelligence",
Expand Down

0 comments on commit ea67ed4

Please sign in to comment.