Skip to content

Commit

Permalink
refactor docs
Browse files Browse the repository at this point in the history
  • Loading branch information
GishB committed Mar 22, 2024
1 parent 4a373a6 commit a5812f2
Showing 1 changed file with 67 additions and 43 deletions.
110 changes: 67 additions & 43 deletions optimization/WSSAlgorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class WindowSizeSelection:
type of WSS algorithm. It can be 'highest_autocorrelation', 'dominant_fourier_frequency',
'summary_statistics_subsequence' or 'multi_window_finder'.
By default, it is 'dominant_fourier_frequency'
Reference:
(c) "Windows Size Selection in Unsupervised Time Series Analytics: A Review and Benchmark. Arik Ermshaus,
Patrick Schafer, and Ulf Leser"
Expand Down Expand Up @@ -53,10 +54,10 @@ def __init__(self,
self.window_max = len(time_series)

def autocorrelation(self):
"""
Main function for the highest_autocorrelation method
""" Main function for the highest_autocorrelation method
:return: a tuple of selected window size and list of scores for this method
Return:
a tuple of selected window size and list of scores for this method
"""
list_score = [self.high_ac_metric(self.time_series[i:] + self.time_series[:i], i) \
for i in range(self.window_min, self.window_max)]
Expand All @@ -65,11 +66,15 @@ def autocorrelation(self):

def high_ac_metric(self, copy_ts, i):
"""
Calculate metric value based on chosen chosen window size for the highest_autocorrelation method
Calculate metric value based on chosen window size for the highest_autocorrelation method
:param copy_ts: a list of lagged time series
:param i: temporary window size (or the lagged value)
:return: score for selected window size
Args:
copy_ts: a list of lagged time series
i: temporary window size (or the lagged value)
Return:
score for selected window size
"""
temp_len = len(copy_ts)
temp_coef = 1 / (temp_len - i)
Expand All @@ -80,11 +85,13 @@ def high_ac_metric(self, copy_ts, i):
return a_score

def local_max_search(self, score_list):
"""
Find global max value id for the highest_autocorrelation method.
""" Find global max value id for the highest_autocorrelation method.
:param score_list: a list of scores obtained
:return: a tuple of window_size_selected and list_score
Arg:
score_list: a list of scores obtained
Return:
a tuple of window_size_selected and list_score
"""
list_probably_peaks = find_peaks(score_list)[0][1:]
list_score_peaks = [score_list[i] for i in list_probably_peaks]
Expand All @@ -93,10 +100,10 @@ def local_max_search(self, score_list):
return window_size_selected, list_score_peaks

def dominant_fourier_frequency(self):
"""
Main function for the dominant_fourier_frequency
""" Main function for the dominant_fourier_frequency
:return: a tuple of window_size_selected and list_score
Return:
a tuple of window_size_selected and list_score
"""
list_score_k = []
for i in range(self.window_min, self.window_max):
Expand All @@ -107,11 +114,13 @@ def dominant_fourier_frequency(self):
return window_size_selected, list_score_k

def coeff_metrics(self, temp_size):
"""
Find score coefficient for the dominant_fourier_frequency
""" Find score coefficient for the dominant_fourier_frequency
:param temp_size: temporary selected window size
:return: a score metric distance
Arg:
temp_size: temporary selected window size
Return:
score metric distance
"""

length_n = len(self.time_series)
Expand All @@ -125,7 +134,8 @@ def summary_statistics_subsequence(self):
"""
Main function for the summary_statistics_subsequence
:return: selected window size and a list of score
Return:
selected window size and a list of score
"""
ts = (self.time_series - np.min(self.time_series)) / (np.max(self.time_series) - np.min(self.time_series))

Expand All @@ -136,14 +146,16 @@ def summary_statistics_subsequence(self):
return window_size_selected, list_score

def stats_diff(self, ts, window_size, stats_ts):
"""
Find difference between global statistic and statistic of subsequnces with different window
""" Find difference between global statistic and statistic of subsequnces with different window
for the summary_statistics_subsequence
:param ts: time series data
:param window_size: temporary selected window size
:param stats_ts: statistic over all ts for calculations
:return: not normalized euclidian distance between selected window size and general statistic for ts
Args:
ts: time series data
window_size: temporary selected window size
stats_ts: statistic over all ts for calculations
Return:
not normalized euclidian distance between selected window size and general statistic for ts
"""
stat_w = [[np.mean(ts[i:i + window_size]), np.std(ts[i:i + window_size]),
np.max(ts[i:i + window_size]) - np.min(ts[i:i + window_size])] for i in range(self.length_ts)]
Expand All @@ -153,24 +165,26 @@ def stats_diff(self, ts, window_size, stats_ts):
return np.mean(stat_diff)

def suss_score(self, ts, window_size, stats_ts):
"""
Find score coefficient for the the summary_statistics_subsequence
""" Find score coefficient for the summary_statistics_subsequence
Args:
ts: time series data
window_size: temporary selected window size
stats_ts: statistic over all ts for calculations
:param ts: time series data
:param window_size: temporary selected window size
:param stats_ts: statistic over all ts for calculations
:return: normalized euclidian distance between selected window size and general statistic for ts
Return:
normalized euclidian distance between selected window size and general statistic for ts
"""
s_min, s_max = self.stats_diff(ts, len(ts), stats_ts), self.stats_diff(ts, 1, stats_ts)
score = self.stats_diff(ts, window_size, stats_ts)
score_normalize = (score - s_min) / (s_max - s_min)
return 1 - score_normalize

def multi_window_finder(self):
"""
Main function for multi_window_finder method
""" Main function for multi_window_finder method
:return: selected window size and a list of scores for this method
Return:
selected window size and a list of scores for this method
"""
distance_scores = [self.mwf_metric(i) for i in range(self.window_min, self.window_max)]
minimum_id_list, id_max = self.top_local_minimum(distance_scores)
Expand All @@ -179,11 +193,13 @@ def multi_window_finder(self):
return window_size_selected, distance_scores

def mwf_metric(self, window_selected_temp):
"""
Find multi_window_finder method metric value for a chosen window size
""" Find multi_window_finder method metric value for a chosen window size
:param window_selected_temp: temporary window selected
:return: value which is the MWF distance metric
Arg:
window_selected_temp: temporary window selected
Return:
value which is the MWF distance metric
"""
coeff_temp = 1 / window_selected_temp
m_values = []
Expand All @@ -193,12 +209,15 @@ def mwf_metric(self, window_selected_temp):
distance_k = sum(np.log10(abs(m_values - np.mean(m_values))))
return distance_k

def top_local_minimum(self, distance_scores):
"""
Find a list of local minimum for multi_window_finder method
@staticmethod
def top_local_minimum(distance_scores):
""" Find a list of local minimum for multi_window_finder method
Arg:
distance_scores: list of distance scores from mwf_metric
:param distance_scores: list of distance scores from mwf_metric
:return: list of index where narray has minimum, max id for distance_scores list
Return:
list of index where narray has minimum, max id for distance_scores list
"""
id_max = distance_scores.index(max(distance_scores))
score_temp = distance_scores[id_max:]
Expand All @@ -210,6 +229,11 @@ def top_local_minimum(self, distance_scores):
return id_local_minimum_list, id_max

def runner_wss(self):
""" Initial function to optimize hyperparameter.
Returns:
selected parameter and scores list
"""
if int(len(self.time_series)) <= self.window_min:
window_size_selected, list_score = int(len(self.time_series)), []
else:
Expand Down

0 comments on commit a5812f2

Please sign in to comment.