Skip to content

Commit

Permalink
add method to find the optimal fitting window (#115)
Browse files Browse the repository at this point in the history
* add method to find the optimal fitting window based on Demos & Sornette 2017 - Lagrange regularisation approach to compare nested data sets and determine objectively financial bubbles inceptions

* add sklearn to project deps
  • Loading branch information
Joshwani authored Dec 5, 2024
1 parent b8c34cf commit 3c5774a
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 2 deletions.
107 changes: 107 additions & 0 deletions lppls/lppls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
from datetime import datetime as date
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
import xarray as xr
from typing import Any, Dict, Optional
import warnings


class LPPLS(object):
Expand Down Expand Up @@ -634,3 +637,107 @@ def ordinal_to_date(self, ordinal):
return date.fromordinal(int(ordinal)).strftime("%Y-%m-%d")
except (ValueError, OutOfBoundsDatetime):
return str(pd.NaT)

def detect_bubble_start_time_via_lagrange(
self,
max_window_size: int,
min_window_size: int,
step_size: int = 1,
max_searches: int = 25,
) -> Optional[Dict[str, Any]]:

window_sizes = []
sse_list = []
ssen_list = []
lagrange_sse_list = []
start_times = []
n_params = 7 # The number of degrees of freedom used for this exercise as well as for the real-world time series is p = 8, which includes the 7 parameters of the LPPLS model augmented by the extra parameter t1

total_obs = len(self.observations[0])

lppls_params_list = []

for window_size in range(max_window_size, min_window_size - 1, -step_size):
start_idx = total_obs - window_size
end_idx = total_obs
obs_window = self.observations[:, start_idx:end_idx]

start_time = self.observations[0][start_idx]
start_times.append(start_time)
t2 = self.observations[0][end_idx - 1]

try:
tc, m, w, a, b, _, c1, c2, _, _ = self.fit(max_searches, obs=obs_window)
if tc == 0.0:
continue

# compute predictions and residuals
Yhat = self.lppls(obs_window[0], tc, m, w, a, b, c1, c2)
residuals = obs_window[1] - Yhat

# compute SSE and normalized SSE
sse = np.sum(residuals ** 2)
n = len(obs_window[0])
if n - n_params <= 0:
continue # avoid division by zero or negative degrees of freedom
ssen = sse / (n - n_params)

window_sizes.append(window_size)
sse_list.append(sse)
ssen_list.append(ssen)
lppls_params_list.append({
'tc': tc,
'm': m,
'w': w,
'a': a,
'b': b,
'c1': c1,
'c2': c2,
'obs_window': obs_window # may be useful later
})
except Exception as e:
print(e)
continue

if len(ssen_list) < 2:
warnings.warn("Not enough data points to compute Lagrange regularization.")
return None

window_sizes_np = np.array(window_sizes).reshape(-1, 1)
ssen_list_np = np.array(ssen_list)

# fit linear regression to normalized SSE vs. window sizes
reg = LinearRegression().fit(window_sizes_np, ssen_list_np)
slope = reg.coef_[0]
intercept = reg.intercept_

# compute Lagrange-regularized SSE
for i in range(len(sse_list)):
lagrange_sse = ssen_list[i] - slope * window_sizes[i]
lagrange_sse_list.append(lagrange_sse)

# find the optimal window size
min_index = np.argmin(lagrange_sse_list)
optimal_window_size = window_sizes[min_index]
optimal_params = lppls_params_list[min_index] # get LPPLS parameters for optimal window

# get tau (start time of the bubble)
tau_idx = total_obs - optimal_window_size
tau = self.observations[0][tau_idx]

return {
"tau": tau,
"optimal_window_size": optimal_window_size,
"tc": optimal_params['tc'],
"m": optimal_params['m'],
"w": optimal_params['w'],
"a": optimal_params['a'],
"b": optimal_params['b'],
"c1": optimal_params['c1'],
"c2": optimal_params['c2'],
"window_sizes": window_sizes,
"sse_list": sse_list,
"ssen_list": ssen_list,
"lagrange_sse_list": lagrange_sse_list,
"start_times": start_times
}
193 changes: 193 additions & 0 deletions notebooks/lagrange_regularization.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ numpy>=1.17.0
pandas>=0.25.0
scipy>=1.3.0
tqdm>=4.62.3
xarray==0.19.0
xarray==0.19.0
scikit-learn>=1.2.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = fh.read()

setuptools.setup(name='lppls',
version='0.6.19',
version='0.6.20',
description='A Python module for fitting the LPPLS model to data.',
packages=['lppls'],
author='Josh Nielsen',
Expand Down

0 comments on commit 3c5774a

Please sign in to comment.