diff --git a/src/gglasso/helper/data_generation.py b/src/gglasso/helper/data_generation.py index cb52b5e..0c5786c 100644 --- a/src/gglasso/helper/data_generation.py +++ b/src/gglasso/helper/data_generation.py @@ -11,8 +11,7 @@ from .basic_linalg import trp - -def generate_precision_matrix(p=100, M=10, style = 'powerlaw', gamma = 2.8, prob = 0.1, scale = False, seed = None): +def generate_precision_matrix(p=100, M=10, style='powerlaw', gamma=2.8, prob=0.1, scale=False, seed=None): """ Generates a sparse precision matrix with associated covariance matrix from a random network. @@ -52,8 +51,8 @@ def generate_precision_matrix(p=100, M=10, style = 'powerlaw', gamma = 2.8, prob L = int(p/M) assert M*L == p - A = np.zeros((p,p)) - Sigma = np.zeros((p,p)) + A = np.zeros((p, p)) + Sigma = np.zeros((p, p)) if seed is not None: nxseed = seed @@ -66,9 +65,9 @@ def generate_precision_matrix(p=100, M=10, style = 'powerlaw', gamma = 2.8, prob nxseed = int(nxseed +m) if style == 'powerlaw': - G_m = nx.generators.random_graphs.random_powerlaw_tree(n=L, gamma=gamma, tries=max(5*p,1000), seed = nxseed) + G_m = nx.generators.random_graphs.random_powerlaw_tree(n=L, gamma=gamma, tries=max(5*p,1000), seed=nxseed) elif style == 'erdos': - G_m = nx.generators.random_graphs.erdos_renyi_graph(n=L , p=prob, seed=nxseed, directed=False) + G_m = nx.generators.random_graphs.erdos_renyi_graph(n=L, p=prob, seed=nxseed, directed=False) else: raise ValueError(f"{style} is not a valid choice for the network generation.") A_m = nx.to_numpy_array(G_m) @@ -79,15 +78,11 @@ def generate_precision_matrix(p=100, M=10, style = 'powerlaw', gamma = 2.8, prob else: rng = np.random.default_rng(np.random.randint(low=11111, high=99999)) - B1 = rng.uniform(low = .1, high = .4, size = (L,L)) - B2 = rng.choice(a = [-1,1], p=[.5, .5], size = (L,L)) + B1 = rng.uniform(low=.1, high=.4, size=(L,L)) + B2 = rng.choice(a=[-1,1], p=[.5, .5], size=(L,L)) A_m = A_m * (B1*B2) - # only use upper triangle and symmetrize - #A_m = np.triu(A_m) - #A_m = .5 * (A_m + A_m.T) - A[m*L:(m+1)*L, m*L:(m+1)*L] = A_m row_sum_od = 1.5 * abs(A).sum(axis = 1) + 1e-10 @@ -105,15 +100,12 @@ def generate_precision_matrix(p=100, M=10, style = 'powerlaw', gamma = 2.8, prob if D.min() < 1e-8: A += (0.1+abs(D.min())) * np.eye(p) - #D = np.linalg.eigvalsh(A) - #assert D.min() > 0, f"generated matrix A is not positive definite, min EV is {D.min()}" - - Ainv = np.linalg.pinv(A, hermitian = True) + Ainv = np.linalg.pinv(A, hermitian=True) # scale by inverse of diagonal and 0.6*1/sqrt(d_ii*d_jj) on off-diag if scale: d = np.diag(Ainv) - scale_mat = np.tile(np.sqrt(d),(Ainv.shape[0],1)) + scale_mat = np.tile(np.sqrt(d),(Ainv.shape[0], 1)) scale_mat = (1/0.6)*(scale_mat.T * scale_mat) np.fill_diagonal(scale_mat, d) @@ -144,7 +136,7 @@ def time_varying_power_network(p=100, K=10, M=10, scale = False, seed = None): assert M*L == p assert M >=3 - Sigma_0,_ = generate_precision_matrix(p = p, M = M, style = 'powerlaw', scale = scale, seed = seed) + Sigma_0,_ = generate_precision_matrix(p=p, M=M, style='powerlaw', scale=scale, seed=seed) for k in np.arange(K): Sigma_k = Sigma_0.copy() @@ -156,10 +148,10 @@ def time_varying_power_network(p=100, K=10, M=10, scale = False, seed = None): Sigma[k,:,:] = Sigma_k - Theta = np.linalg.pinv(Sigma, hermitian = True) + Theta = np.linalg.pinv(Sigma, hermitian=True) decay = np.exp(-.5 * np.arange(K)) - helper = np.ones((K,L,L)) * decay[:,None,None] + helper = np.ones((K, L, L)) * decay[:, None, None] for k in np.arange(K): np.fill_diagonal(helper[k,:,:], 1) @@ -169,7 +161,7 @@ def time_varying_power_network(p=100, K=10, M=10, scale = False, seed = None): return Sigma, Theta -def group_power_network(p=100, K=10, M=10, scale = False, seed = None): +def group_power_network(p=100, K=10, M=10, scale=False, seed=None): """ generates a power law network. In each single network one block disappears (randomly) p: dimension @@ -181,14 +173,14 @@ def group_power_network(p=100, K=10, M=10, scale = False, seed = None): L = int(p/M) assert M*L == p - Sigma_0,_ = generate_precision_matrix(p = p, M = M, style = 'powerlaw', scale = scale, seed = seed) + Sigma_0,_ = generate_precision_matrix(p=p, M=M, style='powerlaw', scale=scale, seed=seed) # contains the number of the block disappearing for each k=1,..,K if seed is not None: rng = np.random.default_rng(seed) else: rng = np.random.default_rng(np.random.randint(low=11111, high=99999)) - block = rng.integers(M, size = K) + block = rng.integers(M, size=K) for k in np.arange(K): Sigma_k = Sigma_0.copy() @@ -197,7 +189,7 @@ def group_power_network(p=100, K=10, M=10, scale = False, seed = None): Sigma[k,:,:] = Sigma_k - Theta = np.linalg.pinv(Sigma, hermitian = True) + Theta = np.linalg.pinv(Sigma, hermitian=True) Sigma, Theta = ensure_sparsity(Sigma, Theta) return Sigma, Theta @@ -209,12 +201,12 @@ def ensure_sparsity(Sigma, Theta): D = np.linalg.eigvalsh(Theta) assert D.min() > 0, "generated matrix Theta is not positive definite" - Sigma = np.linalg.pinv(Theta, hermitian = True) + Sigma = np.linalg.pinv(Theta, hermitian=True) return Sigma, Theta -def sample_covariance_matrix(Sigma, N, seed = None): +def sample_covariance_matrix(Sigma, N, seed=None): """ samples data for a given covariance matrix Sigma (with K layers) return: sample covariance matrix S @@ -226,7 +218,7 @@ def sample_covariance_matrix(Sigma, N, seed = None): (p,p) = Sigma.shape sample = rng.multivariate_normal(np.zeros(p), Sigma, N).T - S = np.cov(sample, bias = True) + S = np.cov(sample, bias=True) else: assert abs(Sigma - trp(Sigma)).max() <= 1e-10 @@ -239,7 +231,7 @@ def sample_covariance_matrix(Sigma, N, seed = None): S = np.zeros((K,p,p)) for k in np.arange(K): # normalize with N --> bias = True - S[k,:,:] = np.cov(sample[k,:,:], bias = True) + S[k,:,:] = np.cov(sample[k,:,:], bias=True) return S,sample diff --git a/src/gglasso/helper/experiment_helper.py b/src/gglasso/helper/experiment_helper.py index a6c79ae..ae84ec1 100644 --- a/src/gglasso/helper/experiment_helper.py +++ b/src/gglasso/helper/experiment_helper.py @@ -199,7 +199,6 @@ def plot_runtime(iA, iP, vecN, save = False): p1 = ax.plot(iA[j]['residual'], c = color_dict['ADMM'], label = 'ADMM residual') p2 = ax.plot(iP[j]['residual'], c = color_dict['PPDNA'], marker = 'o', markersize = 3, label = 'PPDNA residual') - #ax.tick_params(axis='both', which='major', labelsize=7) ax.set_yscale('log') ax.set_xscale('log') ax.set_ylim(1e-6,0.2) @@ -213,9 +212,6 @@ def plot_runtime(iA, iP, vecN, save = False): ax.vlines(iP[j]['iter_admm']-1, 0, 0.2, 'grey') ax.set_xlim(max(iP[j]['iter_admm'] - 5,1), ) - #ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) - #ax2.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) - if j in [0,2]: ax.set_ylabel('KKT residual') if j in [1,3]: @@ -377,8 +373,6 @@ def single_surface_plot(L1, L2, C, ax, name = 'eBIC'): ax.set_xlabel(r'$\lambda_1$', fontsize = 14) ax.set_ylabel(r'$\lambda_2$', fontsize = 14) - #ax.set_xlabel(r'$w_1$', fontsize = 14) - #ax.set_ylabel(r'$w_2$', fontsize = 14) ax.set_zlabel(name, fontsize = 14) ax.view_init(elev = 18, azim = 51) diff --git a/src/gglasso/helper/model_selection.py b/src/gglasso/helper/model_selection.py index fab2a98..b385332 100644 --- a/src/gglasso/helper/model_selection.py +++ b/src/gglasso/helper/model_selection.py @@ -3,6 +3,7 @@ """ import numpy as np +from typing import Optional, Union from .basic_linalg import Sdot from .utils import mean_sparsity, sparsity @@ -17,18 +18,23 @@ TAU_MIN = 1e-12 N_TAU = 20 -def lambda_parametrizer(l1 = 0.05, w2 = 0.5): +def lambda_parametrizer(l1: Union[float, np.ndarray]=0.05, + w2: Union[float, np.ndarray]=0.5 + ): """transforms given l1 and w2 into the respective l2""" a = 1/np.sqrt(2) l2 = (w2*l1)/(a*(1-w2)) return l2 -def map_l_to_w(l1, l2): +def map_l_to_w(l1: float, l2: float): w1 = l1 + (1/np.sqrt(2)) *l2 w2 = l2/(w1*np.sqrt(2)) return (w1,w2) -def lambda_grid(l1, l2 = None, w2 = None): +def lambda_grid(l1: np.ndarray, + l2: Optional[np.ndarray]=None, + w2: Optional[np.ndarray]=None, + ): """ l1, l2, w2: values for the grid either l2 or w2 has to be specified @@ -38,16 +44,33 @@ def lambda_grid(l1, l2 = None, w2 = None): assert np.all(l2 is not None) | np.all(w2 is not None), "Either a range of lambda2 or w2 values have to be specified" if np.all(l2 is not None): - L1, L2 = np.meshgrid(l1,l2) + L1, L2 = np.meshgrid(l1, l2) else: - l1grid, w2grid = np.meshgrid(l1,w2) + l1grid, w2grid = np.meshgrid(l1, w2) L2 = lambda_parametrizer(l1grid, w2grid) L1 = l1grid.copy() return L1.squeeze(), L2.squeeze() -def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', gamma = 0.3, \ - G = None, latent = False, mu_range = None, ix_mu = None, thresholding = False, tol = 1e-7, rtol = 1e-7, verbose = False): +def grid_search(solver: str, + S: Union[np.ndarray, dict], + N: np.ndarray, + p: Union[np.ndarray, int], + reg: str, + l1: np.ndarray, + l2: Optional[np.ndarray]=None, + w2: Optional[np.ndarray]=None, + method: str='eBIC', + gamma: float=0.3, + G: Optional[np.ndarray]=None, + latent: bool=False, + mu_range: Optional[np.ndarray]=None, + ix_mu: Optional[np.ndarray]=None, + thresholding: bool=False, + tol: float=1e-7, + rtol: float=1e-7, + verbose: bool=False + ): """ method for doing model selection for MGL problems using grid search and AIC/eBIC parameters to select: lambda1 (sparsity), lambda2 (group sparsity or total variation) @@ -111,11 +134,13 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', assert method in ['AIC', 'eBIC'] assert reg in ['FGL', 'GGL'] - if type(S) == dict: + if isinstance(S, dict): K = len(S.keys()) - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): K = S.shape[0] - + else: + raise Exception("S must be specified either as array or dict.") + assert len(N) == K, f"N must be given as array, is given as {N}." if latent: @@ -128,7 +153,8 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', print(L1) print(L2) - grid1 = L1.shape[0]; grid2 = L2.shape[1] + grid1 = L1.shape[0] + grid2 = L2.shape[1] AIC = np.nan*np.zeros((grid1, grid2)) # use default gammas plus the one for eBIC selection @@ -138,11 +164,11 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', BIC = dict() for g in gammas: - BIC[g] = np.nan*np.zeros((grid1, grid2)) + BIC[g] = np.nan * np.zeros((grid1, grid2)) - SP = np.nan*np.zeros((grid1, grid2)) + SP = np.nan * np.zeros((grid1, grid2)) #SKIP = np.zeros((grid1, grid2), dtype = bool) - RANK = np.nan*np.zeros((K, grid1, grid2)) + RANK = np.nan * np.zeros((K, grid1, grid2)) if thresholding: TAU = np.zeros((K, grid1, grid2)) @@ -150,16 +176,22 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', TAU = None # solver kwargs - kwargs = {'reg': reg, 'S': S, 'tol': tol, 'rtol': rtol, 'verbose': False, 'measure': False} - if type(S) == dict: + kwargs = {'reg': reg, + 'S': S, + 'tol': tol, + 'rtol': rtol, + 'verbose': False, + 'measure': False + } + + if isinstance(S, dict): Omega_0 = id_dict(p) kwargs['G'] = G - elif type(S) == np.ndarray: - Omega_0 = id_array(K,p) + elif isinstance(S, np.ndarray): + Omega_0 = id_array(K, p) kwargs['Omega_0'] = Omega_0.copy() - curr_min = np.inf curr_best = None @@ -185,7 +217,6 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', this_mu = mu_range[ix_mu[:,g2]] kwargs['latent'] = True kwargs['mu1'] = this_mu.copy() - #print("MU values", kwargs['mu1']) # solve sol, info = solver(**kwargs) @@ -195,55 +226,70 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', # thresholding if thresholding: # store also the best solution without solution - _no_thr_this_score = ebic(S, sol['Theta'], N, gamma = gamma) + _no_thr_this_score = ebic(S, sol['Theta'], N, gamma=gamma) if _no_thr_this_score < _no_thr_curr_min: - _no_thr_best_params = {'lambda1': L1[g1,g2], 'lambda2':L2[g1,g2]} + _no_thr_best_params = {'lambda1': L1[g1,g2], + 'lambda2': L2[g1,g2] + } _no_thr_curr_min = _no_thr_this_score _no_thr_curr_best = sol.copy() # now tune threshold - sol['Theta'], opt_tau, _ = tune_multiple_threshold(sol['Theta'], S, N, tau_range = None,\ - method = method, gamma = gamma) - TAU[:,g1,g2] = opt_tau + sol['Theta'], opt_tau, _ = tune_multiple_threshold(sol['Theta'], + S, + N, + tau_range=None, + method=method, + gamma=gamma + ) + TAU[:, g1, g2] = opt_tau ######################################### # store diagnostics - AIC[g1,g2] = aic(S, sol['Theta'], N) + AIC[g1, g2] = aic(S, sol['Theta'], N) for g in gammas: - BIC[g][g1,g2] = ebic(S, sol['Theta'], N, gamma = g) + BIC[g][g1, g2] = ebic(S, sol['Theta'], N, gamma=g) if latent: - RANK[:,g1,g2] = [np.linalg.matrix_rank(sol['L'][k]) for k in np.arange(K)] + RANK[:, g1, g2] = [np.linalg.matrix_rank(sol['L'][k]) for k in np.arange(K)] - SP[g1,g2] = mean_sparsity(sol['Theta']) + SP[g1, g2] = mean_sparsity(sol['Theta']) # new best point found if method == 'eBIC': - if BIC[gamma][g1,g2] < curr_min: - curr_min = BIC[gamma][g1,g2] + if BIC[gamma][g1, g2] < curr_min: + curr_min = BIC[gamma][g1, g2] curr_best = sol.copy() elif method == 'AIC': - if AIC[g1,g2] < curr_min: - curr_min = AIC[g1,g2] + if AIC[g1, g2] < curr_min: + curr_min = AIC[g1, g2] curr_best = sol.copy() if verbose: - print(f"Grid point: (l1,l2): {(L1[g1,g2],L2[g1,g2])}, sparsity: {np.round(SP[g1,g2],3)}, best score: {np.round(curr_min,1)}") + print(f"Grid point: (l1,l2): {(L1[g1, g2],L2[g1, g2])}, sparsity: {np.round(SP[g1, g2],3)}, best score: {np.round(curr_min, 1)}") # get optimal lambda if method == 'AIC': AIC[AIC==-np.inf] = np.nan - ix= np.unravel_index(np.nanargmin(AIC), AIC.shape) + ix = np.unravel_index(np.nanargmin(AIC), AIC.shape) elif method == 'eBIC': for g in gammas: BIC[g][BIC[g]==-np.inf] = np.nan - ix= np.unravel_index(np.nanargmin(BIC[gamma]), BIC[gamma].shape) + ix = np.unravel_index(np.nanargmin(BIC[gamma]), BIC[gamma].shape) if verbose: - print(f"Best regularization parameters: (l1,l2): {(L1[ix],L2[ix])}") + print(f"Best regularization parameters: (l1,l2): {(L1[ix], L2[ix])}") - stats = {'BIC': BIC, 'AIC': AIC, 'SP': SP, 'RANK': RANK, 'TAU': TAU, 'L1': L1, 'L2': L2, \ - 'BEST': {'lambda1': L1[ix], 'lambda2': L2[ix]}, 'GAMMA': gammas} + stats = {'BIC': BIC, + 'AIC': AIC, + 'SP': SP, + 'RANK': RANK, + 'TAU': TAU, + 'L1': L1, + 'L2': L2, + 'BEST': {'lambda1': L1[ix], 'lambda2': L2[ix]}, + 'GAMMA': gammas + } if thresholding: stats['NO_THRESHOLDING_SOL'] = _no_thr_curr_best @@ -251,8 +297,19 @@ def grid_search(solver, S, N, p, reg, l1, l2 = None, w2 = None, method= 'eBIC', return stats, ix, curr_best -def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = False, mu_range = None,\ - thresholding = False, use_block = True, store_all = True, tol = 1e-7, rtol = 1e-7): +def K_single_grid(S: Union[np.ndarray, dict], + lambda_range: np.ndarray, + N: np.ndarray, + method: str='eBIC', + gamma: float=0.3, + latent: bool=False, + mu_range: Optional[np.ndarray]=None, + thresholding: bool=False, + use_block: bool=True, + store_all: bool=True, + tol: float=1e-7, + rtol: float=1e-7 + ): """ method for doing model selection for K single Graphical Lasso problems, using grid search and AIC/eBIC parameters to select: lambda1 (sparsity), mu1 (lowrank, if latent=True) @@ -303,11 +360,13 @@ def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = Fal """ assert method in ['AIC', 'eBIC'] - if type(S) == dict: + if isinstance(S, dict): K = len(S.keys()) - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): K = S.shape[0] - + else: + raise Exception("S must be specified either as array or dict.") + assert len(N) == K, f"N must be given as array, is given as {N}." if latent: @@ -328,11 +387,11 @@ def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = Fal BIC = dict() for g in gammas: - BIC[g] = np.nan*np.zeros((K,_L,_M)) + BIC[g] = np.nan * np.zeros((K, _L, _M)) - AIC = np.nan*np.zeros((K,_L,_M)) - SP = np.nan*np.zeros((K,_L,_M)) - RANK = np.zeros((K,_L,_M)) + AIC = np.nan * np.zeros((K, _L, _M)) + SP = np.nan * np.zeros((K, _L, _M)) + RANK = np.zeros((K, _L, _M)) estimates = dict() lowrank = dict() @@ -347,14 +406,24 @@ def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = Fal for k in np.arange(K): print(f"------------Range search for instance {k}------------") - if type(S) == dict: + if isinstance(S, dict): S_k = S[k].copy() - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): S_k = S[k,:,:].copy() - best, est_k, lr_k, stats_k = single_grid_search(S = S_k, lambda_range = lambda_range, N = N[k], method = method, gamma = gamma, \ - latent = latent, mu_range = mu_range, thresholding = thresholding,\ - use_block = use_block, store_all = store_all, tol = tol, rtol = rtol) + best, est_k, lr_k, stats_k = single_grid_search(S=S_k, + lambda_range=lambda_range, + N=N[k], + method=method, + gamma=gamma, + latent=latent, + mu_range=mu_range, + thresholding=thresholding, + use_block=use_block, + store_all=store_all, + tol=tol, + rtol=rtol + ) #store best individual estimator est_indv['Theta'][k] = best['Theta'].copy() @@ -367,33 +436,34 @@ def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = Fal for g in gammas: BIC[g][k,:,:] = stats_k['BIC'][g].copy() + AIC[k,:,:] = stats_k['AIC'].copy() SP[k,:,:] = stats_k['SP'].copy() RANK[k,:,:] = stats_k['RANK'].copy() ########################################### # get optimal low rank for each lambda - tmpSCORE = np.zeros((K,_L)) + tmpSCORE = np.zeros((K, _L)) tmpSCORE[:] = np.nan - ix_mu = np.zeros((K,_L), dtype = int) + ix_mu = np.zeros((K, _L), dtype=int) # for each lambda, get optimal mu for k in np.arange(K): for j in np.arange(_L): if method == 'AIC': - ix_mu[k,j] = np.nanargmin(AIC[k,j,:]) - tmpSCORE[k,j] = AIC[k,j,ix_mu[k,j]] + ix_mu[k, j] = np.nanargmin(AIC[k,j,:]) + tmpSCORE[k, j] = AIC[k, j, ix_mu[k,j]] elif method == 'eBIC': - ix_mu[k,j] = np.nanargmin(BIC[gamma][k,j,:]) - tmpSCORE[k,j] = BIC[gamma][k,j,ix_mu[k,j]] + ix_mu[k, j] = np.nanargmin(BIC[gamma][k,j,:]) + tmpSCORE[k, j] = BIC[gamma][k, j, ix_mu[k,j]] # get optimal lambda (uniform over k=1,..,K and individual) tmpSCORE[tmpSCORE==-np.inf] = np.nan ix_uniform = np.nanargmin(tmpSCORE.sum(axis=0)) - ix_indv = np.nanargmin(tmpSCORE, axis = 1) + ix_indv = np.nanargmin(tmpSCORE, axis=1) # stack in case of array - if type(S) == np.ndarray: + if isinstance(S, np.ndarray): est_indv['Theta'] = np.stack([e for e in est_indv['Theta'].values()]) if latent: est_indv['L'] = np.stack([e for e in est_indv['L'].values()]) @@ -407,28 +477,45 @@ def K_single_grid(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = Fal est_uniform['L'] = dict() for k in np.arange(K): - est_uniform['Theta'][k] = estimates[k][ix_uniform, ix_mu[k,ix_uniform] , :,:] + est_uniform['Theta'][k] = estimates[k][ix_uniform, ix_mu[k, ix_uniform],:,:] if latent: - est_uniform['L'][k] = lowrank[k][ix_uniform, ix_mu[k,ix_uniform] , :,:] + est_uniform['L'][k] = lowrank[k][ix_uniform, ix_mu[k, ix_uniform],:,:] - if type(S) == np.ndarray: + if isinstance(S, np.ndarray): est_uniform['Theta'] = np.stack([e for e in est_uniform['Theta'].values()]) if latent: est_uniform['L'] = np.stack([e for e in est_uniform['L'].values()]) else: est_uniform = None - statistics = {'BIC': BIC[gamma], 'AIC': AIC, 'SP': SP, 'RANK': RANK, \ - 'LAMB': LAMB, 'MU': MU,\ - 'ix_uniform': ix_uniform, 'ix_indv': ix_indv, 'ix_mu': ix_mu} - - + statistics = {'BIC': BIC[gamma], + 'AIC': AIC, + 'SP': SP, + 'RANK': RANK, + 'LAMB': LAMB, + 'MU': MU, + 'ix_uniform': ix_uniform, + 'ix_indv': ix_indv, + 'ix_mu': ix_mu + } + return est_uniform, est_indv, statistics -def single_grid_search(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent = False, mu_range = None,\ - thresholding = False, use_block = True, store_all = True, tol = 1e-7, rtol = 1e-7, - lambda1_mask = None): +def single_grid_search(S: np.ndarray, + lambda_range: np.ndarray, + N: int, + method: str='eBIC', + gamma: float=0.3, + latent: bool=False, + mu_range: np.ndarray=None, + thresholding: bool=False, + use_block: bool=True, + store_all: bool=True, + tol: float=1e-7, + rtol: float=1e-7, + lambda1_mask: Optional[np.ndarray]=None + ): """ method for model selection for SGL problem, doing grid search and selection via eBIC or AIC @@ -497,24 +584,30 @@ def single_grid_search(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent for g in gammas: BIC[g] = np.nan*np.zeros((_L, _M)) - AIC = np.nan*np.zeros((_L, _M)) - SP = np.nan*np.zeros((_L, _M)) - RANK = np.zeros((_L,_M)) + AIC = np.nan * np.zeros((_L, _M)) + SP = np.nan * np.zeros((_L, _M)) + RANK = np.zeros((_L, _M)) if thresholding: TAU = np.zeros((_L, _M)) else: TAU = None - kwargs = {'S': S, 'Omega_0': np.eye(p), 'X_0': np.eye(p), 'tol': tol, 'rtol': rtol,\ - 'verbose': False, 'measure': False} + kwargs = {'S': S, + 'Omega_0': np.eye(p), + 'X_0': np.eye(p), + 'tol': tol, + 'rtol': rtol, + 'verbose': False, + 'measure': False + } if lambda1_mask is not None: kwargs['lambda1_mask'] = lambda1_mask if store_all: - estimates = np.zeros((_L,_M,p,p)) - lowrank = np.zeros((_L,_M,p,p)) + estimates = np.zeros((_L, _M, p, p)) + lowrank = np.zeros((_L, _M, p, p)) else: estimates = None lowrank = None @@ -542,17 +635,22 @@ def single_grid_search(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent if latent: if store_all: lowrank[j,m,:,:] = sol['L'].copy() - RANK[j,m] = np.linalg.matrix_rank(sol['L']) + RANK[j, m] = np.linalg.matrix_rank(sol['L']) # tune optimal threshold, changes sol['Theta'] if thresholding: - sol['Theta'], opt_tau, _ = tune_threshold(sol['Theta'], S, N,\ - tau_range = None, method = method, gamma = gamma) - TAU[j,m] = opt_tau + sol['Theta'], opt_tau, _ = tune_threshold(sol['Theta'], + S, + N, + tau_range=None, + method=method, + gamma=gamma + ) + TAU[j, m] = opt_tau - AIC[j,m] = aic_single(S, sol['Theta'], N) + AIC[j, m] = aic_single(S, sol['Theta'], N) for g in gammas: - BIC[g][j, m] = ebic_single(S, sol['Theta'], N, gamma = g, lambda1_mask = lambda1_mask) + BIC[g][j, m] = ebic_single(S, sol['Theta'], N, gamma=g, lambda1_mask=lambda1_mask) SP[j,m] = sparsity(sol['Theta']) @@ -561,12 +659,12 @@ def single_grid_search(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent # new best point found if method == 'eBIC': - if BIC[gamma][j,m] < curr_min: - curr_min = BIC[gamma][j,m] + if BIC[gamma][j, m] < curr_min: + curr_min = BIC[gamma][j, m] best_sol = sol.copy() elif method == 'AIC': - if AIC[j,m] < curr_min: - curr_min = AIC[j,m] + if AIC[j, m] < curr_min: + curr_min = AIC[j, m] best_sol = sol.copy() @@ -580,8 +678,16 @@ def single_grid_search(S, lambda_range, N, method = 'eBIC', gamma = 0.3, latent ix = np.unravel_index(np.nanargmin(BIC[gamma]), BIC[gamma].shape) - stats = {'BIC': BIC, 'AIC': AIC, 'SP': SP, 'RANK': RANK, 'LAMBDA': LAMB, 'MU': MU, 'TAU': TAU,\ - 'BEST': {'lambda1': LAMB[ix], 'mu1': MU[ix]}, 'GAMMA': gammas} + stats = {'BIC': BIC, + 'AIC': AIC, + 'SP': SP, + 'RANK': RANK, + 'LAMBDA': LAMB, + 'MU': MU, + 'TAU': TAU, + 'BEST': {'lambda1': LAMB[ix], 'mu1': MU[ix]}, + 'GAMMA': gammas + } return best_sol, estimates, lowrank, stats @@ -598,14 +704,14 @@ def thresholding(A, tau): return A*mask -def tune_threshold(Theta, S, N, tau_range = None, method = 'eBIC', gamma = 0.1): +def tune_threshold(Theta, S, N, tau_range=None, method='eBIC', gamma=0.1): """ Pick the best threshold for 2d-array according to eBIC or AIC. """ if tau_range is None: # diagonal is upper bound as this would make Theta indefinite. #tau_range = np.linspace(TAU_MIN, np.diag(Theta).min()*0.9, N_TAU) - tau_range = np.logspace(-12,-1,N_TAU) + tau_range = np.logspace(-12, -1, N_TAU) assert np.all(tau_range > 0) @@ -614,7 +720,7 @@ def tune_threshold(Theta, S, N, tau_range = None, method = 'eBIC', gamma = 0.1): for j in range(len(tau_range)): tau = tau_range[j] if method == 'eBIC': - E = ebic(S, thresholding(Theta, tau), N, gamma = gamma) + E = ebic(S, thresholding(Theta, tau), N, gamma=gamma) elif method == 'AIC': E = aic(S, thresholding(Theta, tau), N) scores[j] = E @@ -627,13 +733,13 @@ def tune_threshold(Theta, S, N, tau_range = None, method = 'eBIC', gamma = 0.1): t_Theta = thresholding(Theta, opt_tau) return t_Theta, opt_tau, scores -def tune_multiple_threshold(Theta, S, N, tau_range, method = 'eBIC', gamma = 0.1): +def tune_multiple_threshold(Theta, S, N, tau_range, method='eBIC', gamma=0.1): """ Pick the best threshold for 3d-array or dict according to eBIC or AIC. """ - if type(S) == dict: + if isinstance(S, dict): K = len(S.keys()) - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): K = S.shape[0] t_Theta = Theta.copy() @@ -641,7 +747,13 @@ def tune_multiple_threshold(Theta, S, N, tau_range, method = 'eBIC', gamma = 0.1 tau = np.zeros(K) for k in np.arange(K): - Th_k, tau_k, scores_k = tune_threshold(Theta[k], S[k], N[k], tau_range, method, gamma) + Th_k, tau_k, scores_k = tune_threshold(Theta[k], + S[k], + N[k], + tau_range, + method, + gamma + ) score[k] = scores_k tau[k] = tau_k t_Theta[k] = Th_k @@ -659,9 +771,9 @@ def aic(S, Theta, N): AIC information criterion after Danaher et al. excludes the diagonal """ - if type(S) == dict: + if isinstance(S, dict): aic = aic_dict(S, Theta, N) - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): if len(S.shape) == 3: aic = aic_array(S, Theta, N) else: @@ -691,7 +803,7 @@ def aic_array(S,Theta, N): if isinstance(N, SINGLE_FLOAT_INT_TYPES): N = np.ones(K) * N - + aic = 0 for k in np.arange(K): aic += aic_single(S[k,:,:], Theta[k,:,:], N[k]) @@ -710,13 +822,13 @@ def aic_single(S, Theta, N): ################################################################ -def ebic(S, Theta, N, gamma = 0.5): +def ebic(S, Theta, N, gamma=0.5): """ extended BIC after Drton et al. """ - if type(S) == dict: + if isinstance(S, dict): ebic = ebic_dict(S, Theta, N, gamma) - elif type(S) == np.ndarray: + elif isinstance(S, np.ndarray): if len(S.shape) == 3: ebic = ebic_array(S, Theta, N, gamma) else: @@ -728,6 +840,7 @@ def ebic(S, Theta, N, gamma = 0.5): def ebic_single(S, Theta, N, gamma, lambda1_mask=None): (p,p) = S.shape + assert isinstance(N, SINGLE_FLOAT_INT_TYPES) if lambda1_mask is not None: assert lambda1_mask.shape == S.shape diff --git a/src/gglasso/solver/admm_solver.py b/src/gglasso/solver/admm_solver.py index cbe89f5..6578ea2 100644 --- a/src/gglasso/solver/admm_solver.py +++ b/src/gglasso/solver/admm_solver.py @@ -5,15 +5,30 @@ import numpy as np import time import warnings +from typing import Optional, Union from ..helper.basic_linalg import trp, Gdot from .ggl_helper import prox_p, phiplus, prox_rank_norm, f, P_val - -def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ - Theta_0 = np.array([]), X_0 = np.array([]), n_samples = None, \ - tol = 1e-5 , rtol = 1e-4, stopping_criterion = 'boyd', update_rho = True,\ - rho= 1., max_iter = 1000, verbose = False, measure = False, latent = False, mu1 = None): +def ADMM_MGL(S: np.ndarray, + lambda1: float, + lambda2: float, + reg: str, + Omega_0: np.ndarray, + Theta_0: np.ndarray=np.array([]), + X_0: np.ndarray=np.array([]), + n_samples: Optional[Union[int, np.ndarray]]=None, + tol: float=1e-5, + rtol: float=1e-4, + stopping_criterion: str='boyd', + update_rho: bool=True, + rho: float=1., + max_iter: int=1000, + verbose: bool=False, + measure: bool=False, + latent: bool=False, + mu1: Optional[Union[float, np.ndarray]]=None + ): """ This is an ADMM solver for the (Latent variable) Multiple Graphical Lasso problem (MGL). It jointly estimates K precision matrices of shape (p,p). @@ -117,7 +132,7 @@ def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ # else --> n_samples should be array with sample sizes if n_samples == None: nk = np.ones((K,1,1)) - elif type(n_samples) == int: + elif isinstance(n_samples, int): nk = n_samples * np.ones((K,1,1)) else: assert len(nk) == K @@ -166,22 +181,32 @@ def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ eigD, eigQ = np.linalg.eigh(W_t) for k in np.arange(K): - Omega_t[k,:,:] = phiplus(beta = nk[k,0,0]/rho, D = eigD[k,:], Q = eigQ[k,:,:]) + Omega_t[k,:,:] = phiplus(beta=nk[k,0,0]/rho, + D=eigD[k,:], + Q=eigQ[k,:,:] + ) # Theta Update - Theta_t = prox_p(Omega_t + L_t + X_t, (1/rho)*lambda1, (1/rho)*lambda2, reg) + Theta_t = prox_p(Omega_t + L_t + X_t, + (1/rho)*lambda1, + (1/rho)*lambda2, + reg + ) #L Update if latent: C_t = Theta_t - X_t - Omega_t eigD, eigQ = np.linalg.eigh(C_t) for k in np.arange(K): - L_t[k] = prox_rank_norm(C_t[k,:,:], mu1[k]/rho, D = eigD[k,:], Q = eigQ[k,:,:]) + L_t[k] = prox_rank_norm(C_t[k,:,:], + mu1[k]/rho, + D=eigD[k,:], + Q = eigQ[k,:,:] + ) # X Update X_t += Omega_t - Theta_t + L_t - if measure: end = time.time() runtime[iter_t] = end-start @@ -189,8 +214,14 @@ def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ # Stopping condition if stopping_criterion == 'boyd': - r_t,s_t,e_pri,e_dual = ADMM_stopping_criterion(Omega_t, Omega_t_1, Theta_t, L_t, X_t,\ - S, rho, tol, rtol, latent) + r_t,s_t,e_pri,e_dual = ADMM_stopping_criterion(Omega_t, + Omega_t_1, + Theta_t, + L_t, + X_t, + S, + rho, tol, rtol, latent + ) # update rho if update_rho: @@ -215,7 +246,13 @@ def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ break elif stopping_criterion == 'kkt': - eta_A = kkt_stopping_criterion(Omega_t, Theta_t, L_t, rho*X_t, S , lambda1, lambda2, nk, reg, latent, mu1) + eta_A = kkt_stopping_criterion(Omega_t, + Theta_t, + L_t, + rho*X_t, + S, + lambda1, lambda2, nk, reg, latent, mu1 + ) residual[iter_t] = eta_A if verbose: @@ -265,7 +302,11 @@ def ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , \ sol = {'Omega': Omega_t, 'Theta': Theta_t, 'L': L_t, 'X': X_t} if measure: - info = {'status': status , 'runtime': runtime[:iter_t+1], 'residual': residual[:iter_t+1], 'objective': objective[:iter_t+1]} + info = {'status': status , + 'runtime': runtime[:iter_t+1], + 'residual': residual[:iter_t+1], + 'objective': objective[:iter_t+1] + } else: info = {'status': status} @@ -289,7 +330,7 @@ def ADMM_stopping_criterion(Omega, Omega_t_1, Theta, L, X, S, rho, eps_abs, eps_ return r,s,e_pri,e_dual -def kkt_stopping_criterion(Omega, Theta, L, X, S , lambda1, lambda2, nk, reg, latent = False, mu1 = None): +def kkt_stopping_criterion(Omega, Theta, L, X, S, lambda1, lambda2, nk, reg, latent=False, mu1=None): assert Omega.shape == Theta.shape == S.shape assert S.shape[1] == S.shape[2] @@ -299,14 +340,17 @@ def kkt_stopping_criterion(Omega, Theta, L, X, S , lambda1, lambda2, nk, reg, la (K,p,p) = S.shape - term1 = np.linalg.norm(Theta - prox_p(Theta + X , l1 = lambda1, l2= lambda2, reg = reg)) / (1 + np.linalg.norm(Theta)) + term1 = np.linalg.norm(Theta - prox_p(Theta+X, + l1=lambda1, + l2=lambda2, + reg=reg)) / (1 + np.linalg.norm(Theta)) term2 = np.linalg.norm(Theta - Omega - L) / (1 + np.linalg.norm(Theta)) proxK = np.zeros((K,p,p)) eigD, eigQ = np.linalg.eigh(Omega - nk*S - X) for k in np.arange(K): - proxK[k,:,:] = phiplus(beta = nk[k,0,0], D = eigD[k,:], Q = eigQ[k,:,:]) + proxK[k,:,:] = phiplus(beta=nk[k,0,0], D=eigD[k,:], Q=eigQ[k,:,:]) term3 = np.linalg.norm(Omega - proxK) / (1 + np.linalg.norm(Omega)) @@ -314,7 +358,11 @@ def kkt_stopping_criterion(Omega, Theta, L, X, S , lambda1, lambda2, nk, reg, la proxL = np.zeros((K,p,p)) eigD, eigQ = np.linalg.eigh(L - X) for k in np.arange(K): - proxL[k,:,:] = prox_rank_norm(L[k,:,:] - X[k,:,:], beta = mu1[k], D = eigD[k,:], Q = eigQ[k,:,:]) + proxL[k,:,:] = prox_rank_norm(L[k,:,:] - X[k,:,:], + beta=mu1[k], + D=eigD[k,:], + Q=eigQ[k,:,:] + ) term4 = np.linalg.norm(L - proxL) / (1 + np.linalg.norm(L)) else: diff --git a/src/gglasso/solver/ggl_helper.py b/src/gglasso/solver/ggl_helper.py index 1569b37..55bfdad 100644 --- a/src/gglasso/solver/ggl_helper.py +++ b/src/gglasso/solver/ggl_helper.py @@ -3,10 +3,9 @@ """ import numpy as np -#from tick.prox import ProxTV from numba import njit -from ..helper.basic_linalg import trp,Gdot,Sdot +from ..helper.basic_linalg import trp, Gdot, Sdot from .fgl_helper import condat_method # functions specifically related to the GGL regularizer @@ -19,15 +18,15 @@ def prox_od_1norm(A, l): """ calculates the prox of the off-diagonal 1norm at a point A """ - (d1,d2) = A.shape + (d1, d2) = A.shape res = np.sign(A) * np.maximum(np.abs(A) - l, 0.) - for i in np.arange(np.minimum(d1,d2)): + for i in np.arange(np.minimum(d1, d2)): res[i,i] = A[i,i] return res -def prox_rank_norm(A, beta, D = np.array([]), Q = np.array([])): +def prox_rank_norm(A, beta, D=np.array([]), Q=np.array([])): if len(D) != A.shape[0]: D, Q = np.linalg.eigh(A) @@ -50,7 +49,7 @@ def prox_sum_Frob(X, M, l): \lambda \sum_{j\neq l} \|X^M_{jl}\|_F """ (pM, pM) = X.shape - assert pM%M == 0 + assert pM % M == 0 p = int(pM/M) Y = np.zeros((pM,pM)) @@ -58,11 +57,11 @@ def prox_sum_Frob(X, M, l): for i in np.arange(p): for j in np.arange(start=i, stop=p): if j == i: - Y[i*M:(i+1)*M,j*M:(j+1)*M] = X[i*M:(i+1)*M,j*M:(j+1)*M] + Y[i*M:(i+1)*M, j*M:(j+1)*M] = X[i*M:(i+1)*M, j*M:(j+1)*M] else: - B = prox_2norm(X[i*M:(i+1)*M,j*M:(j+1)*M], l) - Y[i*M:(i+1)*M,j*M:(j+1)*M] = B - Y[j*M:(j+1)*M,i*M:(i+1)*M] = B.T + B = prox_2norm(X[i*M:(i+1)*M, j*M:(j+1)*M], l) + Y[i*M:(i+1)*M, j*M:(j+1)*M] = B + Y[j*M:(j+1)*M, i*M:(i+1)*M] = B.T return Y @@ -78,7 +77,7 @@ def jacobian_projection(v, l): if a <= l: g = np.eye(K) else: - g = (l/a) * ( np.eye(K) - (1/a**2) * np.outer(v,v) ) + g = (l/a) * (np.eye(K) - (1/a**2) * np.outer(v,v)) return g @@ -96,7 +95,7 @@ def jacobian_1norm(v,l): return np.diag(d) @njit() -def jacobian_prox_phi_ggl(v , l1 , l2): +def jacobian_prox_phi_ggl(v, l1, l2): u = prox_1norm(v, l1) sig = jacobian_projection(u, l2) lam = jacobian_1norm(v, l1) @@ -106,14 +105,13 @@ def jacobian_prox_phi_ggl(v , l1 , l2): return M # functions specifically related to the FGL regularizer - @njit() def construct_B(K): dd = np.eye(K) - ld = - np.tri(K, k = -1) + np.tri(K, k = -2) + ld = - np.tri(K, k=-1) + np.tri(K, k=-2) - B = dd+ld - B = B[1:,:] + B = dd + ld + B = B[1:, :] # older numba versions modify B when applying pinv, hence copy tB = B.T.copy() # this is the left-inverse of B.T, is needed to reconstruct the dual solution z_lambda @@ -121,19 +119,20 @@ def construct_B(K): return B, invB -# also implemented in package tick, but our implementation is faster +# old version using tick package: +# from tick.prox import ProxTV +# a = ProxTV(l).call(np.ascontiguousarray(v)) + @njit() def prox_tv(v,l): - a = condat_method(v,l) - #a = ProxTV(l).call(np.ascontiguousarray(v)) + a = condat_method(v, l) return a @njit() def prox_phi_fgl(v, l1, l2): - res = prox_1norm(prox_tv(v,l2) , l1) + res = prox_1norm(prox_tv(v,l2), l1) return res - @njit() def jacobian_tv(v,l): K = len(v) @@ -147,14 +146,14 @@ def jacobian_tv(v,l): z_tmp[ind1] = 0. Sigma = np.diag(z_tmp) - P_hat = np.linalg.pinv(Sigma @ B@ B.T @ Sigma) + P_hat = np.linalg.pinv(Sigma @ B @ B.T @ Sigma) P = np.eye(K) - B.T @ P_hat @ B return P @njit() -def jacobian_prox_phi_fgl(v , l1 , l2): - x = prox_tv(v,l2) - P = jacobian_tv(v,l2) +def jacobian_prox_phi_fgl(v, l1, l2): + x = prox_tv(v, l2) + P = jacobian_tv(v, l2) Theta = jacobian_1norm(x, l1) return Theta @ P @@ -167,11 +166,11 @@ def P_val(X, l1, l2, reg): res = 0 for i in np.arange(p): # start at i+1 because P does NOT operate on diagonal - for j in np.arange(start = i + 1 , stop = p): + for j in np.arange(start=i+1 , stop=p): if reg == 'GGL': - res += l1 * np.linalg.norm(X[:,i,j] , 1) + l2 * np.linalg.norm(X[:,i,j] , 2) + res += l1 * np.linalg.norm(X[:,i,j], 1) + l2 * np.linalg.norm(X[:,i,j], 2) elif reg == 'FGL': - res += l1 * np.linalg.norm(X[:,i,j] , 1) + l2 * np.linalg.norm(X[1:,i,j] - X[:-1,i,j] , 1) + res += l1 * np.linalg.norm(X[:,i,j], 1) + l2 * np.linalg.norm(X[1:,i,j] - X[:-1,i,j], 1) # multiply by 2 as we only summed the upper triangular return 2 * res @@ -186,19 +185,18 @@ def prox_phi(v, l1, l2, reg): elif reg == 'FGL': res = prox_phi_fgl(v, l1, l2) return res - @njit() def prox_p(X, l1, l2, reg): #X is always symmetric and hence we only calculate upper diagonals assert np.abs(X - trp(X)).max() <= 1e-5, "input X is not symmetric" - assert np.minimum(l1,l2) > 0, "lambda 1 and lambda2 have to be positive" + assert np.minimum(l1, l2) > 0, "lambda 1 and lambda2 have to be positive" (K,p,p) = X.shape M = np.zeros((K,p,p)) for i in np.arange(p): - for j in np.arange(start = i, stop = p): + for j in np.arange(start=i, stop=p): if i == j: # factor 1/2 because we later add again M[:,i,j] = (1/2)*X[:,i,j] @@ -221,9 +219,9 @@ def jacobian_prox_phi(v , l1 , l2, reg): assert reg in ['GGL', 'FGL'] if reg == 'GGL': - res = jacobian_prox_phi_ggl(v , l1 , l2) + res = jacobian_prox_phi_ggl(v, l1, l2) elif reg == 'FGL': - res = jacobian_prox_phi_fgl(v , l1 , l2) + res = jacobian_prox_phi_fgl(v, l1, l2) return res @@ -241,21 +239,22 @@ def construct_jacobian_prox_p(X, l1 , l2, reg): W = np.zeros((K,K,p,p)) for i in np.arange(p): - for j in np.arange(start = i, stop = p): + for j in np.arange(start=i, stop=p): if i == j: W[:,:,i,j] = np.eye(K) else: - ij_entry = jacobian_prox_phi(X[:,i,j] , l1 , l2, reg) + ij_entry = jacobian_prox_phi(X[:,i,j], l1, l2, reg) W[:,:,i,j] = ij_entry W[:,:,j,i] = ij_entry return W -def eval_jacobian_prox_p(Y , W): +def eval_jacobian_prox_p(Y, W): # W is the result of construct_jacobian_prox_p (K,p,p) = Y.shape - assert W.shape == (K,K,p,p) + assert W.shape == (K, K, p, p) fun = np.einsum('lkij,kij->lij', W, Y) + # without einsum (but slow): # fun = np.zeros((K,p,p)) # for i in np.arange(p): # for j in np.arange(p): @@ -300,13 +299,12 @@ def phiplus(beta, D, Q): B : array of shape (p,p) proximal operator. """ - #B = Q @ np.diag(phip(D,beta)) @ Q.T - B = (Q * phip(D,beta))@Q.T + B = (Q * phip(D,beta)) @ Q.T return B @njit() def phiminus(beta, D, Q): - B = (Q * phim(D,beta))@Q.T + B = (Q * phim(D,beta)) @ Q.T return B #@njit() @@ -315,26 +313,25 @@ def moreau_h(beta, D, Q): D: array (p,p) Q: array (p,p) """ - pp = phiplus(beta, D, Q) pm = phiminus(beta, D, Q) - psi = - (beta * np.log (np.linalg.det(pp))) + (0.5 * np.linalg.norm(pm)**2 ) + psi = -(beta * np.log (np.linalg.det(pp))) + (0.5 * np.linalg.norm(pm)**2) return psi, pp, pm # tile is not numba supported, could be replaced by repeat+reshape -def construct_gamma(A, beta, D = np.array([]), Q = np.array([])): +def construct_gamma(A, beta, D=np.array([]), Q=np.array([])): (K,p,p) = A.shape Gamma = np.zeros((K,p,p)) if D.shape[0] != A.shape[0]: - raise KeyError + raise KeyError("Shapes don't match.") for k in np.arange(K): - phip_d = phip(D[k,:] , beta) + phip_d = phip(D[k,:], beta) - h1 = np.tile(np.sqrt(D[k,:]**2 + 4* beta), (p,1)) - h1 = h1 + h1.T + h1 = np.tile(np.sqrt(D[k,:]**2 + 4*beta), (p,1)) + h1 = h1 + h1.T h2 = np.tile(phip_d, (p,1)) h2 = h2 + h2.T @@ -357,12 +354,10 @@ def eval_jacobian_phiplus(B, Gamma, Q): return res # functions related to the proximal point algorithm - def Phi_t(Omega, Theta, S, Omega_t, Theta_t, sigma_t, lambda1, lambda2, reg): res = f(Omega, S) + P_val(Theta, lambda1, lambda2, reg) + 1/(2*sigma_t) * (np.linalg.norm(Omega - Omega_t)**2 + np.linalg.norm(Theta - Theta_t)**2) return res - def hessian_Y(D , Gamma, eigQ, W, sigma_t): """ this is the linear operator for the CG method @@ -371,18 +366,16 @@ def hessian_Y(D , Gamma, eigQ, W, sigma_t): """ tmp1 = eval_jacobian_phiplus(D, Gamma, eigQ) tmp2 = eval_jacobian_prox_p(D, W) - res = - sigma_t * (tmp1 + tmp2) return res - -def cg_ppdna(Gamma, eigQ, W, sigma_t, b, tol = 1e-6, max_iter = 20): +def cg_ppdna(Gamma, eigQ, W, sigma_t, b, tol=1e-6, max_iter=20): """ solves the linear system in the PPDNA subproblem - Gamma, eigQ,W, sigma_t are constructed beforehand - b: right-hand-side of linear system + Gamma, eigQ, W, sigma_t are constructed beforehand + b: right-hand-side of linear system tol: tolerance to CG method max_iter: max iterations of CG method """ @@ -390,7 +383,7 @@ def cg_ppdna(Gamma, eigQ, W, sigma_t, b, tol = 1e-6, max_iter = 20): dim = b.shape x = np.zeros(dim) r = b - hessian_Y(x, Gamma, eigQ, W, sigma_t) - r_norm = np.linalg.norm(r)**2 #Gdot(r,r) + r_norm = np.linalg.norm(r)**2 p = r.copy() j = 0 @@ -415,7 +408,7 @@ def cg_ppdna(Gamma, eigQ, W, sigma_t, b, tol = 1e-6, max_iter = 20): return x, status -def Y_t( X, Omega_t, Theta_t, S, lambda1, lambda2, sigma_t, reg): +def Y_t(X, Omega_t, Theta_t, S, lambda1, lambda2, sigma_t, reg): assert np.min(np.array([lambda1, lambda2, sigma_t])) > 0 , "at least one parameter is not positive" assert X.shape[1] == X.shape[2], "dimensions are not as expected" @@ -429,17 +422,16 @@ def Y_t( X, Omega_t, Theta_t, S, lambda1, lambda2, sigma_t, reg): grad1 = np.zeros((K,p,p)) term1 = 0 for k in np.arange(K): - Psi_h, proxh, _ = moreau_h(sigma_t, D = eigD[k,:], Q = eigQ[k,:,:]) + Psi_h, proxh, _ = moreau_h(sigma_t, D=eigD[k,:], Q=eigQ[k,:,:]) term1 += (1/sigma_t) * Psi_h grad1[k,:,:] = proxh - term2 = - 1/(2*sigma_t) * (np.linalg.norm(W_t)**2 + np.linalg.norm(V_t)**2) # (Gdot(W_t, W_t) + Gdot(V_t, V_t)) - term3 = 1/(2*sigma_t) * (np.linalg.norm(Omega_t)**2 + np.linalg.norm(Theta_t)**2) # (Gdot(Omega_t, Omega_t) + Gdot(Theta_t, Theta_t)) + term2 = - 1/(2*sigma_t) * (np.linalg.norm(W_t)**2 + np.linalg.norm(V_t)**2) + term3 = 1/(2*sigma_t) * (np.linalg.norm(Omega_t)**2 + np.linalg.norm(Theta_t)**2) - Psi_P , U = moreau_P(V_t, sigma_t * lambda1, sigma_t * lambda2, reg) + Psi_P, U = moreau_P(V_t, sigma_t*lambda1, sigma_t*lambda2, reg) term4 = (1/sigma_t) * Psi_P fun = term1 + term2 + term3 + term4 - #grad = grad1 - U return fun, grad1, U, (eigD, eigQ) diff --git a/src/gglasso/solver/ppdna_solver.py b/src/gglasso/solver/ppdna_solver.py index 9e961ca..cf1cf67 100644 --- a/src/gglasso/solver/ppdna_solver.py +++ b/src/gglasso/solver/ppdna_solver.py @@ -12,7 +12,7 @@ from .admm_solver import ADMM_MGL from ..helper.basic_linalg import trp, Gdot -def get_ppdna_params(ppdna_params = None): +def get_ppdna_params(ppdna_params=None): # initialize if empty if ppdna_params == None: @@ -36,10 +36,14 @@ def get_ppdna_params(ppdna_params = None): def get_ppa_sub_params_default(): - ppa_sub_params = { 'sigma_t' : 1e3, - 'eta' : 1e-1, 'tau' : .2, 'rho' : .5, 'mu' : 1e-4, - 'eps_t' : .95, 'delta_t' : .95} - + ppa_sub_params = {'sigma_t': 1e3, + 'eta': 1e-1, + 'tau': .2, + 'rho': .5, + 'mu': 1e-4, + 'eps_t': .95, + 'delta_t': .95 + } return ppa_sub_params def check_ppa_sub_params(ppa_sub_params): @@ -58,7 +62,7 @@ def check_ppa_sub_params(ppa_sub_params): return -def PPA_subproblem(Omega_t, Theta_t, X_t, S, reg, ppa_sub_params = None, verbose = False): +def PPA_subproblem(Omega_t, Theta_t, X_t, S, reg, ppa_sub_params=None, verbose=False): """ This is the dual based semismooth Newton method solver for the PPA subproblems Algorithm 1 in Zhang et al. @@ -98,29 +102,41 @@ def PPA_subproblem(Omega_t, Theta_t, X_t, S, reg, ppa_sub_params = None, verbose # fun evaluation and eig can be reused from Armijo in laster iter if sub_iter == 0: - funY_Xt, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t( X_t, Omega_t, Theta_t, S, lambda1, lambda2, sigma_t, reg) + funY_Xt, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t(X_t, + Omega_t, + Theta_t, + S, + lambda1, lambda2, sigma_t, reg + ) else: funY_Xt = Y_t_new gradY_Xt = Omega_sol - Theta_sol - #eigD, eigQ = np.linalg.eigh(W_t) - Gamma = construct_gamma(W_t, sigma_t, D = eigD, Q = eigQ) + Gamma = construct_gamma(W_t, sigma_t, D=eigD, Q=eigQ) W = construct_jacobian_prox_p( (1/sigma_t)*V_t, lambda1, lambda2, reg) # step 1: CG method cg_accur = min(eta, np.linalg.norm(gradY_Xt)**(1+tau)) - D, cg_status = cg_ppdna(Gamma, eigQ, W, sigma_t, -gradY_Xt, tol = cg_accur, max_iter = 10) + D, cg_status = cg_ppdna(Gamma, eigQ, W, sigma_t, -gradY_Xt, tol=cg_accur, max_iter=10) # step 2: line search alpha = 1. - Y_t_new, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t(X_t + alpha*D, Omega_t, Theta_t, S, lambda1, lambda2,\ - sigma_t, reg) + Y_t_new, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t(X_t + alpha*D, + Omega_t, + Theta_t, + S, + lambda1, lambda2, sigma_t, reg + ) - while Y_t_new < funY_Xt + mu * alpha * Gdot(gradY_Xt , D): + while Y_t_new < funY_Xt + mu*alpha*Gdot(gradY_Xt , D): alpha *= rho - Y_t_new, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t(X_t + alpha*D, Omega_t, Theta_t, S, lambda1, lambda2,\ - sigma_t, reg) + Y_t_new, Omega_sol, Theta_sol, (eigD, eigQ) = Y_t(X_t + alpha*D, + Omega_t, + Theta_t, + S, + lambda1, lambda2, sigma_t, reg + ) # step 3: update variables and check stopping condition @@ -136,12 +152,23 @@ def PPA_subproblem(Omega_t, Theta_t, X_t, S, reg, ppa_sub_params = None, verbose if verbose and not(condA and condB): print("Subproblem could not be solve with the given accuracy! -- reached maximal iterations") - sub_info= {'niter' : sub_iter, 'armijo' : alpha} + sub_info= {'niter': sub_iter, 'armijo': alpha} return Omega_sol, Theta_sol, X_t, sub_info -def PPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.array([]), ppdna_params = None, eps_ppdna = 1e-5 , verbose = False, measure = False): +def PPDNA(S, + lambda1, + lambda2, + reg, + Omega_0, + Theta_0=np.array([]), + X_0=np.array([]), + ppdna_params=None, + eps_ppdna=1e-5, + verbose=False, + measure=False + ): """ Proximal Point algorithm for the Multiple Graphical Lasso problem (MGL). It solves @@ -236,7 +263,7 @@ def PPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.ar for iter_t in np.arange(max_iter): # check stopping criterion - eta_P = PPDNA_stopping_criterion(Omega_t, Theta_t, X_t, S , ppa_sub_params, reg) + eta_P = PPDNA_stopping_criterion(Omega_t, Theta_t, X_t, S, ppa_sub_params, reg) residual[iter_t] = eta_P if measure: @@ -246,8 +273,14 @@ def PPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.ar status = 'optimal' break - Omega_t, Theta_t, X_t, sub_info = PPA_subproblem(Omega_t, Theta_t, X_t, S, reg = reg, ppa_sub_params = ppa_sub_params,\ - verbose = False) + Omega_t, Theta_t, X_t, sub_info = PPA_subproblem(Omega_t, + Theta_t, + X_t, + S, + reg=reg, + ppa_sub_params=ppa_sub_params, + verbose=False + ) if measure: end = time.time() @@ -261,8 +294,7 @@ def PPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.ar ppa_sub_params['sigma_t'] = 1.3 * ppa_sub_params['sigma_t'] ppa_sub_params['eps_t'] = 0.95 * ppa_sub_params['eps_t'] ppa_sub_params['delta_t'] = 0.95 * ppa_sub_params['delta_t'] - - + if eta_P > eps_ppdna: status = 'max iterations reached' @@ -280,33 +312,40 @@ def PPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.ar sol = {'Omega': Omega_t, 'Theta': Theta_t, 'X': X_t} if measure: # last runtime irrelevant (as break) and first residual irrelevant - info = {'status': status , 'runtime': runtime[:iter_t], 'residual': residual[1:iter_t + 1], 'objective': objective[:iter_t]} + info = {'status': status , + 'runtime': runtime[:iter_t], + 'residual': residual[1:iter_t + 1], + 'objective': objective[:iter_t] + } else: info = {'status': status} return sol, info -def PPDNA_stopping_criterion(Omega, Theta, X, S , ppa_sub_params, reg): +def PPDNA_stopping_criterion(Omega, Theta, X, S, ppa_sub_params, reg): assert Omega.shape == Theta.shape == S.shape assert S.shape[1] == S.shape[2] (K,p,p) = S.shape - term1 = np.linalg.norm(Theta- prox_p(Theta + X , l1 = ppa_sub_params['lambda1'], l2= ppa_sub_params['lambda2'], reg = reg)) / (1 + np.linalg.norm(Theta)) + term1 = np.linalg.norm(Theta-prox_p(Theta+X, + l1=ppa_sub_params['lambda1'], + l2=ppa_sub_params['lambda2'], + reg=reg)) / (1+np.linalg.norm(Theta)) - term2 = np.linalg.norm(Theta - Omega) / (1 + np.linalg.norm(Theta)) + term2 = np.linalg.norm(Theta - Omega) / (1+np.linalg.norm(Theta)) proxK = np.zeros((K,p,p)) - eigD, eigQ = np.linalg.eigh(Omega-S-X) + eigD, eigQ = np.linalg.eigh(Omega - S - X) for k in np.arange(K): - proxK[k,:,:] = phiplus(beta = 1., D = eigD[k,:], Q = eigQ[k,:,:]) + proxK[k,:,:] = phiplus(beta=1., D=eigD[k,:], Q=eigQ[k,:,:]) - term3 = np.linalg.norm(Omega - proxK) / (1 + np.linalg.norm(Omega)) + term3 = np.linalg.norm(Omega - proxK) / (1+np.linalg.norm(Omega)) return max(term1, term2, term3) -def warmPPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = np.array([]), admm_params = None, ppdna_params = None, eps = 1e-5 , eps_admm = 1e-3, verbose = False, measure = False): +def warmPPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0=np.array([]), X_0=np.array([]), admm_params=None, ppdna_params=None, eps=1e-5 , eps_admm=1e-3, verbose=False, measure=False): """ function for solving a MGL problem with PPDNA but using ADMM as a warm start (i.e. solve to low accuracy with ADMM first) """ @@ -319,9 +358,20 @@ def warmPPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = n phase2 = True rho = 1. - sol1, info1 = ADMM_MGL(S, lambda1, lambda2, reg , Omega_0 , Theta_0, X_0, \ - tol = eps_admm, stopping_criterion = 'kkt', verbose = verbose, measure = measure,\ - rho = rho, update_rho = False) + sol1, info1 = ADMM_MGL(S, + lambda1, + lambda2, + reg, + Omega_0, + Theta_0, + X_0, + tol=eps_admm, + stopping_criterion='kkt', + verbose=verbose, + measure=measure, + rho=rho, + update_rho=False + ) assert info1['status'] == 'optimal' @@ -331,8 +381,18 @@ def warmPPDNA(S, lambda1, lambda2, reg, Omega_0, Theta_0 = np.array([]), X_0 = n X_0 = rho*sol1['X'] if phase2: - sol2, info2 = PPDNA(S, lambda1, lambda2, reg, Omega_0 = Omega_0, Theta_0 = Theta_0, X_0 = X_0,\ - ppdna_params = ppdna_params, eps_ppdna = eps_ppdna , verbose = verbose, measure = measure) + sol2, info2 = PPDNA(S, + lambda1, + lambda2, + reg, + Omega_0=Omega_0, + Theta_0=Theta_0, + X_0=X_0, + ppdna_params=ppdna_params, + eps_ppdna=eps_ppdna, + verbose=verbose, + measure=measure + ) # append the infos if measure: