-
Notifications
You must be signed in to change notification settings - Fork 67
/
r_pca.py
102 lines (80 loc) · 3.03 KB
/
r_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from __future__ import division, print_function
import numpy as np
try:
from pylab import plt
except ImportError:
print('Unable to import pylab. R_pca.plot_fit() will not work.')
try:
# Python 2: 'xrange' is the iterative version
range = xrange
except NameError:
# Python 3: 'range' is iterative - no need for 'xrange'
pass
class R_pca:
def __init__(self, D, mu=None, lmbda=None):
self.D = D
self.S = np.zeros(self.D.shape)
self.Y = np.zeros(self.D.shape)
if mu:
self.mu = mu
else:
self.mu = np.prod(self.D.shape) / (4 * np.linalg.norm(self.D, ord=1))
self.mu_inv = 1 / self.mu
if lmbda:
self.lmbda = lmbda
else:
self.lmbda = 1 / np.sqrt(np.max(self.D.shape))
@staticmethod
def frobenius_norm(M):
return np.linalg.norm(M, ord='fro')
@staticmethod
def shrink(M, tau):
return np.sign(M) * np.maximum((np.abs(M) - tau), np.zeros(M.shape))
def svd_threshold(self, M, tau):
U, S, V = np.linalg.svd(M, full_matrices=False)
return np.dot(U, np.dot(np.diag(self.shrink(S, tau)), V))
def fit(self, tol=None, max_iter=1000, iter_print=100):
iter = 0
err = np.Inf
Sk = self.S
Yk = self.Y
Lk = np.zeros(self.D.shape)
if tol:
_tol = tol
else:
_tol = 1E-7 * self.frobenius_norm(self.D)
#this loop implements the principal component pursuit (PCP) algorithm
#located in the table on page 29 of https://arxiv.org/pdf/0912.3599.pdf
while (err > _tol) and iter < max_iter:
Lk = self.svd_threshold(
self.D - Sk + self.mu_inv * Yk, self.mu_inv) #this line implements step 3
Sk = self.shrink(
self.D - Lk + (self.mu_inv * Yk), self.mu_inv * self.lmbda) #this line implements step 4
Yk = Yk + self.mu * (self.D - Lk - Sk) #this line implements step 5
err = self.frobenius_norm(self.D - Lk - Sk)
iter += 1
if (iter % iter_print) == 0 or iter == 1 or iter > max_iter or err <= _tol:
print('iteration: {0}, error: {1}'.format(iter, err))
self.L = Lk
self.S = Sk
return Lk, Sk
def plot_fit(self, size=None, tol=0.1, axis_on=True):
n, d = self.D.shape
if size:
nrows, ncols = size
else:
sq = np.ceil(np.sqrt(n))
nrows = int(sq)
ncols = int(sq)
ymin = np.nanmin(self.D)
ymax = np.nanmax(self.D)
print('ymin: {0}, ymax: {1}'.format(ymin, ymax))
numplots = np.min([n, nrows * ncols])
plt.figure()
for n in range(numplots):
plt.subplot(nrows, ncols, n + 1)
plt.ylim((ymin - tol, ymax + tol))
plt.plot(self.L[n, :] + self.S[n, :], 'r')
plt.plot(self.L[n, :], 'b')
if not axis_on:
plt.axis('off')