-
Notifications
You must be signed in to change notification settings - Fork 1
/
PCA.py
150 lines (115 loc) · 4.69 KB
/
PCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import numpy as np
class PCA:
"""
This class implements PCA.
Args:
k(int): Number of principle components. If None, k will be set equal to the original dimension.
whiten(boolean): When True (False by default) the components are multiplied by the square root of (numbers of
data - 1) and then divided by the singular values to ensure uncorrelated outputs with
unit variances.
Attributes:
k(int): Number of principle components. If None, k will be set equal to the original dimension.
whiten(boolean): whiten or not.
x: Array of data.
mean: Array of mean of data.
components: Array of principle components.
singular_values: Array of singular values of input data array. Variances after PCA is equals to
singular_values ** 2 / (numbers of data - 1).
Methods:
PCA.fit: Fit the model with data.
PCA.transform: Apply PCA model to data.
PCA.fit_transform: Fit and apply.
PCA.inverse_transform: Transform data back to its original space.
PCA.factor_loading: Return the factor loading for all principle components to all dimension.
PCA.explained_variance: Return the amount/ratio of variance explained by each of the selected components.
"""
def __init__(self, k = None, whiten = False):
"""
Constructor method.
"""
self.k = k
self.whiten = whiten
self.x = None
self.mean = None
self.components = None
self.singular_values = None
def fit(self, x):
"""
Fit the model with data.
Args:
x: array of data.
"""
self.x = x.copy()
self.x = self.__check_x(self.x)
self.mean = np.average(self.x, axis = 0)
self.x -= self.mean
self.k = self.x.shape[1] if self.k == None else self.k
U, S, V = np.linalg.svd(self.x)
self.singular_values = S[:self.k]
self.components = V[:self.k, :]
self.__original_singular_values = S
self.__original_components = V
def transform(self, x):
"""
Apply PCA model to data.
Args:
x: array of data.
Return:
array of data represented bt the principal components.
"""
x = self.__check_x(x) - self.mean
res = np.dot(x, self.components.T) if self.whiten == False \
else (self.x.shape[0]-1)**0.5 * np.dot(x, self.components.T) / self.singular_values
return res
def fit_transform(self, x):
"""
Fit and apply.
Args:
x: array of data.
Return:
array of data represented bt the principal components.
"""
self.fit(x)
return self.transform(x)
def inverse_transform(self, x):
"""
Transform data back to its original space.
Args:
x: array of data.
Returns:
data in original space.
"""
x *= self.singular_values / (self.x.shape[0] - 1) ** 0.5 if self.whiten else 1
return np.dot(x, self.components) + self.mean
def factor_loading(self):
"""
Return the factor loading for all principle components to all dimension.
Return:
Array-like, shape(number_of_principle_components, number_of_original_dimensions)
"""
fl = self.components * np.reshape(self.singular_values, [self.k,1])
fl /= np.diag(np.dot(self.x.T, self.x)) ** 0.5
return fl
def explained_variance(self, ratio = True):
"""
Return the amount/ratio of variance explained by each of the selected components.
Args:
ratio: If True, return ratio of variance. If False, returnthe value of variance. Default True.
Return:
Array of variance (value/ratio).
"""
var = self.singular_values ** 2
all_var = self.__original_singular_values ** 2
return var / (self.x.shape[0]-1) if not ratio else var / np.sum(all_var)
def __check_x(self, x):
"""
Check if x is 2D array. If not, convert.
Check if dimension matches. If not, raise error.
Return:
x in 2D array.
"""
if x.ndim == 1: x = np.reshape(x, [1, x.shape[0]])
if x.ndim == 0: x = np.reshape(x, [1, 1])
if self.k != None and x.shape[1] < self.k: raise ValueError("Dimension should not be smaller than k.")
if self.x.shape[1] != x.shape[1]: raise ValueError("Dimension not match.")
return x