-
Notifications
You must be signed in to change notification settings - Fork 0
/
pca.py
executable file
·103 lines (87 loc) · 3.5 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
def pca(X = np.array([]), no_dims = 50):
"""
Runs PCA on the N x D array X in order to reduce its dimensionality to
no_dims dimensions.
Inputs:
- X: A matrix with shape N x D where N is the number of examples and D is
the dimensionality of original data.
- no_dims: A scalar indicates the output dimension of examples after
performing PCA.
Returns:
- Y: A matrix of reduced size with shape N x no_dims where N is the number
of examples and no_dims is the dimensionality of output examples.
no_dims should be smaller than D, which is the dimensionality of
original examples.
- M: A matrix of eigenvectors with shape D x no_dims where D is the
dimensionality of the original data
"""
Y = np.array([])
M = np.array([])
N = X.shape[0]
S = (np.matmul(X.T, X))/N
w,v = np.linalg.eigh(S)
w = np.flip(w, axis=0)
v = np.flip(v, axis=1)
temp = v.T[0:no_dims]
M = temp.T
Y=np.matmul(X,M)
return Y, M
def decompress(Y = np.array([]), M = np.array([])):
"""
Returns compressed data to initial shape, hence decompresses it.
Inputs:
- Y: A matrix of reduced size with shape N x no_dims where N is the number
of examples and no_dims is the dimensionality of output examples.
no_dims should be smaller than D, which is the dimensionality of
original examples.
- M: A matrix of eigenvectors with shape D x no_dims where D is the
dimensionality of the original data
Returns:
- X_hat: Reconstructed matrix with shape N x D where N is the number of
examples and D is the dimensionality of each example before
compression.
"""
X_hat = np.array([])
X_hat=np.matmul(Y,M.T)
return X_hat
def reconstruction_error(orig = np.array([]), decompressed = np.array([])):
"""
Computes reconstruction error (pixel-wise mean squared error) for original
image and reconstructed image
Inputs:
- orig: An array of size 1xD, original flattened image.
- decompressed: An array of size 1xD, decompressed version of the image
"""
error = 0
D=orig.shape[0]
temp=orig-decompressed
error=np.sum(np.multiply(temp, temp))/D
return error
def load_data(dataset='mnist_subset.json'):
# This function reads the MNIST data
import json
with open(dataset, 'r') as f:
data_set = json.load(f)
mnist = np.vstack((np.asarray(data_set['train'][0]),
np.asarray(data_set['valid'][0]),
np.asarray(data_set['test'][0])))
return mnist
if __name__ == '__main__':
import argparse
import sys
mnist = load_data()
compression_rates = [2, 10, 50, 100, 250, 500]
with open('pca_output.txt', 'w') as f:
for cr in compression_rates:
Y, M = pca(mnist - np.mean(mnist, axis=0), cr)
decompressed_mnist = decompress(Y, M)
decompressed_mnist += np.mean(mnist, axis=0)
total_error = 0.
for mi, di in zip(mnist, decompressed_mnist):
error = reconstruction_error(mi, di)
f.write(str(error))
f.write('\n')
total_error += error
print('Total reconstruction error after compression with %d principal '\
'components is %f' % (cr, total_error))