-
Notifications
You must be signed in to change notification settings - Fork 0
/
compression_medal_09.py
69 lines (51 loc) · 1.86 KB
/
compression_medal_09.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
from sklearn.decomposition import PCA
def reduce_features(data, p):
"""
Perform PCA dimensionality reduction
Args:
data (list): The data table of the form m*n,
m - features' columns and a target column
n - training examples
p - the limit of the drop in accuracy, if compared to the model without features' reduction
Returns:
v - number of features after reduction
X_reduced - reduced design matrix
y - target values
"""
# Separate features and labels
X = data[:][:-1]
y = data[:][-1]
# Normalize the data
X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
# Perform PCA for feature reduction
pca = PCA()
pca.fit(X_normalized)
# Determine the number of components to keep
cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
v = np.argmax(cumulative_variance >= (1 - p))
# Limit v to satisfy v <= 2/3 * m
max_v = int(2 / 3 * np.shape(X)[1])
v = min(v, max_v)
# Apply PCA transformation
pca = PCA(n_components=v)
X_reduced = pca.fit_transform(X_normalized)
return v, X_reduced, y
# read data from the provided dataset
dataframe=[]
with open("/static/data/00_559",mode='r') as data:
for line in data.readlines():
dataframe.append(list(map(float,line.strip().split())))
# read number of examples n, features m, accuracy drop limit k
n,m,k = list(map(float, dataframe[0]))
df = dataframe[1:][:]
# apple pca dim reduction function on the df
v,X_reduced, y = (reduce_features(df,k))
"""
OPTIONAL: write the reduced feature matrix into the csv file
with(open('00_reduced_features',mode='w',newline=''))as f:
f.write('\n')
features_reduced.to_csv(f, sep=' ', index=False, header=False)
for row in features_reduced.itertuples(index=False, name=None):
print(*row)
"""