-
Notifications
You must be signed in to change notification settings - Fork 1
/
code.m
162 lines (142 loc) · 5.96 KB
/
code.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
% /////////////////////////////////////////////////////////////////////////
% // Code Written and Developed by : Gokkul Nath T S /////
% // Tested with Matlab 2014a (8.3.0.532) 64 Bit /////
% // External Libraries Used : LibSVM 3.20 /////
% // System Specification : Intel Core i5 8gb Ram /////
% /////////////////////////////////////////////////////////////////////////
clc
clear all
close all
%% Input Data and Folds
[traindata] = xlsread(fullfile('C:\Users\gokkul nath\Desktop\Ramanathan sir SVM\Coimbatore Climate Prediction\Accumulated data Cross Validation Estimation\Bombay\accumulated till 2015.xlsx')) ;
[testdata] = xlsread(fullfile('C:\Users\gokkul nath\Desktop\Ramanathan sir SVM\Coimbatore Climate Prediction\Accumulated data Cross Validation Estimation\Bombay\2015e.xlsx')) ;
labels=traindata(:,1); data=traindata(:,2:end);
testlabel=testdata(:,1); test=testdata(:,2:end);
folds = 10;
K=4; % Cluster Size
[C,gamma] = meshgrid(13:2:17, -13:2:-9);
cv_acc = zeros(numel(C),1);
%% Figure 1 : Conventional SVM :Cross Validation Module and Grid Search
for i=1:numel(C)
cv_acc(i) = svmtrain(labels, data, ...
sprintf('-s 0 -t 2 -c %f -g %f -v %d ', 2^C(i), 2^gamma(i), folds));
end
%# pair (C,gamma) with best accuracy
[~,idx] = max(cv_acc);
%# now you can train you model using best_C and best_gamma
best_C = 2^C(idx);
best_gamma = 2^gamma(idx);
model = svmtrain(labels, data, ...
sprintf('-s 0 -t 2 -c %f -g %f -b 1', best_C, best_gamma));
[predict_labeli, accuracyi, prob_valuesi] = svmpredict(testlabel, test, model, '-b 1 '); % run the SVM model on the test data
Ci = confusionmat(testlabel, predict_labeli)
accuracyi
size(model.SVs)
%% Figure 2 ,3 , 4, 5 : Clustering of Support Vectors: K Means Clustering
tic; % To Measure Clustering Time
% Extraction Of Support Vectors From Conventional Model
size1=size(model.SVs); numObservarations = size1(1); dimensions = size1(2);
% Temp Variables
SVs=model.SVs; sv_indices=model.sv_indices; sv_coef=model.sv_coef;
datamodel=[sv_coef sv_indices SVs ] %data which is to be clustered
%% cluster
opts = statset('MaxIter', 500, 'Display', 'iter');
[clustIDX, clusters, interClustSum, Dist] = kmeans(datamodel, K, 'options',opts, ...
'distance','sqEuclidean', 'EmptyAction','singleton', 'replicates',5,'start','uniform');
%% plot data+clusters
figure, hold on
scatter3(datamodel(:,1),datamodel(:,2),datamodel(:,3), 50, clustIDX, 'filled')
scatter3(clusters(:,1),clusters(:,2),clusters(:,3), 200, (1:K)', 'filled')
hold off, xlabel('x'), ylabel('y'), zlabel('z')
%% plot clusters quality
figure
[silh,h] = silhouette(datamodel, clustIDX);
avrgScore = mean(silh);
%% Assign data to clusters
% calculate distance (squared) of all instances to each cluster centroid
D = zeros(numObservarations, K); % init distances
for k=1:K
%d = sum((x-y).^2).^0.5
D(:,k) = sum( ((datamodel - repmat(clusters(k,:),numObservarations,1)).^2), 2);
end
% find for all instances the cluster closet to it
[minDists, clusterIndices] = min(D, [], 2);
% compare it with what you expect it to be
sum(clusterIndices == clustIDX);
%% class separation
final=[clustIDX datamodel];
sortedA = sortrows(final,1); %# Sort the rows by the Cluster Index
[~,~,uniqueIndex] = unique(sortedA(:,1));
cellA = mat2cell(sortedA,...
accumarray(uniqueIndex(:),1)); %# into a cell array
%% Change/Replacement of support vectors in model remove class
acc=zeros(1,3); % Matrix with Accuaracy ClusterID and Size of Cluster
class2=zeros(0,11); %Temp Variable for cluster separation and manupulation
for i=1:K
class2=[cellA{i}];
class2=full(class2);
model.sv_coef=class2(:,2); model.sv_indices=class2(:,3);
model.SVs=sparse(class2(:,4:size(class2,2))); model.totalSV=size(class2,1);
model.nSV=[size(model.sv_coef(find(model.sv_coef > 0))); size(model.sv_coef(find(model.sv_coef < 0)))];
[predict_label, accuracy, prob_values] = svmpredict(testlabel, test, model, '-b 1 '); % run the SVM model on the test data
C = confusionmat(testlabel, predict_label)
accuracy
temp=[accuracy(1) i size(class2,1)]; % Accuracy ClusterID Size of Cluster
acc=[acc ; temp]; % Accumulate the accuracy in a single matrix
acc(~any(acc,2), : ) = []; % to remove zero columns
end
%% 2 Terms Accumulation
terms2=zeros(0,11); acc2=zeros(1,4);
for i=1:K
for j=2:K
if(i~=j && j~=i)
terms2=[cellA(i); cellA(j);];
terms2=cell2mat(terms2);
terms2=full(terms2);
model.sv_coef=terms2(:,2);
model.sv_indices=terms2(:,3);
model.SVs=sparse(terms2(:,4:size(terms2,2)));
model.totalSV=size(terms2,1);
model.nSV=[size(model.sv_coef(find(model.sv_coef > 0))); size(model.sv_coef(find(model.sv_coef < 0)))];
%%
[predict_label, accuracy, prob_values] = svmpredict(testlabel, test, model, '-b 1 '); % run the SVM model on the test data
C = confusionmat(testlabel, predict_label)
accuracy
i
j
temp=[accuracy(1) i j size(terms2,1)];
acc2=[acc2 ; temp];
acc2(~any(acc2,2), : ) = []; % to remove zero columns
end
end
end
%% Terms 3 Accumulation
terms3=zeros(0,11);
acc3=zeros(1,5);
for i=1:K
for j=2:K
for k=3:K
if((i~=j) && (j~=k) && (i~=k))
terms3=[cellA(i); cellA(j); cellA(k);];
terms3=cell2mat(terms3);
terms3=full(terms3);
% SVs=sparse(class2(:,3:size~=(class2,2)));
model.sv_coef=terms3(:,2);
model.sv_indices=terms3(:,3);
model.SVs=sparse(terms3(:,4:size(terms3,2)));
model.totalSV=size(terms3,1);
model.nSV=[size(model.sv_coef(find(model.sv_coef > 0))); size(model.sv_coef(find(model.sv_coef < 0)))];
%%
[predict_label, accuracy, prob_values] = svmpredict(testlabel, test, model, '-b 1 '); % run the SVM model on the test data
C = confusionmat(testlabel, predict_label)
accuracy
i
j
k
temp=[accuracy(1) i j k size(terms3,1)];
acc3=[acc3 ; temp];
acc3(~any(acc3,2), : ) = []; % to remove zero columns
end
end
end
end