-
Notifications
You must be signed in to change notification settings - Fork 1
/
VDBC.m
222 lines (203 loc) · 9.36 KB
/
VDBC.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
function [results] = VDBC(dataF, dataTNum, numCls, numDim, numFolds)
% Function VDBC for Voronoi Diagram Based Classifier. It is based on Chang
% W* algorithm:
% - Select a random instance;
% - Find its nearest neighbor
% + If they belong to the same class
% - Create a centroid between them.
% + Else
% - Current selected instance becomes a centroid.
% - Test unknown instances with built centroids
%% Pre-process
% Vector to store MAUC values
results(numFolds) = 0;
% Separating data into k folds
foldIdx = kfoldIndices(numCls, dataTNum, numFolds);
%% Train and Test
for k=1:numFolds
disp(strcat('ITERAÇÃO', num2str(k)));
centroids = [];
% Test Set
testIdx = foldIdx(k).indices;
testSet = dataF(testIdx, :); testTargets = dataTNum(testIdx);
% Train Set
trainIdx = [];
for i=1:numFolds
if i ~= k
trainIdx = [trainIdx; foldIdx(i).indices];
end
end
trainSet = dataF(trainIdx, :); trainTargets = dataTNum(trainIdx);
centroids = train(trainSet, trainTargets, numDim, centroids);
output = testing(testSet, centroids);
% size(unique(testTargets), 1) ---> sometimes not all classes are
% present on test set, so it is passed the size of present classes
results(k) = calculateMAUC(output, testTargets, size(unique(testTargets), 1));
end
end
function foldIdx = kfoldIndices(numCls, dataTNum, numFolds)
% Function for creating indices for the k folds
foldIdx(numFolds).indices = [];
clsInd(numCls).indices = [];
clsFoldInd(numCls).indices = [];
% Getting the indices of instances from each class and then the inside
% class indices for folds
for i=1:numCls
clsInd(i).indices = find(dataTNum == i);
clsFoldInd(i).indices = crossvalind('Kfold', size(clsInd(i).indices, 1), numFolds);
end
for i=1:numFolds
for j=1:numCls
foldIdx(i).indices = [foldIdx(i).indices; clsInd(j).indices(clsFoldInd(j).indices == i)];
end
end
end
function centroids = train(trainSet, trainTargets, numDim, centroids)
% Training function
% For each training instance
for i=1:size(trainSet, 1)
dist = distance(transpose(trainSet(i, :)), transpose(trainSet));
dist(i) = NaN; % distance to itself
if ~isempty(centroids)
% If there already exist at least one centroid
% The last column of a centroid holds its class, therefore it
% is not counted during distance calculation
dist2 = distance(transpose(trainSet(i, :)), transpose(centroids(:, 1:end-1)));
if min(dist) < min(dist2)
% If another instance is closer than any centroid
nearest = find(dist == min(dist));
if size(nearest,2) > 1
% If two or more instances are equidistant
possibleClasses = zeros(1, size(nearest,2));
for j=1:size(nearest,2)
possibleClasses(j) = trainTargets(nearest(j));
end
if all(possibleClasses == possibleClasses(1))
% If all belong to the same class, create a
% centroid among them
newCtr = zeros(1, numDim+1);
for d=1:numDim
newCtr(1, d) = mean(trainSet([i nearest], d));
end
newCtr(1, end) = possibleClasses(1);
centroids = [centroids; newCtr];
else
% If at least one of them belongs to different
% class, the current instance becomes a centroid
newCtr = zeros(1, numDim+1);
newCtr(1, 1:end-1) = trainSet(i, :);
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
end
else
% Only one nearest neighbor
neighborCls = trainTargets(nearest);
if trainTargets(i) == trainTargets(nearest)
% If they belong to the same class, a centroid is
% created between them
newCtr = zeros(1, numDim+1);
for d=1:numDim
newCtr(d) = mean(trainSet([i nearest], d));
end
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
else
% If they belong to different classes the current
% instance becomes a centroid
newCtr = zeros(1, numDim+1);
newCtr(1, 1:end-1) = trainSet(i, :);
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
end
end
else
% If a centroid is closer than any instance
nearCtrIdx = find(dist2 == min(dist2));
nearCtrCls = centroids(nearCtrIdx, end);
% If they belong to the same class nothing is done.
% Otherwise, current instance becomes a centroid
if trainTargets(i) ~= nearCtrCls
newCtr = zeros(1, numDim+1);
newCtr(1, 1:end-1) = trainSet(i, :);
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
end
end
else
% Centroids set is still empty
nearest = find(dist == min(dist));
if size(nearest,2) > 1
% If two or more instances are equidistant
possibleClasses = zeros(1, size(nearest,2));
for j=1:size(nearest,2)
possibleClasses(j) = trainTargets(nearest(j));
end
if all(possibleClasses == possibleClasses(1))
% If all belong to the same class, create a centroid
% among them
newCtr = zeros(1, numDim+1);
for d=1:numDim
newCtr(1, d) = mean(trainSet([i nearest], d));
end
newCtr(1, end) = possibleClasses(1);
centroids = [centroids; newCtr];
else
% If at least one of them belongs to different class,
% the current instance becomes a centroid
newCtr = zeros(1, numDim+1);
newCtr(1, 1:end-1) = trainSet(i, :);
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
end
else
% Only one nearest neighbor
neighborCls = trainTargets(nearest);
if trainTargets(i) == trainTargets(nearest)
% If they belong to the same class, a centroid is
% created between them
newCtr = zeros(1, numDim+1);
for d=1:numDim
newCtr(d) = mean(trainSet([i nearest], d));
end
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
else
% If they belong to different classes the current
% instance becomes a centroid
newCtr = zeros(1, numDim+1);
newCtr(1, 1:end-1) = trainSet(i, :);
newCtr(1, end) = trainTargets(i);
centroids = [centroids; newCtr];
end
end
end
end
end
function output = testing(testSet, centroids)
% Function for generating the output based on 1NN classification with
% prototypes
output = zeros(size(testSet, 1), 1);
for i=1:size(testSet, 1)
dist = distance(transpose(testSet(i, :)), transpose(centroids(:, 1:end-1)));
nearest = find(dist == min(dist));
if size(nearest, 2) > 1 % The size is related to the column due to the distance function
possibleClasses = centroids(nearest, end);
if all(possibleClasses == possibleClasses(1))
output(i) = possibleClasses(1);
else
% For while the class is decided randomly. TODO: decide it
% based on probabilities, giving more importante to minor
% classes
possibleClasses = sort(possibleClasses);
output(i) = possibleClasses(randi(size(possibleClasses, 1)));
end
else
output(i) = centroids(nearest, end);
end
end
end
function mauc = calculateMAUC(output, testTargets, nClass)
% Function for calculating MAUC performance
[mauc, ~] = colAUC(output, testTargets, 'ROC');
mauc = (2/(nClass*(nClass - 1)))*sum(mauc);
end