-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithm.cpp
213 lines (177 loc) · 6.71 KB
/
algorithm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
// Author: Kristi Daigh
// Project: MLEM2 Rule Induction
// Date: 11/20/2019
/** Source file for algorithm class.
@file algorithm.cpp
This class handles the execution
of the MLEM2 algorithm. */
#include "algorithm.hpp"
#include <algorithm>
#include <limits.h>
using namespace std;
Algorithm::Algorithm(std::size_t numAttributes)
: m_numAttributes(numAttributes){}
void Algorithm::generateAVBlocks(Dataset * data){
// FOR: Each attribute (column)
for(unsigned col = 0; col < m_numAttributes; col++){
string attr = data->getAttribute(col);
// IF: Attribute values are numeric
if(data->getValue(1, col)->isNumeric()){
float min = 0, max = 0;
list<float> cutpoints = data->discretize(col, min, max);
#if DEBUG==true
cout << "NUMERIC: Cutpoints for " << attr << ": ";
for(float c : cutpoints){
cout << c << ", ";
}
cout << endl;
#endif
// FOR: Each cutpoint, create (empty) attribute-value blocks
for (float c : cutpoints){
m_avBlocks.push_back(new AVNumeric(attr, col, min, c));
m_avBlocks.push_back(new AVNumeric(attr, col, c, max));
}
}
// ELSE: Attribute values are symbolic
else {
list<string> values = data->getPossibleValues(col);
#if DEBUG==true
cout << "SYMBOLIC: Possible values for " << attr << ": ";
for(string str : values){
cout << str << ", ";
}
cout << endl;
#endif
// FOR: Each value, create an (empty) attribute-value block
for (string v : values){
m_avBlocks.push_back(new AVSymbolic(attr, col, v));
}
}
} // END FOR
// Populate attribute-value blocks
// LOOP: For each attribute-value block
for(unsigned i = 0; i < m_avBlocks.size(); i++){
// LOOP: For each case (row), add matching values to the block
for(unsigned r = 1; r <= data->getNumCases(); r++){
int c = m_avBlocks[i]->getAttrCol();
m_avBlocks[i]->addOnMatch(data->getValue(r, c), r);
}
#if DEBUG == true
m_avBlocks[i]->print();
#endif
}
}
vector<Concept *> Algorithm::generateConcepts(Dataset * data){
vector <Concept *> concepts;
list<string> values = data->getPossibleValues(m_numAttributes);
for(string v : values){
concepts.push_back(new Concept(data->getDecision(), v));
}
// FOR: Each concept
for(unsigned i = 0; i < concepts.size(); i++){
// FOR: Each case (row), add matching values to the block
for(unsigned r = 1; r <= data->getNumCases(); r++){
int c = m_numAttributes;
string cellValue = data->getValue(r, c)->getStrValue();
if(concepts[i]->getValue() == cellValue){
concepts[i]->addCase(r);
}
}
}
return concepts;
}
void Algorithm::generateRuleset(ostream & file, Dataset * data){
// Generate program components
generateAVBlocks(data);
vector <Concept *> concepts = generateConcepts(data);
// FOR: Each concept, generate rules and print to stream
for(Concept * concept : concepts){
#if DEBUG == true
cout << concept->toString() << endl;
#endif
LocalCover rules = induceRules(concept);
file << rules.toString(m_avBlocks);
delete concept;
}
}
LocalCover Algorithm::induceRules(Concept * concept){
set<int> B = concept->getBlock();
set<int> G = B;
LocalCover lc(concept);
size_t numOrigBlocks = m_avBlocks.size();
// WHILE: G is non-empty
while(!G.empty()){
Rule rule;
map<int, set<int>> T_G;
#if DEBUG == true
printSet("G = ", G);
#endif
// FOR: All attribute-value blocks
for(unsigned i = 0; i < numOrigBlocks; i++){
set<int> intersectSet = setIntersection(m_avBlocks[i]->getBlock(), G);
// IF: Intersection is non-empty
if(!(intersectSet.empty())){
T_G[i] = intersectSet;
}
}
// Select conditions for rule
// WHILE: T is non-empty or T is not subsetEq to B
while( (rule.empty()) || !(subsetEq(rule.getBlock(m_avBlocks), B)) ){
int choicePos = getOptimalCondition(T_G);
rule.addCondition(choicePos);
T_G.erase(choicePos);
// Update goal set
G = setIntersection(m_avBlocks[choicePos]->getBlock(), G);
// FOR: Relevant attribute-value block intersections
for(auto const & [i, intersect] : T_G){
T_G[i] = setIntersection(m_avBlocks[i]->getBlock(), G);
}
} // END WHILE (INNER LOOP)
// Remove unnecessary conditions
// rule.mergeIntervals(m_avBlocks);
rule.dropConditions(m_avBlocks, B);
// Add to local covering
lc.addRule(new Rule(rule));
// Update goal set
G = setDifference(B, lc.getCoveredConditions(m_avBlocks));
} // END WHILE (OUTER LOOP)
// Remove unnecessary rules
lc.dropRules(m_avBlocks, B);
return lc;
}
int Algorithm::getOptimalCondition(map<int, set<int>> T_G){
std::list<int> maxSizePos, minCardPos;
size_t maxSize = 0, minCard = INT_MAX;
for(auto const & [i, intersect] : T_G){
// IF: Current size is larger than maxSize, clear list and add index
if(T_G[i].size() > maxSize){
maxSize = T_G[i].size();
maxSizePos.clear();
maxSizePos.push_back(i);
// IF: Current size is equal to maxSize, add index for consideration
} else if (T_G[i].size() == maxSize){
maxSizePos.push_back(i);
}
}
// IF: Unique largest size is found, return position
if(maxSizePos.size() == 1){
return maxSizePos.front();
}
// LOOP: For each set intersection in maxSizePos
for(int i : maxSizePos){
// IF: Set is non-empty
if(!(T_G[i].empty())){
// IF: Current cardinality is smaller than minCard, update minCard
if(m_avBlocks[i]->size() < minCard){
minCard = m_avBlocks[i]->size();
minCardPos.clear();
minCardPos.push_back(i);
// IF: Current size is equal to minCard, cardinality is not unique
} else if (m_avBlocks[i]->size() == minCard){
minCardPos.push_back(i);
}
}
}
// RETURN: Position with smallest cardinality or first occuring if tie
return minCardPos.front();
}