-
Notifications
You must be signed in to change notification settings - Fork 0
/
ArffParser.java
65 lines (57 loc) · 2.15 KB
/
ArffParser.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Scanner;
import java.io.File;
public class ArffParser {
private String inputFilename;
private ArrayList<Feature> features = new ArrayList<Feature>();
private ArrayList<ArrayList<Double>> dataInstances = new ArrayList<ArrayList<Double>>();
/* Adds a data instance to the dataInstances ArrayList and handles adding categorical options.
*
* @param dataInstance the comma separated string containing the data instance's values */
private void addDataInstance(String dataInstance) {
String[] data = dataInstance.split(",");
ArrayList<Double> processedDataInstance = new ArrayList<Double>();
double processedValue = 0;
for (int i = 0; i < data.length; i++) {
Feature feat = features.get(i);
CategoricalFeature fe = (CategoricalFeature)features.get(i);
processedValue = fe.getIndexOfCategoricalFeatureValue(data[i]);
processedDataInstance.add(processedValue);
}
this.dataInstances.add(processedDataInstance);
}
/* Parses data from .arff file at inputFilename, storing categorical features, categorical
* feature options, and data instances accordingly. */
public void parseDataFromArffFile() throws Exception {
try {
Scanner in = new Scanner(new File(inputFilename), "UTF-8");
in.nextLine();
while(in.hasNextLine()) {
String line = in.nextLine().trim();
if (line.length() > 0 && line.charAt(0) != '%' && !line.contains("@data") && !line.contains("@relation")) {
if (line.contains("@attribute")) {
String[] lineSplitOnSpace = line.split(" ");
Feature catFeature = new CategoricalFeature(lineSplitOnSpace[1].trim(), lineSplitOnSpace[2].trim());
this.features.add(catFeature);
} else {
this.addDataInstance(line);
}
}
}
} catch (Exception e) {
System.out.println("Issue reading in input file:" + e);
System.exit(0);
}
}
public void setInputFilename(String filename) {
this.inputFilename = filename;
}
public ArrayList<ArrayList<Double>> getDataInstances() {
return this.dataInstances;
}
public ArrayList<Feature> getFeatures() {
return this.features;
}
}