-
Notifications
You must be signed in to change notification settings - Fork 0
/
ConcordanceCombiner.java
368 lines (185 loc) · 7.42 KB
/
ConcordanceCombiner.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Scanner;
import java.util.Collection;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Collections;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Pattern;
import static com.tutego.jrtf.Rtf.rtf;
import static com.tutego.jrtf.RtfHeader.font;
import static com.tutego.jrtf.RtfPara.*;
import static com.tutego.jrtf.RtfText.*;
import static com.tutego.jrtf.RtfUnit.CM;
import java.awt.Desktop;
import java.io.*;
import java.util.Date;
import com.tutego.jrtf.*;
import java.util.concurrent.TimeUnit;
class LocationTagExtended {
String word;
String paragraphCollection;
String filename;
LocationTagExtended(String w, String pC, String fn) {
word = w;
paragraphCollection = pC;
filename = fn;
}
String getWord() {
return word;
}
String getParagraphCollectionString() {
return paragraphCollection;
}
String getFilename() {
return filename;
}
public static Comparator<LocationTagExtended> WordComparator = new Comparator<LocationTagExtended>() {
public int compare(LocationTagExtended l1, LocationTagExtended l2) {
String l1Word = l1.getWord();
String l2Word = l2.getWord();
return l1Word.compareTo(l2Word);
}};
}
class ConcordanceCombiner {
static int paragraph = 0;
static boolean incrementThisRound = false;
static ArrayList<LocationTagExtended> locationTags = new ArrayList<LocationTagExtended>();
static ArrayList<String> finalLines = new ArrayList<String>();
static ArrayList<File> filesGiven = new ArrayList<File>();
static ArrayList<String> wordsAlreadyUsed = new ArrayList<String>();
public static boolean isNumeric(String str) {
try {
Integer.parseInt(str);
return true;
} catch(NumberFormatException e){
return false;
}
}
public static void main(String[] args) throws IOException {
//System.out.println("Here");
//first determine how many files the program is being fed
int fileArguments = args.length;
System.out.println(fileArguments);
//for each file given to the program, make a new file and
//save it to the arraylist
for(int i = 0; i < fileArguments; i++) {
File tempFile = new File(args[i]);
filesGiven.add(tempFile);
}
boolean flop = true;
for(int i = 0; i < fileArguments; i++) {
try {
Scanner reader = new Scanner(filesGiven.get(i), "UTF-8");
while(reader.hasNextLine()) {
String data = reader.nextLine();
//System.out.println(data);
String [] wordsOnLine = data.split(" ");
//System.out.println(wordsOnLine.length);
if(wordsOnLine.length > 1) {
locationTags.add(new LocationTagExtended(wordsOnLine[0],wordsOnLine[1],filesGiven.get(i).getName().replace(".txt","")));
}
}
reader.close();
} catch (FileNotFoundException error) {
System.out.println("Error");
error.printStackTrace();
}
}
//now we have built up our collection of location tags
System.out.println(locationTags.size());
Collections.sort(locationTags, LocationTagExtended.WordComparator);
//now to build up the final lines arraylist
for(int i = 0; i < locationTags.size(); i++) {
String buildString = "";
//buildString = buildString + locationTags.get(i).getWord();
//buildString = buildString + " ";
finalLines.add(" ");
finalLines.add(locationTags.get(i).getWord());
buildString = buildString + locationTags.get(i).getFilename();
//buildString = buildString + ": ";
buildString = buildString + locationTags.get(i).getParagraphCollectionString();
finalLines.add(buildString);
//finalLines.add(" ");
}
//now we have to remove the duplicate entries of words, while still preserving the order and both neighbours of the line being removed
//for each line, if the line has a ":" in it, then you know that this is a "word" line
//check if this line has already been added, if so remove it
for(int i = 0; i < finalLines.size(); i++) {
if(finalLines.get(i).indexOf(":") >= 0) {
//this line is a word line - check if you already have it
if(wordsAlreadyUsed.indexOf(finalLines.get(i)) >= 0) {
//already have this line, should delete it
finalLines.remove(i);
} else {
//this line is new
wordsAlreadyUsed.add(finalLines.get(i));
}
}
}
for(int i = 0; i < finalLines.size(); i++) {
try {
FileWriter writer = new FileWriter("combinedOutput.txt", true);
writer.write(finalLines.get(i));
writer.write("\r\n");
writer.close();
} catch (IOException error) {
System.out.println("error");
error.printStackTrace();
}
}
//set a small delay due to a potential race condition between creating the file above and accessing it below
//System.out.println("taking a little break");
//TimeUnit.SECONDS.sleep(3);
System.out.println("starting to convert to a .rtf file");
//now take this text file and convert it to a rich text file using the rtf library
ArrayList<RtfPara> rtfParagraphsCollection = new ArrayList<RtfPara>();
try {
File concordanceCombineOutput = new File("combinedOutput.txt");
Scanner documentScanner = new Scanner(concordanceCombineOutput);
File finalOutput = new File("finalCombinedOutput.rtf");
String finalData = "";
while(documentScanner.hasNextLine()) {
String data = documentScanner.nextLine();
//get the index of ":" in the line
int colonIndex = data.indexOf(":");
//get the index of "[" in the line
int bracketIndex = data.indexOf("[");
//this data line is a word line
if(colonIndex > 0) {
String wordData = "";
wordData = data;
//this line in the output file is just going to be (in bold)- theword:
RtfTextPara tempPara = p(bold(wordData), text("\n"));
rtfParagraphsCollection.add(tempPara);
}
//this data line is a concordance line
if(bracketIndex > 0) {
int lengthOfString = data.length();
int indexOfBracket = data.indexOf("[");
String firstPart = "";
String lastPart = "";
//the title of the file
firstPart = data.substring(0,indexOfBracket);
//the concordance portion of the file
lastPart = data.substring(indexOfBracket, lengthOfString);
RtfTextPara tempPara = p(bold(firstPart), text(lastPart), text("\n"));
rtfParagraphsCollection.add(tempPara);
}
}
//write the entire collection of RtfTextPara to the output rtf file
FileWriter fw = new FileWriter(finalOutput);
rtf().
section(
rtfParagraphsCollection
).out(new FileWriter(finalOutput));
documentScanner.close();
} catch (FileNotFoundException e) {
System.out.println("An error occured");
e.printStackTrace();
}
}
}