Skip to content

Commit

Permalink
fix problem with double entries in enrichment fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Orbiter committed Sep 8, 2018
1 parent c06e3d7 commit e36c442
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
1 change: 0 additions & 1 deletion src/org/loklak/harvester/TwitterScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
Expand Down
11 changes: 7 additions & 4 deletions src/org/loklak/objects/MessageEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -72,16 +73,18 @@ public double getClassifierProbability(Classifier.Context context) {

public static List<String> extract(String text, Pattern p, int regexGroup) {
Matcher m = p.matcher(text);
List<String> dataList = new ArrayList<String>();
Set<String> dataList = new LinkedHashSet<String>();

while (m.find()) {
dataList.add(m.group(regexGroup));
}
for (String r: dataList) {
//text.replaceAll(r, "");
text.replace(r, "");
text.replace(r, ""); // replaceAll??
}
return dataList;

List<String> result = new ArrayList<>(dataList.size());
result.addAll(dataList);
return result;
}

public List<String> extractLinks(String text) {
Expand Down

0 comments on commit e36c442

Please sign in to comment.