From e36c442f16b2c9211abb0281dfda9a097eb6e07f Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 8 Sep 2018 13:08:39 +0200 Subject: [PATCH] fix problem with double entries in enrichment fields --- src/org/loklak/harvester/TwitterScraper.java | 1 - src/org/loklak/objects/MessageEntry.java | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/org/loklak/harvester/TwitterScraper.java b/src/org/loklak/harvester/TwitterScraper.java index ea3f814e6..68c583840 100644 --- a/src/org/loklak/harvester/TwitterScraper.java +++ b/src/org/loklak/harvester/TwitterScraper.java @@ -28,7 +28,6 @@ import java.net.MalformedURLException; import java.net.URL; -import java.net.URLDecoder; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; diff --git a/src/org/loklak/objects/MessageEntry.java b/src/org/loklak/objects/MessageEntry.java index c729fc7ca..69a4e5bb2 100644 --- a/src/org/loklak/objects/MessageEntry.java +++ b/src/org/loklak/objects/MessageEntry.java @@ -23,6 +23,7 @@ import java.net.URL; import java.util.ArrayList; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -72,16 +73,18 @@ public double getClassifierProbability(Classifier.Context context) { public static List extract(String text, Pattern p, int regexGroup) { Matcher m = p.matcher(text); - List dataList = new ArrayList(); + Set dataList = new LinkedHashSet(); while (m.find()) { dataList.add(m.group(regexGroup)); } for (String r: dataList) { - //text.replaceAll(r, ""); - text.replace(r, ""); + text.replace(r, ""); // replaceAll?? } - return dataList; + + List result = new ArrayList<>(dataList.size()); + result.addAll(dataList); + return result; } public List extractLinks(String text) {