diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java index 47fd6d4af..31b67b83d 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java @@ -5,12 +5,15 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryparser.flexible.aqp.config.AqpAdsabsQueryConfigHandler; import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpAdsabsRegexQueryNode; +import org.apache.lucene.queryparser.flexible.aqp.nodes.SlowFuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.aqp.parser.AqpStandardQueryConfigHandler; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; @@ -75,7 +78,7 @@ protected QueryNode postProcessNode(QueryNode node) if (value.indexOf('?') > -1) { qmarkPosition = value.indexOf('?'); } - + // if wildcard in the middle, we can't deal with it. return if (asteriskPosition > 0 && asteriskPosition+1 < value.length() || qmarkPosition > 0 && qmarkPosition+1 < value.length() || asteriskPosition > -1 && qmarkPosition > -1) @@ -126,12 +129,28 @@ else if(node instanceof FuzzyQueryNode) { if (tokens.length > 1) return node; // break, let the analyzer decide the fate + if (!tokens[0].equals(value)) { - return new FuzzyQueryNode(field, - tokens[0], - ((FuzzyQueryNode)node).getSimilarity(), - ((FuzzyQueryNode)node).getBegin(), - ((FuzzyQueryNode)node).getEnd()); + + QueryConfigHandler config = getQueryConfigHandler(); + Map args = config.get(AqpStandardQueryConfigHandler.ConfigurationKeys.NAMED_PARAMETER); + + + if (node.getClass().equals(SlowFuzzyQueryNode.class) + && args != null && args.getOrDefault("aqp.allow.slow.fuzzy", "true") == "true") { + return new SlowFuzzyQueryNode(field, + tokens[0], + ((FuzzyQueryNode)node).getSimilarity(), + ((FuzzyQueryNode)node).getBegin(), + ((FuzzyQueryNode)node).getEnd()); + } + else { + return new FuzzyQueryNode(field, + tokens[0], + ((FuzzyQueryNode)node).getSimilarity(), + ((FuzzyQueryNode)node).getBegin(), + ((FuzzyQueryNode)node).getEnd()); + } } } } diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java index a3a98c667..6bc6ee981 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorUtils.java @@ -42,6 +42,7 @@ public static String normalizeAuthor(String a) { } public static String normalizeAuthor(String a, boolean keepApostrophe) { + boolean hasWildcards = a.indexOf('*') > -1 || a.indexOf('?') > -1; // \*\? should never be encountered here if (!keepApostrophe) a = n4.matcher(a).replaceAll("-"); a = n0.matcher(a).replaceAll(" "); @@ -53,7 +54,7 @@ public static String normalizeAuthor(String a, boolean keepApostrophe) { a = n2.matcher(a.trim()).replaceAll(" "); - if (!(a.contains(","))) // || a.contains(" ") + if (!hasWildcards && !(a.contains(","))) // || a.contains(" ") a = a + ","; // do this at the end, we want to see the space instead of '-' a = a.replace('-', ' '); diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/PythonicAuthorNormalizerFilter.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/PythonicAuthorNormalizerFilter.java index c9ac22860..c0c722663 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/PythonicAuthorNormalizerFilter.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/PythonicAuthorNormalizerFilter.java @@ -65,6 +65,12 @@ public boolean incrementToken() throws IOException { for (String individual: original.split(";")) { + // skip processing wildcards + if (individual.indexOf('*') > -1 || individual.indexOf('?') > -1) { + buffer.add(individual); + continue; + } + Map parsedName = jythonParser.parse_human_name(individual); if (parsedName != null) { diff --git a/contrib/adsabs/src/java/org/apache/solr/search/CitationLRUCache.java b/contrib/adsabs/src/java/org/apache/solr/search/CitationLRUCache.java index b696dcbdd..4753b6aa2 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/CitationLRUCache.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/CitationLRUCache.java @@ -698,7 +698,7 @@ public void set(int docbase, int docid, Object value) { * Given the set of fields, we'll look inside them and retrieve (into memory) * all values */ - private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List fields, KVSetter setter) + private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List fields, final KVSetter setter) throws IOException { IndexSchema schema = searcher.getCore().getLatestSchema(); @@ -712,7 +712,7 @@ private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List fie liveDocs = leave.reader().getLiveDocs(); lr = leave.reader(); FieldInfos fInfo = lr.getFieldInfos(); - for (String field : fields) { + for (final String field : fields) { FieldInfo fi = fInfo.fieldInfo(field); diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java index 6491a1d61..553088ae7 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.WildcardQuery; import org.apache.solr.common.params.CommonParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.QParser; @@ -374,6 +376,13 @@ public void xtestX() throws Exception { } public void testAuthorParsingUseCases() throws Exception { + + assertQueryEquals(req("q", "author:acco*"), "author:acco*", WildcardQuery.class); + assertQueryEquals(req("q", "author:Adamč*"), "author:adamč*", WildcardQuery.class); + + testAuthorQuery("Adamč*", + "adamč*", + "//*[@numFound='11']"); // multiple synonyms in the file are separated with semicolon testAuthorQuery("\"wyrzykowsky, l\"", @@ -392,14 +401,14 @@ public void testAuthorParsingUseCases() throws Exception { "//*[@numFound='1']"); // should not find anything, even though the names are there indexed next to each other - assertQ(req("q", "author:\"foo, * other, *\"", "debugQuery", "true"), + assertQ(req("q", "author:\"foo, * other, *\""), "//*[@numFound='0']" ); - assertQ(req("q", "author:\"foo, *\"", "debugQuery", "true"), + assertQ(req("q", "author:\"foo, *\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='600']" ); - assertQ(req("q", "author:\"other, *\"", "debugQuery", "true"), + assertQ(req("q", "author:\"other, *\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='600']" ); diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java index b3ec6d477..b76033e98 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java @@ -12,6 +12,7 @@ import org.apache.lucene.queries.CustomScoreQuery; import org.apache.lucene.queries.mlt.MoreLikeThisQuery; import org.apache.lucene.queryparser.flexible.aqp.TestAqpAdsabs; +import org.apache.lucene.sandbox.queries.SlowFuzzyQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; @@ -586,14 +587,14 @@ public void testSpecialCases() throws Exception { // levenshtein automata only considers distances (and max is 2) assertQueryEquals(req("defType", "aqp", "q", "=author:\"Hoffmann, W.\"~0.8"), - "author:hoffmann, w~2", - FuzzyQuery.class); + "author:hoffmann, w~0.8", + SlowFuzzyQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=author:\"Hoffmann, W.\"~3"), "author:hoffmann, w~2", FuzzyQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=author:\"Hoffmann, W.\"~1"), - "author:hoffmann, w~1", - FuzzyQuery.class); + "author:hoffmann, w~1.0", + SlowFuzzyQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:\"Hoffmann, W.\"~2"), diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml index 9fc5a24a3..55a1cea3e 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml @@ -89,7 +89,6 @@ -