diff --git a/.classpath b/.classpath index e3e159951..5435b3939 100644 --- a/.classpath +++ b/.classpath @@ -74,7 +74,7 @@ - + diff --git a/.project b/.project index bd8f2e231..d181e26bd 100644 --- a/.project +++ b/.project @@ -3,7 +3,7 @@ apache-solr-48 - apache-solr-49 + apache-solr-61 diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java index dae114f4e..a235f994f 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java @@ -66,19 +66,53 @@ protected QueryNode postProcessNode(QueryNode node) if (node instanceof WildcardQueryNode) { field = ((WildcardQueryNode) node).getFieldAsString(); value = ((WildcardQueryNode) node).getTextAsString(); - if (value.indexOf('*') == 0 || value.indexOf('?') == 0) - return node; //let the analyzer decide the fate + + int asteriskPosition = -1; + int qmarkPosition = -1; + int origLen = value.length(); + + if (value.indexOf('*') > -1) { + asteriskPosition = value.indexOf('*'); + } + if (value.indexOf('?') > -1) { + qmarkPosition = value.indexOf('?'); + } + + if (asteriskPosition > 0 && asteriskPosition+1 < value.length() + || qmarkPosition > 0 && qmarkPosition+1 < value.length() + || asteriskPosition > -1 && qmarkPosition > -1) + return node; for (String suffix: new String[]{"_wildcard", ""}) { if (hasAnalyzer(field + suffix)) { tokens = analyze(field + suffix, value); - if (tokens.length > 1) + if (tokens.length != 1) return node; // break, let the analyzer decide the fate - if (!tokens[0].equals(value)) { + String newToken = tokens[0]; + if (newToken.length() < origLen) { + if (qmarkPosition > -1) { + if (qmarkPosition == 0) { + newToken = '?' + tokens[0]; + } + else { + newToken = tokens[0] + '?'; + } + } + else { + if (asteriskPosition == 0) { + newToken = '*' + tokens[0]; + } + else { + newToken = tokens[0] + '*'; + } + } + } + + if (!newToken.equals(value)) { return new WildcardQueryNode(field, - tokens[0], ((WildcardQueryNode)node).getBegin(), + newToken, ((WildcardQueryNode)node).getBegin(), ((WildcardQueryNode)node).getEnd()); } } diff --git a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java index 9a1fab70b..87e8058cd 100644 --- a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java +++ b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery.Builder; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermQuery; @@ -40,9 +39,12 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestRuleLimitSysouts; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; + +@TestRuleLimitSysouts.Limit(bytes = 600000) @SuppressCodecs({"Lucene3x", "SimpleText"}) public class BenchmarkAuthorSearch extends LuceneTestCase{ private IndexSearcher searcher; @@ -411,7 +413,8 @@ private List getIndexData(int[] randomIds) throws IOException { String original = doc.get("original").toString(); String[] parts = original.split("\\,? "); int howMany = TestUtil.nextInt(random(), 0, parts.length-1); // how many initials - data.add(new TestCase(original, parts, howMany)); + if (howMany > 1) + data.add(new TestCase(original, parts, howMany)); } return data; } @@ -434,8 +437,11 @@ private void verifySearch(int[] randomIds) throws IOException { bq.add(q, Occur.MUST); bq.add(new TermQuery(new Term("id", Integer.toString(randomIds[i]))), Occur.MUST); if (q != null) { - System.out.println(q.toString()); - int no = searcher.search(bq.build(), 1).totalHits; + //System.out.println(q.toString()); + Query query = bq.build(); + //System.out.println(query.toString()); + //System.out.println("q: " + searcher.search(q, 10).totalHits); + int no = searcher.search(query, 1).totalHits; if (no != 1) { System.out.println("Results differ: " + oq + " <<>> " + q + " [" + ho + " : " + no + "]"); if (store == true) { @@ -457,7 +463,7 @@ private void verifySearch(int[] randomIds) throws IOException { } - private Query[] buildQueries(String[] parts) throws UnsupportedEncodingException { + private Query[] buildQueries(String[] parts) throws IOException { if (parts.length - 1 < 3) return null; int howMany = TestUtil.nextInt(random(), 2, parts.length-1); // how many initials @@ -483,7 +489,8 @@ private Query getSpanQuery(String[] parts, int howMany, boolean truncate) throws clauses[0] = new SpanTermQuery(new Term("vectrfield", parts[0])); // surname for (int i = 0; i < howMany; i++) { if (truncate) { - clauses[i+1] = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("vectrfield", parts[i+1].substring(0, 1) + "*"))); + SpanMultiTermQueryWrapper q = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("vectrfield", parts[i+1].substring(0, 1) + "*"))); + clauses[i+1] = q; } else { clauses[i+1] = new SpanTermQuery(new Term("vectrfield", parts[i+1])); @@ -493,7 +500,7 @@ private Query getSpanQuery(String[] parts, int howMany, boolean truncate) throws return sq; } - private Query getPayloadQuery(String[] parts, int howMany, boolean truncate) throws UnsupportedEncodingException { + private Query getPayloadQuery(String[] parts, int howMany, boolean truncate) throws IOException { List payloads = new ArrayList(howMany+1); BytesRef pay = new BytesRef((Integer.toString(0)).getBytes("UTF-8")); payloads.add(pay); @@ -502,8 +509,10 @@ private Query getPayloadQuery(String[] parts, int howMany, boolean truncate) thr clauses[0] = new SpanTermQuery(new Term("vectrfield", parts[0])); // surname for (int i = 0; i < howMany; i++) { if (truncate) { - //clauses[i+1] = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("vectrfield", parts[i+1].substring(0, 1) + "*"))); - clauses[i+1] = new SpanMultiTermQueryWrapper(new PrefixQuery(new Term("vectrfield", parts[i+1].substring(0, 1)))); + SpanMultiTermQueryWrapper q = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("vectrfield", parts[i+1].substring(0, 1) + "*"))); + clauses[i+1] = (SpanQuery) q.rewrite(searcher.getIndexReader()); + + //clauses[i+1] = new SpanMultiTermQueryWrapper(new PrefixQuery(new Term("vectrfield", parts[i+1].substring(0, 1)))); } else { clauses[i+1] = new SpanTermQuery(new Term("vectrfield", parts[i+1]));