From 6dc759f7b0bdeb227f287b8134b47ac81b6b56a4 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Fri, 30 Sep 2016 17:49:32 -0400 Subject: [PATCH] Latest changes; the *wildcard handling could have broken some code --- .classpath | 2 +- .../AqpAdsabsCarefulAnalyzerProcessor.java | 76 +++++++------ ...qpAdsabsFieldNodePreAnalysisProcessor.java | 4 +- .../analysis/DateNormalizerTokenFilter.java | 11 +- .../CitationsTransformerFactory.java | 2 +- .../conf/solrconfig-citations-transformer.xml | 1 - .../src/test/org/adsabs/TestAdsAllFields.java | 2 +- .../adsabs/lucene/BenchmarkAuthorSearch.java | 8 +- .../src/test/org/adsabs/solr/AdsConfig.java | 2 +- .../analysis/TestAdsabsTypeDateString.java | 5 +- .../TestAdsabsTypeFulltextParsing.java | 16 +-- .../TestAdsabsTypeNormalizedStringAscii.java | 52 ++++++--- .../TestAdsabsTypeNormalizedTextAscii.java | 2 +- .../analysis/TestDateNormalizerFilter.java | 11 +- .../solr/search/TestAqpAdsabsSolrSearch.java | 43 +++----- .../solr/search/TestSolrCitationQuery.java | 6 +- ...qpDebuggingQueryNodeProcessorPipeline.java | 2 +- .../adsabs/solr/collection1/conf/schema.xml | 102 +++++++++--------- 18 files changed, 196 insertions(+), 151 deletions(-) diff --git a/.classpath b/.classpath index 77f85ffb1..e3e159951 100644 --- a/.classpath +++ b/.classpath @@ -47,7 +47,7 @@ - + diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java index a39c35193..c0b1a3a06 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsCarefulAnalyzerProcessor.java @@ -64,41 +64,59 @@ protected QueryNode postProcessNode(QueryNode node) String value =null; String[] tokens; if (node instanceof WildcardQueryNode) { - field = ((WildcardQueryNode) node).getFieldAsString() + "_wildcard"; + field = ((WildcardQueryNode) node).getFieldAsString(); value = ((WildcardQueryNode) node).getTextAsString(); - if (hasAnalyzer(field)) { - tokens = analyze(field, value); - if (!tokens[0].equals(value)) { - return new WildcardQueryNode(((WildcardQueryNode) node).getFieldAsString(), - tokens[0], ((WildcardQueryNode)node).getBegin(), - ((WildcardQueryNode)node).getEnd()); + for (String suffix: new String[]{"_wildcard", ""}) { + if (hasAnalyzer(field + suffix)) { + tokens = analyze(field + suffix, "foo*bar"); + + if (tokens.length > 1 || value.indexOf('*') == 0 || value.indexOf('?') == 0) + return node; // break, let the analyzer decide the fate + + if (!tokens[0].equals(value)) { + return new WildcardQueryNode(field, + tokens[0], ((WildcardQueryNode)node).getBegin(), + ((WildcardQueryNode)node).getEnd()); + } } - } + } } else if(node instanceof FuzzyQueryNode) { - field = ((FuzzyQueryNode) node).getFieldAsString() + "_fuzzy"; + field = ((FuzzyQueryNode) node).getFieldAsString(); value = ((FuzzyQueryNode) node).getTextAsString(); - if (hasAnalyzer(field)) { - tokens = analyze(field, value); - if (!tokens[0].equals(value)) { - return new FuzzyQueryNode(field = ((FuzzyQueryNode) node).getFieldAsString(), - tokens[0], - ((FuzzyQueryNode)node).getSimilarity(), - ((FuzzyQueryNode)node).getBegin(), - ((FuzzyQueryNode)node).getEnd()); + for (String suffix: new String[]{"_fuzzy", ""}) { + if (hasAnalyzer(field+suffix)) { + tokens = analyze(field + suffix, value); + + if (tokens.length > 1) + return node; // break, let the analyzer decide the fate + + if (!tokens[0].equals(value)) { + return new FuzzyQueryNode(field, + tokens[0], + ((FuzzyQueryNode)node).getSimilarity(), + ((FuzzyQueryNode)node).getBegin(), + ((FuzzyQueryNode)node).getEnd()); + } } - } + } } else if(node instanceof AqpAdsabsRegexQueryNode) { - field = ((FieldQueryNode) node).getFieldAsString() + "_regex"; + field = ((FieldQueryNode) node).getFieldAsString(); value = ((FieldQueryNode) node).getText().toString(); - if (hasAnalyzer(field)) { - tokens = analyze(field, value); - if (!tokens[0].equals(value)) { - return new AqpAdsabsRegexQueryNode(((FieldQueryNode) node).getFieldAsString(), - tokens[0], ((FieldQueryNode)node).getBegin(), - ((FieldQueryNode)node).getEnd()); - } + for (String suffix: new String[]{"_regex", ""}) { + if (hasAnalyzer(field + suffix)) { + tokens = analyze(field + suffix, value); + + if (tokens.length > 1) + return node; // break, let the analyzer decide the fate + + if (!tokens[0].equals(value)) { + return new AqpAdsabsRegexQueryNode(field, + tokens[0], ((FieldQueryNode)node).getBegin(), + ((FieldQueryNode)node).getEnd()); + } + } } } @@ -154,12 +172,6 @@ private String[] analyze(CharSequence field, String value) throws QueryNodeExcep // pass } - - // for now let'd do this - if (out.size() > 1) { - throw new QueryNodeException(new MessageImpl("We are not expecting multiple tokens from analyzing: " + field + ":" + value)); - } - return out.toArray(new String[out.size()]); } diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java index 40598e77c..4f7dd04e4 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java @@ -43,7 +43,7 @@ public AqpAdsabsFieldNodePreAnalysisProcessor() { super(); dmp = new DateMathParser(DateMathParser.UTC); - sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); + sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); sdf.setTimeZone(TimeZone.getTimeZone("UTC")); } @@ -190,7 +190,7 @@ private String moveDate( String...moveBy) throws QueryNodeException { String[] dateParts = originalDate.split("-|/"); Date dateWithOffset = (Date) parsedDate.clone(); - + dmp.setNow(parsedDate); try { if (dateParts.length == 1) { // just a year assert moveBy.length >= 1; diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/DateNormalizerTokenFilter.java b/contrib/adsabs/src/java/org/apache/solr/analysis/DateNormalizerTokenFilter.java index 5f23a279b..8dfc78857 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/DateNormalizerTokenFilter.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/DateNormalizerTokenFilter.java @@ -5,6 +5,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; +import java.util.TimeZone; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -19,18 +20,20 @@ public final class DateNormalizerTokenFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private DateMathParser dmp; private String offset; - private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT); + private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); + public DateNormalizerTokenFilter(TokenStream input, String incomingFormat, String offset) { super(input); + sdf.setTimeZone(TimeZone.getTimeZone("UTC")); this.offset = offset; String[] parts = incomingFormat.split("\\|"); format = new SimpleDateFormat[parts.length]; for (int i=0;i ${tests.luceneMatchVersion:LUCENE_CURRENT} - ${solr.data.dir:} diff --git a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java index d0be9581b..bfccf1dd5 100644 --- a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java +++ b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java @@ -814,7 +814,7 @@ public void test() throws Exception { /* * cite_read_boost */ - //dumpDoc(null, "recid", "read_count", "cite_read_boost"); + dumpDoc(null, "recid", "read_count", "cite_read_boost"); assertQ(req("q", "cite_read_boost:[0.0 TO 1.0]"), "//doc/int[@name='recid'][.='100']", "//doc/int[@name='recid'][.='101']", diff --git a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java index bef3000e0..d8ec0c015 100644 --- a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java +++ b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java @@ -424,13 +424,17 @@ private void verifySearch(int[] randomIds) throws IOException { String original = doc.getField("original").stringValue(); String[] parts = original.split("\\,? "); Query[] queries = buildQueries(parts); + if (queries == null) + continue; TermQuery oq = new TermQuery(new Term("original", original)); int ho = searcher.search(oq, 1).totalHits; for (Query q: queries) { + if (q == null) continue; Builder bq = new BooleanQuery.Builder(); bq.add(q, Occur.MUST); bq.add(new TermQuery(new Term("id", Integer.toString(randomIds[i]))), Occur.MUST); if (q != null) { + System.out.println(q.toString()); int no = searcher.search(bq.build(), 1).totalHits; if (no != 1) { System.out.println("Results differ: " + oq + " <<>> " + q + " [" + ho + " : " + no + "]"); @@ -454,7 +458,9 @@ private void verifySearch(int[] randomIds) throws IOException { } private Query[] buildQueries(String[] parts) throws UnsupportedEncodingException { - int howMany = TestUtil.nextInt(random(), 0, parts.length-1); // how many initials + int howMany = TestUtil.nextInt(random(), 2, parts.length-1); // how many initials + if (howMany < 2) + return null; Query[] queries = new Query[9]; queries[1] = getRegexpQuery(parts, howMany, false); queries[2] = getWildcardQuery(parts, howMany, false); diff --git a/contrib/adsabs/src/test/org/adsabs/solr/AdsConfig.java b/contrib/adsabs/src/test/org/adsabs/solr/AdsConfig.java index f9bb429f7..9cc22c965 100644 --- a/contrib/adsabs/src/test/org/adsabs/solr/AdsConfig.java +++ b/contrib/adsabs/src/test/org/adsabs/solr/AdsConfig.java @@ -21,7 +21,7 @@ public static class F { public static String[] TYPE_NORMALIZED_TEXT_ASCII_FIELDS = new String[]{"pub", "keyword", "keyword_norm"}; public static String TYPE_NORMALIZED_STRING_ASCII = "bibcode"; - public static String[] TYPE_NORMALIZED_STRING_ASCII_FIELDS = new String[]{"bibcode", "volume", + public static String[] TYPE_NORMALIZED_STRING_ASCII_FIELDS = new String[]{"bibcode", "citation", "volume", "issue", "lang", "issn", "isbn", "property", "database", "data", "bibgroup", "vizier"}; public static String[] TYPE_DATE_FIELDS = new String[]{"date"}; diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java index fba2ea4b2..ec7171530 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java @@ -76,7 +76,6 @@ public void test() throws Exception { assertU(addDocs("date", "1977-01-01T00:30:00Z")); assertU(commit()); - assertQ(req("q", "*:*"), "//*[@numFound='16']"); @@ -100,8 +99,10 @@ public void test() throws Exception { "date:[1325376000000 TO 1325462399000]", LegacyNumericRangeQuery.class); // 1012-01-01T00:00:01 - 2012-12-31T23:59:59 + // NOTE: the date parsing is tricky (calendars were changed in 1582) + // so it actually produces 1011-12-26; but I think we can ignore it assertQueryEquals(req("q", "pubdate:[* TO 2012]", "defType", "aqp"), - "date:[-30231100799000 TO 1356998399000]", + "date:[-30231619199000 TO 1356998399000]", LegacyNumericRangeQuery.class); // 2012-01-01T00:00:00 - 3011-12-31T23:59:59 diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java index d33c1aa30..61629f035 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java @@ -371,9 +371,9 @@ public void testMultiTokens() throws Exception { // multi-pace is split by WDFF and expanded with a synonym assertQueryEquals(req("q", "title:\"bubble pace telescope multi-pace foobar\"", "defType", "aqp"), "title:\"bubble (pace syn::lunar) telescope multi (pace syn::lunar) foobar\"" + - " title:\"bubble (pace syn::lunar) telescope multipace ? foobar\"" + + " title:\"bubble (pace syn::lunar) telescope ? multipace foobar\"" + " title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multi (pace syn::lunar) foobar\"~2" + - " title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multipace ? foobar\"~3", + " title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? ? multipace foobar\"~3", BooleanQuery.class); assertQ(req("q", "title" + ":\"bubble pace telescope multi-pace foobar\""), "//*[@numFound='1']", "//doc/str[@name='id'][.='17']"); @@ -415,9 +415,9 @@ public void testMultiTokens() throws Exception { //dumpDoc(null, "title", "recid"); assertQueryEquals(req("q", "title:\"bubble pace telescope multi-foo\"", "defType", "aqp", "df", "title"), "title:\"bubble (pace syn::lunar) telescope multi foo\" " + - "title:\"bubble (pace syn::lunar) telescope multifoo\" " + + "title:\"bubble (pace syn::lunar) telescope ? multifoo\" " + "title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multi foo\"~2 " + - "title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multifoo\"~2", + "title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? ? multifoo\"~3", BooleanQuery.class); assertQ(req("q", "title:\"bubble pace telescope multi-foo\"", "defType", "aqp", "df", "title"), "//*[@numFound='2']", @@ -799,6 +799,7 @@ public void testOtherCases() throws Exception { "//doc/str[@name='id'][.='10']", "//doc/str[@name='id'][.='11']" ); + assertQ(req("q", "title" + ":*sky"), "//*[@numFound='4']", "//doc/str[@name='id'][.='10']", "//doc/str[@name='id'][.='11']", @@ -851,16 +852,15 @@ public void testOtherCases() throws Exception { "defType", "aqp"), "title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" " + "title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" " - + "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" " - + "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"~2", + + "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" " + + "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"~2", BooleanQuery.class); - assertQueryEquals(req( "q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ-ray Sources for Radio Millisecond Pulsars\"", "defType", "aqp"), "title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" " - + "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"", + + "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"", BooleanQuery.class); //dumpDoc(null, "title"); diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedStringAscii.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedStringAscii.java index 94220aa64..49f19762b 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedStringAscii.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedStringAscii.java @@ -54,28 +54,56 @@ public static void beforeClass() throws Exception { public void test() throws Exception { - - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "Bílá kobyla skočila přes čtyřista")); - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "třicet-tři stříbrných střech")); - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "A ještě TřistaTřicetTři stříbrných stovek")); - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "one two three")); - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "este-c'est que")); - assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "568")); - - assertU(commit()); - //dumpDoc(null, F.ID, F.TYPE_NORMALIZED_STRING_ASCII_FIELDS[0]); + String[] fs = new String[]{"bibcode", "identifier", "title"}; // single-val-string, multi-val-string, text + assertU(addDocs(fs, "Bílá kobyla skočila přes čtyřista")); + assertU(addDocs(fs, "třicet-tři stříbrných střech")); + assertU(addDocs(fs, "A ještě TřistaTřicetTři stříbrných stovek")); + assertU(addDocs(fs, "one two three")); + assertU(addDocs(fs, "este-c'est que")); + assertU(addDocs(fs, "568")); + + assertU(commit("waitSearcher", "true")); assertQ(req("q", "*:*"), "//*[@numFound='6']"); assertQueryEquals(req("q", "bibcode:Bílá", "qt", "aqp"), "bibcode:bila", TermQuery.class); assertQueryEquals(req("q", "bibcode:Bila-bila", "qt", "aqp"), "bibcode:bilabila", TermQuery.class); + assertQ(req("q", "bibcode:Bílá*"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='0']"); - assertQ(req("q", "bibcode:Bílá-kobyla*"), "//*[@numFound='1']", + assertQ(req("q", "identifier:Bílá*"), + "//*[@numFound='1']", + "//doc[1]/str[@name='id'][.='0']"); + assertQ(req("q", "title:Bílá*"), + "//*[@numFound='1']", + "//doc[1]/str[@name='id'][.='0']"); + + assertQ(req("q", "bibcode:kobyla"), + "//*[@numFound='0']"); + assertQ(req("q", "identifier:kobyla"), + "//*[@numFound='0']"); + assertQ(req("q", "title:kobyla"), + "//*[@numFound='1']", + "//doc[1]/str[@name='id'][.='0']"); + + + assertQ(req("q", "bibcode:Bílá-kobyla*"), + "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='0']"); - assertQ(req("q", "bibcode:kobyla"), "//*[@numFound='0']"); + assertQ(req("q", "identifier:Bílá-kobyla*"), + "//*[@numFound='1']", + "//doc[1]/str[@name='id'][.='0']"); + assertQ(req("q", "title:Bílá-kobyla*"), + "//*[@numFound='0']"); + + assertQ(req("q", "bibcode:Bílá-kobyla"), + "//*[@numFound='0']"); + assertQ(req("q", "identifier:Bílá-kobyla"), + "//*[@numFound='0']"); + assertQ(req("q", "title:Bílá-kobyla"), + "//*[@numFound='1']"); assertQ(req("q", "bibcode:\"one two three\""), "//*[@numFound='1']", diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedTextAscii.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedTextAscii.java index 152888a1a..aec42205a 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedTextAscii.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeNormalizedTextAscii.java @@ -69,7 +69,7 @@ public void test() throws Exception { assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "three-jets-four")); assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "five jets")); - assertU(commit()); + assertU(commit("waitSearcher", "true")); assertQ(req("q", "*:*"), "//*[@numFound='10']"); diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestDateNormalizerFilter.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestDateNormalizerFilter.java index 718f439fd..71e27006d 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestDateNormalizerFilter.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestDateNormalizerFilter.java @@ -12,13 +12,18 @@ public class TestDateNormalizerFilter extends BaseTokenStreamTestCase { public void test() throws Exception { HashMap config = new HashMap(); - config.put("format", "yyyy-MM-dd|yy-MM-dd|yy-MM"); + config.put("format", "yyyy-MM-dd|yy-MM-dd|yy-MM|yyyy"); DateNormalizerTokenFilterFactory factory = new DateNormalizerTokenFilterFactory(config); TokenStream stream; - stream = factory.create(whitespaceMockTokenizer(new StringReader("2014-12-00"))); + stream = factory.create(whitespaceMockTokenizer(new StringReader("2014 2014-00 2014-12 2014-12-01 2014-12-00"))); assertTokenStreamContents(stream, - new String[] {"2014-12-01"} + new String[] { + "2014-01-01T00:00:00Z", + "2014-01-01T00:00:00Z", + "2014-12-01T00:00:00Z", + "2014-12-01T00:30:00Z", + "2014-12-01T00:00:00Z"} ); } diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java index 9d958ab52..9f66f5724 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java @@ -193,14 +193,6 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword", "q", "author:accomazzi, alberto property:refereed r s t"), -// "+(" -// + "((((author:accomazzi, author:accomazzi,*)) (keyword:alberto | title:alberto))~2) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)" -// + ") " -// + "+(" -// + "((property:refereed (keyword:r | title:r) (keyword:s | title:s) (keyword:t | title:t))~4) property:refereedrst" -// + ")", - - "+(" + "(+((author:accomazzi, author:accomazzi,*)) +(keyword:alberto | title:alberto)) " + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)" @@ -217,8 +209,9 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "q", "author:accomazzi, alberto property:refereed r s t", "qf", "title keyword^0.5"), - "+(((((author:accomazzi, author:accomazzi,*)) (keyword:alberto^0.5 | title:alberto))~2) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)) +(((property:refereed (keyword:r^0.5 | title:r) (keyword:s^0.5 | title:s) (keyword:t^0.5 | title:t))~4) property:refereedrst)", - //"+((+((author:accomazzi, author:accomazzi,*)) +((keyword:alberto)^0.5 | title:alberto)) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)) +((+property:refereed +((keyword:r)^0.5 | title:r) +((keyword:s)^0.5 | title:s) +((keyword:t)^0.5 | title:t)) property:refereedrst)/" + "+((+((author:accomazzi, author:accomazzi,*)) +((keyword:alberto)^0.5 | title:alberto)) " + + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)) " + +"+((+property:refereed +((keyword:r)^0.5 | title:r) +((keyword:s)^0.5 | title:s) +((keyword:t)^0.5 | title:t)) property:refereedrst)", BooleanQuery.class); @@ -232,20 +225,19 @@ public void testUnfieldedSearch() throws Exception { // 22/10/13 - I've introduced a new strategy that emits both the // original query string and the phrase query, this is a workaround // for edismax + // 30/09/16 - edismax is using default AND assertQueryEquals(req("defType", "aqp", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "q", "pink elephant"), - "(((((all:pink all:syn::pinkish))) (all:elephant))~2) all:\"(pink syn::pinkish) elephant\"", - //"(+(((all:pink all:syn::pinkish))) +(all:elephant)) all:"(pink syn::pinkish) elephant" + "(+(((all:pink all:syn::pinkish))) +(all:elephant)) all:\"(pink syn::pinkish) elephant\"", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "pink elephant", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword"), - "(((keyword:pink | ((title:pink title:syn::pinkish))) (keyword:elephant | title:elephant))~2) (keyword:\"pink elephant\" | title:\"(pink syn::pinkish) elephant\")", - //(+(keyword:pink | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:"pink elephant" | title:"(pink syn::pinkish) elephant") + "(+(keyword:pink | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:\"pink elephant\" | title:\"(pink syn::pinkish) elephant\")", BooleanQuery.class); // when combined, the ADS's default AND operator should be visible +foo @@ -253,8 +245,7 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword"), - "+((((keyword:pink | ((title:pink title:syn::pinkish))) (keyword:elephant | title:elephant))~2) (keyword:\"pink elephant\" | title:\"(pink syn::pinkish) elephant\")) +title:foo", - //+((+(keyword:pink | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:"pink elephant" | title:"(pink syn::pinkish) elephant")) +title:foo + "+((+(keyword:pink | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:\"pink elephant\" | title:\"(pink syn::pinkish) elephant\")) +title:foo", BooleanQuery.class); @@ -267,8 +258,7 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "qf", "title^0.9 keyword^0.7"), - "(((keyword:r^0.7 | title:r^0.9) (keyword:s^0.7 | title:s^0.9) (keyword:t^0.7 | title:t^0.9))~3) (keyword:\"r s t\"^0.7 | (((title:\"r s t\" (title:syn::r s t title:syn::acr::rst)))^0.9))", - //(+((keyword:r)^0.7 | (title:r)^0.9) +((keyword:s)^0.7 | (title:s)^0.9) +((keyword:t)^0.7 | (title:t)^0.9)) ((keyword:"r s t")^0.7 | ((title:"r s t" (title:syn::r s t title:syn::acr::rst)))^0.9) + "(+((keyword:r)^0.7 | (title:r)^0.9) +((keyword:s)^0.7 | (title:s)^0.9) +((keyword:t)^0.7 | (title:t)^0.9)) ((keyword:\"r s t\")^0.7 | ((title:\"r s t\" (title:syn::r s t title:syn::acr::rst)))^0.9)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", @@ -277,8 +267,7 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "qf", "title^0.9 keyword^0.7"), - "(((keyword:x^0.7 | title:x^0.9) (keyword:r^0.7 | title:r^0.9) (keyword:s^0.7 | title:s^0.9) (keyword:t^0.7 | title:t^0.9) (keyword:y^0.7 | title:y^0.9))~5) (keyword:\"x r s t y\"^0.7 | (((title:\"x r s t y\" title:\"x (syn::r s t syn::acr::rst) ? ? y\"~2))^0.9))", - //(+((keyword:x)^0.7 | (title:x)^0.9) +((keyword:r)^0.7 | (title:r)^0.9) +((keyword:s)^0.7 | (title:s)^0.9) +((keyword:t)^0.7 | (title:t)^0.9) +((keyword:y)^0.7 | (title:y)^0.9)) ((keyword:"x r s t y")^0.7 | ((title:"? r s t" title:"x (syn::r s t syn::acr::rst) y"~2))^0.9) + "(+((keyword:x)^0.7 | (title:x)^0.9) +((keyword:r)^0.7 | (title:r)^0.9) +((keyword:s)^0.7 | (title:s)^0.9) +((keyword:t)^0.7 | (title:t)^0.9) +((keyword:y)^0.7 | (title:y)^0.9)) ((keyword:\"x r s t y\")^0.7 | ((title:\"x r s t y\" title:\"x (syn::r s t syn::acr::rst) ? ? y\"~2))^0.9)", BooleanQuery.class); @@ -293,13 +282,10 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple" ), - "(" + - "(((+(author:accomazzi, author:accomazzi,*) +((((author:alberto, author:alberto,*))^2.3) | title:alberto)))^2.3)" + // author:accomazzi AND (author:alberto OR title:alberto) - " | (" + // OR - "(+title:accomazzi +((((author:alberto, author:alberto,*))^2.3) | title:alberto))" + // title:accomazzi AND (author:alberto OR title:alberto) - ")) " + // OR - "((((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))^2.3) | ((title:\"accomazzi alberto\" title:accomazzialberto)))", - //(((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))^2.3 | ((+title:accomazzi +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))^2.3 | ((title:"accomazzi alberto" title:accomazzialberto))) + "(" + + "((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))^2.3 " + + "| ((+title:accomazzi +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))) " + + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))^2.3 | ((title:\"accomazzi alberto\" title:accomazzialberto)))", BooleanQuery.class); @@ -316,8 +302,7 @@ public void testUnfieldedSearch() throws Exception { "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple" ), - "(((+(author:accomazzi, author:accomazzi,*) +((((author:alberto, author:alberto,*))^2.3) | title:alberto)))~1) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)", - //(((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))~1) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1) + "(((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))~1) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)", BooleanQuery.class); diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestSolrCitationQuery.java b/contrib/adsabs/src/test/org/apache/solr/search/TestSolrCitationQuery.java index 0ed005e23..b21696a48 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestSolrCitationQuery.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestSolrCitationQuery.java @@ -3,10 +3,11 @@ import org.junit.BeforeClass; import monty.solr.util.MontySolrAbstractTestCase; +import monty.solr.util.MontySolrQueryTestCase; import monty.solr.util.MontySolrSetup; -public class TestSolrCitationQuery extends MontySolrAbstractTestCase { +public class TestSolrCitationQuery extends MontySolrQueryTestCase { @BeforeClass @@ -59,9 +60,10 @@ public void testSearch() throws Exception { assertU(adoc("id", "5", "bibcode", "F", "citation", "C" )); - assertU(commit("waitSearcher", "true")); // very weird, it is not waiting + + assertQ(req("q", "*:*"), "//*[@numFound='6']" ); diff --git a/contrib/antlrqueryparser/src/java/org/apache/lucene/queryparser/flexible/aqp/util/AqpDebuggingQueryNodeProcessorPipeline.java b/contrib/antlrqueryparser/src/java/org/apache/lucene/queryparser/flexible/aqp/util/AqpDebuggingQueryNodeProcessorPipeline.java index a515ee1e6..5515be472 100644 --- a/contrib/antlrqueryparser/src/java/org/apache/lucene/queryparser/flexible/aqp/util/AqpDebuggingQueryNodeProcessorPipeline.java +++ b/contrib/antlrqueryparser/src/java/org/apache/lucene/queryparser/flexible/aqp/util/AqpDebuggingQueryNodeProcessorPipeline.java @@ -67,10 +67,10 @@ public QueryNode process(QueryNode queryTree) throws QueryNodeException { System.out.println(newMap); System.out.println(" -----------------"); } + System.out.println(newVal.equals(oldVal) ? (newMap.equals(oldMap) ? "" : newVal) : newVal); System.out.println("--------------------------------------------"); - System.out.println(newVal.equals(oldVal) ? "" : newVal); oldVal = newVal; oldMap = newMap; diff --git a/contrib/examples/adsabs/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/solr/collection1/conf/schema.xml index 0785f3eae..769aed105 100644 --- a/contrib/examples/adsabs/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/solr/collection1/conf/schema.xml @@ -496,7 +496,7 @@ @@ -650,11 +650,11 @@ positionIncrementGap="0"> - + - + @@ -678,28 +678,30 @@ - - - - - - - - - - + + + + + + + + - - - - - - - - - + + + + + + + + + + @@ -947,7 +949,7 @@ required="true" omitNorms="true" omitTermFreqAndPositions="true"/> - @@ -999,7 +1001,7 @@ multiValued="true" omitNorms="true" omitTermFreqAndPositions="true"/> - - - + + multiValued="true" omitNorms="true"/> @@ -1434,6 +1436,8 @@ --> + - - - - - - - - - + + + + + + + + + @@ -1511,11 +1515,11 @@ - - - + + +