From e6a3b38015b735fc92fdbca6d4398f39bab33620 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Fri, 18 Sep 2020 18:59:20 -0400 Subject: [PATCH] Updated list of default fields --- .../aqp/builders/AqpAdsabsSubQueryProvider.java | 2 ++ .../solr/analysis/DiagnoseFilterFactory.java | 9 +++++++-- .../queryparser/flexible/aqp/TestAqpAdsabs.java | 2 ++ .../search/TestSecondOrderQueryTypesAds.java | 10 ++++++---- .../solr/analysis/TestAdsabsTypeDateString.java | 1 + .../analysis/TestAdsabsTypeFulltextParsing.java | 16 +++++++++++++--- .../author/TestAdsabsTypeAuthorParsing.java | 2 ++ .../solr/search/TestAqpAdsabsSolrSearch.java | 15 +++++++++++++++ .../server/solr/collection1/conf/schema.xml | 4 ++-- .../server/solr/collection1/conf/solrconfig.xml | 2 +- 10 files changed, 51 insertions(+), 12 deletions(-) diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java index ff7a9d7ce..696a59f2e 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java @@ -945,8 +945,10 @@ public Query parse() throws SyntaxError { parsers.put("edismax_combined_aqp", new AqpSubqueryParserFull() { // will decide whether new aqp() parse is needed public Query parse(FunctionQParser fp) throws SyntaxError { final String original = fp.getString(); + System.out.println("edismax fed: " + original); QParser eqp = fp.subQuery(original, "adismax"); Query q = eqp.getQuery(); + System.out.println("edismax produced: " + q); return simplify(q); } protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError { diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/DiagnoseFilterFactory.java b/contrib/adsabs/src/java/org/apache/solr/analysis/DiagnoseFilterFactory.java index 1062c6d72..8dffb1428 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/DiagnoseFilterFactory.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/DiagnoseFilterFactory.java @@ -8,6 +8,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.TokenFilterFactory; @@ -49,6 +50,7 @@ final class DiagnoseFilter extends TokenFilter { private final CharTermAttribute termAtt; private final OffsetAttribute offsetAtt; private final String msg; + private final PositionLengthAttribute posLen; public DiagnoseFilter(TokenStream input, String msg) { super(input); @@ -57,6 +59,7 @@ public DiagnoseFilter(TokenStream input, String msg) { typeAtt = addAttribute(TypeAttribute.class); termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); + posLen = addAttribute(PositionLengthAttribute.class); } /* @@ -70,8 +73,10 @@ public boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; System.out.println("stage:" + (msg != null ? msg : "null") + - " term=" + termAtt.toString() + " pos=" - + posIncrAtt.getPositionIncrement() + " type=" + typeAtt.type() + " term=" + termAtt.toString() + " posInc=" + + posIncrAtt.getPositionIncrement() + + " posLen=" + posLen.getPositionLength() + + " type=" + typeAtt.type() + " offsetStart=" + offsetAtt.startOffset() + " offsetEnd=" + offsetAtt.endOffset()); diff --git a/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java b/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java index aa3e91599..fa686f94e 100644 --- a/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java +++ b/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java @@ -359,8 +359,10 @@ public void testWildCards() throws Exception { } public void testEscaped() throws Exception { + setDebug(true); WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(); assertQueryEquals("\\(1\\+1\\)\\:2", wsa, "(1+1):2", TermQuery.class); + assertQueryEquals("th*is", wsa, "th*is", WildcardQuery.class); assertQueryEquals("th\\*is", wsa, "th*is", TermQuery.class); assertQueryEquals("a\\\\\\\\+b", wsa, "a\\\\+b", TermQuery.class); assertQueryEquals("a\\u0062c", wsa, "abc", TermQuery.class); diff --git a/contrib/adsabs/src/test/org/apache/lucene/search/TestSecondOrderQueryTypesAds.java b/contrib/adsabs/src/test/org/apache/lucene/search/TestSecondOrderQueryTypesAds.java index a5ed72917..a430c6178 100644 --- a/contrib/adsabs/src/test/org/apache/lucene/search/TestSecondOrderQueryTypesAds.java +++ b/contrib/adsabs/src/test/org/apache/lucene/search/TestSecondOrderQueryTypesAds.java @@ -140,7 +140,9 @@ public void testADSOperators() throws Exception { LuceneCacheWrapper boostTwo = LuceneCacheWrapper.getFloatCache( "boost_2", UninvertingReader.Type.SORTED_SET_FLOAT, tempReq.getSearcher().getSlowAtomicReader()); - + System.out.println(Float.toString(boostConstant.getFloat(0))); + System.out.println(Float.toString(boostOne.getFloat(0))); + System.out.println(Float.toString(boostTwo.getFloat(0))); // expecting 4 results with various order, simply based on the boost factor testQ2("id:1", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostConstant), @@ -366,11 +368,11 @@ public int compare(Object o1, Object o2) { arrExpected[i] = u; i++; } - //System.out.println("expected:" + Arrays.toString(arrExpected)); + System.out.println("expected:" + Arrays.toString(arrExpected)); ; - //System.out.println("results:" + Arrays.toString(resultIds)); + System.out.println("results:" + Arrays.toString(resultIds)); ; - //System.out.println(results); + System.out.println(results); assertArrayEquals(arrExpected, resultIds); } finally { r.close(); diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java index 10bed1ee5..1ecc3ead0 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeDateString.java @@ -104,6 +104,7 @@ public void test() throws Exception { "date:[75317598000 TO 75317598000]", LegacyNumericRangeQuery.class); + setDebug(true); // 2012-01-01T00:00:00 - 2012-02-01T00:00:00 (excl) assertQueryEquals(req("q", "pubdate:2012-01", "defType", "aqp"), "date:[1325376000000 TO 1328054400000}", diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java index ebf13c634..5027919b5 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java @@ -177,7 +177,8 @@ public static String getSchemaFile() { "radio, radios, nonradio, radioed, radiobereich, adio, miniradio, radido => radio\n" + "pulsars, pulsar, psr, pulser, psrs, pulsare, pulsares, pulars, pulsary, puslsar, interpulsars, pusar, nonpulsar, psro, rontgenpulsare, pulsarlike, pulsarpsr => pulsars\n" + "millisecond, milliseconds, submillisecond, millisec, milliseconde, millesecond, millisekunden, milliseond, millisecnd => millisecond\n" + - "fermi, fermilab => fermi\n" + "fermi, fermilab => fermi\n", + "space => universe\n" }); File multiTokenSynonymsFile = createTempFile(new String[]{ @@ -195,9 +196,10 @@ public static String getSchemaFile() { // this is from ads synonyms "ADS,aitken\0double\0stars\n" + "ADS,astrophysics\0data\0system\n" + - "ADS,anti\0de\0sitter\0space,antidesitter\0spacetime\n" + + "ADS,anti\0de\0sitter\0space,antidesitter\0spacetime,antidesitter\0space\n" + "ADS,astrophysics\0data\0system\n" + - "VLBA,very\0long\0baseline\0array\n", + "VLBA,very\0long\0baseline\0array\n" + + "space,universe" // and this is how it would be if it was one line //"ADS,aitken\0double\0stars,astrophysics\0data\0system,anti\0de\0sitter\0space,antidesitter\0spacetime\n" @@ -277,6 +279,14 @@ public void setUp() throws Exception { assertU(adoc("id", "602", "bibcode", "xxxxxxxxxx602", "title", "Very Long Baseline Array (VLBA) is a ten-antennaaaah")); + assertU(adoc("id", "603", "bibcode", "xxxxxxxxxx603", + "title", "THE HUBBLE constant: a summary of the hubble space telescope program")); + assertU(adoc("id", "604", "bibcode", "xxxxxxxxxx604", + "title", "MIT and antidesitter space-time")); + assertU(adoc("id", "605", "bibcode", "xxxxxxxxxx604", + "title", "MIT and anti de sitter space-time")); + assertU(adoc("id", "606", "bibcode", "xxxxxxxxxx604", + "title", "Massachusets Institute of Technology and antidesitter space-time")); assertU(commit()); } diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java index 4dbd8a1e9..d9ee9628f 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAdsabsTypeAuthorParsing.java @@ -386,6 +386,7 @@ public void testAuthorParsingUseCases() throws Exception { assertQueryEquals(req("q", "author:\"^acco*\""), "spanPosRange(SpanMultiTermQueryWrapper(author:acco*), 0, 1)", SpanPositionRangeQuery.class); assertQueryEquals(req("q", "author:acco*"), "author:acco*", WildcardQuery.class); + setDebug(true); assertQueryEquals(req("q", "author:Adamč*"), "author:adamč*", WildcardQuery.class); testAuthorQuery("Adamč*", @@ -554,6 +555,7 @@ public void testAuthorParsingUseCases() throws Exception { // 'xxx' will be removed from the author (at least in the modified version) + setDebug(true); assertQueryEquals(req("defType", "aqp", "q", "author:\"accomazzi, alberto, xxx.\""), "author:accomazzi, alberto, xxx | author:accomazzi, alberto, xxx * | author:accomazzi, alberto | author:accomazzi, alberto * | author:accomazzi, a xxx | author:accomazzi, a xxx * | author:accomazzi, alberto, x | author:accomazzi, alberto, x * | author:accomazzi, a x | author:accomazzi, a x * | author:accomazzi, alberto, | author:accomazzi, alberto, * | author:accomazzi, a | author:accomazzi, a * | author:accomazzi,", DisjunctionMaxQuery.class); diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java index a35e55322..8ccbeabd9 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java @@ -187,6 +187,21 @@ public void testUnfieldedSearch() throws Exception { "qf", "author^2.3 title abstract^0.4"), "((Synonym(abstract:acr::muller abstract:acr::müller))^0.4 | ((author:müller, | author:müller,* | author:mueller, | author:mueller,* | author:muller, | author:muller,*))^2.3 | Synonym(title:acr::muller title:acr::müller))", DisjunctionMaxQuery.class); + setDebug(true); + assertQueryEquals(req("defType", "edismax", + "aqp.unfielded.tokens.strategy", "multiply", + "aqp.unfielded.tokens.new.type", "simple", + "qf", "title keyword", + "q", "property:refereed r s t"), + "", + BooleanQuery.class); + assertQueryEquals(req("defType", "aqp", + "aqp.unfielded.tokens.strategy", "multiply", + "aqp.unfielded.tokens.new.type", "simple", + "qf", "title keyword", + "q", "property:refereed r s t"), + "", + BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "\"forman, c\"", "qf", "author^2.3 title abstract^0.4"), "((abstract:\"forman c\")^0.4 | ((author:forman, c | author:forman, christine | author:jones, c | author:jones, christine | author:forman, c* | author:forman,))^2.3 | title:\"forman c\")", diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml index 714e2b2a1..94dabc861 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/schema.xml @@ -478,7 +478,7 @@ + incomingType="SYNONYM" addPrefix="syn::" posIncrement="0" range="0,2147483647" /> + diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml index 1a23e126a..fbfb13354 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml @@ -307,7 +307,7 @@ json 10 - id recid author title abstract page pub + title abstract author bibcode identifier volume page bibstem doctype pubdate pub pub_raw citation_count read_count esources