Skip to content

Commit

Permalink
Fixed #issues/173 - failing proximity search due to a constant search…
Browse files Browse the repository at this point in the history
…; Made author facet searches case insenstive
  • Loading branch information
romanchyla committed Dec 1, 2021
1 parent a8b1e85 commit 78efe39
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -975,11 +975,11 @@ public Query parse() throws SyntaxError {
parsers.put("edismax_combined_aqp", new AqpSubqueryParserFull() { // will decide whether new aqp() parse is needed
public Query parse(FunctionQParser fp) throws SyntaxError {
final String original = fp.getString();
//System.out.println("edismax fed: " + original);
QParser eqp = fp.subQuery(original, "adismax");
Query q = eqp.getQuery();
//System.out.println("edismax produced: " + q);
return simplify(q);
//System.out.println("edismax fed: " + original);
QParser eqp = fp.subQuery(original, "adismax");
Query q = eqp.getQuery();
//System.out.println("edismax produced: " + q);
return simplify(q);
}
protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError {
List<Query> parts = query.getDisjuncts();
Expand Down Expand Up @@ -1019,7 +1019,7 @@ private String toBeAnalyzedAgain(TermQuery q) {
}
private Query reAnalyze(String field, String value, Float boost) throws SyntaxError {
QParser fParser = getParser();
System.out.println(field+ ":"+fParser.getString() + "|value=" + value);
//System.out.println(field+ ":"+fParser.getString() + "|value=" + value);
QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
Query q = aqp.getQuery();
if (boost != null && boost != 1.0f) {
Expand Down Expand Up @@ -1069,8 +1069,8 @@ private Query reAnalyze(String field, String value, Float boost) throws SyntaxEr
QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
Query q = aqp.getQuery();
if (boost != null && boost != 1.0f) {
q = new BoostQuery(q, boost);
}
q = new BoostQuery(q, boost);
}
return q;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.jython.JythonObjectFactory;
import org.jython.monty.interfaces.JythonNameParser;

Expand Down
86 changes: 44 additions & 42 deletions contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java
Original file line number Diff line number Diff line change
Expand Up @@ -431,24 +431,24 @@ public void test() throws Exception {
"aqp.constant_scoring", "author^13 title^12",
"aqp.classic_scoring.modifier", "0.48",
"fl", "recid,score"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='13.0']" // 13.00 * (cite_read_boost + aqp.classic_scoring.modifier)
);
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='13.0']" // 13.00 * (cite_read_boost + aqp.classic_scoring.modifier)
);
assertQ(req("q", "author:\"Einstein, A\" AND author:\"Anders\"",
"aqp.constant_scoring", "author^13",
"aqp.classic_scoring.modifier", "0.48",
"fl", "recid,score"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='26.0']");
"fl", "recid,score"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='26.0']");
assertQ(req("q", "author:\"Einstein, A\" OR author:\"Anders\"",
"aqp.constant_scoring", "author^13",
"aqp.classic_scoring.modifier", "0.48",
"fl", "recid,score"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='26.0']");
"aqp.constant_scoring", "author^13",
"aqp.classic_scoring.modifier", "0.48",
"fl", "recid,score"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']",
"//doc/float[@name='score'][.='26.0']");

assert h.query(req("q", "author:\"Einstein, A\"", "fl", "author_norm", "indent", "false"))
.contains("<arr name=\"author_norm\">" +
Expand Down Expand Up @@ -494,11 +494,13 @@ public void test() throws Exception {


/*
* author facets
* author facets - should be case insensitive
*/

assertQ(req("q", "author_facet_hier:\"0/Anders, J M\""), "//*[@numFound='1']");
assertQ(req("q", "author_facet_hier:\"0/anders, j m\""), "//*[@numFound='1']");
assertQ(req("q", "author_facet_hier:\"1/Anders, J M/Anders, John Michael\""), "//*[@numFound='1']");
assertQ(req("q", "author_facet_hier:\"1/Anders, J M/ANDERS, john michael\""), "//*[@numFound='1']");
assertQ(req("q", "author_facet_hier:\"1/Einstein, A\""), "//*[@numFound='0']");


Expand Down Expand Up @@ -656,33 +658,33 @@ public void test() throws Exception {


/*
* orcid, added 30/12/14; they must correspond to the author array
* - updated 13/11/15 - orcid field is now a virtual one; and we have
* orcid_pub,_user,_other
*/
assertQ(req("q", "orcid_pub:1111-2222-3333-4444"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assertQ(req("q", "orcid_pub:1111*"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assert h.query(req("q", "recid:100", "indent", "false", "fl", "orcid_pub"))
.contains("<arr name=\"orcid_pub\">" +
"<str>1111-2222-3333-4444</str>" +
"<str>-</str>" +
"<str>0000-0002-4110-3511</str></arr>"
);
// this is only present in orcid_other
assertQ(req("q", "orcid:1111-2222-3333-5555"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assertQ(req("q", "orcid_other:1111-2222-3333-5555"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
* orcid, added 30/12/14; they must correspond to the author array
* - updated 13/11/15 - orcid field is now a virtual one; and we have
* orcid_pub,_user,_other
*/
assertQ(req("q", "orcid_pub:1111-2222-3333-4444"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assertQ(req("q", "orcid_pub:1111*"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assert h.query(req("q", "recid:100", "indent", "false", "fl", "orcid_pub"))
.contains("<arr name=\"orcid_pub\">" +
"<str>1111-2222-3333-4444</str>" +
"<str>-</str>" +
"<str>0000-0002-4110-3511</str></arr>"
);
// this is only present in orcid_other
assertQ(req("q", "orcid:1111-2222-3333-5555"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);
assertQ(req("q", "orcid_other:1111-2222-3333-5555"),
"//doc/int[@name='recid'][.='100']",
"//*[@numFound='1']"
);



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,9 +490,9 @@ public void testBasics() throws Exception{
assertQueryEquals("-m:(a b NEAR c d AND e)", null, "+m:a +spanNear([m:b, m:c], 5, true) +(+m:d +m:e)"); //? should we allow - at the beginning?

assertQueryEquals("m:(a b NEAR2 c)", null, "+m:a +spanNear([m:b, m:c], 2, true)");
assertQueryEquals("m:(a b NEAR3 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 3, true) +(+m:d +m:e)");
assertQueryEquals("-m:(a b NEAR4 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 4, true) +(+m:d +m:e)");
assertQueryNodeException("m:(a b NEAR7 c)"); // by default, only range 1-5 is allowed (in configuration)
assertQueryEquals("m:(a b NEAR3 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 3, true) +(+m:d +m:e)");
assertQueryEquals("-m:(a b NEAR4 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 4, true) +(+m:d +m:e)");
assertQueryNodeException("m:(a b NEAR7 c)"); // by default, only range 1-5 is allowed (in configuration)



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
@SuppressWarnings({"rawtypes", "unchecked"})
public class TestCitationsSearch extends MontySolrAbstractTestCase {

private boolean debug = true;
private boolean debug = false;
private SolrQueryRequest tempReq;

@BeforeClass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,8 @@ public void test() throws Exception {
"defType", "aqp"),
"indexstamp:[1349049600000 TO 1638316800000]",
null);
assertQ(req("q", "indexstamp:[\"2012-10-01T00:00:00.000\" TO \"2021-12-01T00:00:00.000Z\"]", "indent", "true"),

assertQ(req("q", "indexstamp:[\"2012-10-01T00:00:00.000\" TO \"2121-12-01T00:00:00.000Z\"]", "indent", "true"),
"//*[@numFound='141']"
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,22 @@ public void tearDown() throws Exception {


public void testUnfieldedSearch() throws Exception {


// NEAR on unfielded search -- will generate error when results have mixed fields
assertQueryParseException(req("defType", "aqp",
"q", "foo NEAR2 bar",
"qf", "bibcode^5 title^10",
"aqp.unfielded.tokens.strategy", "disjuncts",
"aqp.unfielded.tokens.new.type", "simple",
"aqp.constant_scoring", "bibcode^6"));

assertQueryParseException(req("defType", "aqp",
"q", "foo NEAR2 bar NEAR2 title:baz",
"qf", "bibcode^5 title^10",
"aqp.unfielded.tokens.strategy", "disjuncts",
"aqp.unfielded.tokens.new.type", "simple",
"aqp.constant_scoring", "bibcode^6"));

// when we generate the phrase search, ignore acronyms
assertQueryEquals(req("defType", "aqp",
"q", "FOO BAR BAZ",
Expand Down Expand Up @@ -1063,10 +1078,10 @@ public void testSearch() throws Exception {

// #375
assertQueryEquals(req("defType", "aqp", "q", "author:\"Civano, F\" -author_facet_hier:(\"Civano, Fa\" OR \"Civano, Da\")"),
"+(author:civano, f | author:civano, f* | author:civano,) -(author_facet_hier:Civano, Fa author_facet_hier:Civano, Da)",
"+(author:civano, f | author:civano, f* | author:civano,) -(author_facet_hier:civano, fa author_facet_hier:civano, da)",
BooleanQuery.class);
assertQueryEquals(req("defType", "aqp", "q", "author:\"Civano, F\" +author_facet_hier:(\"Civano, Fa\" OR \"Civano, Da\")"),
"+(author:civano, f | author:civano, f* | author:civano,) +(author_facet_hier:Civano, Fa author_facet_hier:Civano, Da)",
"+(author:civano, f | author:civano, f* | author:civano,) +(author_facet_hier:civano, fa author_facet_hier:civano, da)",
BooleanQuery.class);
assertQueryEquals(req("defType", "aqp", "q", "title:xxx -title:(foo OR bar)"),
"+title:xxx -(title:foo title:bar)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -53,6 +54,8 @@ public SpanQuery getSpanQuery(SpanConverterContainer container)
return wrapBoost((SpanQuery) q, boost);
} else if (q instanceof TermQuery) {
return wrapBoost(new SpanTermQuery(((TermQuery) q).getTerm()), boost);
} else if (q instanceof ConstantScoreQuery) {
return getSpanQuery(new SpanConverterContainer(((ConstantScoreQuery) q).getQuery(), 1, true, 0.0f));
} else if (q instanceof WildcardQuery) {
return wrapBoost(new SpanMultiTermQueryWrapper<WildcardQuery>((WildcardQuery) q), boost);
} else if (q instanceof PrefixQuery) {
Expand All @@ -62,16 +65,16 @@ public SpanQuery getSpanQuery(SpanConverterContainer container)
} else if (q instanceof BooleanQuery) {
return wrapBoost(convertBooleanToSpan(container), boost);
} else if (q instanceof RegexpQuery) {
return wrapBoost(new SpanMultiTermQueryWrapper<RegexpQuery>((RegexpQuery) q), boost);
return wrapBoost(new SpanMultiTermQueryWrapper<RegexpQuery>((RegexpQuery) q), boost);
} else if (q instanceof DisjunctionMaxQuery) {
return wrapBoost(convertDisjunctionQuery(container), boost);
return wrapBoost(convertDisjunctionQuery(container), boost);
} else if (q instanceof BoostQuery) {
return wrapBoost(getSpanQuery(new SpanConverterContainer(((BoostQuery) q).getQuery(), 1, true)),
return wrapBoost(getSpanQuery(new SpanConverterContainer(((BoostQuery) q).getQuery(), 1, true)),
((BoostQuery) q).getBoost());
} else if (q instanceof MatchNoDocsQuery) {
return new EmptySpanQuery(container.query);
return new EmptySpanQuery(container.query);
} else if (q instanceof SynonymQuery) {
return wrapBoost(convertSynonymToSpan(container), boost);
return wrapBoost(convertSynonymToSpan(container), boost);
} else {

SpanQuery wrapped = wrapNonConvertible(container);
Expand Down Expand Up @@ -113,6 +116,7 @@ private SpanQuery wrapBoost(SpanQuery q, float boost) {
return new SpanBoostQuery(q, boost);
return q;
}

public SpanQuery wrapNonConvertible(SpanConverterContainer container) {
if (wrapNonConvertible) {
return doWrapping(container);
Expand Down Expand Up @@ -150,6 +154,7 @@ protected SpanQuery convertBooleanToSpan(SpanConverterContainer container)

List<BooleanClause> clauses = q.clauses();
SpanQuery[] spanClauses = new SpanQuery[clauses.size()];
String field = null;
Occur o = null;
int i = 0;
for (BooleanClause c : clauses) {
Expand All @@ -164,21 +169,30 @@ protected SpanQuery convertBooleanToSpan(SpanConverterContainer container)
Query sq = c.getQuery();
SpanQuery result = getSpanQuery(new SpanConverterContainer(sq, 1, true));
spanClauses[i] = result;

i++;
}

if (o.equals(Occur.MUST)) {
return new SpanNearQuery(spanClauses, container.slop,
container.inOrder);
} else if (o.equals(Occur.SHOULD)) {
return new SpanOrQuery(spanClauses);
} else if (o.equals(Occur.MUST_NOT)) {
SpanQuery[] exclude = new SpanQuery[spanClauses.length - 1];
for (int j = 1; j < spanClauses.length; j++) {
exclude[j - 1] = spanClauses[j];
}
return new SpanNotQuery(spanClauses[0], new SpanOrQuery(exclude));

try {
if (o.equals(Occur.MUST)) {
return new SpanNearQuery(spanClauses, container.slop,
container.inOrder);
} else if (o.equals(Occur.SHOULD)) {
return new SpanOrQuery(spanClauses);
} else if (o.equals(Occur.MUST_NOT)) {
SpanQuery[] exclude = new SpanQuery[spanClauses.length - 1];
for (int j = 1; j < spanClauses.length; j++) {
exclude[j - 1] = spanClauses[j];
}
return new SpanNotQuery(spanClauses[0], new SpanOrQuery(exclude));
}
}
catch (IllegalArgumentException exc) {
throw new QueryNodeException(new MessageImpl(
QueryParserMessages.LUCENE_QUERY_CONVERSION_ERROR, q.toString(),
"Proximity searches must be executed against the same field; please specify the field explicitly"));
}


throw new QueryNodeException(new MessageImpl(
QueryParserMessages.LUCENE_QUERY_CONVERSION_ERROR, q.toString(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1246,18 +1246,18 @@
type="normalized_text_ascii_notokenization" indexed="true"
stored="true" multiValued="true" omitNorms="true" />

<field name="author_facet" type="string" indexed="true"
<field name="author_facet" type="normalized_string" indexed="true"
stored="${storeAll:false}" multiValued="true" omitNorms="true"
omitTermFreqAndPositions="true" />

<field name="author_facet_hier" type="string" indexed="true"
<field name="author_facet_hier" type="normalized_string" indexed="true"
stored="${storeAll:false}" multiValued="true" omitNorms="true"
omitTermFreqAndPositions="true" docValues="true" />

<field name="author_count" type="int" indexed="true"
stored="true" omitNorms="true" omitTermFreqAndPositions="true" />

<field name="first_author_facet_hier" type="string"
<field name="first_author_facet_hier" type="normalized_string"
indexed="true" stored="${storeAll:false}" multiValued="true"
omitNorms="true" omitTermFreqAndPositions="true" docValues="true" />

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@
Modified qf:
old: first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2
new: first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8
-->
<str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>

Expand Down Expand Up @@ -361,7 +362,7 @@
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="qf">first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2</str>
<str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>
<str name="defType">aqp</str>
<str name="aqp.unfielded.tokens.strategy">disjuncts</str>
<str name="aqp.unfielded.tokens.new.type">simple</str>
Expand Down Expand Up @@ -418,7 +419,7 @@
Make sure these defaults are set also in other public
query handlers (e.g. tvrh - used by the word cloud)
-->
<str name="qf">first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2</str>
<str name="qf">first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8</str>
<str name="defType">aqp</str>
<str name="aqp.unfielded.tokens.strategy">disjuncts</str>
<str name="aqp.unfielded.tokens.new.type">simple</str>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public void testUpdates() throws Exception {

assertU(commit("waitSearcher", "true"));

// the first search is not auto-warmed (the code seems
// the first search is not auto-warmed (the code seems
// that seems like a SOLR bug (I checked the SolrIndexSearcher
// code and it is right; so i created own function for
// warming warm_cache()
Expand All @@ -71,9 +71,9 @@ public void testUpdates() throws Exception {
"q","bibcode:b*",
"fq","{!bitset compression=none}");
List<ContentStream> streams = new ArrayList<ContentStream>(1);
ContentStreamBase cs = new ContentStreamBase.StringStream("bibcode\nb2\nx5");
cs.setContentType("big-query/csv");
streams.add(cs);
ContentStreamBase cs = new ContentStreamBase.StringStream("bibcode\nb2\nx5");
cs.setContentType("big-query/csv");
streams.add(cs);
req.setContentStreams(streams);

assertQ(req
Expand Down

0 comments on commit 78efe39

Please sign in to comment.