From 6a7e3852b87c35f5782286f2858eace37da0cf5a Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Fri, 16 Apr 2021 17:32:50 -0400 Subject: [PATCH] Ignore case when we are expanding search into a phrase search --- .../aqp/processors/AqpDEFOPUnfieldedTokens.java | 15 ++++++++++++++- .../org/apache/solr/search/AqpAdsabsQParser.java | 1 + .../solr/search/TestAqpAdsabsSolrSearch.java | 11 ++++++++++- contrib/examples/adsabs/bin/solr.in.sh | 2 +- .../server/solr/collection1/conf/solrconfig.xml | 6 +++--- 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpDEFOPUnfieldedTokens.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpDEFOPUnfieldedTokens.java index 6e6d5fb99..fff65efc6 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpDEFOPUnfieldedTokens.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpDEFOPUnfieldedTokens.java @@ -360,8 +360,14 @@ private void removeField(QueryNode n) { } + private QueryNode createReplacementNode(List newGroup, String tt) throws CloneNotSupportedException, QueryNodeException { String newValue = getConcatenatedValue(newGroup); + + if (newValue != newValue.toLowerCase() && newValue == newValue.toUpperCase() && newGroup.size() > getMaxAllowedAcronym()) { + newValue = newValue.toLowerCase(); + } + String field = ""; if (newGroup.get(0).getField() != null && newGroup.get(0).getField().length() > 0) field = newGroup.get(0).getField() + ":"; @@ -411,7 +417,14 @@ private QueryNode createReplacementNode(List newGroup, String tt) thro - /* + private int getMaxAllowedAcronym() { + Object obj = _getConfigVal("aqp.unfielded.max.uppercase.tokens"); + if (obj == null) + return 1024; //effectively unlimited + return Integer.parseInt((String)obj); + } + + /* * Ufff....this is necessary, because the QueryNodeImpl is NOT * resetting the parent. sooooo stupid.... */ diff --git a/contrib/adsabs/src/java/org/apache/solr/search/AqpAdsabsQParser.java b/contrib/adsabs/src/java/org/apache/solr/search/AqpAdsabsQParser.java index f026b5f06..3eabe9834 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/AqpAdsabsQParser.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/AqpAdsabsQParser.java @@ -349,6 +349,7 @@ public Query parse() throws SyntaxError { // QueryConfigHandler config = qParser.getQueryConfigHandler(); // return qParser.parse(getString() + config.get(AqpAdsabsQueryConfigHandler.ConfigurationKeys.DUMMY_VALUE), null); //} + return qParser.parse(getString(), null); } catch (QueryNodeException e) { throw new SyntaxError(e); diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java index 916692ffd..639b8547e 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java @@ -137,7 +137,16 @@ public void tearDown() throws Exception { public void testUnfieldedSearch() throws Exception { - + + // when we generate the phrase search, ignore acronyms + assertQueryEquals(req("defType", "aqp", + "q", "FOO BAR BAZ", + "aqp.unfielded.tokens.strategy", "disjuncts", + "aqp.unfielded.tokens.new.type", "simple", + "aqp.unfielded.max.uppercase.tokens", "2", + "qf", "title"), + "(((title:foo) (title:bar) (title:baz)) | title:\"foo bar baz\")", + DisjunctionMaxQuery.class); // have constant scoring work even for unfielded searches assertQueryEquals(req("defType", "aqp", "q", "foo bar", diff --git a/contrib/examples/adsabs/bin/solr.in.sh b/contrib/examples/adsabs/bin/solr.in.sh index 37e9700e1..53f6a6fbf 100644 --- a/contrib/examples/adsabs/bin/solr.in.sh +++ b/contrib/examples/adsabs/bin/solr.in.sh @@ -73,7 +73,7 @@ SOLR_OPTS="${SOLR_OPTS} -Xss256k \ -Dmontysolr.load.citation_cache=${MONTYSOLR_LOAD_CITATION_CACHE:-true} \ -Dmontysolr.autoCommit.maxDocs=${MONTYSOLR_MAX_DOCS:-40000} \ -Dmontysolr.autoCommit.maxTime=${MONTYSOLR_MAX_TIME:-1800000} \ --Dmontysolr.autoSoftCommit.maxTime=${MONTYSOLR_SOFT_MAX_TIME:-900000} \ +-Dmontysolr.autoSoftCommit.maxTime=${MONTYSOLR_SOFT_MAX_TIME:-1} \ -Dmontysolr.coordinate=${MONTYSOLR_COORDINATE:-true} \ -Dmontysolr.stagger.maxDelay=${MONTYSOLR_STAGGER_MAX_DELAY:-2700} \ -Dmontysolr.stagger.numInstances=${MONTYSOLR_STAGGER_NUM_INSTANCES:-3} \ diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml index fdfdedeaa..89cd26c19 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml @@ -342,7 +342,7 @@ first_author^14 author^13 year^10 bibstem^10 SYNONYM aff_raw,aff_id,institution - + 2 AND @@ -372,7 +372,7 @@ first_author^14 author^13 year^10 bibstem^10 SYNONYM aff_raw,aff_id,institution - + 2 AND unfielded_search @@ -426,7 +426,7 @@ edismax_combined_aqp true true - + 2 AND unfielded_search