Skip to content

Commit

Permalink
Latest changes; the *wildcard handling could have broken some code
Browse files Browse the repository at this point in the history
  • Loading branch information
romanchyla committed Sep 30, 2016
1 parent 10f5629 commit 6dc759f
Show file tree
Hide file tree
Showing 18 changed files with 196 additions and 151 deletions.
2 changes: 1 addition & 1 deletion .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-highlighter-6.1.0-SNAPSHOT-javadoc.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-highlighter-6.1.0-SNAPSHOT.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-join-6.1.0-SNAPSHOT-javadoc.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-join-6.1.0-SNAPSHOT.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-join-6.1.0-SNAPSHOT.jar" sourcepath="/apache-solr-61"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-memory-6.1.0-SNAPSHOT-javadoc.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-memory-6.1.0-SNAPSHOT.jar"/>
<classpathentry kind="lib" path="build/solrjars-extracted/lucene/lucene-misc-6.1.0-SNAPSHOT-javadoc.jar"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,41 +64,59 @@ protected QueryNode postProcessNode(QueryNode node)
String value =null;
String[] tokens;
if (node instanceof WildcardQueryNode) {
field = ((WildcardQueryNode) node).getFieldAsString() + "_wildcard";
field = ((WildcardQueryNode) node).getFieldAsString();
value = ((WildcardQueryNode) node).getTextAsString();
if (hasAnalyzer(field)) {
tokens = analyze(field, value);
if (!tokens[0].equals(value)) {
return new WildcardQueryNode(((WildcardQueryNode) node).getFieldAsString(),
tokens[0], ((WildcardQueryNode)node).getBegin(),
((WildcardQueryNode)node).getEnd());
for (String suffix: new String[]{"_wildcard", ""}) {
if (hasAnalyzer(field + suffix)) {
tokens = analyze(field + suffix, "foo*bar");

if (tokens.length > 1 || value.indexOf('*') == 0 || value.indexOf('?') == 0)
return node; // break, let the analyzer decide the fate

if (!tokens[0].equals(value)) {
return new WildcardQueryNode(field,
tokens[0], ((WildcardQueryNode)node).getBegin(),
((WildcardQueryNode)node).getEnd());
}
}
}
}
}
else if(node instanceof FuzzyQueryNode) {
field = ((FuzzyQueryNode) node).getFieldAsString() + "_fuzzy";
field = ((FuzzyQueryNode) node).getFieldAsString();
value = ((FuzzyQueryNode) node).getTextAsString();
if (hasAnalyzer(field)) {
tokens = analyze(field, value);
if (!tokens[0].equals(value)) {
return new FuzzyQueryNode(field = ((FuzzyQueryNode) node).getFieldAsString(),
tokens[0],
((FuzzyQueryNode)node).getSimilarity(),
((FuzzyQueryNode)node).getBegin(),
((FuzzyQueryNode)node).getEnd());
for (String suffix: new String[]{"_fuzzy", ""}) {
if (hasAnalyzer(field+suffix)) {
tokens = analyze(field + suffix, value);

if (tokens.length > 1)
return node; // break, let the analyzer decide the fate

if (!tokens[0].equals(value)) {
return new FuzzyQueryNode(field,
tokens[0],
((FuzzyQueryNode)node).getSimilarity(),
((FuzzyQueryNode)node).getBegin(),
((FuzzyQueryNode)node).getEnd());
}
}
}
}
}
else if(node instanceof AqpAdsabsRegexQueryNode) {
field = ((FieldQueryNode) node).getFieldAsString() + "_regex";
field = ((FieldQueryNode) node).getFieldAsString();
value = ((FieldQueryNode) node).getText().toString();
if (hasAnalyzer(field)) {
tokens = analyze(field, value);
if (!tokens[0].equals(value)) {
return new AqpAdsabsRegexQueryNode(((FieldQueryNode) node).getFieldAsString(),
tokens[0], ((FieldQueryNode)node).getBegin(),
((FieldQueryNode)node).getEnd());
}
for (String suffix: new String[]{"_regex", ""}) {
if (hasAnalyzer(field + suffix)) {
tokens = analyze(field + suffix, value);

if (tokens.length > 1)
return node; // break, let the analyzer decide the fate

if (!tokens[0].equals(value)) {
return new AqpAdsabsRegexQueryNode(field,
tokens[0], ((FieldQueryNode)node).getBegin(),
((FieldQueryNode)node).getEnd());
}
}
}
}

Expand Down Expand Up @@ -154,12 +172,6 @@ private String[] analyze(CharSequence field, String value) throws QueryNodeExcep
// pass
}


// for now let'd do this
if (out.size() > 1) {
throw new QueryNodeException(new MessageImpl("We are not expecting multiple tokens from analyzing: " + field + ":" + value));
}

return out.toArray(new String[out.size()]);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public AqpAdsabsFieldNodePreAnalysisProcessor() {
super();
dmp = new DateMathParser(DateMathParser.UTC);

sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
}

Expand Down Expand Up @@ -190,7 +190,7 @@ private String moveDate(
String...moveBy) throws QueryNodeException {
String[] dateParts = originalDate.split("-|/");
Date dateWithOffset = (Date) parsedDate.clone();

dmp.setNow(parsedDate);
try {
if (dateParts.length == 1) { // just a year
assert moveBy.length >= 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -19,18 +20,20 @@ public final class DateNormalizerTokenFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private DateMathParser dmp;
private String offset;
private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT);
private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);


public DateNormalizerTokenFilter(TokenStream input, String incomingFormat, String offset) {
super(input);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
this.offset = offset;
String[] parts = incomingFormat.split("\\|");
format = new SimpleDateFormat[parts.length];
for (int i=0;i<parts.length;i++) {
format[i] = new SimpleDateFormat(parts[i], Locale.ROOT);
format[i].setTimeZone(DateMathParser.UTC);
format[i] = new SimpleDateFormat(parts[i], Locale.US);
format[i].setTimeZone(TimeZone.getTimeZone("UTC"));
}
dmp = new DateMathParser(DateMathParser.UTC);
dmp = new DateMathParser(TimeZone.getTimeZone("UTC"));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public DocTransformer create(String field, SolrParams params, SolrQueryRequest r
BinaryDocValues idMapping = null;
if (params.getBool("resolve", false)) {
try {
idMapping = req.getSearcher().getLeafReader().getBinaryDocValues(resolutionField);
idMapping = req.getSearcher().getLeafReader().getSortedDocValues(resolutionField);
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot get data for resolving field: " + resolutionField, e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<dataDir>${solr.data.dir:}</dataDir>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
Expand Down
2 changes: 1 addition & 1 deletion contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ public void test() throws Exception {
/*
* cite_read_boost
*/
//dumpDoc(null, "recid", "read_count", "cite_read_boost");
dumpDoc(null, "recid", "read_count", "cite_read_boost");
assertQ(req("q", "cite_read_boost:[0.0 TO 1.0]"),
"//doc/int[@name='recid'][.='100']",
"//doc/int[@name='recid'][.='101']",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,17 @@ private void verifySearch(int[] randomIds) throws IOException {
String original = doc.getField("original").stringValue();
String[] parts = original.split("\\,? ");
Query[] queries = buildQueries(parts);
if (queries == null)
continue;
TermQuery oq = new TermQuery(new Term("original", original));
int ho = searcher.search(oq, 1).totalHits;
for (Query q: queries) {
if (q == null) continue;
Builder bq = new BooleanQuery.Builder();
bq.add(q, Occur.MUST);
bq.add(new TermQuery(new Term("id", Integer.toString(randomIds[i]))), Occur.MUST);
if (q != null) {
System.out.println(q.toString());
int no = searcher.search(bq.build(), 1).totalHits;
if (no != 1) {
System.out.println("Results differ: " + oq + " <<>> " + q + " [" + ho + " : " + no + "]");
Expand All @@ -454,7 +458,9 @@ private void verifySearch(int[] randomIds) throws IOException {
}

private Query[] buildQueries(String[] parts) throws UnsupportedEncodingException {
int howMany = TestUtil.nextInt(random(), 0, parts.length-1); // how many initials
int howMany = TestUtil.nextInt(random(), 2, parts.length-1); // how many initials
if (howMany < 2)
return null;
Query[] queries = new Query[9];
queries[1] = getRegexpQuery(parts, howMany, false);
queries[2] = getWildcardQuery(parts, howMany, false);
Expand Down
2 changes: 1 addition & 1 deletion contrib/adsabs/src/test/org/adsabs/solr/AdsConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static class F {
public static String[] TYPE_NORMALIZED_TEXT_ASCII_FIELDS = new String[]{"pub", "keyword", "keyword_norm"};

public static String TYPE_NORMALIZED_STRING_ASCII = "bibcode";
public static String[] TYPE_NORMALIZED_STRING_ASCII_FIELDS = new String[]{"bibcode", "volume",
public static String[] TYPE_NORMALIZED_STRING_ASCII_FIELDS = new String[]{"bibcode", "citation", "volume",
"issue", "lang", "issn", "isbn", "property", "database", "data", "bibgroup", "vizier"};

public static String[] TYPE_DATE_FIELDS = new String[]{"date"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ public void test() throws Exception {
assertU(addDocs("date", "1977-01-01T00:30:00Z"));

assertU(commit());

assertQ(req("q", "*:*"), "//*[@numFound='16']");


Expand All @@ -100,8 +99,10 @@ public void test() throws Exception {
"date:[1325376000000 TO 1325462399000]",
LegacyNumericRangeQuery.class);
// 1012-01-01T00:00:01 - 2012-12-31T23:59:59
// NOTE: the date parsing is tricky (calendars were changed in 1582)
// so it actually produces 1011-12-26; but I think we can ignore it
assertQueryEquals(req("q", "pubdate:[* TO 2012]", "defType", "aqp"),
"date:[-30231100799000 TO 1356998399000]",
"date:[-30231619199000 TO 1356998399000]",
LegacyNumericRangeQuery.class);

// 2012-01-01T00:00:00 - 3011-12-31T23:59:59
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,9 @@ public void testMultiTokens() throws Exception {
// multi-pace is split by WDFF and expanded with a synonym
assertQueryEquals(req("q", "title:\"bubble pace telescope multi-pace foobar\"", "defType", "aqp"),
"title:\"bubble (pace syn::lunar) telescope multi (pace syn::lunar) foobar\"" +
" title:\"bubble (pace syn::lunar) telescope multipace ? foobar\"" +
" title:\"bubble (pace syn::lunar) telescope ? multipace foobar\"" +
" title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multi (pace syn::lunar) foobar\"~2" +
" title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multipace ? foobar\"~3",
" title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? ? multipace foobar\"~3",
BooleanQuery.class);
assertQ(req("q", "title" + ":\"bubble pace telescope multi-pace foobar\""), "//*[@numFound='1']",
"//doc/str[@name='id'][.='17']");
Expand Down Expand Up @@ -415,9 +415,9 @@ public void testMultiTokens() throws Exception {
//dumpDoc(null, "title", "recid");
assertQueryEquals(req("q", "title:\"bubble pace telescope multi-foo\"", "defType", "aqp", "df", "title"),
"title:\"bubble (pace syn::lunar) telescope multi foo\" " +
"title:\"bubble (pace syn::lunar) telescope multifoo\" " +
"title:\"bubble (pace syn::lunar) telescope ? multifoo\" " +
"title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multi foo\"~2 " +
"title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? multifoo\"~2",
"title:\"(syn::bubble pace telescope syn::acr::bpt) ? ? ? multifoo\"~3",
BooleanQuery.class);
assertQ(req("q", "title:\"bubble pace telescope multi-foo\"", "defType", "aqp", "df", "title"),
"//*[@numFound='2']",
Expand Down Expand Up @@ -799,6 +799,7 @@ public void testOtherCases() throws Exception {
"//doc/str[@name='id'][.='10']",
"//doc/str[@name='id'][.='11']"
);

assertQ(req("q", "title" + ":*sky"), "//*[@numFound='4']",
"//doc/str[@name='id'][.='10']",
"//doc/str[@name='id'][.='11']",
Expand Down Expand Up @@ -851,16 +852,15 @@ public void testOtherCases() throws Exception {
"defType", "aqp"),
"title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" "
+ "title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" "
+ "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" "
+ "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"~2",
+ "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma) ray (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" "
+ "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) ? (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"~2",
BooleanQuery.class);


assertQueryEquals(req(
"q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ-ray Sources for Radio Millisecond Pulsars\"",
"defType", "aqp"),
"title:\"350 (mhz syn::mhz) (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\" "
+ "title:\"350mhz ? (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"",
+ "title:\"350mhz (acr::gbt syn::acr::gbt syn::green bank telescope) (survey syn::survey) 50 (faint syn::faint) (fermi syn::fermi) (gamma syn::gamma syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (ray gammaray syn::gamma ray syn::gammaray syn::gamma rays syn::gammarays) (sources syn::source) (radio syn::radio) (millisecond syn::millisecond) (pulsars syn::pulsars)\"",
BooleanQuery.class);

//dumpDoc(null, "title");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,56 @@ public static void beforeClass() throws Exception {


public void test() throws Exception {

assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "Bílá kobyla skočila přes čtyřista"));
assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "třicet-tři stříbrných střech"));
assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "A ještě TřistaTřicetTři stříbrných stovek"));
assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "one two three"));
assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "este-c'est que"));
assertU(addDocs(F.TYPE_NORMALIZED_STRING_ASCII_FIELDS, "568"));

assertU(commit());

//dumpDoc(null, F.ID, F.TYPE_NORMALIZED_STRING_ASCII_FIELDS[0]);
String[] fs = new String[]{"bibcode", "identifier", "title"}; // single-val-string, multi-val-string, text
assertU(addDocs(fs, "Bílá kobyla skočila přes čtyřista"));
assertU(addDocs(fs, "třicet-tři stříbrných střech"));
assertU(addDocs(fs, "A ještě TřistaTřicetTři stříbrných stovek"));
assertU(addDocs(fs, "one two three"));
assertU(addDocs(fs, "este-c'est que"));
assertU(addDocs(fs, "568"));

assertU(commit("waitSearcher", "true"));

assertQ(req("q", "*:*"), "//*[@numFound='6']");

assertQueryEquals(req("q", "bibcode:Bílá", "qt", "aqp"), "bibcode:bila", TermQuery.class);
assertQueryEquals(req("q", "bibcode:Bila-bila", "qt", "aqp"), "bibcode:bilabila", TermQuery.class);

assertQ(req("q", "bibcode:Bílá*"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", "bibcode:Bílá-kobyla*"), "//*[@numFound='1']",
assertQ(req("q", "identifier:Bílá*"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", "title:Bílá*"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");

assertQ(req("q", "bibcode:kobyla"),
"//*[@numFound='0']");
assertQ(req("q", "identifier:kobyla"),
"//*[@numFound='0']");
assertQ(req("q", "title:kobyla"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");


assertQ(req("q", "bibcode:Bílá-kobyla*"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", "bibcode:kobyla"), "//*[@numFound='0']");
assertQ(req("q", "identifier:Bílá-kobyla*"),
"//*[@numFound='1']",
"//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", "title:Bílá-kobyla*"),
"//*[@numFound='0']");

assertQ(req("q", "bibcode:Bílá-kobyla"),
"//*[@numFound='0']");
assertQ(req("q", "identifier:Bílá-kobyla"),
"//*[@numFound='0']");
assertQ(req("q", "title:Bílá-kobyla"),
"//*[@numFound='1']");

assertQ(req("q", "bibcode:\"one two three\""),
"//*[@numFound='1']",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public void test() throws Exception {
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "three-jets-four"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "five jets"));

assertU(commit());
assertU(commit("waitSearcher", "true"));

assertQ(req("q", "*:*"), "//*[@numFound='10']");

Expand Down
Loading

0 comments on commit 6dc759f

Please sign in to comment.