From c06fbad2eb25fa1aa432c384fabb30d6a9d395e8 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Tue, 13 May 2014 20:59:41 -0400 Subject: [PATCH] Almost converted the multi-synonyms class --- .../synonym/NewSemicolonSynonymParser.java | 10 ++-- .../synonym/NewSolrSynonymParser.java | 10 ++-- .../synonym/NewSynonymFilterFactory.java | 56 ++++++++++++------- .../synonym/NewWordnetSynonymParser.java | 4 +- .../AqpAdsabsExpandAuthorSearchProcessor.java | 34 ++++++----- .../PersistingMapTokenFilterFactory.java | 4 -- .../author/AuthorCollectorFactory.java | 8 ++- .../author/AuthorCollectorFilter.java | 18 +++--- .../solr/search/AqpFunctionQParser.java | 18 +++--- .../solr/search/BitSetQParserPlugin.java | 26 ++++++--- .../function/PositionSearchFunction.java | 2 +- .../search/function/PositionSearchParser.java | 3 +- .../org/apache/solr/update/InvenioDoctor.java | 15 ++--- .../perf/CreatePerformanceQueriesHandler.java | 7 ++- .../solr/util/MontySolrQueryTestCase.java | 52 +++++++---------- .../adsabs/TestAdsabsIndexingSearching.java | 35 ++++++------ .../adsabs/lucene/BenchmarkAuthorSearch.java | 16 +++--- .../analysis/core/TestAcronymFilter.java | 29 +++++----- .../synonym/TestNewMultiWordSynonyms.java | 54 +++++++++--------- .../flexible/aqp/TestAqpAdsabs.java | 35 ++++-------- .../TestAdsabsTypeAffiliationText.java | 28 ++++++---- .../author/TestAuthorCollectorFactory.java | 21 ++++--- .../adsabs/solr/collection1/conf/schema.xml | 16 +++--- 23 files changed, 257 insertions(+), 244 deletions(-) diff --git a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSemicolonSynonymParser.java b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSemicolonSynonymParser.java index d295c06a3..059694f36 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSemicolonSynonymParser.java +++ b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSemicolonSynonymParser.java @@ -12,12 +12,10 @@ public class NewSemicolonSynonymParser extends NewSynonymFilterFactory.SynonymParser { private final boolean expand; - private final Analyzer analyzer; public NewSemicolonSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) { - super(dedup); + super(dedup, analyzer); this.expand = expand; - this.analyzer = analyzer; } public void add(Reader in) throws IOException, ParseException { @@ -52,19 +50,19 @@ private void addInternal(BufferedReader in) throws IOException { String inputStrings[] = split(sides[0], ";"); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { - inputs[i] = analyze(analyzer, inputStrings[i].trim(), new CharsRef()); + inputs[i] = analyze(inputStrings[i].trim(), new CharsRef()); } String outputStrings[] = split(sides[1], ";"); outputs = new CharsRef[outputStrings.length]; for (int i = 0; i < outputs.length; i++) { - outputs[i] = analyze(analyzer, outputStrings[i].trim(), new CharsRef()); + outputs[i] = analyze(outputStrings[i].trim(), new CharsRef()); } } else { String inputStrings[] = split(line, ";"); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { - inputs[i] = analyze(analyzer, inputStrings[i].trim(), new CharsRef()); + inputs[i] = analyze(inputStrings[i].trim(), new CharsRef()); } if (expand) { outputs = inputs; diff --git a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSolrSynonymParser.java b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSolrSynonymParser.java index 53a938148..cbad1bae9 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSolrSynonymParser.java +++ b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSolrSynonymParser.java @@ -56,12 +56,10 @@ */ public class NewSolrSynonymParser extends NewSynonymFilterFactory.SynonymParser { private final boolean expand; - private final Analyzer analyzer; public NewSolrSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) { - super(dedup); + super(dedup, analyzer); this.expand = expand; - this.analyzer = analyzer; } public void add(Reader in) throws IOException, ParseException { @@ -96,19 +94,19 @@ private void addInternal(BufferedReader in) throws IOException { String inputStrings[] = split(sides[0], ","); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { - inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef()); + inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRef()); } String outputStrings[] = split(sides[1], ","); outputs = new CharsRef[outputStrings.length]; for (int i = 0; i < outputs.length; i++) { - outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef()); + outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRef()); } } else { String inputStrings[] = split(line, ","); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { - inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef()); + inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRef()); } if (expand) { outputs = inputs; diff --git a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSynonymFilterFactory.java b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSynonymFilterFactory.java index 28d9c7ea4..fa54a9cd1 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSynonymFilterFactory.java +++ b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewSynonymFilterFactory.java @@ -24,11 +24,13 @@ import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.Reader; +import java.lang.reflect.InvocationTargetException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import java.text.ParseException; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -95,15 +97,18 @@ public void inform(ResourceLoader loader) throws IOException { } - public static class SynonymParser extends SynonymMap.Builder { + public static class SynonymParser extends SynonymMap.Parser { - public SynonymParser(boolean dedup) { - super(dedup); + public SynonymParser(boolean dedup, Analyzer analyzer) { + super(dedup, analyzer); } public void add(Reader in) throws IOException, ParseException { throw new IllegalAccessError("You must override this method"); } + + @Override + public void parse(Reader in) throws IOException, ParseException {} } @@ -111,7 +116,7 @@ public static class SynonymBuilderFactory extends TokenizerFactory implements Re protected Map args; - protected SynonymBuilderFactory(Map args) { + public SynonymBuilderFactory(Map args) { super(args); this.args = args; } @@ -188,10 +193,15 @@ protected SynonymParser getParser(Analyzer analyzer) { // (there are no tests for this functionality) private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException { - TokenizerFactory tokFactory = TokenizerFactory.forName(cname, this.args); - tokFactory.setExplicitLuceneMatchVersion(true); - if (tokFactory instanceof ResourceLoaderAware) { - ((ResourceLoaderAware) tokFactory).inform(loader); + Class clazz = loader.findClass(cname, TokenizerFactory.class); + TokenizerFactory tokFactory; + try { + tokFactory = clazz.getConstructor(Map.class).newInstance(new HashMap()); + if (tokFactory instanceof ResourceLoaderAware) { + ((ResourceLoaderAware) tokFactory).inform(loader); + } + } catch (Exception e) { + throw new RuntimeException(e); } return tokFactory; } @@ -203,14 +213,20 @@ public void inform(ResourceLoader loader) throws IOException { } - private SynonymBuilderFactory loadBuilderFactory(ResourceLoader loader, String cname) throws IOException { - TokenizerFactory builderFactory = TokenizerFactory.forName(cname, args); - builderFactory.setExplicitLuceneMatchVersion(true); - if (builderFactory instanceof ResourceLoaderAware) { - ((ResourceLoaderAware) builderFactory).inform(loader); - } - return (SynonymBuilderFactory) builderFactory; - } + + //(there are no tests for this functionality) + private SynonymBuilderFactory loadBuilderFactory(ResourceLoader loader, String cname) throws IOException { + Class clazz = loader.findClass(cname, SynonymBuilderFactory.class); + try { + SynonymBuilderFactory tokFactory = clazz.getConstructor(Map.class).newInstance(args); + if (tokFactory instanceof ResourceLoaderAware) { + ((ResourceLoaderAware) tokFactory).inform(loader); + } + return tokFactory; + } catch (Exception e) { + throw new RuntimeException(e); + } + } /* @@ -231,7 +247,7 @@ private SynonymBuilderFactory loadBuilderFactory(ResourceLoader loader, String c * 2: telescope */ public static class AlwaysIncludeOriginal extends SynonymBuilderFactory { - protected AlwaysIncludeOriginal(Map args) { + public AlwaysIncludeOriginal(Map args) { super(args); } @@ -286,7 +302,7 @@ public void add(CharsRef input, CharsRef output, boolean includeOrig) { * 4: was */ public static class MultiTokenReplaceNulls extends SynonymBuilderFactory { - protected MultiTokenReplaceNulls(Map args) { + public MultiTokenReplaceNulls(Map args) { super(args); } @@ -313,7 +329,7 @@ public void add(CharsRef input, CharsRef output, boolean includeOrig) { */ public static class BestEffortSearchLowercase extends SynonymBuilderFactory { private Map args; - protected BestEffortSearchLowercase(Map args) { + public BestEffortSearchLowercase(Map args) { super(args); this.args = args; } @@ -355,7 +371,7 @@ private CharsRef lowercase(CharsRef chars) { */ public static class BestEffortIgnoreCaseSelectively extends SynonymBuilderFactory { private Map args; - protected BestEffortIgnoreCaseSelectively(Map args) { + public BestEffortIgnoreCaseSelectively(Map args) { super(args); this.args = args; } diff --git a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewWordnetSynonymParser.java b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewWordnetSynonymParser.java index 65545319d..962c3e1f2 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewWordnetSynonymParser.java +++ b/contrib/adsabs/src/java/org/apache/lucene/analysis/synonym/NewWordnetSynonymParser.java @@ -37,7 +37,7 @@ public class NewWordnetSynonymParser extends NewSynonymFilterFactory.SynonymPars private final Analyzer analyzer; public NewWordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) { - super(dedup); + super(dedup, analyzer); this.expand = expand; this.analyzer = analyzer; } @@ -89,7 +89,7 @@ private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException { int end = line.lastIndexOf('\''); String text = line.substring(start, end).replace("''", "'"); - return analyze(analyzer, text, reuse); + return analyze(text, reuse); } private void addInternal(CharsRef synset[], int size) { diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsExpandAuthorSearchProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsExpandAuthorSearchProcessor.java index 241c270de..3365bcd66 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsExpandAuthorSearchProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsExpandAuthorSearchProcessor.java @@ -12,6 +12,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; +import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizerFactory; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.TokenFilterFactory; @@ -32,9 +33,11 @@ import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; +import org.apache.solr.analysis.author.AuthorNormalizeFilter; import org.apache.solr.analysis.author.AuthorNormalizeFilterFactory; import org.apache.solr.analysis.author.AuthorUtils; import org.apache.solr.analysis.author.PythonicAuthorNormalizeFilterFactory; +import org.apache.solr.analysis.author.PythonicAuthorNormalizerFilter; /** * Looks at the QueryNode(s) and if they are author searches, @@ -270,26 +273,20 @@ private boolean isLongForm(String name) { * chain, you should always review also this method */ - private TokenStreamComponents tsc = null; - private ReusableStringReader reader = null; + Analyzer authorNameAnalyzer = new Analyzer() { + @Override + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer source = new KeywordTokenizer(reader); + TokenStream filter = new PythonicAuthorNormalizerFilter(source); + filter = new AuthorNormalizeFilter(filter); + return new TokenStreamComponents(source, filter); + } + }; + private List normalizeAuthorName(String input) throws QueryNodeException { - if (reader == null) { // well, nice try, but it will be always created new... - TokenFilterFactory[] filters = new TokenFilterFactory[2]; - TokenizerFactory tokenizer = new KeywordTokenizerFactory(new HashMap()); - filters[1] = new AuthorNormalizeFilterFactory(new HashMap()); - filters[0] = new PythonicAuthorNormalizeFilterFactory(new HashMap()); - reader = new ReusableStringReader(); - Tokenizer tk = tokenizer.create( reader ); - TokenStream ts = tk; - for (TokenFilterFactory filter : filters) { - ts = filter.create(ts); - } - tsc = new TokenStreamComponents(tk, ts); - } - - TokenStream ts = tsc.getTokenStream(); - reader.setValue(input); + try { + TokenStream ts = authorNameAnalyzer.tokenStream("foo", input); ts.reset(); List out = new ArrayList(); CharTermAttribute termAtt; @@ -297,6 +294,7 @@ private List normalizeAuthorName(String input) throws QueryNodeException termAtt = ts.getAttribute(CharTermAttribute.class); out.add(termAtt.toString()); } + ts.close(); return out; } catch (IOException e) { throw new QueryNodeException(new MessageImpl("Error parsing: " + input, e)); diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/PersistingMapTokenFilterFactory.java b/contrib/adsabs/src/java/org/apache/solr/analysis/PersistingMapTokenFilterFactory.java index 25745e6f6..3d084b5c4 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/PersistingMapTokenFilterFactory.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/PersistingMapTokenFilterFactory.java @@ -36,10 +36,6 @@ public PersistingMapTokenFilterFactory(Map args) { if (args.containsKey("syntax")) { this.syntax = args.remove("syntax"); } - - if (!args.isEmpty()) { - throw new IllegalArgumentException("Unknown parameter(s): " + args); - } } public void inform(ResourceLoader loader) { diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFactory.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFactory.java index a23316a90..680d3af4d 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFactory.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFactory.java @@ -26,18 +26,20 @@ public class AuthorCollectorFactory extends PersistingMapTokenFilterFactory { public AuthorCollectorFactory(Map args) { super(args); if (args.containsKey("tokenTypes")) { - tokenTypes = StrUtils.splitSmart(args.get("tokenTypes"), ",", false); + tokenTypes = StrUtils.splitSmart(args.remove("tokenTypes"), ",", false); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The tokenType parameter missing"); } emitTokens = false; if (args.containsKey("emitTokens")) { - if (((String) args.get("emitTokens")).equals("true")) { + if (((String) args.remove("emitTokens")).equals("true")) { emitTokens = true; } } - + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameter(s): " + args); + } } /* (non-Javadoc) diff --git a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFilter.java b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFilter.java index 27d1f476c..d06625b35 100644 --- a/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFilter.java +++ b/contrib/adsabs/src/java/org/apache/solr/analysis/author/AuthorCollectorFilter.java @@ -38,7 +38,6 @@ public final class AuthorCollectorFilter extends TokenFilter { private Set tokenBuffer; private Set tokenTypes; private String authorInput; - private int resetCounter; public AuthorCollectorFilter(TokenStream input, WriteableSynonymMap synMap) { super(input); @@ -47,7 +46,6 @@ public AuthorCollectorFilter(TokenStream input, WriteableSynonymMap synMap) { tokenBuffer = new LinkedHashSet(); tokenTypes = new HashSet(); this.synMap = synMap; - resetCounter = 0; } @@ -56,7 +54,6 @@ public AuthorCollectorFilter(TokenStream input, WriteableSynonymMap synMap) { */ @Override public boolean incrementToken() throws IOException { - resetCounter = 0; if (!input.incrementToken()) { return false; @@ -103,11 +100,18 @@ private void addTokensToSynMap() { @Override public void reset() throws IOException { super.reset(); + } + + @Override + public void end() throws IOException { + super.end(); addTokensToSynMap(); - resetCounter++; - if (resetCounter > 2) { - synMap.persist(); - } + } + + @Override + public void close() throws IOException { + synMap.persist(); + super.close(); } public void setEmitTokens(boolean b) { diff --git a/contrib/adsabs/src/java/org/apache/solr/search/AqpFunctionQParser.java b/contrib/adsabs/src/java/org/apache/solr/search/AqpFunctionQParser.java index 0e78e9bd2..385dd9bd6 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/AqpFunctionQParser.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/AqpFunctionQParser.java @@ -63,7 +63,7 @@ protected String consumeAsString() { @Override protected ValueSource parseValueSource(boolean doConsumeDelimiter) - throws ParseException { + throws SyntaxError { // check if there is a query already built inside our node OriginalInput node = consume(); @@ -76,7 +76,7 @@ protected ValueSource parseValueSource(boolean doConsumeDelimiter) else if (input.substring(0,1).equals("$")) { String val = getParam(input); if (val == null) { - throw new ParseException("Missing param " + input + " while parsing function '" + val + "'"); + throw new SyntaxError("Missing param " + input + " while parsing function '" + val + "'"); } QParser subParser = subQuery(val, "func"); @@ -130,7 +130,7 @@ public QueryNode getQueryNode() { } - public String parseId() throws ParseException { + public String parseId() throws SyntaxError { return consumeAsString(); } @@ -139,21 +139,21 @@ public int parseInt() { return Integer.valueOf(consumeAsString()); } - public Float parseFloat() throws ParseException { + public Float parseFloat() throws SyntaxError { String str = consumeAsString(); - if (argWasQuoted()) throw new ParseException("Expected float instead of quoted string:" + str); + if (argWasQuoted()) throw new SyntaxError("Expected float instead of quoted string:" + str); float value = Float.parseFloat(str); return value; } - public double parseDouble() throws ParseException { + public double parseDouble() throws SyntaxError { String str = consumeAsString(); - if (argWasQuoted()) throw new ParseException("Expected double instead of quoted string:" + str); + if (argWasQuoted()) throw new SyntaxError("Expected double instead of quoted string:" + str); double value = Double.parseDouble(str); return value; } - public List parseValueSourceList() throws ParseException { + public List parseValueSourceList() throws SyntaxError { List sources = new ArrayList(3); while (canConsume()) { sources.add(parseValueSource(true)); @@ -161,7 +161,7 @@ public List parseValueSourceList() throws ParseException { return sources; } - public Query parseNestedQuery() throws ParseException { + public Query parseNestedQuery() throws SyntaxError { OriginalInput node = consume(); QParser parser = subQuery(node.value, null); // use the default parser return parser.getQuery(); diff --git a/contrib/adsabs/src/java/org/apache/solr/search/BitSetQParserPlugin.java b/contrib/adsabs/src/java/org/apache/solr/search/BitSetQParserPlugin.java index b69b058d4..cabd9b411 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/BitSetQParserPlugin.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/BitSetQParserPlugin.java @@ -22,6 +22,7 @@ import org.apache.lucene.search.BitSetQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.Ints; import org.apache.lucene.search.Query; import org.apache.lucene.search.SolrCacheWrapper; import org.apache.solr.common.SolrException; @@ -300,19 +301,30 @@ else if (c.isAssignableFrom(TrieIntField.class) || c.isAssignableFrom(IntField.c throw new SolrException(ErrorCode.BAD_REQUEST, "You make me sad - this field: " + fieldName + " is not indexed as integer :("); } - int[] cache; + Ints cache; try { cache = FieldCache.DEFAULT.getInts(reader, fieldName, false); } catch (IOException e) { throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot get a cache for field: " + fieldName + "\n" + e.getMessage()); } - int i = 0; // lucene docid - for (int docValue: cache) { - if (docValue < bits.length() && docValue > 0 && bits.get(docValue)) { - translatedBitSet.set(i); - } - i++; + + /* + int i = 0; // lucene docid + for (int docValue: cache) { + if (docValue < bits.length() && docValue > 0 && bits.get(docValue)) { + translatedBitSet.set(i); + } + i++; + } + */ + + int maxDoc = reader.maxDoc(); + for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i+1)) { + if (i > maxDoc) + break; + translatedBitSet.set(cache.get(i)); } + bits = translatedBitSet; } } diff --git a/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchFunction.java b/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchFunction.java index b64914074..89ffd9ff3 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchFunction.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchFunction.java @@ -93,7 +93,7 @@ public void createWeight(Map context, IndexSearcher searcher) TreeSet extractedTerms = new TreeSet(); subQuery.extractTerms(extractedTerms); for (Term term : extractedTerms) { - termContexts.put(term, TermContext.build(ctx, term, true)); + termContexts.put(term, TermContext.build(ctx, term)); } List leaves = ctx.leaves(); diff --git a/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchParser.java b/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchParser.java index 63f347b42..e4ffcd8df 100644 --- a/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchParser.java +++ b/contrib/adsabs/src/java/org/apache/solr/search/function/PositionSearchParser.java @@ -2,6 +2,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; import org.apache.solr.search.ValueSourceParser; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queryparser.classic.ParseException; @@ -9,7 +10,7 @@ public class PositionSearchParser extends ValueSourceParser { @Override - public ValueSource parse(FunctionQParser fp) throws ParseException { + public ValueSource parse(FunctionQParser fp) throws SyntaxError { String field = fp.parseArg(); String author = fp.parseArg(); int start = fp.parseInt(); diff --git a/contrib/adsabs/src/java/org/apache/solr/update/InvenioDoctor.java b/contrib/adsabs/src/java/org/apache/solr/update/InvenioDoctor.java index 43271696a..a06dca5ec 100644 --- a/contrib/adsabs/src/java/org/apache/solr/update/InvenioDoctor.java +++ b/contrib/adsabs/src/java/org/apache/solr/update/InvenioDoctor.java @@ -33,6 +33,7 @@ import java.util.Set; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.Ints; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.ModifiableSolrParams; @@ -656,8 +657,8 @@ private BitSet[] discoverMissingRecords(BitSet present, BitSet missing, BitSet t // setting maxRecs to very large value means the worker cannot be stopped in time int maxRecs = Math.min(params.getInt("max_records", 100000), 1000000); - - int[] existingRecs = FieldCache.DEFAULT.getInts(req.getSearcher().getAtomicReader(), field, false); + int maxLen = req.getSearcher().maxDoc(); + Ints existingRecs = FieldCache.DEFAULT.getInts(req.getSearcher().getAtomicReader(), field, false); Map idToLuceneId; if (tmpMap.containsKey(existingRecs.hashCode())) { @@ -665,15 +666,15 @@ private BitSet[] discoverMissingRecords(BitSet present, BitSet missing, BitSet t } else { tmpMap.clear(); - idToLuceneId = new HashMap(existingRecs.length); - for (int i=0;i(maxLen); + for (int i=0;i seen) { return out.toString(); } - private void resolveIfNecessary(String fieldName, Set seen) throws ParseException, IOException { + private void resolveIfNecessary(String fieldName, Set seen) throws SyntaxError, IOException { HashSet toRemove = new HashSet(); SolrIndexSearcher searcher = req.getSearcher(); @@ -372,7 +373,7 @@ private void resolveIfNecessary(String fieldName, Set seen) thro try { q.resolveFound(searcher, parser); } - catch (ParseException e1) { + catch (SyntaxError e1) { log.info("Removing invalid query: " + q.query); log.info(e1.getMessage()); toRemove.add(q); diff --git a/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java b/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java index 123fded68..50a8bfd96 100644 --- a/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java +++ b/contrib/adsabs/src/test/monty/solr/util/MontySolrQueryTestCase.java @@ -14,51 +14,39 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.flexible.aqp.AqpTestAbstractCase; import org.apache.lucene.search.Query; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.QParser; import org.apache.solr.search.QueryParsing; +import org.apache.solr.search.SyntaxError; import org.getopt.luke.DocReconstructor; import org.getopt.luke.DocReconstructor.Reconstructed; import org.getopt.luke.GrowableStringArray; +import org.junit.BeforeClass; public class MontySolrQueryTestCase extends MontySolrAbstractTestCase { - protected AqpTestAbstractCase tp = null; + protected static AqpTestAbstractCase tp = new AqpTestAbstractCase() { + @Override + public void setUp() throws Exception { + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + }; + private int idValue = 0; - - @Override - public String getSchemaFile() { - throw new IllegalAccessError("You must override this method"); - } - - @Override - public String getSolrConfigFile() { - throw new IllegalAccessError("You must override this method"); - } + @Override public void setUp() throws Exception { super.setUp(); - - final MontySolrQueryTestCase that = this; - - tp = new AqpTestAbstractCase() { - - @Override - public void setUp() throws Exception { - super.setUp(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - } - - - }; tp.setUp(); } @@ -72,7 +60,7 @@ public void tearDown() throws Exception { } - public QParser getParser(SolrQueryRequest req) throws ParseException, InstantiationException, IllegalAccessException, SecurityException, IllegalArgumentException, NoSuchMethodException, InvocationTargetException { + public QParser getParser(SolrQueryRequest req) throws SyntaxError, InstantiationException, IllegalAccessException, SecurityException, IllegalArgumentException, NoSuchMethodException, InvocationTargetException { SolrParams params = req.getParams(); String query = params.get(CommonParams.Q); String defType = params.get(QueryParsing.DEFTYPE); @@ -99,7 +87,7 @@ public QParser getParser(SolrQueryRequest req) throws ParseException, Instantiat } - public SolrQueryRequest req(String... q) { + public static SolrQueryRequest req(String... q) { boolean clean = true; for (String x: q) { if (q.equals("debugQuery")) { @@ -116,7 +104,7 @@ public SolrQueryRequest req(String... q) { nq[i++] = tp.debugParser ? "true" : "false"; q = nq; } - return super.req(q); + return SolrTestCaseJ4.req(q); } public Query assertQueryEquals(SolrQueryRequest req, String expected, Class clazz) @@ -143,7 +131,7 @@ public Query assertQueryEquals(SolrQueryRequest req, String expected, Class c public void assertQueryParseException(SolrQueryRequest req) throws Exception { try { getParser(req).parse(); - } catch (ParseException expected) { + } catch (SyntaxError expected) { return; } tp.debugFail("ParseException expected, not thrown"); diff --git a/contrib/adsabs/src/test/org/adsabs/TestAdsabsIndexingSearching.java b/contrib/adsabs/src/test/org/adsabs/TestAdsabsIndexingSearching.java index 217c88a4f..cf5f2c4fd 100644 --- a/contrib/adsabs/src/test/org/adsabs/TestAdsabsIndexingSearching.java +++ b/contrib/adsabs/src/test/org/adsabs/TestAdsabsIndexingSearching.java @@ -27,6 +27,7 @@ import org.apache.solr.schema.TrieIntField; import org.apache.solr.servlet.DirectSolrConnection; import org.adsabs.solr.AdsConfig.F; +import org.junit.BeforeClass; /** @@ -42,21 +43,23 @@ **/ public class TestAdsabsIndexingSearching extends MontySolrQueryTestCase { - public String getSchemaFile() { - - makeResourcesVisible(this.solrConfig.getResourceLoader(), - new String[] {MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/solr/collection1/conf", - MontySolrSetup.getSolrHome() + "/example/solr/collection1/conf" - }); - - return MontySolrSetup.getMontySolrHome() - + "/contrib/examples/adsabs/solr/collection1/conf/schema.xml"; - } - - public String getSolrConfigFile() { - return MontySolrSetup.getMontySolrHome() - + "/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml"; - } + @BeforeClass + public static void beforeClass() throws Exception { + System.setProperty("solr.allow.unsafe.resourceloading", "true"); + schemaString = MontySolrSetup.getMontySolrHome() + + "/contrib/examples/adsabs/solr/collection1/conf/schema.xml"; + + configString = MontySolrSetup.getMontySolrHome() + + "/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml"; + + /*makeResourcesVisible(???, + new String[] {MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/solr/collection1/conf", + MontySolrSetup.getSolrHome() + "/example/solr/collection1/conf" + });*/ + + initCore(configString, schemaString); + } + public void test() throws Exception { @@ -64,7 +67,7 @@ public void test() throws Exception { EmbeddedSolrServer embedded = getEmbeddedServer(); // checking the schema - IndexSchema schema = h.getCore().getSchema(); + IndexSchema schema = h.getCore().getLatestSchema(); SchemaField field = schema.getField("id"); diff --git a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java index 40456aaf0..461906c05 100644 --- a/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java +++ b/contrib/adsabs/src/test/org/adsabs/lucene/BenchmarkAuthorSearch.java @@ -40,7 +40,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.BeforeClass; @@ -77,7 +77,7 @@ public void setUp() throws Exception { dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MultiFieldAnalyzer()) - .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000))); + .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(store ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED); @@ -125,7 +125,7 @@ public void setUp() throws Exception { //surname do { - surname = _TestUtil.randomSimpleString(random()).toLowerCase().replace(",", "").trim(); + surname = TestUtil.randomSimpleString(random()).toLowerCase().replace(",", "").trim(); } while (surname.length() == 0); name.append(surname); @@ -135,9 +135,9 @@ public void setUp() throws Exception { n0.setStringValue(surname); //#initials - int noi = _TestUtil.nextInt(random(), 0, 4); + int noi = TestUtil.nextInt(random(), 0, 4); for (int j = 0; j < noi; j++) { - String namePart = names[_TestUtil.nextInt(random(), 0, names.length-1)]; + String namePart = names[TestUtil.nextInt(random(), 0, names.length-1)]; name.append(namePart); name.append(" "); wild.append(namePart); @@ -419,7 +419,7 @@ private List getIndexData(int[] randomIds) throws IOException { Document doc = reader.document(docs.scoreDocs[0].doc); String original = doc.get("original").toString(); String[] parts = original.split("\\,? "); - int howMany = _TestUtil.nextInt(random(), 0, parts.length-1); // how many initials + int howMany = TestUtil.nextInt(random(), 0, parts.length-1); // how many initials data.add(new TestCase(original, parts, howMany)); } return data; @@ -463,7 +463,7 @@ private void verifySearch(int[] randomIds) throws IOException { } private Query[] buildQueries(String[] parts) throws UnsupportedEncodingException { - int howMany = _TestUtil.nextInt(random(), 0, parts.length-1); // how many initials + int howMany = TestUtil.nextInt(random(), 0, parts.length-1); // how many initials Query[] queries = new Query[9]; queries[0] = getRegexQuery(parts, howMany, false); queries[1] = getRegexpQuery(parts, howMany, false); @@ -595,7 +595,7 @@ private String getRegexQueryString(String[] parts, int howMany, boolean truncate private int[] getRandomIds(int i) { int[] randomIds = new int[Math.min(numDocs, i)]; for (int j = 0; j < randomIds.length; j++) { - randomIds[j] = _TestUtil.nextInt(random(), 0, numDocs-1); + randomIds[j] = TestUtil.nextInt(random(), 0, numDocs-1); } return randomIds; } diff --git a/contrib/adsabs/src/test/org/apache/lucene/analysis/core/TestAcronymFilter.java b/contrib/adsabs/src/test/org/apache/lucene/analysis/core/TestAcronymFilter.java index 47c32ec3a..c435857bb 100644 --- a/contrib/adsabs/src/test/org/apache/lucene/analysis/core/TestAcronymFilter.java +++ b/contrib/adsabs/src/test/org/apache/lucene/analysis/core/TestAcronymFilter.java @@ -7,18 +7,19 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.util.Version; import org.apache.solr.analysis.AcronymTokenFilterFactory; public class TestAcronymFilter extends BaseTokenStreamTestCase { public void testReplace() throws Exception { - AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(new HashMap() {{ - put("emitBoth", "false"); - put("prefix", "acr::"); - put("setType", "ACRONYM"); - }}); + AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(new HashMap() {{ + put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + put("emitBoth", "false"); + put("prefix", "acr::"); + put("setType", "ACRONYM"); + }}); + factory.setExplicitLuceneMatchVersion(true); TokenStream stream = factory.create(new MockTokenizer(new StringReader("mit MIT"), MockTokenizer.WHITESPACE, false)); assertTokenStreamContents(stream, @@ -36,13 +37,13 @@ public void testReplace() throws Exception { public void testAdd() throws Exception { - AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(new HashMap() {{ - put("emitBoth", "true"); - put("prefix", "acr::"); - put("setType", "ACRONYM"); - }}); + AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(new HashMap() {{ + put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + put("emitBoth", "true"); + put("prefix", "acr::"); + put("setType", "ACRONYM"); + }}); + factory.setExplicitLuceneMatchVersion(true); TokenStream stream = factory.create(new MockTokenizer(new StringReader("M MIT"), MockTokenizer.WHITESPACE, false)); assertTokenStreamContents(stream, diff --git a/contrib/adsabs/src/test/org/apache/lucene/analysis/synonym/TestNewMultiWordSynonyms.java b/contrib/adsabs/src/test/org/apache/lucene/analysis/synonym/TestNewMultiWordSynonyms.java index 5b9f7c84a..6b9d15486 100644 --- a/contrib/adsabs/src/test/org/apache/lucene/analysis/synonym/TestNewMultiWordSynonyms.java +++ b/contrib/adsabs/src/test/org/apache/lucene/analysis/synonym/TestNewMultiWordSynonyms.java @@ -65,12 +65,11 @@ private StringMockResourceLoader getSolrSingleSyn() { public void testSingleWordSolrSynonyms() throws IOException { - SynonymFilterFactory factory = new SynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("tokenizerFactory", KeywordTokenizerFactory.class.getCanonicalName().toString()); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSolrSingleSyn()); TokenStream ts = factory.create(new MockTokenizer(new StringReader("žščřdťň, á"), MockTokenizer.KEYWORD, false)); assertTokenStreamContents(ts, new String[] { "žščřdťň, á", "zscrdtn, a" }, @@ -82,13 +81,12 @@ public void testSingleWordSolrSynonyms() throws IOException { } public void testSingleWordSemicolonSynonyms() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("format", "semicolon"); args.put("tokenizerFactory", KeywordTokenizerFactory.class.getCanonicalName().toString()); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSemicolonSingleSyn()); TokenStream ts = factory.create(new MockTokenizer(new StringReader("žščřdťň, á"), MockTokenizer.KEYWORD, false)); assertTokenStreamContents(ts, new String[] { "žščřdťň, á", "zscrdtn, a" }, @@ -117,6 +115,10 @@ public void testSingleWordSemicolonSynonyms() throws IOException { * 2: telescope */ public static class TestParserReplaceNullsInclOrig extends NewSynonymFilterFactory.SynonymBuilderFactory { + public TestParserReplaceNullsInclOrig(Map args) { + super(args); + } + protected SynonymParser getParser(Analyzer analyzer) { return new NewSolrSynonymParser(true, true, analyzer) { @Override @@ -131,11 +133,10 @@ public void add(CharsRef input, CharsRef output, boolean includeOrig) { * @since solr 1.4 */ public void testMultiWordSynonyms() throws IOException { - SynonymFilterFactory factory = new SynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(new StringMockResourceLoader("a b c,d")); TokenStream ts = factory.create(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false)); // This fails because ["e","e"] is the value of the token stream @@ -147,14 +148,13 @@ public void testMultiWordSynonyms() throws IOException { public void testMultiWordSynonymsReplaceNullsCustomInclOrigAnalyzer() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("tokenizerFactory", "org.apache.lucene.analysis.core.KeywordTokenizerFactory"); args.put("builderFactory", NewSynonymFilterFactory.BestEffortSearchLowercase.class.getName()); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSyn()); @@ -195,15 +195,14 @@ public void testMultiWordSynonymsReplaceNullsCustomInclOrigAnalyzer() throws IOE public void testMultiWordSynonymsReplaceNullsInclOrig() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("ignoreCase", "true"); args.put("tokenizerFactory", "org.apache.lucene.analysis.core.KeywordTokenizerFactory"); args.put("builderFactory", TestParserReplaceNullsInclOrig.class.getName()); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSyn()); @@ -245,15 +244,14 @@ public void testMultiWordSynonymsReplaceNullsInclOrig() throws IOException { public void testMultiWordSynonymsNullReplaced() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("ignoreCase", "false"); args.put("tokenizerFactory", "org.apache.lucene.analysis.core.KeywordTokenizerFactory"); args.put("builderFactory", NewSynonymFilterFactory.MultiTokenReplaceNulls.class.getName()); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSyn()); @@ -296,13 +294,12 @@ public void testMultiWordSynonymsNullReplaced() throws IOException { public void testMultiWordSynonymsDefault() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("tokenizerFactory", "org.apache.lucene.analysis.core.KeywordTokenizerFactory"); - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSyn()); @@ -347,15 +344,13 @@ public void testMultiWordSynonymsDefault() throws IOException { */ public void testMultiWordSynonymsInclOrig() throws IOException { - NewSynonymFilterFactory factory = new NewSynonymFilterFactory(); Map args = new HashMap(); args.put("synonyms", "synonyms.txt"); args.put("ignoreCase", "true"); args.put("tokenizerFactory", "org.apache.lucene.analysis.core.KeywordTokenizerFactory"); args.put("builderFactory", NewSynonymFilterFactory.AlwaysIncludeOriginal.class.getName()); - - factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); - factory.init(args); + args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString()); + NewSynonymFilterFactory factory = new NewSynonymFilterFactory(args); factory.inform(getSyn()); @@ -414,4 +409,13 @@ public T newInstance(String cname, Class expectedType) { public InputStream openResource(String resource) throws IOException { return new ByteArrayInputStream(text.getBytes("UTF-8")); } + + @Override + public Class findClass(String cname, Class expectedType) { + try { + return Class.forName(cname, true, Thread.currentThread().getContextClassLoader()).asSubclass(expectedType); + } catch (Exception e) { + throw new RuntimeException("Cannot load class: " + cname, e); + } + } } diff --git a/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java b/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java index f170b2b5f..a76b434fb 100644 --- a/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java +++ b/contrib/adsabs/src/test/org/apache/lucene/queryparser/flexible/aqp/TestAqpAdsabs.java @@ -1,18 +1,13 @@ package org.apache.lucene.queryparser.flexible.aqp; -import java.io.IOException; import java.io.Reader; import java.util.Map.Entry; import java.util.regex.Pattern; -import monty.solr.util.MontySolrSetup; - -import org.apache.commons.lang.StringEscapeUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.pattern.PatternTokenizer; -import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queryparser.flexible.aqp.AqpAdsabsQueryParser; import org.apache.lucene.queryparser.flexible.aqp.AqpQueryParser; import org.apache.lucene.queryparser.flexible.aqp.AqpTestAbstractCase; @@ -28,9 +23,6 @@ import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.Version; -import org.junit.AfterClass; -import org.junit.BeforeClass; public class TestAqpAdsabs extends AqpTestAbstractCase { @@ -61,19 +53,14 @@ public AqpQueryParser getParser() throws Exception { } public void testAnalyzers() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); Analyzer pa = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { PatternTokenizer filter; - try { - filter = new PatternTokenizer(reader, Pattern.compile("\\|"), -1); - return new TokenStreamComponents(filter); - } catch (IOException e) { - e.printStackTrace(); - } - return null; + filter = new PatternTokenizer(reader, Pattern.compile("\\|"), -1); + return new TokenStreamComponents(filter); } }; @@ -90,7 +77,7 @@ public void testAuthorField() throws Exception { // note: nothing too much exciting here - the real tests must be done with the // ADS author query, and for that we will need solr unittests - so for now, just basic stuff - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("author:\"A Einstein\"", null, "author:\"a einstein\"", PhraseQuery.class); // probably, this should construct a different query (a phrase perhaps) @@ -118,7 +105,7 @@ public void testAcronyms() throws Exception { * @throws Exception */ public void testIdentifiers() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); Query q = null; assertQueryEquals("arXiv:1012.5859", wsa, "arxiv:1012.5859"); assertQueryEquals("xfield:10.1086/345794", wsa, "xfield:10.1086/345794"); @@ -145,7 +132,7 @@ public void testIdentifiers() throws Exception { * @throws Exception */ public void testDateRanges() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("intitle:\"QSO\" 1995-2000", null, "+intitle:qso +date:[1995 TO 2000]"); @@ -180,7 +167,7 @@ public void testDateRanges() throws Exception { */ public void testRanges() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("[20020101 TO 20030101]", null, "[20020101 TO 20030101]"); assertQueryEquals("[20020101 TO 20030101]^0.5", null, "[20020101 TO 20030101]^0.5"); @@ -217,7 +204,7 @@ public void testRanges() throws Exception { public void testModifiers() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("jakarta^4 apache", null, "+jakarta^4.0 +apache"); assertQueryEquals("\"jakarta apache\"^4 \"Apache Lucene\"", null, "+\"jakarta apache\"^4.0 +\"apache lucene\""); @@ -380,7 +367,7 @@ public void testWildCards() throws Exception { * @throws Exception */ public void testEscaped() throws Exception { - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("\\(1\\+1\\)\\:2", wsa, "(1+1):2", TermQuery.class); assertQueryEquals("th\\*is", wsa, "th*is", TermQuery.class); assertQueryEquals("a\\\\\\\\+b", wsa, "a\\\\+b", TermQuery.class); @@ -397,7 +384,7 @@ public void testEscaped() throws Exception { */ public void testBasics() throws Exception{ - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); KeywordAnalyzer kwa = new KeywordAnalyzer(); assertQueryEquals("keyword:\"planets and satellites\"", wsa, "keyword:\"planets and satellites\"", PhraseQuery.class); @@ -568,7 +555,7 @@ public void _testMultiToken() throws Exception{ public void testRegex() throws Exception{ - WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); + WhitespaceAnalyzer wsa = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); assertQueryEquals("/foo$/", wsa, "/foo$/", RegexpQuery.class); assertQueryEquals("keyword:/foo$/", wsa, "keyword:/foo$/", RegexpQuery.class); assertQueryEquals("keyword:/^foo$/", wsa, "keyword:/^foo$/", RegexpQuery.class); diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java index f050c242e..a2ece19b7 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeAffiliationText.java @@ -22,6 +22,7 @@ import monty.solr.util.MontySolrSetup; import org.apache.lucene.search.PhraseQuery; +import org.junit.BeforeClass; /** * Test for the affiliation_text type @@ -29,23 +30,26 @@ */ public class TestAdsabsTypeAffiliationText extends MontySolrQueryTestCase { - - @Override - public String getSchemaFile() { + @BeforeClass + public static void beforeClass() throws Exception { + System.setProperty("solr.allow.unsafe.resourceloading", "true"); + + /* makeResourcesVisible(this.solrConfig.getResourceLoader(), new String[] {MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/solr/collection1/conf", MontySolrSetup.getSolrHome() + "/example/solr/collection1/conf" }); - return MontySolrSetup.getMontySolrHome() - + "/contrib/examples/adsabs/solr/collection1/conf/schema.xml"; - - } - - public String getSolrConfigFile() { - return MontySolrSetup.getMontySolrHome() - + "/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml"; + */ + + schemaString = MontySolrSetup.getMontySolrHome() + + "/contrib/examples/adsabs/solr/collection1/conf/schema.xml"; + + configString = MontySolrSetup.getMontySolrHome() + + "/contrib/examples/adsabs/solr/collection1/conf/solrconfig.xml"; + + initCore(configString, schemaString); } - + public void test() throws Exception { diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorCollectorFactory.java b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorCollectorFactory.java index 7418ca9e7..e772a741d 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorCollectorFactory.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/author/TestAuthorCollectorFactory.java @@ -23,7 +23,7 @@ public class TestAuthorCollectorFactory extends BaseTokenStreamTestCase { public void testCollector() throws IOException, InterruptedException { - AuthorCollectorFactory factory = new AuthorCollectorFactory(); + File tmpFile = File.createTempFile("variants", ".tmp"); Map args = new HashMap(); @@ -31,12 +31,12 @@ public void testCollector() throws IOException, InterruptedException { args.put("tokenTypes", AuthorUtils.AUTHOR_TRANSLITERATED); args.put("emitTokens", "false"); + AuthorCollectorFactory factory = new AuthorCollectorFactory(args); factory.setExplicitLuceneMatchVersion(true); - factory.init(args); factory.inform(new ClasspathResourceLoader(getClass())); - AuthorNormalizeFilterFactory normFactory = new AuthorNormalizeFilterFactory(); - AuthorTransliterationFactory transliteratorFactory = new AuthorTransliterationFactory(); + AuthorNormalizeFilterFactory normFactory = new AuthorNormalizeFilterFactory(new HashMap()); + AuthorTransliterationFactory transliteratorFactory = new AuthorTransliterationFactory(new HashMap()); //create the synonym writer for the test MÜLLER, BILL TokenStream stream = new PatternTokenizer(new StringReader("MÜLLER, BILL"), Pattern.compile(";"), -1); @@ -46,7 +46,7 @@ public void testCollector() throws IOException, InterruptedException { while (ts.incrementToken() != false) { //pass } - ts.reset(); + ts.end(); WriteableSynonymMap synMap = factory.getSynonymMap(); assertTrue(synMap.containsKey("MULLER, BILL")); @@ -85,7 +85,7 @@ public void testCollector() throws IOException, InterruptedException { while (ts.incrementToken() != false) { //pass } - ts.reset(); + ts.end(); assertFalse(synMap.containsKey("MÜLLER, BILL")); assertFalse(synMap.containsKey("MÜller, Bill")); @@ -101,7 +101,8 @@ public void testCollector() throws IOException, InterruptedException { while (ts.incrementToken() != false) { //pass } - + ts.end(); + // now test the map is correctly written to disk synMap.persist(); checkOutput(tmpFile, @@ -137,10 +138,8 @@ public void testCollector() throws IOException, InterruptedException { System.gc(); */ - // trick the filter into persisting itself - ts.reset(); - ts.reset(); - ts.reset(); + // persist the map + ts.close(); checkOutput(tmpFile, "MULLER\\,\\ BILL=>MÜLLER\\,\\ BILL", diff --git a/contrib/examples/adsabs/solr/collection1/conf/schema.xml b/contrib/examples/adsabs/solr/collection1/conf/schema.xml index 4107d59b8..d546a8fcc 100644 --- a/contrib/examples/adsabs/solr/collection1/conf/schema.xml +++ b/contrib/examples/adsabs/solr/collection1/conf/schema.xml @@ -92,7 +92,7 @@ + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> @@ -113,7 +113,7 @@ ignoreCase="true" expand="true" tokenizerFactory="solr.KeywordTokenizerFactory" /> + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> @@ -255,7 +255,7 @@ + emitTokens="false" /> @@ -278,7 +278,7 @@ + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> @@ -299,7 +299,7 @@ ignoreCase="true" expand="true" tokenizerFactory="solr.KeywordTokenizerFactory" /> + tokenTypes="AUTHOR_QUERY_VARIANT" emitTokens="false" /> + emitTokens="false" />