From e984b65b4792e6698387e7aa3f577e5b62356322 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Mon, 15 Jan 2024 20:36:16 -0500 Subject: [PATCH 01/10] Add new dependencies --- montysolr/build.gradle.kts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/montysolr/build.gradle.kts b/montysolr/build.gradle.kts index e27ee67a4..751b90934 100644 --- a/montysolr/build.gradle.kts +++ b/montysolr/build.gradle.kts @@ -1,6 +1,7 @@ plugins { java antlr + kotlin("jvm") version "1.9.22" } repositories { @@ -23,10 +24,16 @@ dependencies { implementation("com.anyascii:anyascii:0.3.2") implementation("org.python:jython-standalone:2.7.3") + implementation("me.lemire.integercompression:JavaFastPFOR:0.1.12") + implementation("it.unimi.dsi:fastutil-core:8.5.12") + testImplementation("junit:junit:4.13.2") testImplementation("org.antlr:stringtemplate:3.2.1") testImplementation("org.apache.solr:solr-test-framework:7.7.3") testImplementation("org.apache.lucene:lucene-test-framework:7.7.3") + + testImplementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.8.0-RC2") + testImplementation(kotlin("stdlib-jdk8")) } java { From fed04ec6aaa2a351148a184aaa44e926264fa574 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Mon, 15 Jan 2024 20:39:01 -0500 Subject: [PATCH 02/10] Remove dead code and code comments --- .../apache/solr/search/CitationLRUCache.java | 38 +------------------ 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index c92cd4134..7aa1ea409 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -472,37 +472,11 @@ private void warmIncrementally(SolrIndexSearcher searcher, SolrCache old) // collect ids of documents that need to be reloaded/regenerated during this // warmup run - // System.out.println("searcher: " + searcher.toString()); - // System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc()); FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc()); - - // System.out.println("version=" + searcher.getIndexReader().getVersion()); - // try { - // System.out.println("commit=" + searcher.getIndexReader().getIndexCommit()); - // } catch (IOException e2) { - // TODO Auto-generated catch block - // e2.printStackTrace(); - // } - - // for (IndexReaderContext c : searcher.getTopReaderContext().children()) { - // //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey()); - // } - - // for (IndexReaderContext l : searcher.getIndexReader().leaves()) { - // //System.out.println(l); - // } - Bits liveDocs = searcher.getSlowAtomicReader().getLiveDocs(); - // System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + - // liveDocs.length()); - // System.out.println("numDeletes=" + - // searcher.getAtomicReader().numDeletedDocs()); if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too - // searcher.getAtomicReader().getContext().children().size() - - // other.map.clear(); // force regeneration toRefresh.set(0, toRefresh.length()); // Build the mapping from indexed values into lucene ids @@ -516,17 +490,7 @@ public void set(int docbase, int docid, Object value) { } }); - } else if (liveDocs != null) { - - Integer luceneId; - for (V v : other.relationships.values()) { - luceneId = ((Integer) v); - if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated - // System.out.println("Found deleted: " + luceneId); - // retrieve all citations/references for this luceneId and mark these docs to be - // refreshed - } - } + } else { for (int i = 0; i < toRefresh.length(); i++) { if (liveDocs.get(i)) { From 01626a18cf21582565e37d424d38e9204d3a47d0 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Mon, 15 Jan 2024 21:36:41 -0500 Subject: [PATCH 03/10] Resolve flaky test --- .../batch/BatchProviderDumpCitationCache.java | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java b/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java index bc1d859ae..acaf0b337 100644 --- a/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java +++ b/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java @@ -7,6 +7,8 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.CitationCache; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; import java.io.BufferedWriter; import java.io.File; @@ -50,39 +52,52 @@ public void run(SolrQueryRequest req, BatchHandlerRequestQueue queue) throws Exc if (!returnDocids) { - SortedDocValues uniqueValueCache = req.getSearcher().getSlowAtomicReader().getSortedDocValues(uniqueField); - int paperid = 0; - while (it.hasNext()) { - int[][] data = it.next(); - int[] references = data[0]; - //TODO:rca - have a feeling this has become too convoluted - // and there must be a better way to un-invert; especially - // with docvalues - if (references != null && references.length > 0) { - if (uniqueValueCache.advanceExact(paperid)) { - ret = uniqueValueCache.binaryValue(); - out.write(ret.utf8ToString()); - out.write("\t"); - first = true; - for (int luceneDocId : references) { - if (luceneDocId == -1) - continue; - - uniqueValueCache.advanceExact(luceneDocId); + RefCounted searcherRef = req.getCore().getRealtimeSearcher(); + try { + SortedDocValues uniqueValueCache = searcherRef.get() + .getSlowAtomicReader().getSortedDocValues(uniqueField); + + int paperid = 0; + while (it.hasNext()) { + int[][] data = it.next(); + int[] references = data[0]; + //TODO:rca - have a feeling this has become too convoluted + // and there must be a better way to un-invert; especially + // with docvalues + if (references != null && references.length > 0) { + if (uniqueValueCache.advanceExact(paperid)) { ret = uniqueValueCache.binaryValue(); - - if (ret.length > 0) { - if (!first) { - out.write("\t"); + out.write(ret.utf8ToString()); + out.write("\t"); + first = true; + + // It's not possible to reset the SortedDocValues iterator, so create a new one and + // seek to the appropriate point for each referenced document. + SortedDocValues referenceValueCache = searcherRef.get() + .getSlowAtomicReader().getSortedDocValues(uniqueField); + for (int luceneDocId : references) { + if (luceneDocId == -1) + continue; + + if (referenceValueCache.advanceExact(luceneDocId)) { + ret = referenceValueCache.binaryValue(); + + if (ret.length > 0) { + if (!first) { + out.write("\t"); + } + out.write(ret.utf8ToString()); + first = false; + } } - out.write(ret.utf8ToString()); - first = false; } + out.write("\n"); } - out.write("\n"); } + paperid++; } - paperid++; + } finally { + searcherRef.decref(); } } else { int paperid = 0; From 4101a092526f0023386de78fde65193a7cb5afac Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Mon, 15 Jan 2024 21:36:58 -0500 Subject: [PATCH 04/10] Remove dead code --- .../apache/solr/search/CitationLRUCache.java | 30 ++----------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index 7aa1ea409..3c68d69a3 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -639,32 +639,9 @@ private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List fie continue; } - SchemaField fSchema = schema.getField(field); DocValuesType fType = fi.getDocValuesType(); - Map mapping = new HashMap(); - final LeafReader unReader; - - if (fType.equals(DocValuesType.NONE)) { - Class c = fType.getClass(); - if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) { - if (fSchema.multiValued()) { - mapping.put(field, Type.SORTED); - } else { - mapping.put(field, Type.BINARY); - } - } else if (c.isAssignableFrom(TrieIntField.class)) { - if (fSchema.multiValued()) { - mapping.put(field, Type.SORTED_SET_INTEGER); - } else { - mapping.put(field, Type.INTEGER_POINT); - } - } else { - continue; - } - unReader = UninvertingReader.wrap(lr, mapping::get); - } else { - unReader = lr; - } + final LeafReader unReader = lr; + switch (fType) { case NUMERIC: @@ -699,12 +676,9 @@ public void process(int docBase, int docId) throws IOException { case SORTED_SET: transformer = new Transformer() { final SortedSetDocValues dv = unReader.getSortedSetDocValues(field); - final int errs = 0; @Override public void process(int docBase, int docId) throws IOException { - if (errs > 5) - return; if (dv.advanceExact(docId)) { for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) { final BytesRef value = dv.lookupOrd(ord); From 69e822bec31239e385f95093ee0ddfba71ff9567 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Mon, 15 Jan 2024 21:54:08 -0500 Subject: [PATCH 05/10] Replace custom int arraylist with library version --- .../apache/solr/search/CitationLRUCache.java | 108 +++--------------- 1 file changed, 19 insertions(+), 89 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index 3c68d69a3..3312eb6d1 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -17,6 +17,7 @@ package org.apache.solr.search; +import it.unimi.dsi.fastutil.ints.IntArrayList; import org.apache.commons.lang.NotImplementedException; import org.apache.lucene.index.*; import org.apache.lucene.util.Bits; @@ -799,77 +800,6 @@ public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, } } - /** - * Efficient resizable auto-expanding list holding int elements; - * implemented with arrays. - */ - private static final class ArrayIntList { - - private int[] elements; - private int size = 0; - - public ArrayIntList(int initialCapacity) { - elements = new int[initialCapacity]; - } - - public void add(int elem) { - if (size == elements.length) - ensureCapacity(size + 1); - elements[size++] = elem; - } - - public int[] getElements() { - int[] out = new int[size]; - System.arraycopy(elements, 0, out, 0, size); - return out; - } - - public int get(int index) { - if (index >= size) - throwIndex(index); - return elements[index]; - } - - public int size() { - return size; - } - - private void ensureCapacity(int minCapacity) { - int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1); - int[] newElements = new int[newCapacity]; - System.arraycopy(elements, 0, newElements, 0, size); - elements = newElements; - } - - private void throwIndex(int index) { - throw new IndexOutOfBoundsException("index: " + index + ", size: " + size); - } - - public String toString() { - return Arrays.toString(elements); - } - - /** - * returns the first few positions (without offsets); debug only - */ - @SuppressWarnings("unused") - public String toString(int stride) { - int s = size() / stride; - int len = Math.min(10, s); // avoid printing huge lists - StringBuilder buf = new StringBuilder(4 * len); - buf.append("["); - for (int i = 0; i < len; i++) { - buf.append(get(i * stride)); - if (i < len - 1) - buf.append(", "); - } - if (len != s) - buf.append(", ..."); // and some more... - buf.append("]"); - return buf.toString(); - } - } - /* * The main datastructure holding information about the lucene documents. * @@ -885,15 +815,15 @@ public String toString(int stride) { public class RelationshipLinkedHashMap extends LinkedHashMap { private static final long serialVersionUID = -356203002886265188L; int slimit; - List references; - List citations; + List references; + List citations; public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit, Float sizeInPercent) { super(initialSize, ratio, accessOrder); slimit = limit; - references = new ArrayList(0); // just to prevent NPE - normally, is - citations = new ArrayList(0); // initialized in initializeCitationCache + references = new ArrayList<>(0); // just to prevent NPE - normally, is + citations = new ArrayList<>(0); // initialized in initializeCitationCache } @SuppressWarnings("rawtypes") @@ -910,9 +840,9 @@ protected boolean removeEldestEntry(Map.Entry eldest) { public int[] getReferences(int docid) { if (docid < references.size() && references.get(docid) != null) { - ArrayIntList c = references.get(docid); + IntArrayList c = references.get(docid); if (c != null) - return c.getElements(); + return c.toIntArray(); } return null; } @@ -927,16 +857,16 @@ public int relationshipsDataSize() { public int[] getCitations(int docid) { if (docid < citations.size() && citations.get(docid) != null) { - ArrayIntList c = citations.get(docid); + IntArrayList c = citations.get(docid); if (c != null) - return c.getElements(); + return c.toIntArray(); } return null; } public void initializeCitationCache(int maxDocSize) { - references = new ArrayList(maxDocSize); - citations = new ArrayList(maxDocSize); + references = new ArrayList<>(maxDocSize); + citations = new ArrayList<>(maxDocSize); // i was hoping thi sis not necessary, but set(index, value) // throws errors otherwise @@ -974,12 +904,12 @@ public void addCitation(int sourceDocid, Integer targetDocid) { _add(citations, sourceDocid, targetDocid); } - private void _add(List target, int sourceDocid, int targetDocid) { + private void _add(List target, int sourceDocid, int targetDocid) { // System.out.println("_add(" + sourceDocid + "," + targetDocid+")"); if (target.get(sourceDocid) == null) { - ArrayIntList pointer = new ArrayIntList(1); + IntArrayList pointer = new IntArrayList(1); pointer.add(targetDocid); target.set(sourceDocid, pointer); } else { @@ -989,7 +919,7 @@ private void _add(List target, int sourceDocid, int targetDocid) { public void inferCitationsFromReferences() { int i = -1; - for (ArrayIntList refs : references) { + for (IntArrayList refs : references) { i += 1; if (refs == null) { continue; @@ -1004,7 +934,7 @@ public void inferCitationsFromReferences() { public void inferReferencesFromCitations() { int i = -1; - for (ArrayIntList refs : citations) { + for (IntArrayList refs : citations) { i += 1; if (refs == null) { continue; @@ -1030,11 +960,11 @@ public int[][] next() { throw new NoSuchElementException(); int[][] out = new int[2][]; - ArrayIntList v1 = references.get(cursor); - ArrayIntList v2 = citations.get(cursor); + IntArrayList v1 = references.get(cursor); + IntArrayList v2 = citations.get(cursor); - out[0] = v1 != null ? v1.getElements() : new int[0]; - out[1] = v2 != null ? v2.getElements() : new int[0]; + out[0] = v1 != null ? v1.toIntArray() : new int[0]; + out[1] = v2 != null ? v2.toIntArray() : new int[0]; cursor = i + 1; return out; From 01cc3845874d5691b59e3c4391480ee8e725bdb0 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Tue, 16 Jan 2024 01:43:24 -0500 Subject: [PATCH 06/10] Test inner relationship map --- .../solr/search/TestCitationCacheSolr.java | 92 ++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java index b8ccac5f1..228b77ac0 100644 --- a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java +++ b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java @@ -16,22 +16,24 @@ */ package org.apache.solr.search; +import it.unimi.dsi.fastutil.ints.*; import monty.solr.util.MontySolrAbstractTestCase; -import monty.solr.util.MontySolrSetup; import monty.solr.util.SolrTestSetup; import org.apache.solr.request.SolrQueryRequest; import org.junit.BeforeClass; import org.junit.Test; -import java.io.IOException; import java.lang.ref.SoftReference; import java.lang.ref.WeakReference; import java.util.Arrays; import java.util.Iterator; +import java.util.Random; @SuppressWarnings({"rawtypes", "unchecked"}) public class TestCitationCacheSolr extends MontySolrAbstractTestCase { + private Random random; + @BeforeClass public static void beforeClass() throws Exception { schemaString = "solr/collection1/conf/schema-citations-transformer.xml"; @@ -114,6 +116,7 @@ public void createIndex() throws Exception { public void setUp() throws Exception { super.setUp(); createIndex(); + random = new Random(0L); } @Override @@ -656,6 +659,91 @@ public void test() throws Exception { } + @Test + public void testRelationshipMap() throws Exception { + CitationLRUCache.RelationshipLinkedHashMap map = + new CitationLRUCache.RelationshipLinkedHashMap<>(11, 0.75f, false, 1000, 0.75f); + map.initializeCitationCache(10); + + for (int i = 0; i < 10; i++) { + map.put("a"+i, i); + } + + assertEquals(10, map.size()); + + // Test citation map construction & recall + Int2ObjectMap citationMap = new Int2ObjectOpenHashMap<>(); + + for (int i = 0; i < 10; i++) { + int citations = random.nextInt(9); + IntSet set = new IntOpenHashSet(); + + for (int j = 0; j < citations; j++) { + int citedDoc = random.nextInt(10); + while (set.contains(citedDoc) || citedDoc == i) { + citedDoc = random.nextInt(10); + } + set.add(citedDoc); + + map.addCitation(i, citedDoc); + } + + citationMap.put(i, set); + } + + for (int i = 0; i < 10; i++) { + int[] citations = map.getCitations(i); + if (citations == null) + citations = new int[0]; + + int[] expected = citationMap.get(i).toIntArray(); + + Arrays.sort(citations != null ? citations : new int[0]); + Arrays.sort(expected != null ? expected : new int[0]); + + assertArrayEquals(expected, citations); + } + + // Test reference inference from citations + Int2ObjectMap referenceMap = new Int2ObjectOpenHashMap<>(); + + for (int i = 0; i < 10; i++) { + for (int j : citationMap.getOrDefault(i, new IntOpenHashSet())) { + int finalI = i; + referenceMap.compute(j, (k, v) -> { + if (v == null) { + v = new IntOpenHashSet(); + } + v.add(finalI); + return v; + }); + } + } + + map.inferReferencesFromCitations(); + + for (int i = 0; i < 10; i++) { + int[] references = map.getReferences(i); + if (references == null) + references = new int[0]; + + int[] expected = referenceMap.get(i).toIntArray(); + + Arrays.sort(references); + Arrays.sort(expected != null ? expected : new int[0]); + + assertArrayEquals(expected, references); + } + + // Test iterator + Iterator it = map.getRelationshipsIterator(); + for (int i = 0; i < 10; i++) { + int[][] data = it.next(); + assertEquals(referenceMap.getOrDefault(i, new IntArraySet()).size(), data[0].length); + assertEquals(citationMap.getOrDefault(i, new IntArraySet()).size(), data[1].length); + } + } + private int[][][] getCache(CitationLRUCache cache) { int[][][] results = new int[cache.getCitationsIteratorSize()][2][]; Iterator it = cache.getCitationGraph(); From 10d6faecd0f92c6896a733fa2dbf026e58c1bc34 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Tue, 16 Jan 2024 01:43:42 -0500 Subject: [PATCH 07/10] Sort data before comparison --- .../apache/solr/search/TestCitationCacheSolr.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java index 228b77ac0..72ba5c639 100644 --- a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java +++ b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java @@ -618,17 +618,17 @@ public void test() throws Exception { if (cacheName.contains("from-references")) { int[][][] expected = new int[][][]{ - new int[][]{new int[]{3, 4, 2}, new int[0]}, + new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 10, 11}}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 11}}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, - new int[][]{new int[]{3, 4, 2}, new int[0]}, + new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[0], new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, - new int[][]{new int[]{4, 2, 2}, new int[0]}, + new int[][]{new int[]{2, 2, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, }; @@ -637,9 +637,9 @@ public void test() throws Exception { int[][][] expected = new int[][][]{ new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 10, 11, 2}}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 11, 2}}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 10, 11, 2}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 11}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, @@ -751,6 +751,9 @@ private int[][][] getCache(CitationLRUCache cache) { int j = 0; while (it.hasNext()) { int[][] data = it.next(); + Arrays.sort(data[0]); + Arrays.sort(data[1]); + results[j] = data; j += 1; } From 342ac5146e913b4be5dfdeb4935476705c3f380d Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Tue, 16 Jan 2024 01:43:50 -0500 Subject: [PATCH 08/10] Sort data before comparison --- .../apache/lucene/search/TestCitationsSearch.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java b/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java index a05926920..49e3d2348 100644 --- a/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java +++ b/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java @@ -150,8 +150,9 @@ public void testCitesCollector() throws Exception { int docid = es.getKey(); int[] docids = es.getValue(); for (int reference : docids) { - List a = Arrays.stream(citations.get(reference)).boxed().collect(Collectors.toList()); - List b = Arrays.stream(citationsWrapper.getLuceneDocIds(reference)).boxed().collect(Collectors.toList()); + List a = Arrays.stream(citations.get(reference)).boxed().sorted().collect(Collectors.toList()); + List b = Arrays.stream(citationsWrapper.getLuceneDocIds(reference)).boxed().sorted().collect(Collectors.toList()); + assertTrue(a.contains(docid)); assertTrue(b.contains(docid)); assertEquals(a, b); @@ -162,10 +163,9 @@ public void testCitesCollector() throws Exception { int docid = es.getKey(); int[] docids = es.getValue(); for (int reference : docids) { - List a = Arrays.stream(references.get(reference)).boxed().collect(Collectors.toList()); - List b = Arrays.stream(referencesWrapper.getLuceneDocIds(reference)).boxed().collect(Collectors.toList()); - Collections.sort(a); - Collections.sort(b); + List a = Arrays.stream(references.get(reference)).boxed().sorted().collect(Collectors.toList()); + List b = Arrays.stream(referencesWrapper.getLuceneDocIds(reference)).boxed().sorted().collect(Collectors.toList()); + assertTrue(a.contains(docid)); assertTrue(b.contains(docid)); assertEquals(docid + " produced diff cache results", a, b); From 1f10655b7d4260d41e2429ff47420ea5e5243e08 Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Tue, 16 Jan 2024 01:44:55 -0500 Subject: [PATCH 09/10] Use a map instead of an array to store citations --- .../apache/solr/search/CitationLRUCache.java | 74 ++++++++++--------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index 3312eb6d1..622e3fd0c 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -17,6 +17,8 @@ package org.apache.solr.search; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntArrayList; import org.apache.commons.lang.NotImplementedException; import org.apache.lucene.index.*; @@ -27,9 +29,9 @@ import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.schema.*; -import org.apache.solr.uninverting.UninvertingReader; -import org.apache.solr.uninverting.UninvertingReader.Type; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -221,11 +223,11 @@ public int getCitationsIteratorSize() { } public void insertCitation(int sourceDocid, int targetDocid) { - ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addCitation(sourceDocid, targetDocid); + ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addCitation(sourceDocid, targetDocid); } public void insertReference(int sourceDocid, int targetDocid) { - ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addReference(sourceDocid, targetDocid); + ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addReference(sourceDocid, targetDocid); } public int[] getCitations(K key) { @@ -812,18 +814,19 @@ public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, * maxdoc size, so that no evictions happen */ @SuppressWarnings("hiding") - public class RelationshipLinkedHashMap extends LinkedHashMap { + public static class RelationshipLinkedHashMap extends LinkedHashMap { private static final long serialVersionUID = -356203002886265188L; int slimit; - List references; - List citations; + int maxDocSize; + Int2ObjectMap references; + Int2ObjectMap citations; public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit, Float sizeInPercent) { super(initialSize, ratio, accessOrder); slimit = limit; - references = new ArrayList<>(0); // just to prevent NPE - normally, is - citations = new ArrayList<>(0); // initialized in initializeCitationCache + references = new Int2ObjectOpenHashMap<>(0); // just to prevent NPE - normally, is + citations = new Int2ObjectOpenHashMap<>(0); // initialized in initializeCitationCache } @SuppressWarnings("rawtypes") @@ -839,7 +842,7 @@ protected boolean removeEldestEntry(Map.Entry eldest) { } public int[] getReferences(int docid) { - if (docid < references.size() && references.get(docid) != null) { + if (references.containsKey(docid)) { IntArrayList c = references.get(docid); if (c != null) return c.toIntArray(); @@ -852,11 +855,11 @@ public Iterator getRelationshipsIterator() { } public int relationshipsDataSize() { - return citations.size(); + return maxDocSize; } public int[] getCitations(int docid) { - if (docid < citations.size() && citations.get(docid) != null) { + if (citations.containsKey(docid)) { IntArrayList c = citations.get(docid); if (c != null) return c.toIntArray(); @@ -865,16 +868,13 @@ public int[] getCitations(int docid) { } public void initializeCitationCache(int maxDocSize) { - references = new ArrayList<>(maxDocSize); - citations = new ArrayList<>(maxDocSize); - - // i was hoping thi sis not necessary, but set(index, value) - // throws errors otherwise - for (int i = 0; i < maxDocSize; i++) { - references.add(null); - citations.add(null); - } + references = new Int2ObjectOpenHashMap<>(maxDocSize); + citations = new Int2ObjectOpenHashMap<>(maxDocSize); + + references.defaultReturnValue(null); + citations.defaultReturnValue(null); + this.maxDocSize = maxDocSize; } public void addReference(int sourceDocid, Object value) { @@ -904,45 +904,47 @@ public void addCitation(int sourceDocid, Integer targetDocid) { _add(citations, sourceDocid, targetDocid); } - private void _add(List target, int sourceDocid, int targetDocid) { + private void _add(Int2ObjectMap target, int sourceDocid, int targetDocid) { // System.out.println("_add(" + sourceDocid + "," + targetDocid+")"); if (target.get(sourceDocid) == null) { IntArrayList pointer = new IntArrayList(1); pointer.add(targetDocid); - target.set(sourceDocid, pointer); + target.put(sourceDocid, pointer); } else { target.get(sourceDocid).add(targetDocid); } } public void inferCitationsFromReferences() { - int i = -1; - for (IntArrayList refs : references) { - i += 1; + for (Int2ObjectMap.Entry entry : references.int2ObjectEntrySet()) { + int i = entry.getIntKey(); + IntArrayList refs = entry.getValue(); if (refs == null) { continue; } + for (int j = 0; j < refs.size(); j++) { - if (refs.get(j) == -1) + if (refs.getInt(j) == -1) continue; - addCitation(refs.get(j), i); + addCitation(refs.getInt(j), i); } } } public void inferReferencesFromCitations() { - int i = -1; - for (IntArrayList refs : citations) { - i += 1; + for (Int2ObjectMap.Entry entry : citations.int2ObjectEntrySet()) { + int i = entry.getIntKey(); + IntArrayList refs = entry.getValue(); if (refs == null) { continue; } + for (int j = 0; j < refs.size(); j++) { - if (refs.get(j) == -1) + if (refs.getInt(j) == -1) continue; - addReference(refs.get(j), i); + addReference(refs.getInt(j), i); } } } @@ -951,12 +953,12 @@ private class CitationDataIterator implements Iterator { int cursor = 0; // index of next element to return public boolean hasNext() { - return cursor != citations.size(); + return cursor < maxDocSize; } public int[][] next() { int i = cursor; - if (i >= citations.size()) + if (i >= maxDocSize) throw new NoSuchElementException(); int[][] out = new int[2][]; From ad57af3bb6a4336576177f6e0816fc7e48a6124e Mon Sep 17 00:00:00 2001 From: Jean-Claude Paquin Date: Tue, 16 Jan 2024 02:05:29 -0500 Subject: [PATCH 10/10] Remove some more boxing where easy --- .../apache/solr/search/CitationLRUCache.java | 26 ++++++++++++------- .../solr/search/TestCitationCacheSolr.java | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index 622e3fd0c..31c143a99 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -125,7 +125,7 @@ public Object init(Map args, Object persistence, CacheRegenerator regenerator) { citationFields = ((String) args.get("citationFields")).split(","); } - Float sizeInPercent = null; + float sizeInPercent = 1.0f; String str = (String) args.get("size"); if (str != null && str.endsWith("%")) { @@ -139,7 +139,7 @@ public Object init(Map args, Object persistence, CacheRegenerator regenerator) { final int initialSize = Math.min(str == null ? 1024 : Integer.parseInt(str), limit); description = generateDescription(limit, initialSize); - relationships = new RelationshipLinkedHashMap(initialSize, 0.75f, true, limit, sizeInPercent); + relationships = new RelationshipLinkedHashMap(initialSize, 0.75f, true, limit); if (persistence == null) { // must be the first time a cache of this type is being created @@ -352,6 +352,7 @@ public void warm(SolrIndexSearcher searcher, SolrCache old) { log.error("Failed loading persisted cache " + name(), e); } } else { + //noinspection AutoBoxing log.info("Will not load the cache {} current index generation differs; dump:{} != index:{}", name(), CitationCacheReaderWriter.getCacheGeneration(getCacheStorageDir(searcher)), CitationCacheReaderWriter.getIndexGeneration(searcher)); } @@ -431,7 +432,7 @@ public void set(int docbase, int docid, Object value) { if (treatIdentifiersAsText && value instanceof Integer) { value = Integer.toString((Integer) value); } - put((K) value, (V) (Integer) (docbase + docid)); + put((K) value, (V) Integer.valueOf(docbase + docid)); } }); @@ -489,7 +490,7 @@ private void warmIncrementally(SolrIndexSearcher searcher, SolrCache old) @SuppressWarnings("unchecked") @Override public void set(int docbase, int docid, Object value) { - put((K) value, (V) (Integer) (docbase + docid)); + put((K) value, (V) Integer.valueOf(docbase + docid)); } }); @@ -551,7 +552,7 @@ public void set(int docbase, int docid, Object value) { private List getFields(SolrIndexSearcher searcher, String[] listOfFields) { - List out = new ArrayList(); + List out = new ArrayList<>(); IndexSchema schema = searcher.getCore().getLatestSchema(); if (schema.getUniqueKeyField() == null) { @@ -741,7 +742,7 @@ public String getSource() { return "$URL: http://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_0/solr/core/src/java/org/apache/solr/search/LRUCache.java $"; } - @SuppressWarnings({"rawtypes", "unchecked"}) + @SuppressWarnings({"rawtypes", "unchecked", "AutoBoxing"}) public NamedList getStatistics() { NamedList lst = new SimpleOrderedMap(); synchronized (relationships) { @@ -821,8 +822,7 @@ public static class RelationshipLinkedHashMap extends LinkedHashMap Int2ObjectMap references; Int2ObjectMap citations; - public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit, - Float sizeInPercent) { + public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit) { super(initialSize, ratio, accessOrder); slimit = limit; references = new Int2ObjectOpenHashMap<>(0); // just to prevent NPE - normally, is @@ -890,6 +890,10 @@ public void addReference(int sourceDocid, Integer targetDocid) { _add(references, sourceDocid, targetDocid); } + public void addReference(int sourceDocId, int targetDocId) { + _add(references, sourceDocId, targetDocId); + } + public void addCitation(int sourceDocid, Object value) { // System.out.println("addCitation(" + sourceDocid + ", " + value + ")"); if (this.containsKey(value)) { @@ -904,6 +908,10 @@ public void addCitation(int sourceDocid, Integer targetDocid) { _add(citations, sourceDocid, targetDocid); } + public void addCitation(int sourceDocId, int targetDocId) { + _add(citations, sourceDocId, targetDocId); + } + private void _add(Int2ObjectMap target, int sourceDocid, int targetDocid) { // System.out.println("_add(" + sourceDocid + "," + targetDocid+")"); @@ -981,7 +989,7 @@ public void remove() { @Override public void initializeCitationCache(int maxDocs) { - relationships = new RelationshipLinkedHashMap(maxDocs, 0.75f, true, 1024, 100f); + relationships = new RelationshipLinkedHashMap(maxDocs, 0.75f, true, 1024); ((RelationshipLinkedHashMap) relationships).initializeCitationCache(maxDocs); if (stats == null) stats = new CumulativeStats(); diff --git a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java index 72ba5c639..fe921b854 100644 --- a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java +++ b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java @@ -662,7 +662,7 @@ public void test() throws Exception { @Test public void testRelationshipMap() throws Exception { CitationLRUCache.RelationshipLinkedHashMap map = - new CitationLRUCache.RelationshipLinkedHashMap<>(11, 0.75f, false, 1000, 0.75f); + new CitationLRUCache.RelationshipLinkedHashMap<>(11, 0.75f, false, 1000); map.initializeCitationCache(10); for (int i = 0; i < 10; i++) {