Skip to content

Commit

Permalink
Merge pull request #517 from apache/5.0.X-backport
Browse files Browse the repository at this point in the history
5.0.x backport
  • Loading branch information
jmalkin authored Mar 6, 2024
2 parents b50100d + 14cf83b commit 4a020cc
Show file tree
Hide file tree
Showing 27 changed files with 594 additions and 410 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -46,7 +46,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
Expand All @@ -59,6 +59,6 @@ jobs:
# ./location_of_script_within_repo/buildscript.sh

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/fdt/FdtSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
*
* @author Lee Rhodes
*/
public class FdtSketch extends ArrayOfStringsSketch {
public final class FdtSketch extends ArrayOfStringsSketch {

/**
* Create new instance of Frequent Distinct Tuples sketch with the given
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
* @author Lee Rhodes
* @author Kevin Lang
*/
class CouponHashSet extends CouponList {
final class CouponHashSet extends CouponList {

/**
* Constructs this sketch with the intent of loading it with data
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/org/apache/datasketches/hll/CouponList.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,23 @@ class CouponList extends AbstractCoupons {
int couponCount;
int[] couponIntArr;

private static int checkLgConfigK(final CurMode curMode, final int lgConfigK) {
if (curMode == CurMode.SET) { assert lgConfigK > 7; }
return lgConfigK;
}

/**
* New instance constructor for LIST or SET.
* @param lgConfigK the configured Lg K
* @param tgtHllType the configured HLL target
* @param curMode LIST or SET
*/
CouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode) {
super(lgConfigK, tgtHllType, curMode);
super(checkLgConfigK(curMode, lgConfigK), tgtHllType, curMode);
if (curMode == CurMode.LIST) {
lgCouponArrInts = LG_INIT_LIST_SIZE;
} else { //SET
lgCouponArrInts = LG_INIT_SET_SIZE;
assert lgConfigK > 7;
}
couponIntArr = new int[1 << lgCouponArrInts];
couponCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
/**
* @author Lee Rhodes
*/
class DirectAuxHashMap implements AuxHashMap {
final class DirectAuxHashMap implements AuxHashMap {
private final DirectHllArray host; //hosts the WritableMemory and read-only Memory
private final boolean readOnly;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
/**
* @author Lee Rhodes
*/
class DirectCouponHashSet extends DirectCouponList {
final class DirectCouponHashSet extends DirectCouponList {

//Constructs this sketch with data.
DirectCouponHashSet(final int lgConfigK, final TgtHllType tgtHllType,
Expand Down
18 changes: 10 additions & 8 deletions src/main/java/org/apache/datasketches/hll/DirectCouponList.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,21 @@ class DirectCouponList extends AbstractCoupons {
Memory mem;
final boolean compact;

//called from newInstance, writableWrap and DirectCouponHashSet
DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode,
final WritableMemory wmem) {
super(lgConfigK, tgtHllType, curMode);
private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) {
assert !extractCompactFlag(wmem);
return lgConfigK;
}

//called from newInstance, writableWrap and DirectCouponHashSet, must be compact
DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final WritableMemory wmem) {
super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, curMode);
this.wmem = wmem;
mem = wmem;
compact = extractCompactFlag(wmem);
assert !compact;
}

//called from HllSketch.wrap and from DirectCouponHashSet constructor, may be compact
DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode,
final Memory mem) {
//called from HllSketch.wrap and from DirectCouponHashSet constructor, may or may not be compact
DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final Memory mem) {
super(lgConfigK, tgtHllType, curMode);
wmem = null;
this.mem = mem;
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/org/apache/datasketches/hll/DirectHllArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,19 @@ abstract class DirectHllArray extends AbstractHllArray {
long memAdd;
final boolean compact;

private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) {
assert !extractCompactFlag(wmem);
return lgConfigK;
}

//Memory must be already initialized and may have data
DirectHllArray(final int lgConfigK, final TgtHllType tgtHllType, final WritableMemory wmem) {
super(lgConfigK, tgtHllType, CurMode.HLL);
super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, CurMode.HLL);
this.wmem = wmem;
mem = wmem;
memObj = wmem.getArray();
memAdd = wmem.getCumulativeOffset(0L);
compact = extractCompactFlag(mem);
assert !compact;
insertEmptyFlag(wmem, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* @author Lee Rhodes
* @author Kevin Lang
*/
class HeapAuxHashMap implements AuxHashMap {
final class HeapAuxHashMap implements AuxHashMap {
private final int lgConfigK; //required for #slot bits
private int lgAuxArrInts;
private int auxCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
* @author Alexander Saydakov
* @author Kevin Lang
*/
public class UniqueCountMap {
public final class UniqueCountMap {
private static final String LS = System.getProperty("line.separator");
private static final int NUM_LEVELS = 10; // total of single coupon + traverse + coupon maps + hll
private static final int NUM_TRAVERSE_MAPS = 3;
Expand Down
152 changes: 143 additions & 9 deletions src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,19 @@
import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH;

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Objects;

import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries;
import org.apache.datasketches.quantilescommon.PartitioningFeature;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesAPI;
import org.apache.datasketches.quantilescommon.QuantilesGenericAPI;
import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator;

Expand Down Expand Up @@ -154,7 +155,7 @@ public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searc
@Override
public GenericPartitionBoundaries<T> getPartitionBoundaries(final int numEquallySized,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); }
refreshSortedView();
return kllItemsSV.getPartitionBoundaries(numEquallySized, searchCrit);
}
Expand Down Expand Up @@ -307,13 +308,6 @@ MemoryRequestServer getMemoryRequestServer() {
@Override
abstract int getMinMaxSizeBytes();

private final KllItemsSketchSortedView<T> refreshSortedView() {
final KllItemsSketchSortedView<T> sv = (kllItemsSV == null)
? kllItemsSV = new KllItemsSketchSortedView<>(this)
: kllItemsSV;
return sv;
}

abstract T[] getRetainedItemsArray();

@Override
Expand Down Expand Up @@ -374,4 +368,144 @@ void setWritableMemory(final WritableMemory wmem) {
throw new SketchesArgumentException(UNSUPPORTED_MSG + "Sketch not writable.");
}

void updateMinMax(final T item) {
if (isEmpty()) {
setMinItem(item);
setMaxItem(item);
} else {
setMinItem(Util.minT(getMinItem(), item, comparator));
setMaxItem(Util.maxT(getMaxItem(), item, comparator));
}
}

private final KllItemsSketchSortedView<T> refreshSortedView() {
if (kllItemsSV == null) {
final CreateSortedView csv = new CreateSortedView();
kllItemsSV = csv.getSV();
}
return kllItemsSV;
}

private final class CreateSortedView {
T[] quantiles;
long[] cumWeights;

KllItemsSketchSortedView<T> getSV() {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
if (getN() == 0) { throw new SketchesArgumentException(EMPTY_MSG); }
final T[] srcQuantiles = getTotalItemsArray();
final int[] srcLevels = levelsArr;
final int srcNumLevels = getNumLevels();

if (!isLevelZeroSorted()) {
Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator);
if (!hasMemory()) { setLevelZeroSorted(true); }
}
final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove free space
quantiles = (T[]) Array.newInstance(serDe.getClassOfT(), numQuantiles);
cumWeights = new long[numQuantiles];
populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles);
return new KllItemsSketchSortedView<>(quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem());
}

private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels,
final int srcNumLevels, final int numItems) {
final int[] myLevels = new int[srcNumLevels + 1];
final int offset = srcLevels[0];
System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems);
int srcLevel = 0;
int dstLevel = 0;
long weight = 1;
while (srcLevel < srcNumLevels) {
final int fromIndex = srcLevels[srcLevel] - offset;
final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive
if (fromIndex < toIndex) { // if equal, skip empty level
Arrays.fill(cumWeights, fromIndex, toIndex, weight);
myLevels[dstLevel] = fromIndex;
myLevels[dstLevel + 1] = toIndex;
dstLevel++;
}
srcLevel++;
weight *= 2;
}
final int numLevels = dstLevel;
blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights
KllHelper.convertToCumulative(cumWeights);
}
}

private static <T> void blockyTandemMergeSort(final Object[] quantiles, final long[] weights,
final int[] levels, final int numLevels, final Comparator<? super T> comp) {
if (numLevels == 1) { return; }

// duplicate the input in preparation for the "ping-pong" copy reduction strategy.
final Object[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length);
final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here

blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels, comp);
}

private static <T> void blockyTandemMergeSortRecursion(
final Object[] quantilesSrc, final long[] weightsSrc,
final Object[] quantilesDst, final long[] weightsDst,
final int[] levels, final int startingLevel, final int numLevels, final Comparator<? super T> comp) {
if (numLevels == 1) { return; }
final int numLevels1 = numLevels / 2;
final int numLevels2 = numLevels - numLevels1;
assert numLevels1 >= 1;
assert numLevels2 >= numLevels1;
final int startingLevel1 = startingLevel;
final int startingLevel2 = startingLevel + numLevels1;
// swap roles of src and dst
blockyTandemMergeSortRecursion(
quantilesDst, weightsDst,
quantilesSrc, weightsSrc,
levels, startingLevel1, numLevels1, comp);
blockyTandemMergeSortRecursion(
quantilesDst, weightsDst,
quantilesSrc, weightsSrc,
levels, startingLevel2, numLevels2, comp);
tandemMerge(
quantilesSrc, weightsSrc,
quantilesDst, weightsDst,
levels,
startingLevel1, numLevels1,
startingLevel2, numLevels2, comp);
}

private static <T> void tandemMerge(
final Object[] quantilesSrc, final long[] weightsSrc,
final Object[] quantilesDst, final long[] weightsDst,
final int[] levelStarts,
final int startingLevel1, final int numLevels1,
final int startingLevel2, final int numLevels2, final Comparator<? super T> comp) {
final int fromIndex1 = levelStarts[startingLevel1];
final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive
final int fromIndex2 = levelStarts[startingLevel2];
final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive
int iSrc1 = fromIndex1;
int iSrc2 = fromIndex2;
int iDst = fromIndex1;

while (iSrc1 < toIndex1 && iSrc2 < toIndex2) {
if (Util.lt((T) quantilesSrc[iSrc1], (T) quantilesSrc[iSrc2], comp)) {
quantilesDst[iDst] = quantilesSrc[iSrc1];
weightsDst[iDst] = weightsSrc[iSrc1];
iSrc1++;
} else {
quantilesDst[iDst] = quantilesSrc[iSrc2];
weightsDst[iDst] = weightsSrc[iSrc2];
iSrc2++;
}
iDst++;
}
if (iSrc1 < toIndex1) {
System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1);
System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1);
} else if (iSrc2 < toIndex2) {
System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2);
System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2);
}
}

}
Loading

0 comments on commit 4a020cc

Please sign in to comment.