diff --git a/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java b/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java index 1d1745f1a..851939889 100644 --- a/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java +++ b/src/test/java/com/yahoo/sketches/BinomialBoundsNTest.java @@ -135,6 +135,22 @@ public void checkThetaLimits1() { BinomialBoundsN.getUpperBound(100, 1.1, 1, false); } + @Test + public void boundsExample() { + println("BinomialBoundsN Example:"); + int k = 500; + double theta = 0.001; + int stdDev = 2; + double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false); + double est = k/theta; + double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false); + println("K="+k+", Theta="+theta+", SD="+stdDev); + println("UB: "+ub); + println("Est: "+est); + println("LB: "+lb); + println(""); + } + @Test public void printlnTest() { println("PRINTING: "+this.getClass().getName()); diff --git a/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java b/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java deleted file mode 100644 index bf7981a20..000000000 --- a/src/test/java/com/yahoo/sketches/benchmark/BenchmarkMain.java +++ /dev/null @@ -1,160 +0,0 @@ -package com.yahoo.sketches.benchmark; - -import com.yahoo.sketches.hll.HllSketch; -import com.yahoo.sketches.hll.HllSketchBuilder; -import com.yahoo.sketches.hll.Preamble; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -/** - */ -public class BenchmarkMain -{ - @SuppressWarnings("serial") - public static void main(String[] args) - { - final int lgK = 12; - - List benchmarks = new ArrayList(){{ - this.add(new ThetaMemoryBenchmark(lgK)); - this.add(new ThetaBenchmark(lgK)); - - HllSketchBuilder sparseBob = HllSketch.builder().setPreamble(Preamble.fromLogK(lgK)); - HllSketchBuilder denseBob = sparseBob.copy().setDenseMode(true); - this.add(new HllSketchBenchmark("HLL Sketch", new Random(lgK), sparseBob, denseBob)); - this.add( - new HllSketchBenchmark( - "HLL Non-Compressed to Compressed", - new Random(lgK), sparseBob, denseBob.copy().setCompressedDense(true) - ) - ); - this.add( - new HllSketchBenchmark( - "HLL Compressed to Non-Compressed", - new Random(lgK), sparseBob.copy().setCompressedDense(true), denseBob - ) - ); - this.add( - new HllSketchBenchmark( - "HLL All Compressed", - new Random(lgK), denseBob.copy().setCompressedDense(true), denseBob.copy().setCompressedDense(true) - ) - ); - }}; - - runBenchmarks(benchmarks, 20, 100, powerLawDistribution); - } - - private static void runBenchmarks( - List benchmarks, - int increment, - int numTimes, - List distribution - ) - { - int numSketches = 0; - for (SketchBenchmark.Spec spec : distribution) { - numSketches += spec.getNumSketches(); - } - - for (SketchBenchmark benchmark : benchmarks) { - System.out.printf("Starting benchmark[%s]%n", benchmark); - long start = System.currentTimeMillis(); - benchmark.setup(numSketches, powerLawDistribution); - System.out.printf("benchmark[%s] setup done in %,d millis.%n", benchmark, System.currentTimeMillis() - start); - start = System.currentTimeMillis(); - benchmark.runNTimes(increment); - System.out.printf("benchmark[%s] priming[%s] done in %,d millis.%n", benchmark, increment, System.currentTimeMillis() - start); - doGC(); - - - for (int i = 0; i < numTimes; i+=increment) { - start = System.currentTimeMillis(); - benchmark.runNTimes(increment); - long time = System.currentTimeMillis() - start; - System.out.printf( - "Benchmark[%s], %,d runs => %,d millis (%,d ms/run), %,d/sec%n", - benchmark, - i + increment, - time, - (int) (time / (double) increment), - (int) ((1000 / (time / (double) increment)) * numSketches) - ); - doGC(); - } - System.out.printf("Done with benchmark[%s]%n", benchmark); - } - } - - private static void doGC() - { - for (int i = 0; i < 10; ++i) { - System.gc(); - } - } - - - @SuppressWarnings("serial") - public static List powerLawDistribution = new ArrayList(){{ - this.add(new SketchBenchmark.Spec(0, 44129)); - this.add(new SketchBenchmark.Spec(1, 431561)); - this.add(new SketchBenchmark.Spec(2, 129063)); - this.add(new SketchBenchmark.Spec(3, 64821)); - this.add(new SketchBenchmark.Spec(4, 67522)); - this.add(new SketchBenchmark.Spec(6, 20291)); - this.add(new SketchBenchmark.Spec(7, 15767)); - this.add(new SketchBenchmark.Spec(8, 22975)); - this.add(new SketchBenchmark.Spec(11, 22441)); - this.add(new SketchBenchmark.Spec(14, 14531)); - this.add(new SketchBenchmark.Spec(17, 13472)); - this.add(new SketchBenchmark.Spec(22, 13253)); - this.add(new SketchBenchmark.Spec(28, 9002)); - this.add(new SketchBenchmark.Spec(35, 8406)); - this.add(new SketchBenchmark.Spec(45, 7618)); - this.add(new SketchBenchmark.Spec(57, 6349)); - this.add(new SketchBenchmark.Spec(71, 5194)); - this.add(new SketchBenchmark.Spec(89, 4524)); - this.add(new SketchBenchmark.Spec(112, 4032)); - this.add(new SketchBenchmark.Spec(141, 3397)); - this.add(new SketchBenchmark.Spec(178, 2935)); - this.add(new SketchBenchmark.Spec(224, 2516)); - this.add(new SketchBenchmark.Spec(282, 2118)); - this.add(new SketchBenchmark.Spec(355, 1825)); - this.add(new SketchBenchmark.Spec(447, 1527)); - this.add(new SketchBenchmark.Spec(561, 1269)); - this.add(new SketchBenchmark.Spec(709, 1088)); - this.add(new SketchBenchmark.Spec(890, 900)); - this.add(new SketchBenchmark.Spec(1118, 767)); - this.add(new SketchBenchmark.Spec(1410, 654)); - this.add(new SketchBenchmark.Spec(1776, 550)); - this.add(new SketchBenchmark.Spec(2246, 469)); - this.add(new SketchBenchmark.Spec(2813, 353)); - this.add(new SketchBenchmark.Spec(3552, 325)); - this.add(new SketchBenchmark.Spec(4472, 252)); - this.add(new SketchBenchmark.Spec(5639, 249)); - this.add(new SketchBenchmark.Spec(7022, 187)); - this.add(new SketchBenchmark.Spec(8952, 150)); - this.add(new SketchBenchmark.Spec(11270, 138)); - this.add(new SketchBenchmark.Spec(14198, 106)); - this.add(new SketchBenchmark.Spec(17544, 74)); - this.add(new SketchBenchmark.Spec(22145, 81)); - this.add(new SketchBenchmark.Spec(27848, 50)); - this.add(new SketchBenchmark.Spec(35319, 58)); - this.add(new SketchBenchmark.Spec(44267, 33)); - this.add(new SketchBenchmark.Spec(55292, 22)); - this.add(new SketchBenchmark.Spec(72264, 10)); - this.add(new SketchBenchmark.Spec(88903, 13)); - this.add(new SketchBenchmark.Spec(111538, 12)); - this.add(new SketchBenchmark.Spec(136481, 11)); - this.add(new SketchBenchmark.Spec(178605, 6)); - this.add(new SketchBenchmark.Spec(215707, 5)); - this.add(new SketchBenchmark.Spec(273075, 5)); - this.add(new SketchBenchmark.Spec(362878, 5)); - this.add(new SketchBenchmark.Spec(546015, 1)); - this.add(new SketchBenchmark.Spec(1106004, 2)); - this.add(new SketchBenchmark.Spec(1766259, 2)); - }}; - -} diff --git a/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java deleted file mode 100644 index 8945308b0..000000000 --- a/src/test/java/com/yahoo/sketches/benchmark/HllSketchBenchmark.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.yahoo.sketches.benchmark; - -import com.yahoo.sketches.hll.HllSketch; -import com.yahoo.sketches.hll.HllSketchBuilder; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; - -/** - */ -public class HllSketchBenchmark implements SketchBenchmark -{ - private final String name; - private final Random rand; - private final HllSketchBuilder inputBob; - private final HllSketchBuilder unionBob; - - private List sketches; - - public HllSketchBenchmark(String name, Random rand, HllSketchBuilder inputBob, HllSketchBuilder unionBob) - { - this.name = name; - this.rand = rand; - this.inputBob = inputBob; - this.unionBob = unionBob; - } - - @Override - public void setup(int numSketches, List specs) - { - sketches = new ArrayList<>(numSketches); - - for (Spec spec : specs) { - for (int i = 0; i < spec.getNumSketches(); ++i) { - HllSketch sketch = inputBob.build(); - for (int j = 0; j < spec.getNumEntries(); ++j) { - sketch.update(new long[]{rand.nextLong()}); - } - sketches.add(sketch.asCompact()); - } - } - Collections.shuffle(sketches); - } - - @Override - public void runNTimes(int n) - { - for (int i = 0; i < n; ++i) { - HllSketch combined = unionBob.build(); - for (HllSketch toUnion : sketches) { - combined.union(toUnion); - } - } - } - - @Override - public void reset() - { - sketches = null; - } - - @Override - public String toString() - { - return name; - } -} diff --git a/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java deleted file mode 100644 index c59eee548..000000000 --- a/src/test/java/com/yahoo/sketches/benchmark/SketchBenchmark.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.yahoo.sketches.benchmark; - -import java.util.List; - -/** - */ -public interface SketchBenchmark -{ - void setup(int numSketches, List specs); - void runNTimes(int n); - void reset(); - - class Spec { - private final int numSketches; - private final long numEntries; - - public Spec(long numEntries, int numSketches) { - - this.numSketches = numSketches; - this.numEntries = numEntries; - } - - public int getNumSketches() - { - return numSketches; - } - - public long getNumEntries() - { - return numEntries; - } - } -} diff --git a/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java deleted file mode 100644 index 183a67463..000000000 --- a/src/test/java/com/yahoo/sketches/benchmark/ThetaBenchmark.java +++ /dev/null @@ -1,81 +0,0 @@ -package com.yahoo.sketches.benchmark; - -import com.yahoo.sketches.theta.CompactSketch; -import com.yahoo.sketches.theta.SetOperation; -import com.yahoo.sketches.theta.Sketch; -import com.yahoo.sketches.theta.Union; -import com.yahoo.sketches.theta.UpdateSketch; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; - -/** - */ -public class ThetaBenchmark implements SketchBenchmark -{ - private final int nominalEntries; - private final Random rand; - - private List sketches; - - public ThetaBenchmark(int lgK) { - this.nominalEntries = 1 << lgK; - this.rand = new Random(lgK); - } - - @Override - public void setup(int numSketches, List specs) - { - sketches = new ArrayList<>(numSketches); - - for (Spec spec : specs) { - for (int i = 0; i < spec.getNumSketches(); ++i) { - UpdateSketch sketch = UpdateSketch.builder().build(nominalEntries); - for (int j = 0; j < spec.getNumEntries(); ++j) { - sketch.update(rand.nextLong()); - } - - sketches.add(sketch.rebuild().compact(true, null)); - } - } - Collections.shuffle(sketches, rand); - - int numRetained = 0; - int numEstimating = 0; - for (CompactSketch sketch : sketches) { - numRetained += sketch.getRetainedEntries(true); - if (sketch.isEstimationMode()) { - ++numEstimating; - } - } - System.out.printf( - "%,d entries, %,d/sketch, %,d estimating (%.2f%%)%n", - numRetained, numRetained / sketches.size(), numEstimating, (100 * numEstimating) / (double) sketches.size() - ); - } - - @Override - public void runNTimes(int n) - { - for (int i = 0; i < n; ++i) { - Union combined = SetOperation.builder().buildUnion(nominalEntries); - for (Object toUnion : sketches) { - combined.update((Sketch) toUnion); - } - } - } - - @Override - public void reset() - { - sketches = null; - } - - @Override - public String toString() - { - return String.format("Theta OnHeap Benchmark(nominalEntries=%s)", nominalEntries); - } -} diff --git a/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java b/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java deleted file mode 100644 index c8f33d7f2..000000000 --- a/src/test/java/com/yahoo/sketches/benchmark/ThetaMemoryBenchmark.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.yahoo.sketches.benchmark; - -import com.yahoo.sketches.memory.Memory; -import com.yahoo.sketches.memory.NativeMemory; -import com.yahoo.sketches.theta.SetOperation; -import com.yahoo.sketches.theta.Sketch; -import com.yahoo.sketches.theta.Union; -import com.yahoo.sketches.theta.UpdateSketch; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; - -/** - */ -public class ThetaMemoryBenchmark implements SketchBenchmark -{ - private final int nominalEntries; - private final Random rand; - private final byte[] bytes; - - private List memories; - - public ThetaMemoryBenchmark(int lgK) { - this.nominalEntries = 1 << lgK; - this.rand = new Random(lgK); - this.bytes = new byte[Sketch.getMaxUpdateSketchBytes(nominalEntries) + 8]; - } - - @Override - public void setup(int numSketches, List specs) - { - memories = new ArrayList<>(numSketches); - - for (Spec spec : specs) { - for (int i = 0; i < spec.getNumSketches(); ++i) { - UpdateSketch sketch = UpdateSketch.builder().build(nominalEntries); - for (int j = 0; j < spec.getNumEntries(); ++j) { - sketch.update(rand.nextLong()); - } - memories.add(new NativeMemory(sketch.rebuild().compact(true, null).toByteArray())); - } - } - Collections.shuffle(memories, rand); - - int numRetained = 0; - int numEstimating = 0; - for (Memory mem : memories) { - Sketch sketch = Sketch.wrap(mem); - numRetained += sketch.getRetainedEntries(true); - if (sketch.isEstimationMode()) { - ++numEstimating; - } - } - System.out.printf( - "%,d entries, %,d/sketch, %,d estimating (%.2f%%)%n", - numRetained, numRetained / memories.size(), numEstimating, (100 * numEstimating) / (double) memories.size() - ); - } - - @Override - public void runNTimes(int n) - { - for (int i = 0; i < n; ++i) { - Union combined = SetOperation.builder().initMemory(new NativeMemory(bytes)).buildUnion(nominalEntries); - for (Memory toUnion : memories) { - combined.update(toUnion); - } - } - } - - @Override - public void reset() - { - memories = null; - } - - @Override - public String toString() - { - return String.format("Theta Memory Benchmark(nominalEntries=%s)", nominalEntries); - } -} diff --git a/src/test/java/com/yahoo/sketches/cmd/CommandLine.java b/src/test/java/com/yahoo/sketches/cmd/CommandLine.java deleted file mode 100644 index dc28adef7..000000000 --- a/src/test/java/com/yahoo/sketches/cmd/CommandLine.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright 2016, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ - -package com.yahoo.sketches.cmd; - -import static com.yahoo.sketches.Util.LS; -import static com.yahoo.sketches.Util.TAB; -import static java.lang.Math.*; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.FileNotFoundException; -import java.io.InputStreamReader; - -import com.yahoo.sketches.theta.Sketches; -import com.yahoo.sketches.theta.UpdateSketch; -import com.yahoo.sketches.theta.UpdateSketchBuilder; -import com.yahoo.sketches.quantiles.QuantilesSketchBuilder; -import com.yahoo.sketches.quantiles.QuantilesSketch; -import com.yahoo.sketches.frequencies.FrequentItemsSketch; -import com.yahoo.sketches.frequencies.FrequentLongsSketch.Row; -import com.yahoo.sketches.frequencies.ErrorType; - -/** - * Command line access to the basic sketch functions. - */ -public class CommandLine { - private static final String BOLD = "\033[1m"; - private static final String OFF = "\033[0m"; - - public static void main(String[] args) { - if (args.length == 0) help(); - else parseType(args); - } - - static void parseType(String[] args) { - String token1 = args[0].toLowerCase(); - switch (token1) { - case "uniq": parseUniq(args); break; - case "rank": parseRank(args); break; - case "hist": parseHist(args); break; - case "loghist": parseLogHist(args); break; - case "freq": parseFreq(args); break; - case "help": help(); break; - default: { - printlnErr("Unrecognized TYPE: "+token1); - help(); - } - } - } - - private static int parseArgsCase(String[] args) { //we already know type, args[0] is valid - int len = args.length; - int ret = 0; - switch (len) { - case 1: ret = 1; break; //only type, assume default k, System.in - case 2: { - String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName - if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help - if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename - ret = 4; //2nd arg must be numeric, assume System.in - break; - } - default: { //3 or more - String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName - if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help - if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename - //2nd arg is numeric, 3rd arg must be filename - ret = 5; - break; - } - } - return ret; - } - - private static void parseUniq(String[] args) { - UpdateSketchBuilder bldr = Sketches.updateSketchBuilder(); - UpdateSketch sketch; - int argsCase = parseArgsCase(args); - switch (argsCase) { - case 1: - doUniq(getBR(null), bldr.build()); break; //[default k], [System.in] - case 2: - uniqHelp(); break; //help - case 3: //2nd arg not numeric, must be a filename - doUniq(getBR(args[1]), bldr.build()); break; //[default k], file - case 4: //2nd arg is numeric, no filename - sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k - doUniq(getBR(null), sketch); //user k, [System.in] - break; - case 5: //3 valid args - sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k - doUniq(getBR(args[2]), sketch); - } - } - - private static void doUniq(BufferedReader br, UpdateSketch sketch) { - String itemStr = ""; - try { - while ((itemStr = br.readLine()) != null) { - sketch.update(itemStr); - } - } catch (IOException e) { - printlnErr("Read Error: Item: "+itemStr +", "+br.toString()); - System.exit(1); - } - println(sketch.toString()); - } - - private static void parseRank(String[] args) { - QuantilesSketchBuilder bldr = new QuantilesSketchBuilder(); - QuantilesSketch sketch; - int argsCase = parseArgsCase(args); - switch (argsCase) { - case 1: - doRank(getBR(null), bldr.build()); break; //[default k], [System.in] - case 2: - rankHelp(); break; //help - case 3: //2nd arg not numeric, must be a filename - doRank(getBR(args[1]), bldr.build()); break; //[default k], file - case 4: //2nd arg is numeric, no filename - sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k - doRank(getBR(null), sketch); //user k, [System.in] - break; - case 5: //3 valid args - sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k - doRank(getBR(args[2]), sketch); - } - } - - private static void doRank(BufferedReader br, QuantilesSketch sketch) { - String itemStr = ""; - try { - while ((itemStr = br.readLine()) != null) { - double item = Double.parseDouble(itemStr); - sketch.update(item); - } - } catch (IOException | NumberFormatException e ) { - printlnErr("Read Error: Item: "+itemStr +", "+br.toString()); - System.exit(1); - } - int ranks = 101; - double[] valArr = sketch.getQuantiles(ranks); - println("Rank"+TAB+ "Value"); - for (int i=0; i sketch; - int defaultSize = 1 << 17; //128K - int argsCase = parseArgsCase(args); - switch (argsCase) { - case 1: - sketch = new FrequentItemsSketch(defaultSize); - doFreq(getBR(null), sketch); break; //[default k], [System.in] - case 2: - freqHelp(); break; //help - case 3: //2nd arg not numeric, must be a filename - sketch = new FrequentItemsSketch(defaultSize); - doFreq(getBR(args[1]), sketch); break; //[default k], file - case 4: //2nd arg is numeric, no filename - sketch = new FrequentItemsSketch(Integer.parseInt(args[1])); //args[1] is numeric = k - doFreq(getBR(null), sketch); //user k, [System.in] - break; - case 5: //3 valid args - sketch = new FrequentItemsSketch(Integer.parseInt(args[1])); //args[1] is numeric = k - doFreq(getBR(args[2]), sketch); - } - } - - private static void doFreq(BufferedReader br, FrequentItemsSketch sketch) { - String itemStr = ""; - try { - while ((itemStr = br.readLine()) != null) { - sketch.update(itemStr); - } - } catch (IOException e ) { - printlnErr("Read Error: Item: "+itemStr +", "+br.toString()); - System.exit(1); - } - //NFP is a subset of NFN - FrequentItemsSketch.Row[] rowArr = sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES); - int len = rowArr.length; - println("Qualifying Rows: "+len); - println(Row.getRowHeader()); - for (int i=0; iThis demo computes a stream of values and feeds them first to - * an exact sort-based method of computing the number of unique values - * in the stream and then feeds a similar stream to two different types of - * sketches from the library. - * - *

This demo becomes most significant in the case where the number of uniques in the - * stream exceeds what the computer can hold in memory. - * - *

This demo utilizes the Unix sort and wc commands for the brute force compuation. - * So this needs to be run on a linux or mac machine. A windows machine with a similar unix - * library installed should also work, but it has not been tested. - */ -public class DemoImpl { - //Static constants - private static final String LS = System.getProperty("line.separator"); - private static final byte LS_BYTE = LS.getBytes()[0]; - private static Random rand = new Random(); - private static StandardOpenOption C = StandardOpenOption.CREATE; - private static StandardOpenOption W = StandardOpenOption.WRITE; - private static StandardOpenOption TE = StandardOpenOption.TRUNCATE_EXISTING; - - //Stream Configuration - private int byteBufCap_ = 1000000; //ByteBuffer capacity - private long n_ = (long)1E8; //stream length - private final int threshold_; //equivalent uniquesFraction on integer scale - - //Sketch configuration - private int lgK_ = 14; //16K - - //Internal sketch values - private int maxMemSkBytes_; - private double rse2_; //RSE for 95% confidence - private UpdateSketch tSketch_ = null; - private HllSketch hllSketch_ = null; - - //Other internal values - private Path path = Paths.get("tmp/test.txt"); - private long[] vArr_ = new long[1]; //reuse this array - private long fileBytes_ = 0; - private long u_ = 0; //unique count; - - /** - * Constuct the demo. - * @param streamLen The total stream length. - * @param uniquesFraction the fraction of streamLen values less than 1.0, that will be unique. - * The actual # of uniques will vary around this value, because it is computed statistically. - */ - public DemoImpl(long streamLen, double uniquesFraction) { - if (uniquesFraction == 1.0) { - this.threshold_ = Integer.MAX_VALUE; - } - else { - this.threshold_ = (int)(Integer.MAX_VALUE * uniquesFraction); - } - n_ = streamLen; - lgK_ = 14; //Log-base 2 of the configured sketch size = 16K - File dir = new File("tmp"); - if (!dir.exists()) { - try { - dir.mkdir(); - } catch(SecurityException e) { - throw new SecurityException(e); - } - } - } - - /** - * Run the demo - */ - public void runDemo() { - println("# COMPUTE DISTINCT COUNT EXACTLY:"); - long exactTimeMS; - - exactTimeMS = buildFile(); - //exactTimeMS = buildFileAndSketch(); //used instead only for testing - - println("## SORT & REMOVE DUPLICATES"); - String sortCmd = "sort -u -o tmp/sorted.txt tmp/test.txt"; - exactTimeMS += runUnixCmd("sort", sortCmd); - - println("\n## LINE COUNT"); - String wcCmd = "wc -l tmp/sorted.txt"; - exactTimeMS += runUnixCmd("wc", wcCmd); - - println("Total Exact "+getMinSec(exactTimeMS) +LS+LS); - - println("# COMPUTE DISTINCT COUNT USING SKETCHES"); - configureThetaSketch(); - long sketchTimeMS = buildSketch(); - double factor = exactTimeMS*1.0/sketchTimeMS; - println("Speedup Factor "+String.format("%.1f", factor) + LS); - - configureHLLSketch(); - sketchTimeMS = buildSketch(); - factor = exactTimeMS*1.0/sketchTimeMS; - println("Speedup Factor "+String.format("%.1f", factor)); - - } - - /** - * @return total test time in milliseconds - */ - private long buildFile() { - println("## BUILD FILE:"); - ByteBuffer byteBuf = ByteBuffer.allocate(byteBufCap_); - u_ = 0; - fileBytes_ = 0; - long testStartTime_mS = System.currentTimeMillis(); - try (SeekableByteChannel sbc = Files.newByteChannel(path, C, W, TE)) { - for (long i=0; i 0) { //write remainder - byteBuf.flip(); - fileBytes_ += sbc.write(byteBuf); - byteBuf.clear(); - } - } - catch (IOException e) { - e.printStackTrace(); - } - long testTime_mS = System.currentTimeMillis() - testStartTime_mS; - //Print common results - printCommon(testTime_mS, n_, u_); - //Print file results - println("File Size Bytes: "+String.format("%,d", fileBytes_) + LS); - return testTime_mS; - } - - /** - * @return total test time in milliseconds - */ - private static long runUnixCmd(String name, String cmd) { - StringBuilder sbOut = new StringBuilder(); - StringBuilder sbErr = new StringBuilder(); - String out = null; - String err = null; - Process p = null; - String[] envp = {"LC_ALL=C"}; //https://bugs.launchpad.net/ubuntu/+source/coreutils/+bug/846628 - long testStartTime_mS = System.currentTimeMillis(); - try { - // run the Unix cmd using the Runtime exec method: - p = Runtime.getRuntime().exec(cmd, envp); - BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); - BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); - - // read the output from the command - boolean outFlag = true; - while ((out = stdInput.readLine()) != null) { - if (outFlag) { - sbOut.append("Output from "+name+" command:").append(LS); - outFlag = false; - } - sbOut.append(out).append(LS); - } - - // read any errors from the attempted command - boolean errFlag = true; - while ((err = stdError.readLine()) != null) { - if (errFlag) { - sbErr.append("\nError from "+name+" command:").append(LS); - errFlag = false; - } - sbErr.append(err).append(LS); - } - } - catch (IOException e) { - System.out.println("Exception: "); - e.printStackTrace(); - System.exit( -1); - } - if ((p != null) && (p.isAlive())) { - p.destroy(); - } - long testTime_mS = System.currentTimeMillis() - testStartTime_mS; - println("Unix cmd: "+cmd); - println(getMinSec(testTime_mS)); - if (sbOut.length() > 0) { println(sbOut.toString()); } - if (sbErr.length() > 0) { println(sbErr.toString()); } - return testTime_mS; - } - - /** - * @return total test time in milliseconds - */ - private long buildSketch() { - u_ = 0; //unique counter for accuracy computation - long testStartTime_mS = System.currentTimeMillis(); - - if (tSketch_ != null) { //Theta Sketch - for (long i = 0; i < n_; i++) { - long v = nextValue(); - tSketch_.update(v); - } - } - else { //HLL Sketch - for (long i = 0; i < n_; i++) { - long v = nextValue(); - hllSketch_.update(v); - } - } - long testTime_mS = System.currentTimeMillis() - testStartTime_mS; - - //Print sketch name - String sk = (tSketch_ != null)? "THETA" : "HLL"; - println("## USING "+sk+" SKETCH"); - //Print common results - printCommon(testTime_mS, n_, u_); - - //Print sketch results - printSketchResults(u_, maxMemSkBytes_, rse2_); - return testTime_mS; - } - - /** - * Used in testing - * @return total test time in milliseconds - */ - @SuppressWarnings("unused") - private long buildFileAndSketch() { - println("## BUILD FILE AND SKETCH:"); - ByteBuffer byteBuf = ByteBuffer.allocate(byteBufCap_); - u_ = 0; - fileBytes_ = 0; - long testStartTime_mS = System.currentTimeMillis(); - try (SeekableByteChannel sbc = Files.newByteChannel(path, C, W, TE)) { - if (tSketch_ != null) { - long v = nextValue(); - tSketch_.update(v); - - //build file - String s = Long.toHexString(v); - if (byteBuf.remaining() < 25) { - byteBuf.flip(); - fileBytes_ += sbc.write(byteBuf); - byteBuf.clear(); - } - byteBuf.put(s.getBytes()).put(LS_BYTE); - } - else { //HLL Sketch - long v = nextValue(); - hllSketch_.update(v); - - //build file - String s = Long.toHexString(v); - if (byteBuf.remaining() < 25) { - byteBuf.flip(); - fileBytes_ += sbc.write(byteBuf); - byteBuf.clear(); - } - byteBuf.put(s.getBytes()).put(LS_BYTE); - } - - if (byteBuf.position() > 0) { - byteBuf.flip(); - fileBytes_ += sbc.write(byteBuf); - byteBuf.clear(); - } - } - catch (IOException e) { - e.printStackTrace(); - } - long testTime_mS = System.currentTimeMillis() - testStartTime_mS; - - //Print common results - printCommon(testTime_mS, n_, u_); - //Print file results - println("File Size Bytes: "+String.format("%,d", fileBytes_)); - - //Print sketch results - printSketchResults(u_, maxMemSkBytes_, rse2_); - return testTime_mS; - } - - /** - * @return next hashed long value - */ - private long nextValue() { - if (((rand.nextInt() >>> 1) < threshold_) || (u_ == 0)) { - u_++; - } - vArr_[0] = u_; - return hash(vArr_, 0L)[0]; - } - -// private long nextValue() { //Faster version, always 100% uniques -// vArr_[0] = ++u_; -// return hash(vArr_, 0L)[0]; -// } - - private final void configureThetaSketch() { - int k = 1 << lgK_; //14 - hllSketch_ = null; - maxMemSkBytes_ = k *16; //includs full hash table - rse2_ = 2.0/sqrt(k); //Error for 95% confidence - tSketch_ = Sketches.updateSketchBuilder(). - setResizeFactor(ResizeFactor.X1). - setFamily(Family.ALPHA).build(k ); - } - - private final void configureHLLSketch() { - int k = 1 << lgK_; //14 - boolean compressed = true; - boolean hipEstimator = true; - boolean denseMode = true; - tSketch_ = null; - maxMemSkBytes_ = (compressed)? k/2 : k; - rse2_ = 2.0 * ((hipEstimator)? 0.836/sqrt(k) : 1.04/sqrt(k)); //for 95% confidence - hllSketch_ = HllSketch.builder().setLogBuckets(lgK_). - setHipEstimator(hipEstimator). - setDenseMode(denseMode). - setCompressedDense(compressed). - build(); - } - - private static void printCommon(long testTime, long n, long u) { - println(getMinSec(testTime)); - println("Total Values: "+String.format("%,d",n)); - int nSecRate = (int) (testTime *1000000.0/n); - println("Build Rate: "+ String.format("%d nSec/Value", nSecRate)); - println("Exact Uniques: "+String.format("%,d", u)); - } - - private void printSketchResults(long u, int maxMemSkBytes, double rse2) { - String sk = (tSketch_ != null)? "THETA" : "HLL"; - println("## USING "+sk+" SKETCH"); - double rounded = Math.round((tSketch_ != null)? tSketch_.getEstimate() : hllSketch_.getEstimate()); - println("Sketch Estimate of Uniques: "+ String.format("%,d", (long)rounded)); - double err = (u == 0)? 0 : (rounded/u - 1.0); - println("Sketch Relative Error: "+String.format("%.3f%%, +/- %.3f%%", err*100, rse2*100)); - println("Max Sketch Size Bytes: "+ String.format("%,d", maxMemSkBytes)); - } - - private static String getMinSec(long mSec) { - int totSec = (int)(mSec/1000.0); - int min = totSec/60; - int sec = totSec%60; - int ms = (int)(mSec - totSec * 1000); - String t = String.format("Time Min:Sec.mSec = %d:%02d.%03d", min, sec, ms); - return t; - } - - private static void println(String s) { System.out.println(s); } - -} diff --git a/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java b/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java deleted file mode 100644 index cbc664013..000000000 --- a/src/test/java/com/yahoo/sketches/demo/ExactVsSketchDemo.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ - -package com.yahoo.sketches.demo; - -import com.yahoo.sketches.demo.DemoImpl; - -/** - *

This demo computes a stream of values and feeds them first to - * an exact sort-based method of computing the number of unique values - * in the stream and then feeds a similar stream to two different types of - * sketches from the library. - * - *

This demo becomes most significant in the case where the number of uniques in the - * stream exceeds what the computer can hold in memory. - * - *

This demo utilizes the Unix/Linux/OS-X sort and wc commands for the brute force compuation. - * So this needs to be run on a linux or mac machine. A windows machine with a suitable unix - * library installed might also work, but it has not been tested. - * - *

To run this demo from the command line:

- *
  • Clone the lastest snapshot from https://github.com/DataSketches/sketches-core.
  • - *
  • Change to the directory where you did the clone
  • - *
  • Do a Maven Install: "mvn install"
  • - *
  • In the following commands replace X.Y.Z with the actual jar version from the target - * directory:
    - * javac -cp target/sketches-core-X.Y.Z.jar src/test/java/com/yahoo/sketches/demo/*.java
    - * java -cp target/sketches-core-X.Y.Z.jar:src/test/java com.yahoo.sketches.demo.ExactVsSketchDemo - * 1E6
  • - *
  • The demo will output results to the console. You can change the 1E6 (1 million) to even - * larger values (e.g., 1E8) but be patient. The exact sort can take a long, long time!
  • - *
- * - */ -public class ExactVsSketchDemo { - - /** - * Runs the demo. - * - * @param args - *
  • arg[0]: (Optional) The stream length and can be expressed as a positive double value. - * The default is 1E6.
  • - *
  • arg[1] (Optional) The fraction of the stream length that will be unique, the remainder - * will be duplicates. The default is 1.0. Note that if this argument is less than 1.0, - * the actual number of exact uniques is statistically determined for each trial and then - * separately counted. That is, the number of exact uniques for the "sort" trial - * will be different from the exact uniques for each of the sketch trial.
  • - *
- */ - public static void main(String[] args) { - int argsLen = args.length; - long streamLen = (long)1E8; //The default stream length - double uFrac = 1.0; //The default fraction that are unique - if (argsLen == 1) { - streamLen = (long)(Double.parseDouble(args[0])); - } else if (argsLen > 1) { - streamLen = (long)(Double.parseDouble(args[0])); - uFrac = Double.parseDouble(args[1]); - } - - DemoImpl demo = new DemoImpl(streamLen, uFrac); - - demo.runDemo(); - } - -} diff --git a/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java b/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java deleted file mode 100644 index d95e77a14..000000000 --- a/src/test/java/com/yahoo/sketches/examples/ExamplesTest.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ -package com.yahoo.sketches.examples; - -import org.testng.annotations.Test; - -import com.yahoo.sketches.BinomialBoundsN; -import com.yahoo.sketches.theta.AnotB; -import com.yahoo.sketches.theta.CompactSketch; -import com.yahoo.sketches.theta.Intersection; -import com.yahoo.sketches.theta.Sketches; -import com.yahoo.sketches.theta.Union; -import com.yahoo.sketches.theta.UpdateSketch; - -public class ExamplesTest { - - @Test - public void setOpsExample() { - println("Set Operations Example:"); - int k = 4096; - UpdateSketch skA = Sketches.updateSketchBuilder().build(k); - UpdateSketch skB = Sketches.updateSketchBuilder().build(k); - UpdateSketch skC = Sketches.updateSketchBuilder().build(k); - - for (int i=1; i<=10; i++) { skA.update(i); } - for (int i=1; i<=20; i++) { skB.update(i); } - for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set - - Union union = Sketches.setOperationBuilder().buildUnion(k); - union.update(skA); - union.update(skB); - // ... continue to iterate on the input sketches to union - - CompactSketch unionSk = union.getResult(); //the result union sketch - println("A U B : "+unionSk.getEstimate()); //the estimate of the union - - //Intersection is similar - - Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.update(unionSk); - inter.update(skC); - // ... continue to iterate on the input sketches to intersect - - CompactSketch interSk = inter.getResult(); //the result intersection sketch - println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection - - //The AnotB operation is a little different as it is stateless: - - AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - aNotB.update(skA, skC); - - CompactSketch not = aNotB.getResult(); - println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation - } - - @Test - public void boundsExample() { - println("BinomialBoundsN Example:"); - int k = 500; - double theta = 0.001; - int stdDev = 2; - double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false); - double est = k/theta; - double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false); - println("K="+k+", Theta="+theta+", SD="+stdDev); - println("UB: "+ub); - println("Est: "+est); - println("LB: "+lb); - println(""); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - - public static void main(String[] args) { - ExamplesTest ext = new ExamplesTest(); - ext.setOpsExample(); - ext.boundsExample(); - - } -} diff --git a/src/test/java/com/yahoo/sketches/performance/ProcessStats.java b/src/test/java/com/yahoo/sketches/performance/ProcessStats.java deleted file mode 100644 index dfe8160b7..000000000 --- a/src/test/java/com/yahoo/sketches/performance/ProcessStats.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ -package com.yahoo.sketches.performance; - -import static java.lang.Math.abs; -import static java.lang.Math.sqrt; - -import java.util.Arrays; - -/** - * Processes the statistics collected from an array of Stats objects from a trial set - * and creates an output row - * - * @author Lee Rhodes - */ -public class ProcessStats { - private static final char TAB = '\t'; - //Quantile fractions computed from the standard normal cumulative distribution. - private static final double M2SD = 0.022750131948179; //minus 2 StdDev - private static final double M1SD = 0.158655253931457; //minus 1 StdDev - private static final double P1SD = 0.841344746068543; //plus 1 StdDev - private static final double P2SD = 0.977249868051821; //plus 2 StdDev - - /** - * Process the Stats[] array and place the output row into the dataStr. - * @param statsArr the input Stats array - * @param uPerTrial the number of uniques per trial for this trial set. - * @param lgK log base 2 of configured nominal entries, or k. - * @param p the probability sampling rate. 0 < p ≤ 1.0. - * @param dataStr The StringBuilder object that is reused for each row of output - */ - public static void process(Stats[] statsArr, int uPerTrial, int lgK, double p, StringBuilder dataStr) { - int k = 1 << lgK; - int trials = statsArr.length; - Arrays.sort(statsArr, 0, trials); - - //Computing the quantiles from the sorted array. - double min = statsArr[0].re; - double qM2SD = statsArr[quantileIndex(M2SD,trials)].re; - double qM1SD = statsArr[quantileIndex(M1SD,trials)].re; - double q50 = statsArr[quantileIndex(.5,trials)].re; - double qP1SD = statsArr[quantileIndex(P1SD,trials)].re; - double qP2SD = statsArr[quantileIndex(P2SD,trials)].re; - double max = statsArr[trials-1].re; - - int cntLB2 = 0, cntLB1 = 0, cntUB1 = 0, cntUB2 = 0; -// double sumLB2 = 0, sumLB1 = 0, sumUB1 = 0, sumUB2 = 0; - double sumEst = 0, sumEstErr = 0, sumSqEstErr = 0; - double sumUpdateTimePerU_nS = 0; - //Scan the sorted statsArr - for (int i=0; i stats.ub2est) cntUB2++; //should be < 2.275%; under estimate - if (uPerTrial > stats.ub1est) cntUB1++; //should be < 15.866%; under estimate - if (uPerTrial < stats.lb1est) cntLB1++; //should be < 15.866%; over estimate - if (uPerTrial < stats.lb2est) cntLB2++; //should be < 2.275%; over estimate -// sumLB2 += stats.lb2est; -// sumLB1 += stats.lb1est; -// sumUB1 += stats.ub1est; -// sumUB2 += stats.ub2est; - //divide by uPerTrial to normalize betweeen 0 and 1.0, sum over all trials - //Components for the mean and variance of the estimate error - sumEst += statsArr[i].estimate; - double estErr = statsArr[i].re; - sumEstErr += estErr; - sumSqEstErr += estErr*estErr; - - sumUpdateTimePerU_nS += statsArr[i].updateTimePerU_nS; - } - //normalize counts - double fracTgtUB2 = (double)cntUB2/trials; - double fracTgtUB1 = (double)cntUB1/trials; - double fracTltLB1 = (double)cntLB1/trials; - double fracTltLB2 = (double)cntLB2/trials; - - //Compute the average results over the trial set - double meanEst = sumEst/trials; - double meanEstErr = sumEstErr/trials; - double deltaSqEstErr = abs(sumSqEstErr - (sumEstErr*sumEstErr)/trials); - double varEstErr = (trials == 1)? deltaSqEstErr/trials : deltaSqEstErr/(trials-1); - double rse = sqrt(varEstErr); - //compute theoretical sketch RSE - double invKm1 = 1.0/(k-1); - double oneMinusKoverN = 1.0 - (double)k/uPerTrial; - double thrse = (sumEstErr == 0.0)? 0.0 : sqrt(invKm1 * oneMinusKoverN); - //compute Bernoulli RSE - double invUperTrial = 1.0/uPerTrial; - double varOverN = (p == 1.0)? 0.0 : 1.0/p - 1.0; - double prse = (p == 1.0)? 0.0 : sqrt(invUperTrial * varOverN); - - //Compute average of each of the bounds estimates -// double meanLB2est = sumLB2/(uPerTrial*trials) -1; -// double meanLB1est = sumLB1/(uPerTrial*trials) -1; -// double meanUB1est = sumUB1/(uPerTrial*trials) -1; -// double meanUB2est = sumUB2/(uPerTrial*trials) -1; - - //Speed - double meanUpdateTimePerU_nS = sumUpdateTimePerU_nS/trials; - - //OUTPUT - dataStr.setLength(0); - dataStr.append(uPerTrial).append(TAB). - - //Sketch estimates, mean, variance - append(meanEst).append(TAB). - append(meanEstErr).append(TAB). - append(rse).append(TAB). - append(thrse).append(TAB). - append(prse).append(TAB). - - //Quantiles measured from the actual distribution of values from all trials. - //Because of quantization effects these values will be noisier than the values - //computed statistically above. - append(min).append(TAB). - append(qM2SD).append(TAB). - append(qM1SD).append(TAB). - append(q50).append(TAB). - append(qP1SD).append(TAB). - append(qP2SD).append(TAB). - append(max).append(TAB). - - //Fractional Bounds measurements - append(fracTltLB2).append(TAB). - append(fracTltLB1).append(TAB). - append(fracTgtUB1).append(TAB). - append(fracTgtUB2).append(TAB). - - //The bounds estimates are computed mathematically based on the sketch - // estimate, the number of valid values in the cache and the value of theta. - // Because of this thes values will be relatively smooth from point to point along the - // unique value axis. -// append(meanLB2est).append(TAB). -// append(meanLB1est).append(TAB). -// append(meanUB1est).append(TAB). -// append(meanUB2est).append(TAB). - //Trials - append(trials).append(TAB). - //Speed - append(meanUpdateTimePerU_nS); - } - - /** - * Returns a column header row - * @return a column header row - */ - public static String getHeader() { - StringBuilder sb = new StringBuilder(); - sb. append("InU").append(TAB). - //Estimates - append("MeanEst").append(TAB). - append("MeanErr").append(TAB). - append("RSE").append(TAB). - append("thRSE").append(TAB). - append("pRSE").append(TAB). - //Quantiles - append("Min").append(TAB). - append("QM2SD").append(TAB). - append("QM1SD").append(TAB). - append("Q50").append(TAB). - append("QP1SD").append(TAB). - append("QP2SD").append(TAB). - append("Max").append(TAB). - //Fractional Bounds measurements - append("FracTltLB2").append(TAB). - append("FracTltLB1").append(TAB). - append("FracTgtUB1").append(TAB). - append("FracTgtUB2").append(TAB). - - //Trials - append("Trials").append(TAB). - //Speed - append("nS/u"); - return sb.toString(); - } - - /** - * Returns the trial index = floor(quantile-fraction, #trials) - * @param frac the desired quantile fraction (0.0 - 1.0) - * @param trials the number of total trials - * @return the trial index - */ - private static int quantileIndex(double frac, int trials) { - int idx1 = (int) Math.floor(frac*trials); - return (idx1 >= trials)? trials-1: idx1; - } -} diff --git a/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java b/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java deleted file mode 100644 index a722804f4..000000000 --- a/src/test/java/com/yahoo/sketches/performance/SketchPerformance.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ -package com.yahoo.sketches.performance; - -import static java.lang.Math.floor; -import static java.lang.Math.pow; - -import com.yahoo.sketches.Family; -import com.yahoo.sketches.ResizeFactor; -import com.yahoo.sketches.hll.HllSketch; -import com.yahoo.sketches.hll.HllSketchBuilder; -import com.yahoo.sketches.theta.UpdateSketch; -import com.yahoo.sketches.theta.UpdateSketchBuilder; - -/** - * Used to generate data for plotting the error distribution or speed performance of a sketch. - * The X-axis is assumed to be the number of uniques fed to the sketch and varies from 1 to whatever - * is specified in the lgMaxU parameter. "lg" is shorthand for Log_base_2, so if lgMaxU is 12 then - * the highest number of uniques on the X-axis would be 4096. An exponential series is used for the - * unique values per trial so that a wide range of unique values (over many octaves) can be tested - * using a constant number of points per octave. This dramatically reduces the number of plotting - * points required and produces nice plots when plotted against a log axis. - * - *

See the main() method as an example of how to configure. - * - * @author Lee Rhodes - */ -public class SketchPerformance { - - /** - * This method drives the whole process. An exponential series is used for the unique - * counts per trial so that a wide range of unique values (over many octaves) can be tested using - * a constant number of points per octave. This dramatically reduces the number of plotting points - * required and produces nice plots when plotted against a log axis. See the main() method as an - * example of how to configure this. - * - * @param trialMgr TrialManager to be used - */ - public static void start(TrialManager trialMgr) { - long testStartTime_mS = System.currentTimeMillis(); - int lastGI = trialMgr.getMaximumGeneratingIndex(); - int ppo = trialMgr.getPPO(); - int lastU = 0; - println(ProcessStats.getHeader()); - StringBuilder dataStr = new StringBuilder(); - - //Each generating index (gi) will generate a new row of data - // representing N trials at a specific number of unique values. - for (int gi = 0; gi <= lastGI; gi++) { - int u = (int)floor(pow(2.0, (double)gi/ppo)); - if (u == lastU) continue; //at the low end skips over duplicate values of u - lastU = u; - int trials = trialMgr.getTrials(u); - int lgK = trialMgr.getLgK(); - double p = trialMgr.getP(); - Stats[] statsArr = processTrialSet(trialMgr, u, trials); - ProcessStats.process(statsArr, u, lgK, p, dataStr); - println(dataStr.toString()); - } - int testTime_S = (int)((System.currentTimeMillis() - testStartTime_mS)/1000.0); - int min = testTime_S/60; - int sec = testTime_S%60; - println("TestTime: "+min+":"+sec); - } - - /** - * A Trial Set is a number of trials at number of uniques per trial, uPerTrial. - * This is set up so that the number of trials may vary based on the number of uniques for the - * trial set. - * @param trialMgr manages the sketch and updating of a stats object - * @param uPerTrial uniques for every trial of a trial set - * @param trials number of trials per trial set - * @return the Stats array contains measurements for each trial of the trial set - */ - private static Stats[] processTrialSet(TrialManager trialMgr, int uPerTrial, int trials) { - Stats[] statsArr = new Stats[trials]; - System.gc(); - for (int t=0; t < trials; t++) { - if (statsArr[t] == null) statsArr[t] = new Stats(); - trialMgr.doTrial(statsArr[t], uPerTrial); - } - return statsArr; - } - - private static void println(String s) { System.out.println(s); } - - - /** - * This main method sets the configuration of the sketches, the TrialManager profile, and - * runs the test. - * @param args not used. - */ - public static void main(String[] args) { - //Common parameters - int lgK = 12; //4K - boolean udSketch = true; //set true if you want to use a theta UpdateSketch, false for HLL - - //Theta UpdateSketch parameters - Family family = Family.QUICKSELECT; - ResizeFactor rf = ResizeFactor.X1;// See javadocs. - boolean direct = false; //See javadocs and the setSketchProfile code - float p = 1.0F; - boolean rebuild = false; //set true if rebuild is desired to reduce size down to k. - - //HLL Parameters - boolean hip = true; - boolean dense = false; - - //Trials Profile Parameters - // For speed trials use min=4,5, max= 13,14,15,16 - // For accuracy trials use min=max= 10 or more - int lgMinTrials = 4; - int lgMaxTrials = 13; - int lgMaxU = 20; - int ppo = 16; - - //INITIALIZE - TrialManager trialMgr = new TrialManager(); - trialMgr.setTrialsProfile(lgMinTrials, lgMaxTrials, lgMaxU, ppo); - UpdateSketchBuilder udBldr = null; - HllSketchBuilder hllBldr = null; - - if (udSketch) { //UpdateSketch Builder - udBldr = UpdateSketch.builder().setNominalEntries(1 << lgK).setFamily(family).setP(p). - setResizeFactor(rf); - trialMgr.setUpdateSketchBuilder(udBldr, direct, rebuild); - } - else { //HLL Builder - hllBldr = HllSketch.builder().setLogBuckets(lgK).setHipEstimator(hip).setDenseMode(dense); - trialMgr.setHllSketchBuilder(hllBldr); - } - - //START THE TESTS - SketchPerformance.start(trialMgr); - - //PRINT SUMMARY - if (udBldr != null) println(udBldr.toString()); - if (hllBldr != null) println(hllBldr.toString()); - println(trialMgr.toString()); - } - -} diff --git a/src/test/java/com/yahoo/sketches/performance/Stats.java b/src/test/java/com/yahoo/sketches/performance/Stats.java deleted file mode 100644 index 5cefe78dd..000000000 --- a/src/test/java/com/yahoo/sketches/performance/Stats.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ -package com.yahoo.sketches.performance; - -import com.yahoo.sketches.hll.HllSketch; -import com.yahoo.sketches.theta.UpdateSketch; - -/** - * Holds key metrics from a single trial - * - * @author Lee Rhodes - */ -public class Stats implements Comparable { - double estimate; //The estimate from the sketch - double re = 0; //Relative Error. Will sort by this - double lb2est; //LowerBound estimate at -2 StdDev - double lb1est; //LowerBound estimate at -1 StdDev - double ub1est; //UpperBound estimate at +1 StdDev - double ub2est; //UpperBound estimate at +2 StdDev - double updateTimePerU_nS; - - /** - * Update this Stats with a theta UpdateSketch - * @param sketch the sketch to update with - * @param uPerTrial the number of uniques fed to the sketch in this trial - * @param updateTime_nS the update time requred for all the updates in nanoSeconds. - */ - public void update(UpdateSketch sketch, int uPerTrial, long updateTime_nS) { - estimate = sketch.getEstimate(); - re = estimate/uPerTrial - 1.0; - lb2est = sketch.getLowerBound(2); - lb1est = sketch.getLowerBound(1); - ub1est = sketch.getUpperBound(1); - ub2est = sketch.getUpperBound(2); - updateTimePerU_nS = (double)updateTime_nS / uPerTrial; - } - - /** - * Update this Stats with an HLL Sketch - * @param sketch the sketch to update with - * @param uPerTrial the number of uniques fed to the sketch in this trial - * @param updateTime_nS the update time requred for all the updates in nanoSeconds. - */ - public void update(HllSketch sketch, int uPerTrial, long updateTime_nS) { - estimate = sketch.getEstimate(); - re = estimate/uPerTrial - 1.0; - lb2est = sketch.getLowerBound(2); - lb1est = sketch.getLowerBound(1); - ub1est = sketch.getUpperBound(1); - ub2est = sketch.getUpperBound(2); - updateTimePerU_nS = (double)updateTime_nS / uPerTrial; - } - - @Override - public int compareTo(Stats that) { - return (this.re < that.re)? -1 : (this.re > that.re)? 1 : 0; - } -} diff --git a/src/test/java/com/yahoo/sketches/performance/TrialManager.java b/src/test/java/com/yahoo/sketches/performance/TrialManager.java deleted file mode 100644 index 22f9714d1..000000000 --- a/src/test/java/com/yahoo/sketches/performance/TrialManager.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright 2015, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. - */ -package com.yahoo.sketches.performance; - -import static java.lang.Math.log; -import static java.lang.Math.pow; - -import com.yahoo.sketches.hll.HllSketch; -import com.yahoo.sketches.hll.HllSketchBuilder; -import com.yahoo.sketches.memory.Memory; -import com.yahoo.sketches.memory.NativeMemory; -import com.yahoo.sketches.theta.Sketch; -import com.yahoo.sketches.theta.UpdateSketch; -import com.yahoo.sketches.theta.UpdateSketchBuilder; - -/** - * Manages the execution of every trial. One of these for the entire process. - * - * @author Lee Rhodes - */ -public class TrialManager { - private static final double LN2 = log(2.0); - private UpdateSketch udSketch_ = null; - private HllSketchBuilder hllBuilder_ = null; - private int lgK_; - private double p_; - //Global counter that increments for every new unique value. - //Assures that all sketches are virtually independent. - private long vIn_; - private int lgBP_; //The break point - private int lgMinTrials_; - private int lgMaxTrials_; - private int lgMaxU_; - private int ppo_; - private double slope_; - private boolean rebuild_ = false; - - /** - * Sets the theta UpdateSketch builder used to create the theta UpdateSketches. - * @param udBldr the theta UpdateSketchBuilder - * @param direct true if direct (off heap) mode is desired. Instead of actual off heap memory - * this will emulate that behavior by using an on-heap byte array accessed by the Memory package. - * Performance-wise it is the same except for issues of garbage collection, which is not the - * purpose of this test. - * @param rebuild set true if rebuild is desired - */ - public void setUpdateSketchBuilder(UpdateSketchBuilder udBldr, boolean direct, boolean rebuild) { - lgK_ = udBldr.getLgNominalEntries(); - p_ = udBldr.getP(); - int k = 1 << lgK_; - lgBP_ = lgK_ + 1; //set the break point where the #trials starts to decrease. - Memory mem = null; - if (direct) { - int bytes = Sketch.getMaxUpdateSketchBytes(k); - byte[] memArr = new byte[bytes]; - mem = new NativeMemory(memArr); - udBldr.initMemory(mem); - } - udSketch_ = udBldr.initMemory(mem).build(k); - rebuild_ = rebuild; - } - - /** - * Sets the HLL builder used to create the HLL sketches. - * @param hllBldr the HllSketchBuilder - */ - public void setHllSketchBuilder(HllSketchBuilder hllBldr) { - lgK_ = hllBldr.getLogBuckets(); - p_ = 1.0; - udSketch_ = null; - hllBuilder_ = hllBldr; - } - - /** - * This sets the profile for how the number of trials vary with the number of uniques. - * The number of trials is the maximum until the number of uniques exceeds k, whereby - * the number of trials starts to decrease in a power-law fashion until the minimum - * number of trials is reached at the maximum number of uniques to be tested. - * @param lgMinTrials The minimum number of trials in a trial set specified as the - * exponent of 2. This will occur at the maximum uniques value. - * @param lgMaxTrials The maximum number of trials in a trial set specified as the - * exponent of 2. - * @param lgMaxU The maximum number of uniques for this entire test specified as the - * exponent of 2. The first trail set starts at uniques (u = 1). - * @param ppo The number of Points Per Octave along the unique value number line - * that will be used for generating trial sets. Recommended values are one point per octave - * to 16 points per octave. - */ - public void setTrialsProfile(int lgMinTrials, int lgMaxTrials, int lgMaxU, int ppo) { - lgMinTrials_ = lgMinTrials; - lgMaxTrials_ = lgMaxTrials; - lgMaxU_ = lgMaxU; - ppo_ = ppo; - slope_ = (double)(lgMaxTrials - lgMinTrials) / (lgBP_ - lgMaxU_); - } - - /** - * Create (or reset) a sketch and perform uPerTrial updates then update the given Stats. - * @param stats The given Stats object - * @param uPerTrial the number of updates for this trial. - */ - public void doTrial(Stats stats, int uPerTrial) { - if (udSketch_ != null) { //UpdateSketch - udSketch_.reset(); //reuse the same sketch - long startUpdateTime_nS = System.nanoTime(); - for (int u=uPerTrial; u--> 0; ) { udSketch_.update(vIn_++); } - long updateTime_nS = System.nanoTime() - startUpdateTime_nS; - if (rebuild_) { udSketch_.rebuild(); } //Resizes down to k. Only useful with QuickSelectSketch - stats.update(udSketch_, uPerTrial, updateTime_nS); - } - else { //HllSketch - HllSketch hllSketch = hllBuilder_.build(); - long startUpdateTime_nS = System.nanoTime(); - for (int u=uPerTrial; u--> 0; ) hllSketch.update(new long[]{vIn_++}); - long updateTime_nS = System.nanoTime() - startUpdateTime_nS; - stats.update(hllSketch, uPerTrial, updateTime_nS); - } - } - - /** - * Computes the number of trials for a given current number of uniques for a trial set. - * @param curU the given current number of uniques for a trial set. - * @return the number of trials for a given current number of uniques for a trial set. - */ - public int getTrials(int curU) { - if ((lgMinTrials_ == lgMaxTrials_) || (curU <= (1 << lgBP_))) { - return 1 << lgMaxTrials_; - } - double lgCurU = log(curU)/LN2; - double lgTrials = slope_ * (lgCurU - lgBP_) + lgMaxTrials_; - return (int) pow(2.0, lgTrials); - } - - /** - * Return the Log-base 2 of the configured nominal entries or k - * @return the Log-base 2 of the configured nominal entries or k - */ - public int getLgK() { - return lgK_; - } - - /** - * Return the probability sampling rate, p. - * @return the probability sampling rate, p. - */ - public double getP() { - return p_; - } - - /** - * Return the configured Points-Per-Octave. - * @return the configured Points-Per-Octave. - */ - public int getPPO() { - return ppo_; - } - - /** - * Return true if sketch rebuild is requested to bring sketch size down to k, if necessary. - * Only relevant for QuickSelectSketch. - * @return true if sketch rebuild is requested to bring sketch size down to k, if necessary. - */ - public boolean getRebuild() { - return rebuild_; - } - - /** - * Returns the maximum generating index (gi) from the log_base2 of the maximum number of uniques - * for the entire test run. - * @return the maximum generating index (gi) - */ - public int getMaximumGeneratingIndex() { - return ppo_*lgMaxU_; - } - - @Override - public String toString() { - return "Trials Profile: LgMinTrials: "+lgMinTrials_+", LgMaxTrials: "+lgMaxTrials_+ - ", lgMaxU: "+lgMaxU_+", PPO: "+ppo_+", Rebuild: "+rebuild_; - } - -} diff --git a/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java b/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java index ebb4d8d4b..a8ab57f0f 100644 --- a/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java +++ b/src/test/java/com/yahoo/sketches/theta/SetOperationTest.java @@ -328,9 +328,48 @@ public void checkValidSetOpID() { assertTrue(SetOperation.isValidSetOpID(UNION.getID())); assertTrue(SetOperation.isValidSetOpID(INTERSECTION.getID())); assertTrue(SetOperation.isValidSetOpID(A_NOT_B.getID())); + } + + @Test + public void setOpsExample() { + println("Set Operations Example:"); + int k = 4096; + UpdateSketch skA = Sketches.updateSketchBuilder().build(k); + UpdateSketch skB = Sketches.updateSketchBuilder().build(k); + UpdateSketch skC = Sketches.updateSketchBuilder().build(k); + + for (int i=1; i<=10; i++) { skA.update(i); } + for (int i=1; i<=20; i++) { skB.update(i); } + for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set + + Union union = Sketches.setOperationBuilder().buildUnion(k); + union.update(skA); + union.update(skB); + // ... continue to iterate on the input sketches to union + + CompactSketch unionSk = union.getResult(); //the result union sketch + println("A U B : "+unionSk.getEstimate()); //the estimate of the union + //Intersection is similar + + Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.update(unionSk); + inter.update(skC); + // ... continue to iterate on the input sketches to intersect + + CompactSketch interSk = inter.getResult(); //the result intersection sketch + println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection + + //The AnotB operation is a little different as it is stateless: + + AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + aNotB.update(skA, skC); + + CompactSketch not = aNotB.getResult(); + println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation } + @Test public void printlnTest() { println("PRINTING: "+this.getClass().getName());