sample() {
return this.unmodifiableSample;
}
- /**
- * Returns how many items should the algorithm skip given its state.
- *
- * The implementation of this method must only rely on the given arguments and not on the state of the instance.
- *
- * @param streamSize how many items have been fed to the sampler
- * @param sampleSize expected sample size
- * @param random the {@link Random} instance to use
- * @return how many items to skip
- */
- protected abstract long skipLength(long streamSize, int sampleSize, Random random);
-
- /**
- * Performs initialization logic.
- *
- * This method is invoked in the constructor.
- *
- * @param sampleSize expected sample size
- * @param random the {@link Random} instance assigned to this instance
- */
- protected void init(int sampleSize, Random random) {
- }
-
static class AtomicReferenceArrayList extends AbstractList implements List, RandomAccess {
private final AtomicReferenceArray array;
diff --git a/src/main/java/gr/james/sampling/LiLSampling.java b/src/main/java/gr/james/sampling/LiLSampling.java
index 6800f45..ea7efa6 100644
--- a/src/main/java/gr/james/sampling/LiLSampling.java
+++ b/src/main/java/gr/james/sampling/LiLSampling.java
@@ -21,8 +21,6 @@
* O(n(1 + log(N/n)))
*/
public class LiLSampling extends AbstractRandomSampling {
- private double W;
-
/**
* Construct a new instance of {@link LiLSampling} using the specified sample size and RNG. The implementation
* assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that.
@@ -34,7 +32,7 @@ public class LiLSampling extends AbstractRandomSampling {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public LiLSampling(int sampleSize, Random random) {
- super(sampleSize, random);
+ super(sampleSize, random, LiLSkipFunction::new);
}
/**
@@ -60,41 +58,6 @@ public static RandomSamplingCollector collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new LiLSampling<>(sampleSize, random));
}
- /**
- * {@inheritDoc}
- *
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- */
- @Override
- protected void init(int sampleSize, Random random) {
- // W = Math.exp(Math.log(RandomSamplingUtils.randomExclusive(random)) / sampleSize);
- W = Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize);
- }
-
- /**
- * {@inheritDoc}
- *
- * @param streamSize {@inheritDoc}
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- * @return {@inheritDoc}
- */
- @Override
- protected long skipLength(long streamSize, int sampleSize, Random random) {
- final double random1 = RandomSamplingUtils.randomExclusive(random);
- final double random2 = RandomSamplingUtils.randomExclusive(random);
- long skip = (long) (Math.log(random1) / Math.log(1 - W));
- assert skip >= 0 || skip == Long.MIN_VALUE;
- if (skip == Long.MIN_VALUE) { // Sometimes when W is very small, 1 - W = 1 and Math.log(1) = +0 instead of -0
- skip = Long.MAX_VALUE; // This results in negative infinity skip
- }
- // W = W * Math.exp(Math.log(random2) / sampleSize);
- W = W * Math.pow(random2, 1.0 / sampleSize);
- return skip;
- }
-
- @Deprecated
private static class LiLSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
diff --git a/src/main/java/gr/james/sampling/LiLSamplingThreadSafe.java b/src/main/java/gr/james/sampling/LiLSamplingThreadSafe.java
index 5a1a90a..c0c7972 100644
--- a/src/main/java/gr/james/sampling/LiLSamplingThreadSafe.java
+++ b/src/main/java/gr/james/sampling/LiLSamplingThreadSafe.java
@@ -27,8 +27,6 @@
*/
public class LiLSamplingThreadSafe extends AbstractThreadSafeRandomSampling {
- private AtomicLong W;
-
/**
* Construct a new instance of {@link LiLSamplingThreadSafe} using the specified sample size and RNG. The
* implementation assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks
@@ -40,7 +38,7 @@ public class LiLSamplingThreadSafe extends AbstractThreadSafeRandomSampling RandomSamplingCollector collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new LiLSamplingThreadSafe<>(sampleSize, random));
}
- /**
- * {@inheritDoc}
- *
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- */
- @Override
- protected void init(int sampleSize, Random random) {
- //W = Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize);
- W = new AtomicLong();
- W.set(Double.doubleToLongBits(Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize)));
- }
-
- /**
- * {@inheritDoc}
- *
- * @param streamSize {@inheritDoc}
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- * @return {@inheritDoc}
- */
- @Override
- protected long skipLength(long streamSize, int sampleSize, Random random) {
- final double random1 = RandomSamplingUtils.randomExclusive(random);
- final double random2 = RandomSamplingUtils.randomExclusive(random);
- double w = Double.longBitsToDouble(W.get());
- long skip = (long) (Math.log(random1) / Math.log(1 - w));
- assert skip >= 0 || skip == Long.MIN_VALUE;
- if (skip == Long.MIN_VALUE) { // Sometimes when W is very small, 1 - W = 1 and Math.log(1) = +0 instead of -0
- skip = Long.MAX_VALUE; // This results in negative infinity skip
- }
- // W = W * Math.pow(random2, 1.0 / sampleSize);
- W.set(Double.doubleToLongBits(w * Math.pow(random2, 1.0 / sampleSize)));
- return skip;
- }
-
- @Deprecated
private static class LiLThreadSafeSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
diff --git a/src/main/java/gr/james/sampling/SkipFunction.java b/src/main/java/gr/james/sampling/SkipFunction.java
index bc4d7f0..629bf9b 100644
--- a/src/main/java/gr/james/sampling/SkipFunction.java
+++ b/src/main/java/gr/james/sampling/SkipFunction.java
@@ -5,9 +5,8 @@
* reservoir. The {@code SkipFunction} works similarly to an iterator: it's {@link #skip()} method returns the skip
* counts in temporal order as the stream increases.
*/
-@Deprecated
@FunctionalInterface
-interface SkipFunction {
+public interface SkipFunction {
/**
* Returns a {@code long} indicating how many elements the algorithm must skip.
*
diff --git a/src/main/java/gr/james/sampling/SkipFunctionFactory.java b/src/main/java/gr/james/sampling/SkipFunctionFactory.java
new file mode 100644
index 0000000..faad7f2
--- /dev/null
+++ b/src/main/java/gr/james/sampling/SkipFunctionFactory.java
@@ -0,0 +1,18 @@
+package gr.james.sampling;
+
+import java.util.Random;
+
+/**
+ * A factory of {@link SkipFunction} instances.
+ */
+@FunctionalInterface
+public interface SkipFunctionFactory {
+ /**
+ * Create a new {@link SkipFunction} instance.
+ *
+ * @param sampleSize the given sample size
+ * @param random the given {@link Random} instance
+ * @return the new {@link SkipFunction} instance
+ */
+ SkipFunction create(int sampleSize, Random random);
+}
diff --git a/src/main/java/gr/james/sampling/VitterXSampling.java b/src/main/java/gr/james/sampling/VitterXSampling.java
index 1080703..3a4f1d9 100644
--- a/src/main/java/gr/james/sampling/VitterXSampling.java
+++ b/src/main/java/gr/james/sampling/VitterXSampling.java
@@ -30,7 +30,7 @@ public class VitterXSampling extends AbstractRandomSampling {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public VitterXSampling(int sampleSize, Random random) {
- super(sampleSize, random);
+ super(sampleSize, random, VitterXSkipFunction::new);
}
/**
@@ -56,32 +56,6 @@ public static RandomSamplingCollector collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new VitterXSampling<>(sampleSize, random));
}
- /**
- * {@inheritDoc}
- *
- * @param streamSize {@inheritDoc}
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- * @return {@inheritDoc}
- */
- @Override
- protected long skipLength(long streamSize, int sampleSize, Random random) {
- streamSize++;
-
- final double r = random.nextDouble();
- long skipCount = 0;
-
- double quot = (streamSize - sampleSize) / (double) streamSize;
- while (quot > r && streamSize > 0) {
- skipCount++;
- streamSize++;
- quot = (quot * (streamSize - sampleSize)) / (double) streamSize;
- }
-
- return skipCount;
- }
-
- @Deprecated
private static class VitterXSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
diff --git a/src/main/java/gr/james/sampling/VitterZSampling.java b/src/main/java/gr/james/sampling/VitterZSampling.java
index d1de0aa..3b3c1dc 100644
--- a/src/main/java/gr/james/sampling/VitterZSampling.java
+++ b/src/main/java/gr/james/sampling/VitterZSampling.java
@@ -19,8 +19,6 @@
* @see Random Sampling with a Reservoir
*/
public class VitterZSampling extends AbstractRandomSampling {
- private double W;
-
/**
* Construct a new instance of {@link VitterZSampling} using the specified sample size and RNG. The implementation
* assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that.
@@ -32,7 +30,7 @@ public class VitterZSampling extends AbstractRandomSampling {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public VitterZSampling(int sampleSize, Random random) {
- super(sampleSize, random);
+ super(sampleSize, random, VitterZSkipFunction::new);
}
/**
@@ -58,63 +56,6 @@ public static RandomSamplingCollector collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new VitterZSampling<>(sampleSize, random));
}
- /**
- * {@inheritDoc}
- *
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- */
- @Override
- protected void init(int sampleSize, Random random) {
- W = Math.pow(random.nextDouble(), -1.0 / sampleSize);
- }
-
- /**
- * {@inheritDoc}
- *
- * @param streamSize {@inheritDoc}
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- * @return {@inheritDoc}
- */
- @Override
- protected long skipLength(long streamSize, int sampleSize, Random random) {
- double term = streamSize - sampleSize + 1;
- while (true) {
- // Generate U and X
- double U = RandomSamplingUtils.randomExclusive(random);
- double X = streamSize * (this.W - 1.0);
- long G = (long) X;
- // Test if U <= h(G) / cg(X)
- double lhs = Math.pow(((U * Math.pow(((streamSize + 1) / term), 2)) * (term + G)) / (streamSize + X), 1.0 / sampleSize);
- double rhs = (((streamSize + X) / (term + G)) * term) / streamSize;
- if (lhs < rhs) {
- this.W = rhs / lhs;
- return G;
- }
- // Test if U <= f(G) / cg(X)
- double y = (((U * (streamSize + 1)) / term) * (streamSize + G + 1)) / (streamSize + X);
- double denom;
- double numer_lim;
- if (sampleSize < G) {
- denom = streamSize;
- numer_lim = term + G;
- } else {
- denom = streamSize - sampleSize + G;
- numer_lim = streamSize + 1;
- }
- for (long numer = streamSize + G; numer >= numer_lim; numer--) {
- y = (y * numer) / denom;
- denom = denom - 1;
- }
- this.W = Math.pow(random.nextDouble(), -1.0 / sampleSize);
- if (Math.pow(y, 1.0 / sampleSize) <= (streamSize + X) / streamSize) {
- return G;
- }
- }
- }
-
- @Deprecated
private static class VitterZSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
diff --git a/src/main/java/gr/james/sampling/WatermanSampling.java b/src/main/java/gr/james/sampling/WatermanSampling.java
index f941dd6..43de387 100644
--- a/src/main/java/gr/james/sampling/WatermanSampling.java
+++ b/src/main/java/gr/james/sampling/WatermanSampling.java
@@ -30,7 +30,7 @@ public class WatermanSampling extends AbstractRandomSampling {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public WatermanSampling(int sampleSize, Random random) {
- super(sampleSize, random);
+ super(sampleSize, random, WatermanSkipFunction::new);
}
/**
@@ -56,26 +56,6 @@ public static RandomSamplingCollector collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new WatermanSampling<>(sampleSize, random));
}
- /**
- * {@inheritDoc}
- *
- * @param streamSize {@inheritDoc}
- * @param sampleSize {@inheritDoc}
- * @param random {@inheritDoc}
- * @return {@inheritDoc}
- */
- @Override
- protected long skipLength(long streamSize, int sampleSize, Random random) {
- streamSize++;
- long skipCount = 0;
- while (random.nextDouble() * streamSize >= sampleSize && streamSize > 0) {
- streamSize++;
- skipCount++;
- }
- return skipCount;
- }
-
- @Deprecated
private static class WatermanSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;