diff --git a/src/main/java/gr/james/sampling/VitterZSampling.java b/src/main/java/gr/james/sampling/VitterZSampling.java index e59f379..05cf23e 100644 --- a/src/main/java/gr/james/sampling/VitterZSampling.java +++ b/src/main/java/gr/james/sampling/VitterZSampling.java @@ -99,4 +99,62 @@ long skipLength(long streamSize, int sampleSize, Random random) { } } } + + @Deprecated + private static class VitterZSkipFunction implements SkipFunction { + private final int sampleSize; + private final Random random; + private long streamSize; + private double W; + + public VitterZSkipFunction(int sampleSize, Random random) { + this.sampleSize = sampleSize; + this.random = random; + this.streamSize = sampleSize; + this.W = Math.pow(random.nextDouble(), -1.0 / sampleSize); + } + + @Override + public long skip() throws StreamOverflowException { + if (streamSize == Long.MAX_VALUE) { + throw new StreamOverflowException(); + } + + double term = streamSize - sampleSize + 1; + while (true) { + // Generate U and X + double U = RandomSamplingUtils.randomExclusive(random); + double X = streamSize * (this.W - 1.0); + long G = (long) X; + // Test if U <= h(G) / cg(X) + double lhs = Math.pow(((U * Math.pow(((streamSize + 1) / term), 2)) * (term + G)) / (streamSize + X), 1.0 / sampleSize); + double rhs = (((streamSize + X) / (term + G)) * term) / streamSize; + if (lhs < rhs) { + this.W = rhs / lhs; + streamSize += G + 1; // increase stream size + return G; + } + // Test if U <= f(G) / cg(X) + double y = (((U * (streamSize + 1)) / term) * (streamSize + G + 1)) / (streamSize + X); + double denom; + double numer_lim; + if (sampleSize < G) { + denom = streamSize; + numer_lim = term + G; + } else { + denom = streamSize - sampleSize + G; + numer_lim = streamSize + 1; + } + for (long numer = streamSize + G; numer >= numer_lim; numer--) { + y = (y * numer) / denom; + denom = denom - 1; + } + this.W = Math.pow(random.nextDouble(), -1.0 / sampleSize); + if (Math.pow(y, 1.0 / sampleSize) <= (streamSize + X) / streamSize) { + streamSize += G + 1; // increase stream size + return G; + } + } + } + } }