Skip to content

Commit

Permalink
Incorporate skip function in abstract classes (#53)
Browse files Browse the repository at this point in the history
* Implement SkipFunctionFactory in AbstractRandomSampling constructor

* Utilize skip function instead of abstract skipLength method

* Remove init and skipLength methods

* Make SkipFunction public
  • Loading branch information
gstamatelat authored Sep 8, 2022
1 parent b696686 commit 4903fac
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 258 deletions.
47 changes: 12 additions & 35 deletions src/main/java/gr/james/sampling/AbstractRandomSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@
/**
* This class provides a skeletal implementation of the {@link RandomSampling} interface to minimize the effort required
* to implement that interface.
* <p>
* This class requires the implementation of 2 methods:
* <ul>
* <li>{@link #skipLength(long, int, Random)}</li>
* <li>{@link #init(int, Random)}</li>
* </ul>
*
* @param <T> the item type
* @author Giorgos Stamatelatos
Expand Down Expand Up @@ -46,30 +40,36 @@ public abstract class AbstractRandomSampling<T> implements RandomSampling<T> {
*/
protected long skip;

/**
* The skip function.
*/
protected final SkipFunction skipFunction;

/**
* Construct a new instance of this class using the specified sample size and RNG. The implementation assumes that
* {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that. If this
* contract is violated, the behavior is undefined.
*
* @param sampleSize the sample size
* @param random the RNG to use
* @param sampleSize the sample size
* @param random the RNG to use
* @param skipFunctionFactory the factory for the skip function
* @throws NullPointerException if {@code random} is {@code null}
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
protected AbstractRandomSampling(int sampleSize, Random random) {
protected AbstractRandomSampling(int sampleSize, Random random, SkipFunctionFactory skipFunctionFactory) {
if (random == null) {
throw new NullPointerException("Random was null");
}
if (sampleSize < 1) {
throw new IllegalArgumentException("Sample size was less than 1");
}
init(sampleSize, random);
this.random = random;
this.sampleSize = sampleSize;
this.streamSize = 0;
this.sample = new ArrayList<>(sampleSize);
this.skip = skipLength(sampleSize, sampleSize, random);
this.unmodifiableSample = Collections.unmodifiableList(sample);
this.skipFunction = skipFunctionFactory.create(sampleSize, random);
this.skip = skipFunction.skip();
}

/**
Expand Down Expand Up @@ -111,7 +111,7 @@ public boolean feed(T item) {
// Accept and generate new skip
assert skip == 0;
sample.set(random.nextInt(sampleSize), item);
skip = skipLength(streamSize, sampleSize, random);
skip = skipFunction.skip();
assert this.skip >= 0;
return true;
}
Expand Down Expand Up @@ -176,27 +176,4 @@ public long streamSize() {
public Collection<T> sample() {
return this.unmodifiableSample;
}

/**
* Returns how many items should the algorithm skip given its state.
* <p>
* The implementation of this method relies on the given arguments and not on the state of the instance.
*
* @param streamSize how many items have been fed to the sampler
* @param sampleSize expected sample size
* @param random the {@link Random} instance to use
* @return how many items to skip
*/
protected abstract long skipLength(long streamSize, int sampleSize, Random random);

/**
* Performs initialization logic.
* <p>
* This method is invoked in the constructor.
*
* @param sampleSize expected sample size
* @param random the {@link Random} instance assigned to this instance
*/
protected void init(int sampleSize, Random random) {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
/**
* This class provides a skeletal implementation of the thread-safe variant of the {@link RandomSampling} interface to
* minimize the effort required to implement that interface.
* <p>
* This class requires the implementation of 2 methods:
* <ul>
* <li>{@link #skipLength(long, int, Random)}</li>
* <li>{@link #init(int, Random)}</li>
* </ul>
*
* @param <T> the item type
* @author Giorgos Stamatelatos
Expand Down Expand Up @@ -55,31 +49,37 @@ public abstract class AbstractThreadSafeRandomSampling<T> implements RandomSampl
*/
protected AtomicLong skip;

/**
* The skip function.
*/
protected final SkipFunction skipFunction;

/**
* Construct a new instance of this class using the specified sample size and RNG. The implementation assumes that
* {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that. If this
* contract is violated, the behavior is undefined.
*
* @param sampleSize the sample size
* @param random the RNG to use
* @param sampleSize the sample size
* @param random the RNG to use
* @param skipFunctionFactory the factory for the skip function
* @throws NullPointerException if {@code random} is {@code null}
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
protected AbstractThreadSafeRandomSampling(int sampleSize, Random random) {
protected AbstractThreadSafeRandomSampling(int sampleSize, Random random, SkipFunctionFactory skipFunctionFactory) {
if (random == null) {
throw new NullPointerException("Random was null");
}
if (sampleSize < 1) {
throw new IllegalArgumentException("Sample size was less than 1");
}
init(sampleSize, random);
this.random = random;
this.sampleSize = sampleSize;
this.streamSize = new AtomicLong(0);
this.sample = new AtomicReferenceArray<>(sampleSize);
this.samplesCount = new AtomicInteger(0);
this.skip = new AtomicLong(skipLength(sampleSize, sampleSize, random));
this.unmodifiableSample = new AtomicReferenceArrayList<>(sample, samplesCount);
this.skipFunction = skipFunctionFactory.create(sampleSize, random);
this.skip = new AtomicLong(skipFunction.skip());
}

/**
Expand Down Expand Up @@ -125,7 +125,7 @@ public final boolean feed(T item) {
}
} else {
assert currentSkipValue == 0;
long nextSkipValue = skipLength(streamSize, sampleSize, random);
long nextSkipValue = skipFunction.skip();
boolean skipCountUpdated = skip.compareAndSet(currentSkipValue, nextSkipValue);
if (skipCountUpdated) {
sample.set(random.nextInt(sampleSize), item);
Expand Down Expand Up @@ -197,29 +197,6 @@ public final Collection<T> sample() {
return this.unmodifiableSample;
}

/**
* Returns how many items should the algorithm skip given its state.
* <p>
* The implementation of this method must only rely on the given arguments and not on the state of the instance.
*
* @param streamSize how many items have been fed to the sampler
* @param sampleSize expected sample size
* @param random the {@link Random} instance to use
* @return how many items to skip
*/
protected abstract long skipLength(long streamSize, int sampleSize, Random random);

/**
* Performs initialization logic.
* <p>
* This method is invoked in the constructor.
*
* @param sampleSize expected sample size
* @param random the {@link Random} instance assigned to this instance
*/
protected void init(int sampleSize, Random random) {
}

static class AtomicReferenceArrayList<T> extends AbstractList<T> implements List<T>, RandomAccess {

private final AtomicReferenceArray<T> array;
Expand Down
39 changes: 1 addition & 38 deletions src/main/java/gr/james/sampling/LiLSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
* O(n(1 + log(N/n)))</a>
*/
public class LiLSampling<T> extends AbstractRandomSampling<T> {
private double W;

/**
* Construct a new instance of {@link LiLSampling} using the specified sample size and RNG. The implementation
* assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that.
Expand All @@ -34,7 +32,7 @@ public class LiLSampling<T> extends AbstractRandomSampling<T> {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public LiLSampling(int sampleSize, Random random) {
super(sampleSize, random);
super(sampleSize, random, LiLSkipFunction::new);
}

/**
Expand All @@ -60,41 +58,6 @@ public static <E> RandomSamplingCollector<E> collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new LiLSampling<>(sampleSize, random));
}

/**
* {@inheritDoc}
*
* @param sampleSize {@inheritDoc}
* @param random {@inheritDoc}
*/
@Override
protected void init(int sampleSize, Random random) {
// W = Math.exp(Math.log(RandomSamplingUtils.randomExclusive(random)) / sampleSize);
W = Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize);
}

/**
* {@inheritDoc}
*
* @param streamSize {@inheritDoc}
* @param sampleSize {@inheritDoc}
* @param random {@inheritDoc}
* @return {@inheritDoc}
*/
@Override
protected long skipLength(long streamSize, int sampleSize, Random random) {
final double random1 = RandomSamplingUtils.randomExclusive(random);
final double random2 = RandomSamplingUtils.randomExclusive(random);
long skip = (long) (Math.log(random1) / Math.log(1 - W));
assert skip >= 0 || skip == Long.MIN_VALUE;
if (skip == Long.MIN_VALUE) { // Sometimes when W is very small, 1 - W = 1 and Math.log(1) = +0 instead of -0
skip = Long.MAX_VALUE; // This results in negative infinity skip
}
// W = W * Math.exp(Math.log(random2) / sampleSize);
W = W * Math.pow(random2, 1.0 / sampleSize);
return skip;
}

@Deprecated
private static class LiLSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
Expand Down
41 changes: 1 addition & 40 deletions src/main/java/gr/james/sampling/LiLSamplingThreadSafe.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
*/
public class LiLSamplingThreadSafe<T> extends AbstractThreadSafeRandomSampling<T> {

private AtomicLong W;

/**
* Construct a new instance of {@link LiLSamplingThreadSafe} using the specified sample size and RNG. The
* implementation assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks
Expand All @@ -40,7 +38,7 @@ public class LiLSamplingThreadSafe<T> extends AbstractThreadSafeRandomSampling<T
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public LiLSamplingThreadSafe(int sampleSize, Random random) {
super(sampleSize, random);
super(sampleSize, random, LiLThreadSafeSkipFunction::new);
}

/**
Expand All @@ -66,43 +64,6 @@ public static <E> RandomSamplingCollector<E> collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new LiLSamplingThreadSafe<>(sampleSize, random));
}

/**
* {@inheritDoc}
*
* @param sampleSize {@inheritDoc}
* @param random {@inheritDoc}
*/
@Override
protected void init(int sampleSize, Random random) {
//W = Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize);
W = new AtomicLong();
W.set(Double.doubleToLongBits(Math.pow(RandomSamplingUtils.randomExclusive(random), 1.0 / sampleSize)));
}

/**
* {@inheritDoc}
*
* @param streamSize {@inheritDoc}
* @param sampleSize {@inheritDoc}
* @param random {@inheritDoc}
* @return {@inheritDoc}
*/
@Override
protected long skipLength(long streamSize, int sampleSize, Random random) {
final double random1 = RandomSamplingUtils.randomExclusive(random);
final double random2 = RandomSamplingUtils.randomExclusive(random);
double w = Double.longBitsToDouble(W.get());
long skip = (long) (Math.log(random1) / Math.log(1 - w));
assert skip >= 0 || skip == Long.MIN_VALUE;
if (skip == Long.MIN_VALUE) { // Sometimes when W is very small, 1 - W = 1 and Math.log(1) = +0 instead of -0
skip = Long.MAX_VALUE; // This results in negative infinity skip
}
// W = W * Math.pow(random2, 1.0 / sampleSize);
W.set(Double.doubleToLongBits(w * Math.pow(random2, 1.0 / sampleSize)));
return skip;
}

@Deprecated
private static class LiLThreadSafeSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/gr/james/sampling/SkipFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
* reservoir. The {@code SkipFunction} works similarly to an iterator: it's {@link #skip()} method returns the skip
* counts in temporal order as the stream increases.
*/
@Deprecated
@FunctionalInterface
interface SkipFunction {
public interface SkipFunction {
/**
* Returns a {@code long} indicating how many elements the algorithm must skip.
* <p>
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/gr/james/sampling/SkipFunctionFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package gr.james.sampling;

import java.util.Random;

/**
* A factory of {@link SkipFunction} instances.
*/
@FunctionalInterface
public interface SkipFunctionFactory {
/**
* Create a new {@link SkipFunction} instance.
*
* @param sampleSize the given sample size
* @param random the given {@link Random} instance
* @return the new {@link SkipFunction} instance
*/
SkipFunction create(int sampleSize, Random random);
}
28 changes: 1 addition & 27 deletions src/main/java/gr/james/sampling/VitterXSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class VitterXSampling<T> extends AbstractRandomSampling<T> {
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public VitterXSampling(int sampleSize, Random random) {
super(sampleSize, random);
super(sampleSize, random, VitterXSkipFunction::new);
}

/**
Expand All @@ -56,32 +56,6 @@ public static <E> RandomSamplingCollector<E> collector(int sampleSize, Random ra
return new RandomSamplingCollector<>(() -> new VitterXSampling<>(sampleSize, random));
}

/**
* {@inheritDoc}
*
* @param streamSize {@inheritDoc}
* @param sampleSize {@inheritDoc}
* @param random {@inheritDoc}
* @return {@inheritDoc}
*/
@Override
protected long skipLength(long streamSize, int sampleSize, Random random) {
streamSize++;

final double r = random.nextDouble();
long skipCount = 0;

double quot = (streamSize - sampleSize) / (double) streamSize;
while (quot > r && streamSize > 0) {
skipCount++;
streamSize++;
quot = (quot * (streamSize - sampleSize)) / (double) streamSize;
}

return skipCount;
}

@Deprecated
private static class VitterXSkipFunction implements SkipFunction {
private final int sampleSize;
private final Random random;
Expand Down
Loading

0 comments on commit 4903fac

Please sign in to comment.