diff --git a/README.md b/README.md index f6414b7..d9e3a20 100644 --- a/README.md +++ b/README.md @@ -72,12 +72,12 @@ Select 2 terms from a vocabulary, based on their weight. ```java WeightedRandomSampling rs = new EfraimidisSampling<>(2, new Random()); -rs.feed("collection", 1) - .feed("algorithms", 2) - .feed("java", 2) - .feed("random", 3) - .feed("sampling", 4) - .feed("reservoir", 5); +rs.feed("collection", 1); +rs.feed("algorithms", 2); +rs.feed("java", 2); +rs.feed("random", 3); +rs.feed("sampling", 4); +rs.feed("reservoir", 5); System.out.println(rs.sample()); ``` diff --git a/demo/src/SelectWeightedFromVocabulary.java b/demo/src/SelectWeightedFromVocabulary.java index 3aeaa12..e7c6bfe 100644 --- a/demo/src/SelectWeightedFromVocabulary.java +++ b/demo/src/SelectWeightedFromVocabulary.java @@ -9,12 +9,12 @@ public final class SelectWeightedFromVocabulary { public static void main(String[] args) { WeightedRandomSampling rs = new EfraimidisSampling<>(2, new Random()); - rs.feed("collection", 1) - .feed("algorithms", 2) - .feed("java", 2) - .feed("random", 3) - .feed("sampling", 4) - .feed("reservoir", 5); + rs.feed("collection", 1); + rs.feed("algorithms", 2); + rs.feed("java", 2); + rs.feed("random", 3); + rs.feed("sampling", 4); + rs.feed("reservoir", 5); System.out.println(rs.sample()); } } diff --git a/src/main/java/gr/james/sampling/AbstractRandomSampling.java b/src/main/java/gr/james/sampling/AbstractRandomSampling.java index 00a0cf6..3d89a41 100644 --- a/src/main/java/gr/james/sampling/AbstractRandomSampling.java +++ b/src/main/java/gr/james/sampling/AbstractRandomSampling.java @@ -52,7 +52,7 @@ abstract class AbstractRandomSampling implements RandomSampling { * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} */ @Override - public RandomSampling feed(T item) { + public final boolean feed(T item) { // Checks if (item == null) { throw new NullPointerException("Item was null"); @@ -68,21 +68,21 @@ public RandomSampling feed(T item) { // Skip items and add to reservoir if (sample.size() < sampleSize) { sample.add(item); + assert sample.size() == Math.min(sampleSize(), streamSize()); + return true; } else { assert sample.size() == sampleSize; if (skip > 0) { skip--; + return false; } else { assert skip == 0; sample.set(random.nextInt(sampleSize), item); skip = skipLength(streamSize, sampleSize, random); + assert this.skip >= 0; + return true; } } - - assert sample.size() == Math.min(sampleSize(), streamSize()); - assert this.skip >= 0; - - return this; } /** @@ -91,12 +91,11 @@ public RandomSampling feed(T item) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} */ @Override - public RandomSampling feed(Iterator items) { - RandomSampling.super.feed(items); - return this; + public final boolean feed(Iterator items) { + return RandomSampling.super.feed(items); } /** @@ -105,12 +104,11 @@ public RandomSampling feed(Iterator items) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} */ @Override - public RandomSampling feed(Iterable items) { - RandomSampling.super.feed(items); - return this; + public final boolean feed(Iterable items) { + return RandomSampling.super.feed(items); } /** diff --git a/src/main/java/gr/james/sampling/ChaoSampling.java b/src/main/java/gr/james/sampling/ChaoSampling.java index db90e8f..c79d0c6 100644 --- a/src/main/java/gr/james/sampling/ChaoSampling.java +++ b/src/main/java/gr/james/sampling/ChaoSampling.java @@ -112,10 +112,11 @@ public static WeightedRandomSamplingCollector weightedCollector(int sampl * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the + * weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(T item, double weight) { + public boolean feed(T item, double weight) { // Checks if (item == null) { throw new NullPointerException("Item was null"); @@ -144,7 +145,7 @@ public ChaoSampling feed(T item, double weight) { // The first k items go straight into the A list if (streamSize <= sampleSize) { this.impossible.add(new Weighted<>(item, weight)); - return this; + return true; } // First order inclusion probability of the new item @@ -204,7 +205,7 @@ public ChaoSampling feed(T item, double weight) { assert impossible.size() + sample.size() == sampleSize; - return this; + return w > add; } /** @@ -215,13 +216,13 @@ public ChaoSampling feed(T item, double weight) { * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} * @throws IllegalArgumentException {@inheritDoc} - * @throws IllegalWeightException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of + * the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(Iterator items, Iterator weights) { - WeightedRandomSampling.super.feed(items, weights); - return this; + public boolean feed(Iterator items, Iterator weights) { + return WeightedRandomSampling.super.feed(items, weights); } /** @@ -230,13 +231,13 @@ public ChaoSampling feed(Iterator items, Iterator weights) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws IllegalWeightException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the + * weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(Map items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Map items) { + return WeightedRandomSampling.super.feed(items); } /** @@ -277,12 +278,12 @@ public final long streamSize() { * @param item {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the + * weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(T item) { - feed(item, 1.0); - return this; + public boolean feed(T item) { + return WeightedRandomSampling.super.feed(item); } /** @@ -291,12 +292,12 @@ public ChaoSampling feed(T item) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the + * weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(Iterator items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Iterator items) { + return WeightedRandomSampling.super.feed(items); } /** @@ -305,11 +306,11 @@ public ChaoSampling feed(Iterator items) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} + * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the + * weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override - public ChaoSampling feed(Iterable items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Iterable items) { + return WeightedRandomSampling.super.feed(items); } } diff --git a/src/main/java/gr/james/sampling/EfraimidisSampling.java b/src/main/java/gr/james/sampling/EfraimidisSampling.java index 2720c15..8d1eb13 100644 --- a/src/main/java/gr/james/sampling/EfraimidisSampling.java +++ b/src/main/java/gr/james/sampling/EfraimidisSampling.java @@ -102,7 +102,7 @@ public static WeightedRandomSamplingCollector weightedCollector(int sampl * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) */ @Override - public EfraimidisSampling feed(T item, double weight) { + public boolean feed(T item, double weight) { // Checks if (item == null) { throw new NullPointerException("Item was null"); @@ -127,18 +127,17 @@ public EfraimidisSampling feed(T item, double weight) { // Add item to reservoir if (pq.size() < sampleSize) { pq.add(newItem); + return true; } else if (pq.peek().weight < newItem.weight) { // Seems unfair for equal weight items to not have a chance to get in the sample // Of course in the long run it hardly matters assert pq.size() == sampleSize(); pq.poll(); pq.add(newItem); + return true; } - assert !pq.isEmpty(); - assert pq.stream().allMatch(Objects::nonNull); - - return this; + return false; } /** @@ -152,9 +151,8 @@ public EfraimidisSampling feed(T item, double weight) { * @throws IllegalWeightException {@inheritDoc} */ @Override - public EfraimidisSampling feed(Iterator items, Iterator weights) { - WeightedRandomSampling.super.feed(items, weights); - return this; + public boolean feed(Iterator items, Iterator weights) { + return WeightedRandomSampling.super.feed(items, weights); } /** @@ -166,9 +164,8 @@ public EfraimidisSampling feed(Iterator items, Iterator weights) { * @throws IllegalWeightException {@inheritDoc} */ @Override - public EfraimidisSampling feed(Map items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Map items) { + return WeightedRandomSampling.super.feed(items); } /** @@ -210,9 +207,8 @@ public final long streamSize() { * @throws NullPointerException {@inheritDoc} */ @Override - public EfraimidisSampling feed(T item) { - WeightedRandomSampling.super.feed(item); - return this; + public boolean feed(T item) { + return WeightedRandomSampling.super.feed(item); } /** @@ -223,9 +219,8 @@ public EfraimidisSampling feed(T item) { * @throws NullPointerException {@inheritDoc} */ @Override - public EfraimidisSampling feed(Iterator items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Iterator items) { + return WeightedRandomSampling.super.feed(items); } /** @@ -236,8 +231,7 @@ public EfraimidisSampling feed(Iterator items) { * @throws NullPointerException {@inheritDoc} */ @Override - public EfraimidisSampling feed(Iterable items) { - WeightedRandomSampling.super.feed(items); - return this; + public boolean feed(Iterable items) { + return WeightedRandomSampling.super.feed(items); } } diff --git a/src/main/java/gr/james/sampling/IdentityRandomSampling.java b/src/main/java/gr/james/sampling/IdentityRandomSampling.java index e29ba61..080ebc9 100644 --- a/src/main/java/gr/james/sampling/IdentityRandomSampling.java +++ b/src/main/java/gr/james/sampling/IdentityRandomSampling.java @@ -54,27 +54,24 @@ public int size() { } @Override - public RS feed(T item) { + public boolean feed(T item) { if (item == null) { throw new NullPointerException(); } if (!set.add(item)) { throw new UnsupportedOperationException(); } - source.feed(item); - return source; + return source.feed(item); } @Override - public RS feed(Iterator items) { - source.feed(items); - return source; + public boolean feed(Iterator items) { + return source.feed(items); } @Override - public RS feed(Iterable items) { - source.feed(items); - return source; + public boolean feed(Iterable items) { + return source.feed(items); } @Override diff --git a/src/main/java/gr/james/sampling/RandomSampling.java b/src/main/java/gr/james/sampling/RandomSampling.java index 998e963..9592a7a 100644 --- a/src/main/java/gr/james/sampling/RandomSampling.java +++ b/src/main/java/gr/james/sampling/RandomSampling.java @@ -38,11 +38,11 @@ public interface RandomSampling { * Feed an item from the stream to the algorithm. * * @param item the item to feed to the algorithm - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code item} is {@code null} * @throws StreamOverflowException if the internal state of the algorithm has overflown */ - RandomSampling feed(T item); + boolean feed(T item); /** * Feed an {@link Iterator} of items of type {@code T} to the algorithm. @@ -55,16 +55,17 @@ public interface RandomSampling { * * * @param items the items to feed to the algorithm - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code items} is {@code null} or any item in {@code items} is {@code null} * @throws StreamOverflowException if any subsequent calls to {@link #feed(Object)} causes * {@code StreamOverflowException} */ - default RandomSampling feed(Iterator items) { + default boolean feed(Iterator items) { + boolean r = false; while (items.hasNext()) { - feed(items.next()); + r = feed(items.next()) || r; } - return this; + return r; } /** @@ -78,16 +79,17 @@ default RandomSampling feed(Iterator items) { * * * @param items the items to feed to the algorithm - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code items} is {@code null} or any item in {@code items} is {@code null} * @throws StreamOverflowException if any subsequent calls to {@link #feed(Object)} causes * {@code StreamOverflowException} */ - default RandomSampling feed(Iterable items) { + default boolean feed(Iterable items) { + boolean r = false; for (T item : items) { - feed(item); + r = feed(item) || r; } - return this; + return r; } /** diff --git a/src/main/java/gr/james/sampling/VitterXSampling.java b/src/main/java/gr/james/sampling/VitterXSampling.java index b257a28..c279fa6 100644 --- a/src/main/java/gr/james/sampling/VitterXSampling.java +++ b/src/main/java/gr/james/sampling/VitterXSampling.java @@ -1,6 +1,5 @@ package gr.james.sampling; -import java.util.Iterator; import java.util.Random; /** @@ -60,46 +59,4 @@ long skipLength(long streamSize, int sampleSize, Random random) { return skipCount; } - - /** - * {@inheritDoc} - * - * @param item {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterXSampling feed(T item) { - super.feed(item); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterXSampling feed(Iterator items) { - super.feed(items); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterXSampling feed(Iterable items) { - super.feed(items); - return this; - } } diff --git a/src/main/java/gr/james/sampling/VitterZSampling.java b/src/main/java/gr/james/sampling/VitterZSampling.java index a035f04..280509a 100644 --- a/src/main/java/gr/james/sampling/VitterZSampling.java +++ b/src/main/java/gr/james/sampling/VitterZSampling.java @@ -1,6 +1,5 @@ package gr.james.sampling; -import java.util.Iterator; import java.util.Random; /** @@ -87,46 +86,4 @@ long skipLength(long streamSize, int sampleSize, Random random) { } } } - - /** - * {@inheritDoc} - * - * @param item {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterZSampling feed(T item) { - super.feed(item); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterZSampling feed(Iterator items) { - super.feed(items); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public VitterZSampling feed(Iterable items) { - super.feed(items); - return this; - } } diff --git a/src/main/java/gr/james/sampling/WatermanSampling.java b/src/main/java/gr/james/sampling/WatermanSampling.java index b6f3b58..c49c087 100644 --- a/src/main/java/gr/james/sampling/WatermanSampling.java +++ b/src/main/java/gr/james/sampling/WatermanSampling.java @@ -1,6 +1,5 @@ package gr.james.sampling; -import java.util.Iterator; import java.util.Random; /** @@ -54,46 +53,4 @@ long skipLength(long streamSize, int sampleSize, Random random) { } return skipCount; } - - /** - * {@inheritDoc} - * - * @param item {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public WatermanSampling feed(T item) { - super.feed(item); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public WatermanSampling feed(Iterator items) { - super.feed(items); - return this; - } - - /** - * {@inheritDoc} - * - * @param items {@inheritDoc} - * @return {@inheritDoc} - * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException {@inheritDoc} - */ - @Override - public WatermanSampling feed(Iterable items) { - super.feed(items); - return this; - } } diff --git a/src/main/java/gr/james/sampling/WeightedRandomSampling.java b/src/main/java/gr/james/sampling/WeightedRandomSampling.java index c5e71c3..e46d38d 100644 --- a/src/main/java/gr/james/sampling/WeightedRandomSampling.java +++ b/src/main/java/gr/james/sampling/WeightedRandomSampling.java @@ -47,12 +47,12 @@ public interface WeightedRandomSampling extends RandomSampling { * * @param item the item to feed to the algorithm * @param weight the weight assigned to this item - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code item} is {@code null} * @throws IllegalWeightException if {@code weight} is incompatible with the algorithm * @throws StreamOverflowException if the internal state of the algorithm has overflown */ - WeightedRandomSampling feed(T item, double weight); + boolean feed(T item, double weight); /** * Feed an {@link Iterator} of items of type {@code T} along with their weights to the algorithm. @@ -72,7 +72,7 @@ public interface WeightedRandomSampling extends RandomSampling { * * @param items the items to feed to the algorithm * @param weights the weights assigned to the {@code items} - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code items} is {@code null} or {@code weights} is {@code null} or any item * in {@code items} is {@code null} or any weight in {@code weights} is * {@code null} @@ -81,14 +81,15 @@ public interface WeightedRandomSampling extends RandomSampling { * @throws StreamOverflowException if any subsequent calls to {@link #feed(Object, double)} causes * {@code StreamOverflowException} */ - default WeightedRandomSampling feed(Iterator items, Iterator weights) { + default boolean feed(Iterator items, Iterator weights) { + boolean r = false; while (items.hasNext() && weights.hasNext()) { - feed(items.next(), weights.next()); + r = feed(items.next(), weights.next()) || r; } if (items.hasNext() || weights.hasNext()) { throw new IllegalArgumentException("Items and weights size mismatch"); } - return this; + return r; } /** @@ -102,7 +103,7 @@ default WeightedRandomSampling feed(Iterator items, Iterator weigh * * * @param items the items to feed to the algorithm - * @return this instance + * @return {@code true} if the sample was modified as a result of this operation * @throws NullPointerException if {@code items} is {@code null} or any key or value in {@code items} is * {@code null} * @throws IllegalWeightException if any of the weights in the values of {@code items} is incompatible with the @@ -110,11 +111,12 @@ default WeightedRandomSampling feed(Iterator items, Iterator weigh * @throws StreamOverflowException if any subsequent calls to {@link #feed(Object, double)} causes * {@code StreamOverflowException} */ - default WeightedRandomSampling feed(Map items) { + default boolean feed(Map items) { + boolean r = false; for (Map.Entry e : items.entrySet()) { - feed(e.getKey(), e.getValue()); + r = feed(e.getKey(), e.getValue()) || r; } - return this; + return r; } /** @@ -131,9 +133,8 @@ default WeightedRandomSampling feed(Map items) { * @throws StreamOverflowException {@inheritDoc} */ @Override - default WeightedRandomSampling feed(T item) { - feed(item, 1.0); - return this; + default boolean feed(T item) { + return feed(item, 1.0); } /** @@ -147,11 +148,8 @@ default WeightedRandomSampling feed(T item) { * @throws StreamOverflowException {@inheritDoc} */ @Override - default WeightedRandomSampling feed(Iterator items) { - while (items.hasNext()) { - feed(items.next()); - } - return this; + default boolean feed(Iterator items) { + return RandomSampling.super.feed(items); } /** @@ -165,34 +163,7 @@ default WeightedRandomSampling feed(Iterator items) { * @throws StreamOverflowException {@inheritDoc} */ @Override - default WeightedRandomSampling feed(Iterable items) { - for (T item : items) { - feed(item); - } - return this; + default boolean feed(Iterable items) { + return RandomSampling.super.feed(items); } - - /** - * {@inheritDoc} - * - * @return {@inheritDoc} - */ - @Override - int sampleSize(); - - /** - * {@inheritDoc} - * - * @return {@inheritDoc} - */ - @Override - long streamSize(); - - /** - * {@inheritDoc} - * - * @return {@inheritDoc} - */ - @Override - Collection sample(); } diff --git a/src/test/java/gr/james/sampling/RandomSamplingTest.java b/src/test/java/gr/james/sampling/RandomSamplingTest.java index 92a0093..e483649 100644 --- a/src/test/java/gr/james/sampling/RandomSamplingTest.java +++ b/src/test/java/gr/james/sampling/RandomSamplingTest.java @@ -134,7 +134,8 @@ public void feedAlternative() { public void sampleView() { final RandomSampling rs = impl.get(); Collection sample = rs.sample(); - rs.feed(1).feed(2); + rs.feed(1); + rs.feed(2); Assert.assertTrue(RandomSamplingUtils.samplesEquals(sample, new HashSet<>(Arrays.asList(1, 2)))); } @@ -178,4 +179,19 @@ public void sampleOnDifferentTime() { } } + /** + * If {@link RandomSampling#feed(Object)} returned {@code true}, than the sample has definitely changed, assuming + * unique stream elements. + */ + @Test + public void feedReturnValue() { + final RandomSampling rs = impl.get(); + Collection sample = new ArrayList<>(); + for (int i = 0; i < 65536; i++) { + final boolean changed = rs.feed(i); + Assert.assertEquals(changed, !RandomSamplingUtils.samplesEquals(sample, rs.sample())); + sample = new ArrayList<>(rs.sample()); + } + } + }