diff --git a/src/main/java/gr/james/sampling/ChaoSampling.java b/src/main/java/gr/james/sampling/ChaoSampling.java index 8367130..1df4a75 100644 --- a/src/main/java/gr/james/sampling/ChaoSampling.java +++ b/src/main/java/gr/james/sampling/ChaoSampling.java @@ -8,8 +8,8 @@ * According to this algorithm, the probability of an item to be in the final sample is proportional to its relative * weight. Weights are the range (0,+Inf), otherwise an {@link IllegalWeightException} is thrown. *

- * This implementation throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} are feeded or if the - * sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY}, whichever occurs first. + * This implementation throws {@link StreamOverflowException} if the sum of the weights of the items feeded is + * {@link Double#POSITIVE_INFINITY}. *

* The space complexity of this class is {@code O(k)}, where {@code k} is the sample size. * @@ -115,8 +115,7 @@ public static WeightedRandomSamplingCollector weightedCollector(int sampl * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the - * weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(T item, double weight) { @@ -124,9 +123,6 @@ public boolean feed(T item, double weight) { if (item == null) { throw new NullPointerException("Item was null"); } - if (streamSize == Long.MAX_VALUE) { - throw new StreamOverflowException(); - } if (weight <= 0) { throw new IllegalWeightException("Weight was not positive, must be in (0,+Inf)"); } @@ -136,7 +132,6 @@ public boolean feed(T item, double weight) { // Increase stream size this.streamSize++; - assert this.streamSize > 0; // Increase weight sum this.weightSum += weight; @@ -146,7 +141,8 @@ public boolean feed(T item, double weight) { assert this.weightSum > 0; // The first k items go straight into the A list - if (streamSize <= sampleSize) { + if (this.impossible.size() + this.sample.size() < sampleSize) { + assert this.sample.isEmpty(); this.impossible.add(new Weighted<>(item, weight)); return true; } @@ -220,8 +216,8 @@ public boolean feed(T item, double weight) { * @throws NullPointerException {@inheritDoc} * @throws IllegalArgumentException {@inheritDoc} * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of - * the weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is + * {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(Iterator items, Iterator weights) { @@ -235,8 +231,7 @@ public boolean feed(Iterator items, Iterator weights) { * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} * @throws IllegalWeightException if {@code weight} is outside the range (0,+Inf) - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the - * weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(Map items) { @@ -265,8 +260,10 @@ public final int sampleSize() { } /** - * Get the number of items that have been feeded to the algorithm during the lifetime of this instance, which is a - * non-negative {@code long} value. + * Get the number of items that have been feeded to the algorithm during the lifetime of this instance. + *

+ * If more than {@link Long#MAX_VALUE} items has been feeded to the instance, {@code streamSize()} will cycle the + * long values, continuing from {@link Long#MIN_VALUE}. *

* This method runs in constant time. * @@ -274,7 +271,6 @@ public final int sampleSize() { */ @Override public final long streamSize() { - assert this.streamSize >= 0; return this.streamSize; } @@ -284,8 +280,7 @@ public final long streamSize() { * @param item {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the - * weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(T item) { @@ -298,8 +293,7 @@ public boolean feed(T item) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the - * weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(Iterator items) { @@ -312,8 +306,7 @@ public boolean feed(Iterator items) { * @param items {@inheritDoc} * @return {@inheritDoc} * @throws NullPointerException {@inheritDoc} - * @throws StreamOverflowException if the number of items feeded exceeds {@link Long#MAX_VALUE} or if the sum of the - * weights of the items feeded is {@link Double#POSITIVE_INFINITY} + * @throws StreamOverflowException if the sum of the weights of the items feeded is {@link Double#POSITIVE_INFINITY} */ @Override public boolean feed(Iterable items) {