Skip to content

Commit

Permalink
Implementation specific default weight (gstamatelat#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
gstamatelat committed Jul 23, 2020
1 parent f0075b9 commit eb00d03
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 21 deletions.
5 changes: 3 additions & 2 deletions src/main/java/gr/james/sampling/ChaoSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
* Implementation of the algorithm by Chao in <b>A general purpose unequal probability sampling plan</b>.
* <p>
* According to this algorithm, the probability of an item to be in the final sample is proportional to its relative
* weight. Weights must be in the range (0,+Inf), otherwise an {@link IllegalWeightException} is thrown.
* weight. Weights must be in the range (0,+Inf), otherwise an {@link IllegalWeightException} is thrown. The default
* weight in this implementation is {@code 1.0}.
* <p>
* This implementation throws {@link StreamOverflowException} if the sum of the weights of the items feeded is
* {@link Double#POSITIVE_INFINITY}.
Expand Down Expand Up @@ -295,7 +296,7 @@ public final long streamSize() {
*/
@Override
public boolean feed(T item) {
return WeightedRandomSampling.super.feed(item);
return this.feed(item, 1.0);
}

/**
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/gr/james/sampling/EfraimidisSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* <p>
* According to this algorithm, the relative weight determines the probability that the item is selected in each of the
* explicit or implicit item selections of the sampling procedure. Weights must be in the range (0,+Inf), otherwise an
* {@link IllegalWeightException} is thrown.
* {@link IllegalWeightException} is thrown. The default weight in this implementation is {@code 1.0}.
* <p>
* This implementation never throws {@link StreamOverflowException}.
* <p>
Expand Down Expand Up @@ -226,7 +226,7 @@ public final long streamSize() {
*/
@Override
public boolean feed(T item) {
return WeightedRandomSampling.super.feed(item);
return this.feed(item, 1.0);
}

/**
Expand Down
8 changes: 3 additions & 5 deletions src/main/java/gr/james/sampling/ParetoSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
* <p>
* Weighted are not being assigned a particular meaning or have physical interpretation but the resulting inclusion
* probabilities are an approximation of the exact model ({@link ChaoSampling}). Weights must be in the range (0,+Inf)
* but not the value {@code 1.0}, otherwise an {@link IllegalWeightException} is thrown. A side effect of this is that
* the signatures {@link #feed(Object)}, {@link #feed(Iterable)} and {@link #feed(Iterator)} will always throw
* {@link IllegalWeightException}.
* but not the value {@code 1.0}, otherwise an {@link IllegalWeightException} is thrown. The default weight in this
* implementation is {@code 0.5}.
* <p>
* This implementation never throws {@link StreamOverflowException}.
* <p>
Expand Down Expand Up @@ -141,7 +140,6 @@ public boolean feed(T item, double weight) {

// Calculate item weight
final Weighted<T> newItem = new Weighted<>(item, (r * (1 - weight)) / ((1 - r) * weight));
assert newItem.weight >= 0.0; // weight can also be 0.0 because of double precision

// Add item to reservoir
if (pq.size() < sampleSize) {
Expand Down Expand Up @@ -232,7 +230,7 @@ public final long streamSize() {
*/
@Override
public boolean feed(T item) {
return WeightedRandomSampling.super.feed(item);
return this.feed(item, 0.5);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* <p>
* Weighted are not being assigned a particular meaning or have physical interpretation but the resulting inclusion
* probabilities are an approximation of the exact model ({@link ChaoSampling}). Weights must be in the range (0,+Inf),
* otherwise an {@link IllegalWeightException} is thrown.
* otherwise an {@link IllegalWeightException} is thrown. The default weight in this implementation is {@code 1.0}.
* <p>
* This implementation never throws {@link StreamOverflowException}.
* <p>
Expand Down Expand Up @@ -226,7 +226,7 @@ public final long streamSize() {
*/
@Override
public boolean feed(T item) {
return WeightedRandomSampling.super.feed(item);
return this.feed(item, 1.0);
}

/**
Expand Down
9 changes: 5 additions & 4 deletions src/main/java/gr/james/sampling/WeightedRandomSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,10 @@ default boolean feed(Map<T, Double> items) {
/**
* {@inheritDoc}
* <p>
* This method uses the value {@code 1.0} as weight and is equivalent to
* This method uses a value {@code z} set by the specific implementation as weight that guarantees legality. Hence,
* this method is equivalent to
* <pre><code>
* feed(item, 1.0);
* feed(item, z);
* </code></pre>
*
* @param item {@inheritDoc}
Expand All @@ -143,7 +144,7 @@ default boolean feed(T item) {
/**
* {@inheritDoc}
* <p>
* This method uses the value {@code 1.0} as weight.
* This method uses an implementation specific value as weight.
*
* @param items {@inheritDoc}
* @return {@inheritDoc}
Expand All @@ -158,7 +159,7 @@ default boolean feed(Iterator<T> items) {
/**
* {@inheritDoc}
* <p>
* This method uses the value {@code 1.0} as weight.
* This method uses an implementation specific value as weight.
*
* @param items {@inheritDoc}
* @return {@inheritDoc}
Expand Down
8 changes: 2 additions & 6 deletions src/test/java/gr/james/sampling/RandomSamplingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,7 @@ public void correctness() {
final RandomSampling<Integer> alg = impl.get();

for (int i = 0; i < STREAM; i++) {
if (alg instanceof ParetoSampling) {
((ParetoSampling<Integer>) alg).feed(i, 0.5);
} else {
alg.feed(i);
}
alg.feed(i);
}

for (int s : alg.sample()) {
Expand Down Expand Up @@ -106,7 +102,7 @@ public void correctness() {
int c = d.get(i);
final double expected = (double) REPS * Math.min(SAMPLE, STREAM) / STREAM;
final double actual = (double) c;
assertEquals(1, actual / expected, 1e-2);
assertEquals(String.format("Correctness failed for streamSize %d", STREAM), 1, actual / expected, 1e-2);
}
}
}
Expand Down

0 comments on commit eb00d03

Please sign in to comment.