Skip to content

Commit

Permalink
RandomSampling.sample always returns the same reference (gstamatelat#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
gstamatelat committed Jun 30, 2018
1 parent 14937e6 commit 78a0456
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ abstract class AbstractUnweightedRandomSampling<T> implements RandomSampling<T>
private final int sampleSize;
private final Random random;
private final List<T> sample;
private final Collection<T> unmodifiableSample;
private long streamSize;
private long skip;

Expand All @@ -39,6 +40,7 @@ abstract class AbstractUnweightedRandomSampling<T> implements RandomSampling<T>
this.streamSize = 0;
this.sample = new ArrayList<>(sampleSize);
this.skip = skipLength(sampleSize, sampleSize, random);
this.unmodifiableSample = Collections.unmodifiableList(sample);
}

/**
Expand Down Expand Up @@ -140,7 +142,7 @@ public final long streamSize() {
*/
@Override
public final Collection<T> sample() {
return Collections.unmodifiableList(sample);
return this.unmodifiableSample;
}

/**
Expand Down
62 changes: 32 additions & 30 deletions src/main/java/gr/james/sampling/ChaoSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class ChaoSampling<T> implements WeightedRandomSampling<T> {
private final Random random;
private final List<T> sample;
private final TreeSet<Weighted<T>> impossible;
private final Collection<T> unmodifiableSample;
private long streamSize;
private double weightSum;

Expand All @@ -47,6 +48,36 @@ public ChaoSampling(int sampleSize, Random random) {
this.sample = new ArrayList<>(sampleSize);
this.impossible = new TreeSet<>();
this.weightSum = 0;
this.unmodifiableSample = new AbstractCollection<T>() {
@Override
public Iterator<T> iterator() {
return new Iterator<T>() {
final Iterator<T> sampleIt = sample.iterator();
final Iterator<Weighted<T>> impossibleIt = impossible.iterator();

@Override
public boolean hasNext() {
return sampleIt.hasNext() || impossibleIt.hasNext();
}

@Override
public T next() {
if (sampleIt.hasNext()) {
return sampleIt.next();
} else if (impossibleIt.hasNext()) {
return impossibleIt.next().object;
} else {
throw new NoSuchElementException();
}
}
};
}

@Override
public int size() {
return sample.size() + impossible.size();
}
};
}

/**
Expand Down Expand Up @@ -215,36 +246,7 @@ public ChaoSampling<T> feed(Map<T, Double> items) {
*/
@Override
public Collection<T> sample() {
return new AbstractCollection<T>() {
@Override
public Iterator<T> iterator() {
return new Iterator<T>() {
final Iterator<T> sampleIt = sample.iterator();
final Iterator<Weighted<T>> impossibleIt = impossible.iterator();

@Override
public boolean hasNext() {
return sampleIt.hasNext() || impossibleIt.hasNext();
}

@Override
public T next() {
if (sampleIt.hasNext()) {
return sampleIt.next();
} else if (impossibleIt.hasNext()) {
return impossibleIt.next().object;
} else {
throw new NoSuchElementException();
}
}
};
}

@Override
public int size() {
return sample.size() + impossible.size();
}
};
return this.unmodifiableSample;
}

/**
Expand Down
48 changes: 25 additions & 23 deletions src/main/java/gr/james/sampling/EfraimidisSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class EfraimidisSampling<T> implements WeightedRandomSampling<T> {
private final int sampleSize;
private final Random random;
private final PriorityQueue<Weighted<T>> pq;
private final Collection<T> unmodifiableSample;
private long streamSize;

/**
Expand All @@ -42,6 +43,29 @@ public EfraimidisSampling(int sampleSize, Random random) {
this.sampleSize = sampleSize;
this.streamSize = 0;
this.pq = new PriorityQueue<>(sampleSize);
this.unmodifiableSample = new AbstractCollection<T>() {
@Override
public Iterator<T> iterator() {
return new Iterator<T>() {
final Iterator<Weighted<T>> it = pq.iterator();

@Override
public boolean hasNext() {
return it.hasNext();
}

@Override
public T next() {
return it.next().object;
}
};
}

@Override
public int size() {
return pq.size();
}
};
}

/**
Expand Down Expand Up @@ -154,29 +178,7 @@ public EfraimidisSampling<T> feed(Map<T, Double> items) {
*/
@Override
public Collection<T> sample() {
return new AbstractCollection<T>() {
@Override
public Iterator<T> iterator() {
return new Iterator<T>() {
final Iterator<Weighted<T>> it = pq.iterator();

@Override
public boolean hasNext() {
return it.hasNext();
}

@Override
public T next() {
return it.next().object;
}
};
}

@Override
public int size() {
return pq.size();
}
};
return this.unmodifiableSample;
}

/**
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/gr/james/sampling/RandomSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,18 @@ default RandomSampling<T> feed(Iterable<T> items) {
* <p>
* This method returns a readonly {@link Collection} view of the items in the sample which is backed by the
* instance; subsequent modification of the instance (using any of the {@code feed} methods) will reflect on this
* collection. The items returned are in no particular order unless otherwise specified.
* collection. In fact you can treat {@code sample()} as a read-only (i.e. {@code final}) field as it will always
* return the same reference:
* <pre><code>
* final RandomSampling&lt;T&gt; rs = ...;
* // Do anything with rs
* Collection&lt;T&gt; sample1 = rs.sample();
* // Do anything with rs
* Collection&lt;T&gt; sample2 = rs.sample();
* // Do anything with rs
* assert sample1 == sample2;
* </code></pre>
* The items returned are in no particular order inside the sample collection unless otherwise specified.
* <p>
* The {@link Collection} returned cannot be {@code null} but it can be empty if and only if no items have been
* feeded to the implementation. The {@link Collection} may also contain duplicate elements if an object has been
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/gr/james/sampling/RandomSamplingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,17 @@ public void firstElements() {
}
}

/**
* The {@link RandomSampling#sample()} method must always return the same reference.
*/
@Test
public void sampleOnDifferentTime() {
final RandomSampling<Integer> rs = impl.get();
final Collection<Integer> sample = rs.sample();
for (int i = 0; i < 1000; i++) {
rs.feed(i);
Assert.assertSame(sample, rs.sample());
}
}

}

0 comments on commit 78a0456

Please sign in to comment.