Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector.RangeIterator;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;

Expand Down Expand Up @@ -270,42 +271,38 @@ protected boolean compareRunEndEncodedVectors(Range range) {
RunEndEncodedVector leftVector = (RunEndEncodedVector) left;
RunEndEncodedVector rightVector = (RunEndEncodedVector) right;

final int leftRangeEnd = range.getLeftStart() + range.getLength();
final int rightRangeEnd = range.getRightStart() + range.getLength();
final RunEndEncodedVector.RangeIterator leftIterator =
new RunEndEncodedVector.RangeIterator(leftVector, range.getLeftStart(), range.getLength());
final RunEndEncodedVector.RangeIterator rightIterator =
new RunEndEncodedVector.RangeIterator(
rightVector, range.getRightStart(), range.getLength());

FieldVector leftValuesVector = leftVector.getValuesVector();
FieldVector rightValuesVector = rightVector.getValuesVector();

RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null);

int leftLogicalIndex = range.getLeftStart();
int rightLogicalIndex = range.getRightStart();
while (nextRun(leftIterator, rightIterator)) {
int leftPhysicalIndex = leftIterator.getRunIndex();
int rightPhysicalIndex = rightIterator.getRunIndex();

while (leftLogicalIndex < leftRangeEnd) {
// TODO: implement it more efficient
// https://github.com/apache/arrow/issues/44157
int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex);
int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex);
if (leftValuesVector.accept(
innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) {
int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex);
int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex);

int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex;
int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex;

if (leftRunLength != rightRunLength) {
if (leftIterator.getRunLength() != rightIterator.getRunLength()) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While the original code had this problem too, maybe it's better to check the run length first? That's presumably a cheaper check and so we can bail out earlier.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modified as you suggested.

return false;
} else {
leftLogicalIndex = leftRunEnd;
rightLogicalIndex = rightRunEnd;
}
} else {
return false;
}
}

return true;
return leftIterator.isEnd() && rightIterator.isEnd();
}

private static boolean nextRun(RangeIterator leftIterator, RangeIterator rightIterator) {
boolean left = leftIterator.nextRun();
boolean right = rightIterator.nextRun();
return left && right;
}

protected RangeEqualsVisitor createInnerVisitor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseIntVector;
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.BigIntVector;
Expand Down Expand Up @@ -820,4 +821,67 @@ static int getPhysicalIndex(FieldVector runEndVector, int logicalIndex) {

return result;
}

public static class RangeIterator {

private final RunEndEncodedVector runEndEncodedVector;
private final int rangeEnd;
private int runIndex;
private int runEnd;
private int logicalPos;

public RangeIterator(RunEndEncodedVector runEndEncodedVector, int startIndex, int length) {
int rangeEnd = startIndex + length;
Preconditions.checkArgument(
startIndex >= 0, "startIndex %s must be non negative.", startIndex);
Preconditions.checkArgument(
rangeEnd <= runEndEncodedVector.getValueCount(),
"(startIndex + length) %s out of range[0, %s].",
rangeEnd,
runEndEncodedVector.getValueCount());

this.rangeEnd = rangeEnd;
this.runEndEncodedVector = runEndEncodedVector;
this.runIndex = runEndEncodedVector.getPhysicalIndex(startIndex) - 1;
this.runEnd = startIndex;
this.logicalPos = -1;
}

public boolean nextRun() {
logicalPos = runEnd;
if (logicalPos >= rangeEnd) {
return false;
}
updateRun();
return true;
}

private void updateRun() {
runIndex++;
runEnd = (int) ((BaseIntVector) runEndEncodedVector.runEndsVector).getValueAsLong(runIndex);
}

public boolean nextValue() {
logicalPos++;
if (logicalPos >= rangeEnd) {
return false;
}
if (logicalPos == runEnd) {
updateRun();
}
return true;
}

public int getRunIndex() {
return runIndex;
}

public int getRunLength() {
return Math.min(runEnd, rangeEnd) - logicalPos;
}

public boolean isEnd() {
return logicalPos >= rangeEnd;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,18 @@ public void testRangeCompare() {
assertTrue(
constantVector.accept(
new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 2, 13)));
assertFalse(
constantVector.accept(
new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 10, 10)));
assertFalse(
constantVector.accept(
new RangeEqualsVisitor(constantVector, constantVector), new Range(10, 1, 10)));

// throws exception if the range end is out the bound of the vector
assertThrows(
IllegalArgumentException.class,
() ->
constantVector.accept(
new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 10, 10)));
assertThrows(
IllegalArgumentException.class,
() ->
constantVector.accept(
new RangeEqualsVisitor(constantVector, constantVector), new Range(10, 1, 10)));

// Create REE vector representing: [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5].
RunEndEncodedVector reeVector =
Expand Down
Loading