Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: DH-18300: Improve DataIndex performance. #6585

Merged
merged 24 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions Base/src/main/java/io/deephaven/base/stats/Value.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,22 @@
//
package io.deephaven.base.stats;

public abstract class Value {
import java.text.DecimalFormat;
import java.util.concurrent.atomic.AtomicLongFieldUpdater;

protected long n = 0;
protected long last = 0;
protected long sum = 0;
protected long sum2 = 0;
protected long max = Long.MIN_VALUE;
protected long min = Long.MAX_VALUE;
public abstract class Value {
private static final AtomicLongFieldUpdater<Value> N_UPDATER = AtomicLongFieldUpdater.newUpdater(Value.class, "n");
private static final AtomicLongFieldUpdater<Value> SUM_UPDATER =
AtomicLongFieldUpdater.newUpdater(Value.class, "sum");
private static final AtomicLongFieldUpdater<Value> SUM2_UPDATER =
AtomicLongFieldUpdater.newUpdater(Value.class, "sum2");

volatile protected long n = 0;
cpwright marked this conversation as resolved.
Show resolved Hide resolved
volatile protected long last = 0;
volatile protected long sum = 0;
volatile protected long sum2 = 0;
volatile protected long max = Long.MIN_VALUE;
volatile protected long min = Long.MAX_VALUE;

private boolean alwaysUpdated = false;

Expand Down Expand Up @@ -52,10 +60,10 @@ protected Value(History history) {
}

public void sample(long x) {
n++;
N_UPDATER.incrementAndGet(this);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
SUM_UPDATER.addAndGet(this, x);
SUM2_UPDATER.addAndGet(this, x * x);
last = x;
sum += x;
sum2 += x * x;
if (x > max) {
cpwright marked this conversation as resolved.
Show resolved Hide resolved
max = x;
}
Expand Down Expand Up @@ -99,4 +107,23 @@ public void update(Item item, ItemUpdateListener listener, long logInterval, lon
}
}
}

@Override
public String toString() {
final DecimalFormat format = new DecimalFormat("#,###");
final DecimalFormat avgFormat = new DecimalFormat("#,###.###");
cpwright marked this conversation as resolved.
Show resolved Hide resolved

final double variance = n > 1 ? (sum2 - ((double) sum * sum / (double) n)) / (n - 1) : Double.NaN;

return "Value{" +
"n=" + format.format(n) +
(n > 0 ? ", sum=" + format.format(sum) +
", max=" + format.format(max) +
", min=" + format.format(min) +
", avg=" + avgFormat.format((n > 0 ? (double) sum / n : Double.NaN)) +
", std=" + avgFormat.format(Math.sqrt(variance))
: "")
+
'}';
}
}
5 changes: 5 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ For more information, see:
* [gh pr create](https://cli.github.com/manual/gh_pr_create)
* [CLI In Use](https://cli.github.com/manual/examples.html)

## Labels

- Each pull request msut have a `ReleaseNotesNeeded` label if a user can perceive the changes. Build or testing-only changes should have a `NoReleaseNotesNeeded` label.
cpwright marked this conversation as resolved.
Show resolved Hide resolved
- Each pull request must have a `DocumentationNeeded` label if the user guide should be updated. Pull requests that do not require a user guide update should have a `NoDocumentationNeeded` label.

## Styleguide
The [styleguide](style/README.md) is applied globally to the entire project, except for generated code that gets checked in.
To apply the styleguide, run `./gradlew spotlessApply`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,43 @@ default ColumnSource<?>[] keyColumns(@NotNull final ColumnSource<?>[] indexedCol
@FinalDefault
@NotNull
default ColumnSource<RowSet> rowSetColumn() {
return table().getColumnSource(rowSetColumnName(), RowSet.class);
return rowSetColumn(DataIndexOptions.DEFAULT);
}

/**
* Get the {@link RowSet} {@link ColumnSource} of the index {@link #table() table}.
*
cpwright marked this conversation as resolved.
Show resolved Hide resolved
* @return The {@link RowSet} {@link ColumnSource}
*/
@FinalDefault
@NotNull
default ColumnSource<RowSet> rowSetColumn(final DataIndexOptions options) {
return table(options).getColumnSource(rowSetColumnName(), RowSet.class);
}

/**
* Get the {@link Table} backing this data index.
*
* <p>
* The returned table is fully in-memory, equivalent to {@link #table(DataIndexOptions)} with default options.
* </p>
*
* @return The {@link Table}
*/
@NotNull
Table table();
default Table table() {
return table(DataIndexOptions.DEFAULT);
}

/**
* Get the {@link Table} backing this data index.
*
* @param options parameters to control the returned table
*
* @return The {@link Table}
*/
@NotNull
Table table(DataIndexOptions options);

/**
* Whether the index {@link #table()} {@link Table#isRefreshing() is refreshing}. Some transformations will force
Expand All @@ -115,4 +142,5 @@ default ColumnSource<RowSet> rowSetColumn() {
* otherwise
*/
boolean isRefreshing();

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ interface RowKeyLookup {
* @return A function that provides map-like lookup of index {@link #table()} row keys from an index lookup key
*/
@NotNull
RowKeyLookup rowKeyLookup();
default RowKeyLookup rowKeyLookup() {
return rowKeyLookup(DataIndexOptions.DEFAULT);
}

@NotNull
RowKeyLookup rowKeyLookup(DataIndexOptions options);
cpwright marked this conversation as resolved.
Show resolved Hide resolved

/**
* Return a {@link RowKeyLookup lookup function} function of index row keys for this index. If
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.engine.table;

import io.deephaven.annotations.BuildableStyle;
import io.deephaven.api.filter.Filter;
import org.immutables.value.Value;

/**
* Options for controlling the function of a {@link DataIndex}.
*
* <p>
* Presently, this is used for the {@link Table#where(Filter)} operation to more efficiently handle data index matches,
* without necessarily reading all RowSet information from disk across partitions.
cpwright marked this conversation as resolved.
Show resolved Hide resolved
* </p>
*/
@Value.Immutable
@BuildableStyle
public interface DataIndexOptions {
DataIndexOptions DEFAULT = DataIndexOptions.builder().build();
cpwright marked this conversation as resolved.
Show resolved Hide resolved

/**
* Does this operation use only a subset of the DataIndex?
*
* <p>
* The DataIndex implementation may use this hint to defer work for some row sets.
* </p>
*
* @return if this operation is only going to use a subset of this data index
*/
@Value.Default
default boolean operationUsesPartialTable() {
return false;
}

/**
* Create a new builder for a {@link DataIndexOptions}.
*
* @return
*/
static Builder builder() {
return ImmutableDataIndexOptions.builder();
}

/**
* The builder interface to construct a {@link DataIndexOptions}.
*/
interface Builder {
/**
* Set whether this operation only uses a subset of the data index.
*
* @param usesPartialTable true if this operation only uses a partial table
* @return this builder
*/
Builder operationUsesPartialTable(boolean usesPartialTable);

/**
* Build the {@link DataIndexOptions}.
*
* @return an immutable DataIndexOptions structure.
*/
DataIndexOptions build();
}
}
Loading