forked from deephaven/deephaven-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Wrap the new data indexing feature in Python (deephaven#5306)
* Wrap the new data indexing feature * Fix spotless check failures * Respond to review comments * To force rerun of CI checks * Apply suggestions from code review Co-authored-by: Chip Kent <[email protected]> * Respond to reivew comments * Apply suggestions from code review Co-authored-by: Chip Kent <[email protected]> * Renaming create_data_index to data_index --------- Co-authored-by: Chip Kent <[email protected]>
- Loading branch information
1 parent
7d05df2
commit 14f8ca3
Showing
3 changed files
with
152 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# | ||
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending | ||
# | ||
"""This module provides the ability to create, check, and retrieve DataIndex objects from Deephaven tables.""" | ||
|
||
from typing import List, Optional | ||
|
||
import jpy | ||
|
||
from deephaven import DHError | ||
from deephaven._wrapper import JObjectWrapper | ||
from deephaven.jcompat import j_list_to_list | ||
from deephaven.table import Table | ||
|
||
_JDataIndexer = jpy.get_type("io.deephaven.engine.table.impl.indexer.DataIndexer") | ||
_JDataIndex = jpy.get_type("io.deephaven.engine.table.DataIndex") | ||
|
||
|
||
class DataIndex(JObjectWrapper): | ||
"""A DataIndex is an index used to improve the speed of data access operations for a Deephaven table. The index | ||
applies to one or more indexed (key) column(s) of a Deephaven table. | ||
Note that a DataIndex itself is backed by a table.""" | ||
|
||
j_object_type = _JDataIndex | ||
|
||
def __init__(self, j_data_index: jpy.JType): | ||
self._j_data_index = j_data_index | ||
|
||
@property | ||
def j_object(self) -> jpy.JType: | ||
return self._j_data_index | ||
|
||
@property | ||
def keys(self) -> List[str]: | ||
"""The names of the columns indexed by the DataIndex. """ | ||
return j_list_to_list(self._j_data_index.keyColumnNames()) | ||
|
||
@property | ||
def table(self) -> Table: | ||
"""The backing table of the DataIndex.""" | ||
return Table(self._j_data_index.table()) | ||
|
||
|
||
def has_data_index(table: Table, key_cols: List[str]) -> bool: | ||
"""Checks if a table currently has a DataIndex for the given key columns. | ||
Args: | ||
table (Table): the table to check | ||
key_cols (List[str]): the names of the key columns indexed | ||
Returns: | ||
bool: True if the table has a DataIndex, False otherwise | ||
""" | ||
return _JDataIndexer.hasDataIndex(table.j_table, key_cols) | ||
|
||
|
||
def _get_data_index(table: Table, key_cols: List[str]) -> Optional[DataIndex]: | ||
"""Gets a DataIndex for the given key columns. Returns None if the DataIndex does not exist. | ||
Args: | ||
table (Table): the table to get the DataIndex from | ||
key_cols (List[str]): the names of the key columns indexed | ||
Returns: | ||
a DataIndex or None | ||
""" | ||
j_di = _JDataIndexer.getDataIndex(table.j_table, key_cols) | ||
return DataIndex(j_di) if j_di else None | ||
|
||
|
||
def data_index(table: Table, key_cols: List[str], create_if_absent: bool = True) -> Optional[DataIndex]: | ||
"""Gets the DataIndex for the given key columns on the provided table. When the DataIndex already exists, returns it. | ||
When the DataIndex doesn't already exist, if create_if_absent is True, creates the DataIndex first then returns it; | ||
otherwise returns None. | ||
Args: | ||
table (Table): the table to index | ||
key_cols (List[str]): the names of the key columns to index | ||
create_if_absent (bool): if True, create the DataIndex if it does not already exist, default is True | ||
Returns: | ||
a DataIndex or None | ||
Raises: | ||
DHError | ||
""" | ||
try: | ||
if not create_if_absent: | ||
return _get_data_index(table, key_cols) | ||
return DataIndex(_JDataIndexer.getOrCreateDataIndex(table.j_table, key_cols)) | ||
except Exception as e: | ||
raise DHError(e, "failed to create DataIndex.") from e |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# | ||
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending | ||
# | ||
|
||
import unittest | ||
|
||
from deephaven import empty_table, DHError | ||
from deephaven.experimental.data_index import data_index, has_data_index | ||
from tests.testbase import BaseTestCase | ||
|
||
|
||
class DataIndexTestCase(BaseTestCase): | ||
def setUp(self) -> None: | ||
super().setUp() | ||
self.table = empty_table(10).update( | ||
["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)", "Z=ii*2"]) | ||
self.data_index = data_index(self.table, ["X", "Y"]) | ||
self.table_nodi = empty_table(10).update(["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)", | ||
"Z=ii*2"]) | ||
|
||
def test_data_index(self): | ||
self.assertFalse(has_data_index(self.table, ["X", "Z"])) | ||
di = data_index(self.table, ["X", "Z"]) | ||
self.assertTrue(has_data_index(self.table, ["X", "Z"])) | ||
self.assertIsNotNone(di) | ||
self.assertEqual(2, len(di.keys)) | ||
self.assertEqual(10, di.table.size) | ||
|
||
with self.assertRaises(DHError): | ||
data_index(self.table, ["X", "W"]) | ||
|
||
def test_data_index_not_create_if_absent(self): | ||
self.assertIsNotNone(data_index(self.table, ["X", "Y"], create_if_absent=False)) | ||
self.assertIsNone(data_index(self.table, ["X"], create_if_absent=False)) | ||
self.assertIsNone(data_index(self.table, ["X", "Z"], create_if_absent=False)) | ||
self.assertIsNone(data_index(self.table_nodi, ["X", "Y"], create_if_absent=False)) | ||
|
||
def test_has_data_index(self): | ||
self.assertTrue(has_data_index(self.table, ["X", "Y"])) | ||
self.assertFalse(has_data_index(self.table, ["X"])) | ||
self.assertFalse(has_data_index(self.table, ["X", "Z"])) | ||
self.assertFalse(has_data_index(self.table_nodi, ["X", "Y"])) | ||
self.assertFalse(has_data_index(self.table_nodi, ["X"])) | ||
self.assertFalse(has_data_index(self.table_nodi, ["X", "Z"])) | ||
|
||
def test_keys(self): | ||
self.assertEqual(["X", "Y"], self.data_index.keys) | ||
|
||
def test_backing_table(self): | ||
self.assertEqual(3, len(self.data_index.table.columns)) | ||
self.assertEqual(10, self.data_index.table.size) | ||
di = data_index(self.data_index.table, self.data_index.keys[0:1]) | ||
self.assertEqual(1, len(di.keys)) | ||
self.assertEqual(3, di.table.size) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |