diff --git a/py/server/deephaven/experimental/data_index.py b/py/server/deephaven/experimental/data_index.py new file mode 100644 index 00000000000..63fa893f7d8 --- /dev/null +++ b/py/server/deephaven/experimental/data_index.py @@ -0,0 +1,93 @@ +# +# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +# +"""This module provides the ability to create, check, and retrieve DataIndex objects from Deephaven tables.""" + +from typing import List, Optional + +import jpy + +from deephaven import DHError +from deephaven._wrapper import JObjectWrapper +from deephaven.jcompat import j_list_to_list +from deephaven.table import Table + +_JDataIndexer = jpy.get_type("io.deephaven.engine.table.impl.indexer.DataIndexer") +_JDataIndex = jpy.get_type("io.deephaven.engine.table.DataIndex") + + +class DataIndex(JObjectWrapper): + """A DataIndex is an index used to improve the speed of data access operations for a Deephaven table. The index + applies to one or more indexed (key) column(s) of a Deephaven table. + + Note that a DataIndex itself is backed by a table.""" + + j_object_type = _JDataIndex + + def __init__(self, j_data_index: jpy.JType): + self._j_data_index = j_data_index + + @property + def j_object(self) -> jpy.JType: + return self._j_data_index + + @property + def keys(self) -> List[str]: + """The names of the columns indexed by the DataIndex. """ + return j_list_to_list(self._j_data_index.keyColumnNames()) + + @property + def table(self) -> Table: + """The backing table of the DataIndex.""" + return Table(self._j_data_index.table()) + + +def has_data_index(table: Table, key_cols: List[str]) -> bool: + """Checks if a table currently has a DataIndex for the given key columns. + + Args: + table (Table): the table to check + key_cols (List[str]): the names of the key columns indexed + + Returns: + bool: True if the table has a DataIndex, False otherwise + """ + return _JDataIndexer.hasDataIndex(table.j_table, key_cols) + + +def _get_data_index(table: Table, key_cols: List[str]) -> Optional[DataIndex]: + """Gets a DataIndex for the given key columns. Returns None if the DataIndex does not exist. + + Args: + table (Table): the table to get the DataIndex from + key_cols (List[str]): the names of the key columns indexed + + Returns: + a DataIndex or None + """ + j_di = _JDataIndexer.getDataIndex(table.j_table, key_cols) + return DataIndex(j_di) if j_di else None + + +def data_index(table: Table, key_cols: List[str], create_if_absent: bool = True) -> Optional[DataIndex]: + """Gets the DataIndex for the given key columns on the provided table. When the DataIndex already exists, returns it. + When the DataIndex doesn't already exist, if create_if_absent is True, creates the DataIndex first then returns it; + otherwise returns None. + + Args: + table (Table): the table to index + key_cols (List[str]): the names of the key columns to index + create_if_absent (bool): if True, create the DataIndex if it does not already exist, default is True + + Returns: + a DataIndex or None + + Raises: + DHError + """ + try: + if not create_if_absent: + return _get_data_index(table, key_cols) + return DataIndex(_JDataIndexer.getOrCreateDataIndex(table.j_table, key_cols)) + except Exception as e: + raise DHError(e, "failed to create DataIndex.") from e diff --git a/py/server/deephaven/liveness_scope.py b/py/server/deephaven/liveness_scope.py index c215f625967..613bf97efc4 100644 --- a/py/server/deephaven/liveness_scope.py +++ b/py/server/deephaven/liveness_scope.py @@ -56,7 +56,7 @@ def make_table_and_scope(a: int): import jpy from typing import Union, Iterator -from warnings import warn + from deephaven import DHError from deephaven._wrapper import JObjectWrapper diff --git a/py/server/tests/test_data_index.py b/py/server/tests/test_data_index.py new file mode 100644 index 00000000000..5b3aad01391 --- /dev/null +++ b/py/server/tests/test_data_index.py @@ -0,0 +1,58 @@ +# +# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +# + +import unittest + +from deephaven import empty_table, DHError +from deephaven.experimental.data_index import data_index, has_data_index +from tests.testbase import BaseTestCase + + +class DataIndexTestCase(BaseTestCase): + def setUp(self) -> None: + super().setUp() + self.table = empty_table(10).update( + ["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)", "Z=ii*2"]) + self.data_index = data_index(self.table, ["X", "Y"]) + self.table_nodi = empty_table(10).update(["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)", + "Z=ii*2"]) + + def test_data_index(self): + self.assertFalse(has_data_index(self.table, ["X", "Z"])) + di = data_index(self.table, ["X", "Z"]) + self.assertTrue(has_data_index(self.table, ["X", "Z"])) + self.assertIsNotNone(di) + self.assertEqual(2, len(di.keys)) + self.assertEqual(10, di.table.size) + + with self.assertRaises(DHError): + data_index(self.table, ["X", "W"]) + + def test_data_index_not_create_if_absent(self): + self.assertIsNotNone(data_index(self.table, ["X", "Y"], create_if_absent=False)) + self.assertIsNone(data_index(self.table, ["X"], create_if_absent=False)) + self.assertIsNone(data_index(self.table, ["X", "Z"], create_if_absent=False)) + self.assertIsNone(data_index(self.table_nodi, ["X", "Y"], create_if_absent=False)) + + def test_has_data_index(self): + self.assertTrue(has_data_index(self.table, ["X", "Y"])) + self.assertFalse(has_data_index(self.table, ["X"])) + self.assertFalse(has_data_index(self.table, ["X", "Z"])) + self.assertFalse(has_data_index(self.table_nodi, ["X", "Y"])) + self.assertFalse(has_data_index(self.table_nodi, ["X"])) + self.assertFalse(has_data_index(self.table_nodi, ["X", "Z"])) + + def test_keys(self): + self.assertEqual(["X", "Y"], self.data_index.keys) + + def test_backing_table(self): + self.assertEqual(3, len(self.data_index.table.columns)) + self.assertEqual(10, self.data_index.table.size) + di = data_index(self.data_index.table, self.data_index.keys[0:1]) + self.assertEqual(1, len(di.keys)) + self.assertEqual(3, di.table.size) + + +if __name__ == '__main__': + unittest.main()