Skip to content

Commit

Permalink
Wrap the new data indexing feature in Python (deephaven#5306)
Browse files Browse the repository at this point in the history
* Wrap the new data indexing feature

* Fix spotless check failures

* Respond to review comments

* To force rerun of CI checks

* Apply suggestions from code review

Co-authored-by: Chip Kent <[email protected]>

* Respond to reivew comments

* Apply suggestions from code review

Co-authored-by: Chip Kent <[email protected]>

* Renaming create_data_index to data_index

---------

Co-authored-by: Chip Kent <[email protected]>
  • Loading branch information
jmao-denver and chipkent authored Apr 3, 2024
1 parent 7d05df2 commit 14f8ca3
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 1 deletion.
93 changes: 93 additions & 0 deletions py/server/deephaven/experimental/data_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
#
"""This module provides the ability to create, check, and retrieve DataIndex objects from Deephaven tables."""

from typing import List, Optional

import jpy

from deephaven import DHError
from deephaven._wrapper import JObjectWrapper
from deephaven.jcompat import j_list_to_list
from deephaven.table import Table

_JDataIndexer = jpy.get_type("io.deephaven.engine.table.impl.indexer.DataIndexer")
_JDataIndex = jpy.get_type("io.deephaven.engine.table.DataIndex")


class DataIndex(JObjectWrapper):
"""A DataIndex is an index used to improve the speed of data access operations for a Deephaven table. The index
applies to one or more indexed (key) column(s) of a Deephaven table.
Note that a DataIndex itself is backed by a table."""

j_object_type = _JDataIndex

def __init__(self, j_data_index: jpy.JType):
self._j_data_index = j_data_index

@property
def j_object(self) -> jpy.JType:
return self._j_data_index

@property
def keys(self) -> List[str]:
"""The names of the columns indexed by the DataIndex. """
return j_list_to_list(self._j_data_index.keyColumnNames())

@property
def table(self) -> Table:
"""The backing table of the DataIndex."""
return Table(self._j_data_index.table())


def has_data_index(table: Table, key_cols: List[str]) -> bool:
"""Checks if a table currently has a DataIndex for the given key columns.
Args:
table (Table): the table to check
key_cols (List[str]): the names of the key columns indexed
Returns:
bool: True if the table has a DataIndex, False otherwise
"""
return _JDataIndexer.hasDataIndex(table.j_table, key_cols)


def _get_data_index(table: Table, key_cols: List[str]) -> Optional[DataIndex]:
"""Gets a DataIndex for the given key columns. Returns None if the DataIndex does not exist.
Args:
table (Table): the table to get the DataIndex from
key_cols (List[str]): the names of the key columns indexed
Returns:
a DataIndex or None
"""
j_di = _JDataIndexer.getDataIndex(table.j_table, key_cols)
return DataIndex(j_di) if j_di else None


def data_index(table: Table, key_cols: List[str], create_if_absent: bool = True) -> Optional[DataIndex]:
"""Gets the DataIndex for the given key columns on the provided table. When the DataIndex already exists, returns it.
When the DataIndex doesn't already exist, if create_if_absent is True, creates the DataIndex first then returns it;
otherwise returns None.
Args:
table (Table): the table to index
key_cols (List[str]): the names of the key columns to index
create_if_absent (bool): if True, create the DataIndex if it does not already exist, default is True
Returns:
a DataIndex or None
Raises:
DHError
"""
try:
if not create_if_absent:
return _get_data_index(table, key_cols)
return DataIndex(_JDataIndexer.getOrCreateDataIndex(table.j_table, key_cols))
except Exception as e:
raise DHError(e, "failed to create DataIndex.") from e
2 changes: 1 addition & 1 deletion py/server/deephaven/liveness_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def make_table_and_scope(a: int):
import jpy

from typing import Union, Iterator
from warnings import warn


from deephaven import DHError
from deephaven._wrapper import JObjectWrapper
Expand Down
58 changes: 58 additions & 0 deletions py/server/tests/test_data_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
#

import unittest

from deephaven import empty_table, DHError
from deephaven.experimental.data_index import data_index, has_data_index
from tests.testbase import BaseTestCase


class DataIndexTestCase(BaseTestCase):
def setUp(self) -> None:
super().setUp()
self.table = empty_table(10).update(
["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)", "Z=ii*2"])
self.data_index = data_index(self.table, ["X", "Y"])
self.table_nodi = empty_table(10).update(["Timestamp = now()", "X=i%3", "Y=`Deephaven` + String.valueOf(ii)",
"Z=ii*2"])

def test_data_index(self):
self.assertFalse(has_data_index(self.table, ["X", "Z"]))
di = data_index(self.table, ["X", "Z"])
self.assertTrue(has_data_index(self.table, ["X", "Z"]))
self.assertIsNotNone(di)
self.assertEqual(2, len(di.keys))
self.assertEqual(10, di.table.size)

with self.assertRaises(DHError):
data_index(self.table, ["X", "W"])

def test_data_index_not_create_if_absent(self):
self.assertIsNotNone(data_index(self.table, ["X", "Y"], create_if_absent=False))
self.assertIsNone(data_index(self.table, ["X"], create_if_absent=False))
self.assertIsNone(data_index(self.table, ["X", "Z"], create_if_absent=False))
self.assertIsNone(data_index(self.table_nodi, ["X", "Y"], create_if_absent=False))

def test_has_data_index(self):
self.assertTrue(has_data_index(self.table, ["X", "Y"]))
self.assertFalse(has_data_index(self.table, ["X"]))
self.assertFalse(has_data_index(self.table, ["X", "Z"]))
self.assertFalse(has_data_index(self.table_nodi, ["X", "Y"]))
self.assertFalse(has_data_index(self.table_nodi, ["X"]))
self.assertFalse(has_data_index(self.table_nodi, ["X", "Z"]))

def test_keys(self):
self.assertEqual(["X", "Y"], self.data_index.keys)

def test_backing_table(self):
self.assertEqual(3, len(self.data_index.table.columns))
self.assertEqual(10, self.data_index.table.size)
di = data_index(self.data_index.table, self.data_index.keys[0:1])
self.assertEqual(1, len(di.keys))
self.assertEqual(3, di.table.size)


if __name__ == '__main__':
unittest.main()

0 comments on commit 14f8ca3

Please sign in to comment.