Skip to content

Commit 6a13611

Browse files
committed
Add IndexingAdapter mixin
1 parent d824a2d commit 6a13611

File tree

4 files changed

+61
-79
lines changed

4 files changed

+61
-79
lines changed

xarray/backends/zarr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def __getitem__(self, key):
242242
# could possibly have a work-around for 0d data here
243243

244244
async def async_getitem(self, key):
245+
print("async getting")
245246
array = self._array
246247
if isinstance(key, indexing.BasicIndexer):
247248
method = self._async_getitem

xarray/coding/variables.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def __getitem__(self, key) -> np.ndarray:
105105
return np.asarray(self.array[key], dtype=self.dtype)
106106

107107

108+
108109
def _apply_mask(
109110
data: np.ndarray,
110111
encoded_fill_values: list,

xarray/core/indexing.py

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -516,16 +516,30 @@ def get_duck_array(self):
516516
return self.array
517517

518518

519-
class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed):
520-
__slots__ = ()
519+
class IndexingAdapter:
520+
"""Marker class for indexing adapters.
521+
522+
These classes translate between Xarray's indexing semantics and the underlying array's
523+
indexing semantics.
524+
"""
521525

522526
def get_duck_array(self):
523527
key = BasicIndexer((slice(None),) * self.ndim)
524528
return self[key]
525529

526530
async def async_get_duck_array(self):
527-
key = BasicIndexer((slice(None),) * self.ndim)
528-
return await self.async_getitem(key)
531+
"""These classes are applied to in-memory arrays, so specific async support isn't needed."""
532+
return self.get_duck_array()
533+
534+
535+
class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed):
536+
__slots__ = ()
537+
538+
def get_duck_array(self):
539+
raise NotImplementedError
540+
541+
async def async_get_duck_array(self):
542+
raise NotImplementedError
529543

530544
def _oindex_get(self, indexer: OuterIndexer):
531545
raise NotImplementedError(
@@ -650,37 +664,25 @@ def shape(self) -> _Shape:
650664
return self._shape
651665

652666
def get_duck_array(self):
653-
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
654-
array = apply_indexer(self.array, self.key)
655-
else:
656-
# If the array is not an ExplicitlyIndexedNDArrayMixin,
657-
# it may wrap a BackendArray so use its __getitem__
658-
array = self.array[self.key]
667+
from xarray.backends.common import BackendArray
659668

660-
# self.array[self.key] is now a numpy array when
661-
# self.array is a BackendArray subclass
662-
# and self.key is BasicIndexer((slice(None, None, None),))
663-
# so we need the explicit check for ExplicitlyIndexed
664-
if isinstance(array, ExplicitlyIndexed):
665-
array = array.get_duck_array()
669+
if isinstance(self.array, BackendArray):
670+
array = self.array[self.key]
671+
else:
672+
array = apply_indexer(self.array, self.key)
673+
if isinstance(array, ExplicitlyIndexed):
674+
array = array.get_duck_array()
666675
return _wrap_numpy_scalars(array)
667676

668677
async def async_get_duck_array(self):
669-
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
670-
array = apply_indexer(self.array, self.key)
671-
else:
672-
# If the array is not an ExplicitlyIndexedNDArrayMixin,
673-
# it may wrap a BackendArray so use its (async) getitem
674-
array = await self.array.async_getitem(self.key)
678+
from xarray.backends.common import BackendArray
675679

676-
# self.array[self.key] is now a numpy array when
677-
# self.array is a BackendArray subclass
678-
# and self.key is BasicIndexer((slice(None, None, None),))
679-
# so we need the explicit check for ExplicitlyIndexed
680-
if isinstance(array, ExplicitlyIndexed):
681-
# At this point, we have issued completed the possible async load from disk
682-
# and array is in-memory. So use the sync get
683-
array = array.get_duck_array()
680+
if isinstance(self.array, BackendArray):
681+
array = await self.array.async_getitem(self.key)
682+
else:
683+
array = apply_indexer(self.array, self.key)
684+
if isinstance(array, ExplicitlyIndexed):
685+
array = await array.async_get_duck_array()
684686
return _wrap_numpy_scalars(array)
685687

686688
def transpose(self, order):
@@ -744,36 +746,26 @@ def shape(self) -> _Shape:
744746
return np.broadcast(*self.key.tuple).shape
745747

746748
def get_duck_array(self):
747-
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
748-
array = apply_indexer(self.array, self.key)
749-
else:
750-
# If the array is not an ExplicitlyIndexedNDArrayMixin,
751-
# it may wrap a BackendArray so use its __getitem__
749+
from xarray.backends.common import BackendArray
750+
751+
if isinstance(self.array, BackendArray):
752752
array = self.array[self.key]
753-
# self.array[self.key] is now a numpy array when
754-
# self.array is a BackendArray subclass
755-
# and self.key is BasicIndexer((slice(None, None, None),))
756-
# so we need the explicit check for ExplicitlyIndexed
757-
if isinstance(array, ExplicitlyIndexed):
758-
array = array.get_duck_array()
753+
else:
754+
array = apply_indexer(self.array, self.key)
755+
if isinstance(array, ExplicitlyIndexed):
756+
array = array.get_duck_array()
759757
return _wrap_numpy_scalars(array)
760758

761759
async def async_get_duck_array(self):
762760
print("inside LazilyVectorizedIndexedArray.async_get_duck_array")
763-
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
764-
array = apply_indexer(self.array, self.key)
765-
else:
766-
# If the array is not an ExplicitlyIndexedNDArrayMixin,
767-
# it may wrap a BackendArray so use its __getitem__
761+
from xarray.backends.common import BackendArray
762+
763+
if isinstance(self.array, BackendArray):
768764
array = await self.array.async_getitem(self.key)
769-
# self.array[self.key] is now a numpy array when
770-
# self.array is a BackendArray subclass
771-
# and self.key is BasicIndexer((slice(None, None, None),))
772-
# so we need the explicit check for ExplicitlyIndexed
773-
if isinstance(array, ExplicitlyIndexed):
774-
# At this point, we have issued completed the possible async load from disk
775-
# and array is in-memory. So use the sync get
776-
array = array.get_duck_array()
765+
else:
766+
array = apply_indexer(self.array, self.key)
767+
if isinstance(array, ExplicitlyIndexed):
768+
array = await array.async_get_duck_array()
777769
return _wrap_numpy_scalars(array)
778770

779771
def _updated_key(self, new_key: ExplicitIndexer):
@@ -1589,7 +1581,7 @@ def is_fancy_indexer(indexer: Any) -> bool:
15891581
return True
15901582

15911583

1592-
class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
1584+
class NumpyIndexingAdapter(IndexingAdapter, ExplicitlyIndexedNDArrayMixin):
15931585
"""Wrap a NumPy array to use explicit indexing."""
15941586

15951587
__slots__ = ("array",)
@@ -1668,7 +1660,7 @@ def __init__(self, array):
16681660
self.array = array
16691661

16701662

1671-
class ArrayApiIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
1663+
class ArrayApiIndexingAdapter(IndexingAdapter, ExplicitlyIndexedNDArrayMixin):
16721664
"""Wrap an array API array to use explicit indexing."""
16731665

16741666
__slots__ = ("array",)
@@ -1733,7 +1725,7 @@ def _assert_not_chunked_indexer(idxr: tuple[Any, ...]) -> None:
17331725
)
17341726

17351727

1736-
class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
1728+
class DaskIndexingAdapter(IndexingAdapter, ExplicitlyIndexedNDArrayMixin):
17371729
"""Wrap a dask array to support explicit indexing."""
17381730

17391731
__slots__ = ("array",)
@@ -1809,7 +1801,7 @@ def transpose(self, order):
18091801
return self.array.transpose(order)
18101802

18111803

1812-
class PandasIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
1804+
class PandasIndexingAdapter(IndexingAdapter, ExplicitlyIndexedNDArrayMixin):
18131805
"""Wrap a pandas.Index to preserve dtypes and handle explicit indexing."""
18141806

18151807
__slots__ = ("_dtype", "array")
@@ -1872,15 +1864,6 @@ def get_duck_array(self) -> np.ndarray | PandasExtensionArray:
18721864
return PandasExtensionArray(self.array.array)
18731865
return np.asarray(self)
18741866

1875-
async def async_get_duck_array(self) -> np.ndarray | PandasExtensionArray:
1876-
# TODO this must surely be wrong - it's not async yet
1877-
print("in PandasIndexingAdapter")
1878-
if pd.api.types.is_extension_array_dtype(self.array):
1879-
from xarray.core.extension_array import PandasExtensionArray
1880-
1881-
return PandasExtensionArray(self.array.array)
1882-
return np.asarray(self)
1883-
18841867
@property
18851868
def shape(self) -> _Shape:
18861869
return (len(self.array),)
@@ -2135,7 +2118,9 @@ def copy(self, deep: bool = True) -> Self:
21352118
return type(self)(array, self._dtype, self.level)
21362119

21372120

2138-
class CoordinateTransformIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
2121+
class CoordinateTransformIndexingAdapter(
2122+
IndexingAdapter, ExplicitlyIndexedNDArrayMixin
2123+
):
21392124
"""Wrap a CoordinateTransform as a lazy coordinate array.
21402125
21412126
Supports explicit indexing (both outer and vectorized).

xarray/namedarray/pycompat.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,20 +153,15 @@ async def async_to_duck_array(
153153
from xarray.core.indexing import (
154154
ExplicitlyIndexed,
155155
ImplicitToExplicitIndexingAdapter,
156+
IndexingAdapter,
156157
)
157-
from xarray.namedarray.parallelcompat import get_chunked_array_type
158158

159159
print(type(data))
160-
161-
if is_chunked_array(data):
162-
chunkmanager = get_chunked_array_type(data)
163-
loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated]
164-
return loaded_data
165-
166-
if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter):
160+
if isinstance(data, IndexingAdapter):
161+
# These wrap in-memory arrays, and async isn't needed
162+
return data.get_duck_array()
163+
elif isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter):
167164
print("async inside to_duck_array")
168165
return await data.async_get_duck_array() # type: ignore[no-untyped-call, no-any-return]
169-
elif is_duck_array(data):
170-
return data
171166
else:
172-
return np.asarray(data) # type: ignore[return-value]
167+
return to_duck_array(data, **kwargs)

0 commit comments

Comments
 (0)