|
4 | 4 |
|
5 | 5 | from typing import TYPE_CHECKING
|
6 | 6 |
|
7 |
| -import numba |
| 7 | +import math |
8 | 8 | import numpy as np
|
9 | 9 | import pandas as pd
|
10 | 10 |
|
11 | 11 | from .. import logging as logg
|
12 |
| -from .._compat import CSBase, njit, old_positionals |
| 12 | +from .._compat import CSBase, old_positionals |
13 | 13 | from .._utils import _check_use_raw, is_backed_type
|
14 | 14 | from ..get import _get_obs_rep
|
15 | 15 |
|
|
29 | 29 | _GetSubset = Callable[[_StrIdx], np.ndarray | CSBase]
|
30 | 30 |
|
31 | 31 |
|
32 |
| -@njit |
33 | 32 | def _get_sparce_nanmean_columns(
|
34 |
| - data: NDArray[Any], indicies: NDArray[np.int32], shape: tuple |
| 33 | + data: NDArray[Any], indices: NDArray[np.int32], shape: tuple |
35 | 34 | ) -> NDArray[np.float64]:
|
36 |
| - sums = np.zeros(shape[1], dtype=np.float64) |
37 |
| - counts = np.repeat(float(shape[0]), shape[1]) |
38 |
| - for data_index in numba.prange(len(data)): |
39 |
| - if np.isnan(data[data_index]): |
40 |
| - counts[indicies[data_index]] -= 1.0 |
41 |
| - continue |
42 |
| - sums[indicies[data_index]] += data[data_index] |
43 |
| - # if we have row column nans return nan (not inf) |
44 |
| - counts[counts == 0.0] = np.nan |
45 |
| - return sums / counts |
46 |
| - |
47 |
| - |
48 |
| -@njit |
| 35 | + sum_arr = np.zeros(shape[1], dtype = np.float64) |
| 36 | + nans_arr = np.zeros(shape[1], dtype = np.float64) |
| 37 | + np.add.at(sum_arr, indices, np.nan_to_num(data, nan=0.0)) |
| 38 | + np.add.at(nans_arr, indices, np.isnan(data)) |
| 39 | + nans_arr[nans_arr==shape[0]] = np.nan |
| 40 | + return sum_arr/(shape[0] - nans_arr) |
| 41 | + |
| 42 | + |
49 | 43 | def _get_sparce_nanmean_rows(
|
50 | 44 | data: NDArray[Any], indptr: NDArray[np.int32], shape: tuple
|
51 | 45 | ) -> NDArray[np.float64]:
|
52 |
| - sums = np.zeros(shape[0], dtype=np.float64) |
53 |
| - counts = np.repeat(float(shape[1]), shape[0]) |
54 |
| - for cur_row_index in numba.prange(shape[0]): |
55 |
| - for data_index in numba.prange( |
56 |
| - indptr[cur_row_index], indptr[cur_row_index + 1] |
57 |
| - ): |
58 |
| - if np.isnan(data[data_index]): |
59 |
| - counts[cur_row_index] -= 1.0 |
60 |
| - continue |
61 |
| - sums[cur_row_index] += data[data_index] |
62 |
| - # if we have row from nans return nan (not inf) |
63 |
| - counts[counts == 0.0] = np.nan |
64 |
| - return sums / counts |
| 46 | + sum_arr = np.add.reduceat(np.nan_to_num(data, nan=0.0), indptr[:-1], dtype=np.float64) |
| 47 | + nans_arr = np.add.reduceat(np.isnan(data), indptr[:-1], dtype=np.float64) |
| 48 | + return sum_arr/(shape[1] - nans_arr) |
65 | 49 |
|
66 | 50 |
|
67 | 51 | def _sparse_nanmean(X: CSBase, axis: Literal[0, 1]) -> NDArray[np.float64]:
|
|
0 commit comments