Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions albucore/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,27 @@ def _reduce_sum_per_channel_float32(arr: ImageFloat32, axes: tuple[int, ...], *,


def _reduce_sum_numpy(
arr: ImageType,
arr: np.ndarray,
axes: tuple[int, ...] | None,
*,
keepdims: bool,
) -> np.uint64 | np.float64 | np.ndarray:
acc = np.uint64 if arr.dtype == np.uint8 else np.float64
) -> np.generic | np.ndarray:
if np.issubdtype(arr.dtype, np.unsignedinteger):
acc: type = np.uint64
elif np.issubdtype(arr.dtype, np.floating):
acc = np.float64
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_reduce_sum_numpy selects np.int64 as the accumulator for all non-uint8, non-floating dtypes. This includes unsigned integer arrays (e.g. uint16/uint32/uint64), where casting to int64 can produce incorrect negative results for values > 2**63-1 and is generally inconsistent with unsigned semantics. Consider using a np.uint64 accumulator for np.unsignedinteger dtypes (and keep np.int64 for signed ints/bool), so sums remain correct across all integer dtypes.

Suggested change
acc = np.float64
acc = np.float64
elif np.issubdtype(arr.dtype, np.unsignedinteger):
acc = np.uint64

Copilot uses AI. Check for mistakes.
else:
# signed integers and bool
acc = np.int64
return np.sum(arr, axis=axes, dtype=acc, keepdims=keepdims)
Comment on lines 75 to 88
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_reduce_sum_numpy can now return np.int64 (for integer/bool inputs when axes=None), but its return annotation is still np.uint64 | np.float64 | np.ndarray. Please update the return type to include the int64 case (or use a broader scalar type like np.integer | np.floating) so type hints match runtime behavior.

Copilot uses AI. Check for mistakes.


def reduce_sum(
arr: ImageType,
arr: np.ndarray,
axis: AxisSpec = None,
*,
keepdims: bool = False,
) -> np.uint64 | np.float64 | np.ndarray:
) -> np.generic | np.ndarray:
r"""Sum over image tensor axes with benchmark-driven routing.

Routing:
Expand All @@ -105,27 +111,28 @@ def reduce_sum(
Alternative: ``mean`` / ``std`` / ``mean_std`` for normalised statistics.

Args:
arr: ``uint8`` or ``float32`` array with explicit channel dimension.
arr: Array with explicit channel dimension. Optimised paths for ``uint8`` and ``float32``;
other dtypes fall back to NumPy (float → float64 accumulator, integer/bool → int64).
axis: ``None`` / ``"global"`` → one scalar; ``"per_channel"`` → shape ``(C,)``;
Comment on lines 113 to 116
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring now states that reduce_sum accepts dtypes beyond uint8/float32 via a NumPy fallback, but the function signature still types arr as ImageType (uint8|float32). This mismatch will cause static type checkers to reject valid runtime usage (e.g. int64/bool). Consider widening arr’s type annotation (e.g. np.ndarray / NDArray[Any]) or adding a broader alias for stats inputs so the public API matches the documented behavior.

Copilot uses AI. Check for mistakes.
or explicit ``int`` / ``tuple[int, ...]`` (NumPy path).
keepdims: Same semantics as :func:`numpy.sum`.

Returns:
``numpy.uint64`` or ``numpy.float64`` scalar for a full reduction, else an array.
Scalar (``uint64`` / ``int64`` / ``float64``) for a full reduction, else an array.
The accumulator dtype follows the input: unsigned → uint64, float → float64,
signed int / bool → int64.
"""
axes = _resolve_axes(arr, axis)
if axes is None:
if _is_uint8_image(arr):
return _reduce_sum_global_uint8(arr, keepdims=keepdims)
if _is_float32_image(arr):
return _reduce_sum_global_float32(arr, keepdims=keepdims)
raise ValueError(f"Unsupported dtype {arr.dtype} for reduce_sum; use uint8 or float32.")
if axes == _per_channel_spatial_axes(arr):
elif axes == _per_channel_spatial_axes(arr):
if _is_uint8_image(arr):
return _reduce_sum_per_channel_uint8(arr, keepdims=keepdims)
if _is_float32_image(arr):
return _reduce_sum_per_channel_float32(arr, axes, keepdims=keepdims)
raise ValueError(f"Unsupported dtype {arr.dtype} for reduce_sum; use uint8 or float32.")
return _reduce_sum_numpy(arr, axes, keepdims=keepdims)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ requires = [ "setuptools>=45", "wheel" ]

[project]
name = "albucore"
version = "0.1.4"
version = "0.1.5"

description = "High-performance image processing functions for deep learning and computer vision."
readme = "README.md"
Expand Down
46 changes: 40 additions & 6 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,19 +350,53 @@ def test_std_custom_eps_matches_numpy(shape: tuple[int, ...], dtype: type) -> No
assert np.isclose(float(s), float(ref), rtol=1e-4, atol=1e-4)


@pytest.mark.parametrize("dtype", [np.int32, np.float64])
def test_unsupported_dtype_raises(dtype: type) -> None:
@pytest.mark.parametrize("dtype", [np.int32, np.float64, np.bool_, np.complex64])
def test_unsupported_dtype_raises_mean_std(dtype: type) -> None:
arr = np.ones((2, 2, 1), dtype=dtype)
with pytest.raises(ValueError, match="Unsupported dtype"):
mean(arr)
with pytest.raises(ValueError, match="Unsupported dtype"):
std(arr)
with pytest.raises(ValueError, match="Unsupported dtype"):
mean_std(arr)
with pytest.raises(ValueError, match="Unsupported dtype"):
reduce_sum(arr)
with pytest.raises(ValueError, match="Unsupported dtype"):
reduce_sum(arr, "per_channel")


@pytest.mark.parametrize(
("dtype", "expected_acc_dtype"),
[
(np.int32, np.int64),
(np.int64, np.int64),
(np.uint32, np.uint64),
(np.uint64, np.uint64),
(np.float64, np.float64),
(np.bool_, np.int64),
],
)
def test_reduce_sum_accumulator_dtype(dtype: type, expected_acc_dtype: type) -> None:
arr = np.ones((4, 4, 3), dtype=dtype)
result = reduce_sum(arr)
assert result.dtype == expected_acc_dtype
per_ch = reduce_sum(arr, "per_channel")
assert per_ch.dtype == expected_acc_dtype


@pytest.mark.parametrize(
("dtype", "fill", "expected_acc_dtype"),
[
# near int32 max — would overflow into int32 but must not with int64 accumulator
(np.int32, np.iinfo(np.int32).max, np.int64),
# unsigned — must not sign-extend into int64
(np.uint32, np.iinfo(np.uint32).max, np.uint64),
# float64 precision — all-same values, check no precision loss in sum
(np.float64, 1.0 / 3.0, np.float64),
],
)
def test_reduce_sum_overflow_and_precision(dtype: type, fill: float, expected_acc_dtype: type) -> None:
arr = np.full((8, 8, 1), fill, dtype=dtype)
expected = np.sum(arr, dtype=expected_acc_dtype)
result = reduce_sum(arr)
assert result.dtype == expected_acc_dtype
assert result == expected


@pytest.mark.parametrize("c", [1, 2, 3, 4])
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading