diff --git a/Src/Base/AMReX_IntSuperAccumulator.H b/Src/Base/AMReX_IntSuperAccumulator.H
new file mode 100644
index 0000000000..8186a40b24
--- /dev/null
+++ b/Src/Base/AMReX_IntSuperAccumulator.H
@@ -0,0 +1,292 @@
+#ifndef AMREX_INT_SUPERACCUMULATOR_H_
+#define AMREX_INT_SUPERACCUMULATOR_H_
+
+#include <AMReX_Config.H>
+
+#include <AMReX_Array4.H>
+#include <AMReX_BaseFab.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_GpuAtomic.H>
+#include <AMReX_GpuQualifiers.H>
+#include <AMReX_Math.H>
+
+#include <cmath>
+#include <cstdint>
+
+namespace amrex {
+
+/**
+ * \brief Integer superaccumulator backed by BaseFab<Long> storage.
+ *
+ * The accumulator stores the exact sum of single-precision floating-point
+ * numbers by representing the scaled integer \f$\sum_i x_i 2^{-e_\text{min}}\f$
+ * in a radix-\f$2^{30}\f$ positional system. Each digit is stored as a 64-bit signed
+ * integer in a `BaseFab<Long>` component. This simplified variant maintains a single logical
+ * entry whose digits occupy the `digits_per_entry` components. Both
+ * `accumulate` and `finalize` operate in single precision; callers may cast to
+ * wider types if needed. All summands are assumed non-negative; subnormal
+ * inputs are treated as zero to reduce the number of required digits per entry.
+ */
+class IntSuperAccumulatorFab
+{
+public:
+    static constexpr int digit_bits = 30;                 //!< Width of each digit in bits.
+    static constexpr int digit_base = 1 << digit_bits;    //!< Base of each digit.
+    static constexpr int min_exponent = -149;             //!< Smallest exponent of scaled mantissa (binary32 minimum normal).
+    static constexpr int max_exponent =  104;             //!< Largest exponent of scaled mantissa (binary32 maximum normal).
+    static constexpr int significand_bits = 24;           //!< Binary32 significand bits including hidden bit.
+    static constexpr int extra_digits = 1;                //!< Safety margin for carry propagation.
+    static constexpr int digits_for_shift = ((max_exponent - min_exponent) / digit_bits) + 1; // covers exponent window
+    static constexpr int digits_for_value = 2;            //!< Worst-case mantissa occupies two digits at radix 2^30.
+    static constexpr int digits_per_entry = digits_for_shift + digits_for_value + extra_digits; //!< 12 digits per entry.
+
+    IntSuperAccumulatorFab () = default;
+
+    explicit IntSuperAccumulatorFab (Arena* arena) noexcept
+        : m_storage(arena) {}
+
+    IntSuperAccumulatorFab (const Box& box, Arena* arena=nullptr)
+        : m_storage(box, digits_per_entry, arena) {}
+
+    [[nodiscard]] static constexpr int entries () noexcept { return 1; }
+
+    void define (const Box& box, Arena* arena=nullptr)
+    {
+        m_storage.resize(box, digits_per_entry, arena);
+    }
+
+    [[nodiscard]] Array4<Long> array () noexcept { return m_storage.array(); }
+    [[nodiscard]] Array4<Long const> const_array () const noexcept { return m_storage.const_array(); }
+
+    void reset ()
+    {
+        if (amrex::Gpu::inLaunchRegion()) {
+            m_storage.template setVal<RunOn::Device>(0);
+        } else {
+            m_storage.template setVal<RunOn::Host>(0);
+        }
+    }
+
+    void reset (Box const& bx)
+    {
+        constexpr int ncomp = digits_per_entry;
+        if (amrex::Gpu::inLaunchRegion()) {
+            m_storage.template setVal<RunOn::Device>(0, bx, 0, ncomp);
+        } else {
+            m_storage.template setVal<RunOn::Host>(0, bx, 0, ncomp);
+        }
+    }
+
+    /**
+     * \brief Accumulate a floating-point value into the superaccumulator.
+     *
+     * The caller must provide the digit array for the single logical entry.
+     * All operations are carried out using integers; the routine does not
+     * allocate additional storage.
+     */
+    template <typename Array>
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE static void
+    accumulate (Array const& digits, int i, int j, int k, float value) noexcept
+    {
+        if (value <= 0.0F) {
+            if (value < 0.0F) {
+#ifndef AMREX_USE_GPU
+                amrex::Abort("IntSuperAccumulatorFab::accumulate: negative input not supported");
+#endif
+            }
+            return;
+        }
+
+        if (!amrex::Math::isfinite(value)) {
+#ifndef AMREX_USE_GPU
+            amrex::Abort("IntSuperAccumulatorFab::accumulate: non-finite input");
+#else
+            return;
+#endif
+        }
+
+        const auto bits = detail::decompose(value);
+        if (bits.is_zero) { return; }
+
+        const int shift = bits.exponent - min_exponent;
+        int digit_index = shift / digit_bits;
+        const int bit_offset = shift % digit_bits;
+
+        const unsigned long long magnitude = bits.magnitude;
+        unsigned long long chunk_value = magnitude << bit_offset;
+
+        constexpr int component_base = 0;
+        constexpr int component_limit = digits_per_entry;
+
+        while (chunk_value != 0ULL) {
+            const unsigned long long chunk = chunk_value & static_cast<unsigned long long>(digit_base - 1);
+            chunk_value >>= digit_bits;
+            if (component_base + digit_index >= component_limit) {
+#ifndef AMREX_USE_GPU
+                amrex::Abort("IntSuperAccumulatorFab: digit overflow");
+#else
+                break;
+#endif
+            }
+            detail::add_to_digit(digits, i, j, k,
+                                 component_base + digit_index,
+                                 component_limit,
+                                 chunk);
+            ++digit_index;
+        }
+    }
+
+    /**
+     * \brief Finalize the accumulator entry and return a binary32 result.
+     */
+    template <typename Array>
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE static float
+    finalize (Array const& digits, int i, int j, int k) noexcept
+    {
+        double result = 0.0;
+        for (int d = digits_per_entry - 1; d >= 0; --d) {
+            const Long digit = digits(i,j,k, d);
+            if (digit == 0) { continue; }
+            const int exponent = min_exponent + d * digit_bits;
+            result += std::ldexp(static_cast<double>(digit), exponent);
+        }
+        return static_cast<float>(result);
+    }
+
+    template <typename Array>
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE static void
+    clear_entry (Array const& digits, int i, int j, int k) noexcept
+    {
+        for (int d = 0; d < digits_per_entry; ++d) {
+            digits(i,j,k, d) = 0;
+        }
+    }
+
+private:
+    struct Decomposed
+    {
+        unsigned long long magnitude;
+        int exponent;
+        bool is_zero;
+    };
+
+    struct detail
+    {
+        template <typename Array>
+        AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE static void
+        add_to_digit (Array const& digits, int i, int j, int k,
+                      int component, int component_limit, unsigned long long value) noexcept
+        {
+            unsigned long long carry = value;
+            int current_component = component;
+            constexpr auto base = static_cast<unsigned long long>(digit_base);
+
+            while (carry != 0ULL) {
+                if (current_component >= component_limit) {
+#ifndef AMREX_USE_GPU
+                    amrex::Abort("IntSuperAccumulatorFab: carry overflow");
+#else
+                    break;
+#endif
+                }
+
+#if defined(AMREX_USE_GPU) && AMREX_DEVICE_COMPILE
+                // When we compile for the device we cannot rely on a carry-save digit layout, so
+                // each thread performs an immediate normalization using GPU atomics.
+                Long* const digit_ptr = &(digits(i,j,k,current_component));
+
+                // Break the carry into the current radix chunk and the carry to forward.
+                const unsigned long long chunk = carry & (base - 1ULL);
+                carry >>= digit_bits;
+
+                // Atomically accumulate this chunk.  Multiple threads may land on the same digit,
+                // which is safe because atomicAdd enforces a total order.
+                if (chunk != 0ULL) {
+                    amrex::Gpu::Atomic::Add(digit_ptr, static_cast<Long>(chunk));
+                }
+
+                // Track how many bases we successfully removed so they can be forwarded upward.
+                unsigned long long propagated = 0ULL;
+                while (true) {
+                    // Snapshot the digit atomically without changing it.  This avoids racing
+                    // against other threads that may be normalizing the same slot.
+                    const Long current_value = amrex::Gpu::Atomic::Add(digit_ptr, static_cast<Long>(0));
+                    if (current_value < digit_base) {
+                        break;
+                    }
+
+                    // Attempt to drain one full base from the digit.  The return value is the
+                    // pre-subtraction contents, so we know whether we actually claimed the carry.
+                    const Long before = amrex::Gpu::Atomic::Add(digit_ptr, static_cast<Long>(-digit_base));
+                    if (before >= digit_base) {
+                        // We consumed one unit of overflow and should forward it to the next digit.
+                        ++propagated;
+                        continue;
+                    } else {
+                        // Another thread already normalized the digit between the peek and our
+                        // subtract; restore the value and exit.
+                        amrex::Gpu::Atomic::Add(digit_ptr, static_cast<Long>(digit_base));
+                        break;
+                    }
+                }
+
+                // Fold any carries we successfully removed into the higher-order digits.
+                carry += propagated;
+#else
+                const unsigned long long sum = static_cast<unsigned long long>(digits(i,j,k,current_component)) + carry;
+                const unsigned long long remainder = sum & (base - 1ULL);
+                const unsigned long long quotient = sum >> digit_bits;
+
+                digits(i,j,k,current_component) = static_cast<Long>(remainder);
+                carry = quotient;
+#endif
+                ++current_component;
+            }
+        }
+
+        AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE static Decomposed
+        decompose (float value) noexcept
+        {
+            Decomposed bits{};
+
+            if (value <= 0.0F) {
+                bits.is_zero = true;
+                return bits;
+            }
+
+            union {
+                float f;
+                std::uint32_t u;
+            } pun{};
+            pun.f = value;
+            const std::uint32_t u = pun.u;
+
+            const std::uint32_t exponent_field = (u >> 23) & 0xFFU;
+            const std::uint32_t fraction = u & ((1U << 23) - 1U);
+
+            if (exponent_field == 0xFFU) {
+#ifndef AMREX_USE_GPU
+                amrex::Abort("IntSuperAccumulatorFab::decompose: NaN or Inf not supported (float)");
+#else
+                Decomposed tmp{}; tmp.is_zero = true; return tmp;
+#endif
+            } else if (exponent_field == 0U) {
+                // Treat subnormals as zero; caller is expected to pre-round.
+                bits.is_zero = true;
+                return bits;
+            } else {
+                bits.magnitude = (1U << 23) | fraction;
+                bits.exponent = static_cast<int>(exponent_field) - 127 - significand_bits + 1;
+            }
+
+            bits.is_zero = false;
+            return bits;
+        }
+    };
+
+    BaseFab<Long> m_storage;
+};
+
+} // namespace amrex
+
+#endif
diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt
index 89f406013d..07802d1d6b 100644
--- a/Src/Base/CMakeLists.txt
+++ b/Src/Base/CMakeLists.txt
@@ -137,6 +137,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        AMReX_BaseFab.H
        AMReX_BaseFab.cpp
        AMReX_Array4.H
+       AMReX_IntSuperAccumulator.H
        AMReX_MakeType.H
        AMReX_TypeTraits.H
        AMReX_FabDataType.H
diff --git a/Src/Base/Make.package b/Src/Base/Make.package
index 675d37b2cc..7b7851076e 100644
--- a/Src/Base/Make.package
+++ b/Src/Base/Make.package
@@ -163,6 +163,7 @@ C$(AMREX_BASE)_headers += AMReX_TypeTraits.H
 C$(AMREX_BASE)_headers += AMReX_FabDataType.H
 
 C$(AMREX_BASE)_headers += AMReX_Array4.H
+C$(AMREX_BASE)_headers += AMReX_IntSuperAccumulator.H
 C$(AMREX_BASE)_sources += AMReX_BaseFab.cpp
 C$(AMREX_BASE)_headers += AMReX_BaseFab.H AMReX_BaseFabUtility.H
 C$(AMREX_BASE)_headers += AMReX_FabFactory.H
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 3db6cccbf8..c0a4b82e25 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -126,7 +126,7 @@ else()
    # List of subdirectories to search for CMakeLists.
    #
    set( AMREX_TESTS_SUBDIRS Amr AsyncOut CallNoinline CLZ CommType CTOParFor DeviceGlobal
-                            Enum HeatEquation MultiBlock MultiPeriod ParmParse Parser
+                            Enum HeatEquation MultiBlock MultiPeriod Numerics ParmParse Parser
                             Parser2 ParserUserFn Reinit RoundoffDomain SmallMatrix)
 
    if (AMReX_PARTICLES)
diff --git a/Tests/Numerics/IntSuperAccumulator/CMakeLists.txt b/Tests/Numerics/IntSuperAccumulator/CMakeLists.txt
new file mode 100644
index 0000000000..7e8d9a8080
--- /dev/null
+++ b/Tests/Numerics/IntSuperAccumulator/CMakeLists.txt
@@ -0,0 +1,9 @@
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources main.cpp)
+    set(_input_files )
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/Numerics/IntSuperAccumulator/GNUmakefile b/Tests/Numerics/IntSuperAccumulator/GNUmakefile
new file mode 100644
index 0000000000..a42d27db03
--- /dev/null
+++ b/Tests/Numerics/IntSuperAccumulator/GNUmakefile
@@ -0,0 +1,26 @@
+AMREX_HOME ?= ../../../amrex
+
+PRECISION = FLOAT
+
+DEBUG   = FALSE
+
+DIM     = 3
+
+COMP    = gcc
+
+USE_MPI   = FALSE
+USE_OMP   = FALSE
+USE_CUDA  = FALSE
+USE_HIP   = FALSE
+USE_SYCL  = FALSE
+
+BL_NO_FORT = TRUE
+
+TINY_PROFILE = FALSE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/Tests/Numerics/IntSuperAccumulator/Make.package b/Tests/Numerics/IntSuperAccumulator/Make.package
new file mode 100644
index 0000000000..6b4b865e8f
--- /dev/null
+++ b/Tests/Numerics/IntSuperAccumulator/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main.cpp
diff --git a/Tests/Numerics/IntSuperAccumulator/main.cpp b/Tests/Numerics/IntSuperAccumulator/main.cpp
new file mode 100644
index 0000000000..b9577a26ed
--- /dev/null
+++ b/Tests/Numerics/IntSuperAccumulator/main.cpp
@@ -0,0 +1,155 @@
+#include <AMReX.H>
+#include <AMReX_Print.H>
+#include <AMReX_Box.H>
+#include <AMReX_IntVect.H>
+#include <AMReX_IntSuperAccumulator.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_GpuContainers.H>
+
+#include <array>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <random>
+
+namespace
+{
+
+struct Range
+{
+    float min_value;
+    float max_value;
+    int   count;
+    int   seed;
+};
+
+} // namespace
+
+int
+main (int argc, char* argv[])
+{
+    amrex::Initialize(argc, argv);
+    int status = 0;
+
+    {
+        using amrex::IntSuperAccumulatorFab;
+
+        const amrex::Box bx(amrex::IntVect::TheZeroVector(), amrex::IntVect::TheZeroVector());
+        IntSuperAccumulatorFab accumulator(bx);
+
+        const float lowest_exponent_min = std::numeric_limits<float>::min();
+        const float lowest_exponent_max = std::nextafter(std::ldexp(1.0F, -125), 0.0F);
+        const float highest_exponent_min = std::ldexp(1.0F, 125);
+        const float highest_exponent_max = std::ldexp(1.0F, 126); // OVERFLOWS TO INF: std::numeric_limits<float>::max();
+
+        const std::array<Range, 6> ranges = {{
+            {lowest_exponent_min, lowest_exponent_max, 32768, 3},
+            {highest_exponent_min, highest_exponent_max, 4, 5},
+            {1.0e-12F, 1.0e-3F,   32768, 11},
+            {1.0e-4F,  1.0F,      65536, 23},
+            {1.0F,     1.0e6F,    65536, 37},
+            {1.0e-2F,  1.0e8F,   131072, 41}
+        }};
+
+        auto compute_reference = [] (amrex::Gpu::HostVector<float> const& values) -> float {
+            volatile double sum = 0.0;
+            volatile double corr = 0.0;
+            for (float v : values) {
+                const volatile double y = static_cast<double>(v) - corr;
+                const volatile double t = sum + y;
+                corr = (t - sum) - y;
+                sum = t;
+            }
+            return static_cast<float>(sum);
+        };
+
+        auto accumulate_values = [&] (amrex::Gpu::HostVector<float> const& values) -> float {
+            accumulator.reset(bx);
+            auto digits = accumulator.array();
+            constexpr int i = 0;
+            constexpr int j = 0;
+            constexpr int k = 0;
+
+            const int count = static_cast<int>(values.size());
+
+            if (amrex::Gpu::inLaunchRegion()) {
+                amrex::Gpu::DeviceVector<float> d_values(count);
+                amrex::Gpu::copy(amrex::Gpu::hostToDevice, values.begin(), values.end(), d_values.begin());
+
+                auto *ptr = d_values.data();
+                auto arr = digits;
+
+                amrex::ParallelFor(count, [=] AMREX_GPU_DEVICE (int n) {
+                    IntSuperAccumulatorFab::accumulate(arr, i, j, k, ptr[n]);
+                });
+
+                amrex::Gpu::streamSynchronize();
+
+                amrex::Gpu::DeviceScalar<float> d_result;
+                auto *res_ptr = d_result.dataPtr();
+                amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) {
+                    res_ptr[0] = IntSuperAccumulatorFab::finalize(arr, i, j, k);
+                });
+                amrex::Gpu::streamSynchronize();
+                return d_result.dataValue();
+            } else {
+                for (float v : values) {
+                    IntSuperAccumulatorFab::accumulate(digits, i, j, k, v);
+                }
+                return IntSuperAccumulatorFab::finalize(digits, i, j, k);
+            }
+        };
+
+        for (const auto& cfg : ranges) {
+            std::mt19937 gen(cfg.seed);
+            std::uniform_real_distribution<float> dist(cfg.min_value, cfg.max_value);
+
+            amrex::Gpu::HostVector<float> values(cfg.count);
+            for (int n = 0; n < cfg.count; ++n) {
+                values[n] = dist(gen);
+            }
+
+            const float reference = compute_reference(values);
+            const float result = accumulate_values(values);
+
+            amrex::Print() << "Range [" << cfg.min_value << ", " << cfg.max_value
+                           << "] with count " << cfg.count
+                           << ": reference=" << reference
+                           << " accumulator=" << result << '\n';
+
+            if (result != reference) {
+                amrex::Print() << "Mismatch for range [" << cfg.min_value << ", "
+                               << cfg.max_value << "] with count " << cfg.count
+                               << ": reference=" << reference
+                               << " accumulator=" << result << '\n';
+                status = 1;
+                break;
+            }
+        }
+
+        if (status == 0) {
+            constexpr float value = 0.125F;
+            constexpr int repeats = 200000;
+
+            amrex::Gpu::HostVector<float> values(repeats);
+            for (int n = 0; n < repeats; ++n) {
+                values[n] = value;
+            }
+
+            const float reference = compute_reference(values);
+            const float result = accumulate_values(values);
+
+            amrex::Print() << "Constant accumulation: reference=" << reference
+                           << " accumulator=" << result << '\n';
+
+            if (result != reference) {
+                amrex::Print() << "Mismatch for constant accumulation: reference="
+                               << reference << " accumulator=" << result << '\n';
+                status = 1;
+            }
+        }
+    }
+
+    amrex::Finalize();
+    return status;
+}