diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt index 9cbd6c8ede..a084e38fcc 100644 --- a/.github/workflows/good_defines.txt +++ b/.github/workflows/good_defines.txt @@ -24,6 +24,7 @@ SCREENING SCREEN_METHOD SDC SIMPLIFIED_SDC +SINGLE_PRECISION_JACOBIAN STRANG TRUE_SDC _OPENMP diff --git a/Docs/source/ode_integrators.rst b/Docs/source/ode_integrators.rst index 6ee5bc5804..4644a347a5 100644 --- a/Docs/source/ode_integrators.rst +++ b/Docs/source/ode_integrators.rst @@ -113,6 +113,23 @@ For the other networks (usually pynucastro networks), the implementation is provided in ``Microphysics/util/linpack.H`` and is templated on the number of equations. Pivoting can be disabled by setting ``integrator.linalg_do_pivoting=0``. +.. index:: USE_SINGLE_PRECISION_JACOBIAN + +.. tip:: + + The storage for the Jacobian can take up the most memory when + integrating the reaction system. It is possible to store the + Jacobian as single-precision, by building with: + + :: + + USE_SINGLE_PRECISION_JACOBIAN=TRUE + + This can speed up the integration prevent the code from running out + of memory when run on GPUs. + + + Integration errors ================== diff --git a/integration/BackwardEuler/be_type.H b/integration/BackwardEuler/be_type.H index 0efae612a7..04d7d8f190 100644 --- a/integration/BackwardEuler/be_type.H +++ b/integration/BackwardEuler/be_type.H @@ -48,7 +48,7 @@ struct be_t { amrex::Real rtol_enuc; amrex::Array1D y; - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac; + ArrayUtil::MathArray2D jac; short jacobian_type; }; diff --git a/integration/Make.package b/integration/Make.package index 81f5c94887..9b8ab665f6 100644 --- a/integration/Make.package +++ b/integration/Make.package @@ -26,6 +26,9 @@ else CEXE_headers += integrator_setup_strang.H endif +ifeq ($(USE_SINGLE_PRECISION_JACOBIAN), TRUE) + DEFINES += -DSINGLE_PRECISION_JACOBIAN +endif ifeq ($(USE_ALL_NSE), TRUE) ifeq ($(USE_ALL_SDC), TRUE) diff --git a/integration/VODE/vode_type.H b/integration/VODE/vode_type.H index 3c246bfb50..a1df1f9bf2 100644 --- a/integration/VODE/vode_type.H +++ b/integration/VODE/vode_type.H @@ -143,11 +143,11 @@ struct dvode_t amrex::Array1D y; // Jacobian - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac; + ArrayUtil::MathArray2D jac; #ifdef ALLOW_JACOBIAN_CACHING // Saved Jacobian - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac_save; + ArrayUtil::MathArray2D jac_save; #endif // the Nordsieck history array diff --git a/integration/integrator_data.H b/integration/integrator_data.H index 2d4f7795cd..fb698318aa 100644 --- a/integration/integrator_data.H +++ b/integration/integrator_data.H @@ -52,6 +52,6 @@ constexpr int integrator_neqs () using IArray1D = amrex::Array1D; using RArray1D = amrex::Array1D; -using RArray2D = ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS>; +using RArray2D = ArrayUtil::MathArray2D; #endif diff --git a/integration/utils/circle_theorem.H b/integration/utils/circle_theorem.H index 957a2f3ada..85afc59d7e 100644 --- a/integration/utils/circle_theorem.H +++ b/integration/utils/circle_theorem.H @@ -17,7 +17,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void circle_theorem_sprad(const amrex::Real time, BurnT& state, T& int_state, amrex::Real& sprad) { - ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac_array; + ArrayUtil::MathArray2D jac_array; if (integrator_rp::jacobian == 1) { jac(time, state, int_state, jac_array); diff --git a/interfaces/ArrayUtilities.H b/interfaces/ArrayUtilities.H index 37f6b51963..9b000eb77e 100644 --- a/interfaces/ArrayUtilities.H +++ b/interfaces/ArrayUtilities.H @@ -34,7 +34,7 @@ namespace ArrayUtil amrex::Real arr[(XHI-XLO+1)]; }; - template + template struct MathArray2D { AMREX_GPU_HOST_DEVICE AMREX_INLINE @@ -71,7 +71,7 @@ namespace ArrayUtil } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - amrex::Real get (const int i, const int j) const noexcept { + T get (const int i, const int j) const noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } @@ -84,18 +84,18 @@ namespace ArrayUtil } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const amrex::Real& operator() (int i, int j) const noexcept { + const T& operator() (int i, int j) const noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - amrex::Real& operator() (int i, int j) noexcept { + T& operator() (int i, int j) noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } - amrex::Real arr[(XHI-XLO+1)*(YHI-YLO+1)]; + T arr[(XHI-XLO+1)*(YHI-YLO+1)]; }; namespace Math diff --git a/interfaces/burn_type.H b/interfaces/burn_type.H index bf7ce27b6d..5db4b8f8bd 100644 --- a/interfaces/burn_type.H +++ b/interfaces/burn_type.H @@ -56,7 +56,15 @@ const int SVAR = SEDEN+1; const int SVAR_EVOLVE = SFX; // this is the data type of the dense Jacobian that the network wants. -using JacNetArray2D = ArrayUtil::MathArray2D<1, neqs, 1, neqs>; + +// the Jacobian can have a different type that our system +#ifdef SINGLE_PRECISION_JACOBIAN +using jac_t = float; +#else +using jac_t = amrex::Real; +#endif + +using JacNetArray2D = ArrayUtil::MathArray2D; struct burn_t { diff --git a/networks/rhs.H b/networks/rhs.H index 91ab01bf47..bd69cc4b22 100644 --- a/networks/rhs.H +++ b/networks/rhs.H @@ -1357,7 +1357,7 @@ void rhs (const burn_t& burn_state, amrex::Array1D& ydot) // Analytical Jacobian AMREX_GPU_HOST_DEVICE AMREX_INLINE -void jac (const burn_t& burn_state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac) +void jac (const burn_t& burn_state, ArrayUtil::MathArray2D& jac) { rhs_state_t rhs_state; @@ -1521,7 +1521,7 @@ void actual_rhs (const burn_t& state, amrex::Array1D& ydot } AMREX_GPU_HOST_DEVICE AMREX_INLINE -void actual_jac (const burn_t& state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac) +void actual_jac (const burn_t& state, ArrayUtil::MathArray2D& jac) { RHS::jac(state, jac); } diff --git a/unit_test/test_linear_algebra/test_linear_algebra.H b/unit_test/test_linear_algebra/test_linear_algebra.H index 0762163da3..caf5677f84 100644 --- a/unit_test/test_linear_algebra/test_linear_algebra.H +++ b/unit_test/test_linear_algebra/test_linear_algebra.H @@ -175,7 +175,7 @@ void linear_algebra() { // now try to output a Jacobian mask based on the actual Jacobian - ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac; + ArrayUtil::MathArray2D jac; burn_t burn_state; burn_state.rho = 1.e8; diff --git a/unit_test/test_rhs/rhs_zones.H b/unit_test/test_rhs/rhs_zones.H index 87a3ec559a..54a7bfaba0 100644 --- a/unit_test/test_rhs/rhs_zones.H +++ b/unit_test/test_rhs/rhs_zones.H @@ -41,7 +41,7 @@ bool do_rhs (int i, int j, int k, amrex::Array4 const& state, const amrex::Array1D ydot; - ArrayUtil::MathArray2D<1, neqs, 1, neqs> jac; + ArrayUtil::MathArray2D jac; #ifdef NEW_NETWORK_IMPLEMENTATION RHS::rhs(burn_state, ydot);