Skip to content

Commit 1be7257

Browse files
authored
FabArray: Option to use a single contiguous chunk of memory (#3857)
This adds an option to use a single contiguous chunk of memory for all the data in Fabs of a FabArray/MultiFab/iMultiFab. One can change the strategy for an individual MultiFab via MFInfo::SetAllocSingleChunk(bool) and for all MultiFabs by default via ParmParse parameter, amrex.mf.alloc_single_chunk=1. This is considered an experimental feature. Please let us know if you notice any issues.
1 parent 67523bb commit 1be7257

File tree

8 files changed

+215
-28
lines changed

8 files changed

+215
-28
lines changed

Docs/sphinx_documentation/source/Basics.rst

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2735,10 +2735,26 @@ covered by fine level grids.
27352735
Memory Allocation
27362736
=================
27372737

2738-
Some constructors of :cpp:`MultiFab`, :cpp:`FArrayBox`, etc. can take
2739-
an :cpp:`Arena` argument for memory allocation. This is usually not
2740-
important for CPU codes, but very important for GPU codes. We will
2741-
present more details in :ref:`sec:gpu:memory` in Chapter GPU.
2738+
Some constructors of :cpp:`MultiFab`, :cpp:`FArrayBox`, etc. can take an
2739+
:cpp:`Arena` argument for memory allocation. Some constructors of
2740+
:cpp:`MultiFab` can take an optional argument :cpp:`MFInfo`, which can be
2741+
used to set the arena. This is usually not important for CPU codes, but
2742+
very important for GPU codes. We will present more details about memory
2743+
arenas in :ref:`sec:gpu:memory` in Chapter GPU.
2744+
2745+
Every :cpp:`FArrayBox` in a :cpp:`MultiFab` has a contiguous chunk of memory
2746+
for floating point data, whereas by default :cpp:`MultiFab` as a collection
2747+
of multiple :cpp:`FArrayBox`\ s does not store all floating point data in
2748+
contiguous chunk of memory. This behavior can be changed for all
2749+
:cpp:`MultiFab`\ s with the :cpp:`ParmParse` parameter,
2750+
``amrex.mf.alloc_single_chunk=1``, or for a specific :cpp:`MultiFab` by
2751+
passing a :cpp:`MFInfo` object (e.g.,
2752+
``MFInfo().SetAllocSingleChunk(true)``) to the constructor. One can call
2753+
:cpp:`MultiFab::singleChunkPtr()` to obtain a pointer to the single chunk
2754+
memory. Note that the function returns a null pointer if the :cpp:`MultiFab`
2755+
does not use a single contiguous chunk of memory. One can also call
2756+
:cpp:`MultiFab::singleChunkSize()` to obtain the size in bytes of the single
2757+
chunk memory.
27422758

27432759
AMReX has a Fortran module, :fortran:`amrex_mempool_module` that can be used to
27442760
allocate memory for Fortran pointers. The reason that such a module exists in

Src/Base/AMReX_FArrayBox.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,7 @@ FABio_8bit::write (std::ostream& os,
809809
const Real mn = f.min<RunOn::Host>(k+comp);
810810
const Real mx = f.max<RunOn::Host>(k+comp);
811811
const Real* dat = f.dataPtr(k+comp);
812-
Real rng = std::fabs(mx-mn);
812+
Real rng = std::abs(mx-mn);
813813
rng = (rng < eps) ? 0.0_rt : 255.0_rt/(mx-mn);
814814
for(Long i(0); i < siz; ++i) {
815815
Real v = rng*(dat[i]-mn);

Src/Base/AMReX_FabArray.H

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,14 @@ Long nBytesOwned (BaseFab<T> const& fab) noexcept { return fab.nBytesOwned(); }
6565
struct MFInfo {
6666
// alloc: allocate memory or not
6767
bool alloc = true;
68+
bool alloc_single_chunk = FabArrayBase::getAllocSingleChunk();
6869
Arena* arena = nullptr;
6970
Vector<std::string> tags;
7071

7172
MFInfo& SetAlloc (bool a) noexcept { alloc = a; return *this; }
7273

74+
MFInfo& SetAllocSingleChunk (bool a) noexcept { alloc_single_chunk = a; return *this; }
75+
7376
MFInfo& SetArena (Arena* ar) noexcept { arena = ar; return *this; }
7477

7578
MFInfo& SetTag () noexcept { return *this; }
@@ -436,6 +439,22 @@ public:
436439
#endif
437440
}
438441

442+
//! Return the data pointer to the single chunk memory if this object
443+
//! uses a single contiguous chunk of memory, nullptr otherwise.
444+
[[nodiscard]] value_type* singleChunkPtr () noexcept {
445+
return m_single_chunk_arena ? (value_type*)m_single_chunk_arena->data() : nullptr;
446+
}
447+
448+
//! Return the data pointer to the single chunk memory if this object
449+
//! uses a single contiguous chunk of memory, nullptr otherwise.
450+
[[nodiscard]] value_type const* singleChunkPtr () const noexcept {
451+
return m_single_chunk_arena ? (value_type const*)m_single_chunk_arena->data() : nullptr;
452+
}
453+
454+
//! Return the size of the single chunk memory if this object uses a
455+
//! single contiguous chunk of memory, 0 otherwise.
456+
[[nodiscard]] std::size_t singleChunkSize () const noexcept { return m_single_chunk_size; }
457+
439458
bool isAllRegular () const noexcept {
440459
#ifdef AMREX_USE_EB
441460
const auto *const f = dynamic_cast<EBFArrayBoxFactory const*>(m_factory.get());
@@ -1233,6 +1252,8 @@ protected:
12331252

12341253
std::unique_ptr<FabFactory<FAB> > m_factory;
12351254
DataAllocator m_dallocator;
1255+
std::unique_ptr<detail::SingleChunkArena> m_single_chunk_arena;
1256+
Long m_single_chunk_size = 0;
12361257

12371258
//! has define() been called?
12381259
bool define_function_called = false;
@@ -1306,7 +1327,8 @@ private:
13061327
using Iterator = typename std::vector<FAB*>::iterator;
13071328

13081329
void AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
1309-
const Vector<std::string>& tags);
1330+
const Vector<std::string>& tags,
1331+
bool alloc_single_chunk);
13101332

13111333
void setFab_assert (int K, FAB const& fab) const;
13121334

@@ -1696,6 +1718,7 @@ FabArray<FAB>::release (int K)
16961718
{
16971719
const int li = localindex(K);
16981720
if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
1721+
AMREX_ASSERT(m_single_chunk_arena == nullptr);
16991722
Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
17001723
if (nbytes > 0) {
17011724
for (auto const& t : m_tags) {
@@ -1715,6 +1738,7 @@ FabArray<FAB>::release (const MFIter& mfi)
17151738
{
17161739
const int li = mfi.LocalIndex();
17171740
if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
1741+
AMREX_ASSERT(m_single_chunk_arena == nullptr);
17181742
Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
17191743
if (nbytes > 0) {
17201744
for (auto const& t : m_tags) {
@@ -1755,6 +1779,12 @@ FabArray<FAB>::clear ()
17551779
updateMemUsage(t, -nbytes, nullptr);
17561780
}
17571781
}
1782+
1783+
if (m_single_chunk_arena) {
1784+
m_single_chunk_arena.reset();
1785+
}
1786+
m_single_chunk_size = 0;
1787+
17581788
m_tags.clear();
17591789

17601790
FabArrayBase::clear();
@@ -1880,6 +1910,8 @@ FabArray<FAB>::FabArray (FabArray<FAB>&& rhs) noexcept
18801910
: FabArrayBase (static_cast<FabArrayBase&&>(rhs))
18811911
, m_factory (std::move(rhs.m_factory))
18821912
, m_dallocator (std::move(rhs.m_dallocator))
1913+
, m_single_chunk_arena(std::move(rhs.m_single_chunk_arena))
1914+
, m_single_chunk_size(std::exchange(rhs.m_single_chunk_size,0))
18831915
, define_function_called(rhs.define_function_called)
18841916
, m_fabs_v (std::move(rhs.m_fabs_v))
18851917
#ifdef AMREX_USE_GPU
@@ -1909,6 +1941,8 @@ FabArray<FAB>::operator= (FabArray<FAB>&& rhs) noexcept
19091941
FabArrayBase::operator=(static_cast<FabArrayBase&&>(rhs));
19101942
m_factory = std::move(rhs.m_factory);
19111943
m_dallocator = std::move(rhs.m_dallocator);
1944+
m_single_chunk_arena = std::move(rhs.m_single_chunk_arena);
1945+
std::swap(m_single_chunk_size, rhs.m_single_chunk_size);
19121946
define_function_called = rhs.define_function_called;
19131947
std::swap(m_fabs_v, rhs.m_fabs_v);
19141948
#ifdef AMREX_USE_GPU
@@ -2008,7 +2042,7 @@ FabArray<FAB>::define (const BoxArray& bxs,
20082042
addThisBD();
20092043

20102044
if(info.alloc) {
2011-
AllocFabs(*m_factory, m_dallocator.m_arena, info.tags);
2045+
AllocFabs(*m_factory, m_dallocator.m_arena, info.tags, info.alloc_single_chunk);
20122046
#ifdef BL_USE_TEAM
20132047
ParallelDescriptor::MyTeam().MemoryBarrier();
20142048
#endif
@@ -2018,8 +2052,11 @@ FabArray<FAB>::define (const BoxArray& bxs,
20182052
template <class FAB>
20192053
void
20202054
FabArray<FAB>::AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
2021-
const Vector<std::string>& tags)
2055+
const Vector<std::string>& tags, bool alloc_single_chunk)
20222056
{
2057+
if (shmem.alloc) { alloc_single_chunk = false; }
2058+
if constexpr (!IsBaseFab_v<FAB>) { alloc_single_chunk = false; }
2059+
20232060
const int n = indexArray.size();
20242061
const int nworkers = ParallelDescriptor::TeamSize();
20252062
shmem.alloc = (nworkers > 1);
@@ -2029,6 +2066,18 @@ FabArray<FAB>::AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
20292066
FabInfo fab_info;
20302067
fab_info.SetAlloc(alloc).SetShared(shmem.alloc).SetArena(ar);
20312068

2069+
if (alloc_single_chunk) {
2070+
m_single_chunk_size = 0L;
2071+
for (int i = 0; i < n; ++i) {
2072+
int K = indexArray[i];
2073+
const Box& tmpbox = fabbox(K);
2074+
m_single_chunk_size += factory.nBytes(tmpbox, n_comp, K);
2075+
}
2076+
AMREX_ASSERT(m_single_chunk_size >= 0); // 0 is okay.
2077+
m_single_chunk_arena = std::make_unique<detail::SingleChunkArena>(ar, m_single_chunk_size);
2078+
fab_info.SetArena(m_single_chunk_arena.get());
2079+
}
2080+
20322081
m_fabs_v.reserve(n);
20332082

20342083
Long nbytes = 0L;
@@ -2136,6 +2185,7 @@ FabArray<FAB>::setFab_assert (int K, FAB const& fab) const
21362185
AMREX_ASSERT(!boxarray.empty());
21372186
AMREX_ASSERT(fab.box() == fabbox(K));
21382187
AMREX_ASSERT(distributionMap[K] == ParallelDescriptor::MyProc());
2188+
AMREX_ASSERT(m_single_chunk_arena == nullptr);
21392189
}
21402190

21412191
template <class FAB>

Src/Base/AMReX_FabArrayBase.H

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <AMReX_Config.H>
44

55
#include <AMReX_BoxArray.H>
6+
#include <AMReX_DataAllocator.H>
67
#include <AMReX_DistributionMapping.H>
78
#include <AMReX_ParallelDescriptor.H>
89
#include <AMReX_ParallelReduce.H>
@@ -721,8 +722,46 @@ public:
721722
};
722723
static AMREX_EXPORT FabArrayStats m_FA_stats;
723724

725+
static AMREX_EXPORT bool m_alloc_single_chunk;
726+
727+
[[nodiscard]] static bool getAllocSingleChunk () { return m_alloc_single_chunk; }
724728
};
725729

730+
namespace detail {
731+
class SingleChunkArena final
732+
: public Arena
733+
{
734+
public:
735+
SingleChunkArena (Arena* a_arena, std::size_t a_size);
736+
~SingleChunkArena () override;
737+
738+
SingleChunkArena () = delete;
739+
SingleChunkArena (const SingleChunkArena& rhs) = delete;
740+
SingleChunkArena (SingleChunkArena&& rhs) = delete;
741+
SingleChunkArena& operator= (const SingleChunkArena& rhs) = delete;
742+
SingleChunkArena& operator= (SingleChunkArena&& rhs) = delete;
743+
744+
[[nodiscard]] void* alloc (std::size_t sz) override;
745+
void free (void* pt) override;
746+
747+
// isDeviceAccessible and isHostAccessible can both be true.
748+
[[nodiscard]] bool isDeviceAccessible () const override;
749+
[[nodiscard]] bool isHostAccessible () const override;
750+
751+
[[nodiscard]] bool isManaged () const override;
752+
[[nodiscard]] bool isDevice () const override;
753+
[[nodiscard]] bool isPinned () const override;
754+
755+
[[nodiscard]] void* data () const noexcept { return (void*) m_root; }
756+
757+
private:
758+
DataAllocator m_dallocator;
759+
char* m_root = nullptr;
760+
char* m_free = nullptr;
761+
std::size_t m_size = 0;
762+
};
763+
}
764+
726765
[[nodiscard]] int nComp (FabArrayBase const& fa);
727766
[[nodiscard]] IntVect nGrowVect (FabArrayBase const& fa);
728767
[[nodiscard]] BoxArray const& boxArray (FabArrayBase const& fa);

Src/Base/AMReX_FabArrayBase.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ FabArrayBase::FabArrayStats FabArrayBase::m_FA_stats;
8686
std::map<std::string,FabArrayBase::meminfo> FabArrayBase::m_mem_usage;
8787
std::vector<std::string> FabArrayBase::m_region_tag;
8888

89+
bool FabArrayBase::m_alloc_single_chunk = false;
90+
8991
namespace
9092
{
9193
bool initialized = false;
@@ -122,6 +124,9 @@ FabArrayBase::Initialize ()
122124
MaxComp = 1;
123125
}
124126

127+
ParmParse ppmf("amrex.mf");
128+
ppmf.queryAdd("alloc_single_chunk", FabArrayBase::m_alloc_single_chunk);
129+
125130
amrex::ExecOnFinalize(FabArrayBase::Finalize);
126131

127132
#ifdef AMREX_MEM_PROFILING
@@ -2696,6 +2701,54 @@ FabArrayBase::flushParForCache ()
26962701

26972702
#endif
26982703

2704+
namespace detail {
2705+
2706+
SingleChunkArena::SingleChunkArena (Arena* a_arena, std::size_t a_size)
2707+
: m_dallocator(a_arena),
2708+
m_root((char*)m_dallocator.alloc(a_size)),
2709+
m_free(m_root),
2710+
m_size(a_size)
2711+
{}
2712+
2713+
SingleChunkArena::~SingleChunkArena ()
2714+
{
2715+
if (m_root) {
2716+
m_dallocator.free(m_root);
2717+
}
2718+
}
2719+
2720+
void* SingleChunkArena::alloc (std::size_t sz)
2721+
{
2722+
amrex::ignore_unused(m_size);
2723+
auto* p = (void*)m_free;
2724+
AMREX_ASSERT(sz <= m_size && ((m_free-m_root)+sz <= m_size));
2725+
m_free += sz;
2726+
return p;
2727+
}
2728+
2729+
void SingleChunkArena::free (void* /*pt*/) {}
2730+
2731+
bool SingleChunkArena::isDeviceAccessible () const {
2732+
return m_dallocator.arena()->isDeviceAccessible();
2733+
}
2734+
2735+
bool SingleChunkArena::isHostAccessible () const {
2736+
return m_dallocator.arena()->isHostAccessible();
2737+
}
2738+
2739+
bool SingleChunkArena::isManaged () const {
2740+
return m_dallocator.arena()->isManaged();
2741+
}
2742+
2743+
bool SingleChunkArena::isDevice () const {
2744+
return m_dallocator.arena()->isDevice();
2745+
}
2746+
2747+
bool SingleChunkArena::isPinned () const {
2748+
return m_dallocator.arena()->isPinned();
2749+
}
2750+
}
2751+
26992752
int nComp (FabArrayBase const& fa)
27002753
{
27012754
return fa.nComp();

Src/Base/AMReX_FabFactory.H

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <AMReX_MakeType.H>
99
#include <AMReX_Vector.H>
1010
#include <AMReX_Arena.H>
11+
#include <AMReX_TypeTraits.H>
1112

1213
namespace amrex
1314
{
@@ -59,8 +60,14 @@ public:
5960
AMREX_NODISCARD
6061
virtual FAB* create_alias (FAB const& /*rhs*/, int /*scomp*/, int /*ncomp*/) const { return nullptr; }
6162
virtual void destroy (FAB* fab) const = 0;
62-
AMREX_NODISCARD
63-
virtual FabFactory<FAB>* clone () const = 0;
63+
AMREX_NODISCARD virtual FabFactory<FAB>* clone () const = 0;
64+
AMREX_NODISCARD virtual Long nBytes (const Box& box, int ncomps, int /*box_index*/) const {
65+
if constexpr (IsBaseFab_v<FAB>) {
66+
return box.numPts() * ncomps * Long(sizeof(typename FAB::value_type));
67+
} else {
68+
return -1;
69+
}
70+
}
6471
};
6572

6673
template <class FAB>

Src/EB/AMReX_MultiCutFab.H

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,6 @@ private:
144144

145145
FabArray<CutFab> m_data;
146146
const FabArray<EBCellFlagFab>* m_cellflags = nullptr;
147-
148-
void remove ();
149147
};
150148

151149
}

0 commit comments

Comments
 (0)