Skip to content

Commit

Permalink
Merge branch 'fix_padded_dist_ktensor' into 'master'
Browse files Browse the repository at this point in the history
Fix several DistKTensorUpdate approaches when ktensors are padded

See merge request etphipp/genten!50
  • Loading branch information
etphipp committed Oct 25, 2024
2 parents c077d83 + b4f49db commit 6107310
Show file tree
Hide file tree
Showing 19 changed files with 131 additions and 70 deletions.
4 changes: 2 additions & 2 deletions data/aminoacid-cpals-dense.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"initial-guess": "rand",
"distributed-guess": "serial",
"seed": 12345,
Expand All @@ -35,7 +35,7 @@
},
"iterations":
{
"value": 8,
"value": 5,
"absolute-tolerance": 1
}
}
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-cpals.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand All @@ -33,7 +33,7 @@
},
"iterations":
{
"value": 8,
"value": 5,
"absolute-tolerance": 1
}
}
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-cpopt-lbfgsb.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand All @@ -30,7 +30,7 @@
},
"iterations":
{
"value": 39,
"value": 47,
"absolute-tolerance": 2
}
}
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-cpopt-rol-hess.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand All @@ -30,7 +30,7 @@
},
"iterations":
{
"value": 19,
"value": 20,
"absolute-tolerance": 3
}
}
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-cpopt-rol.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand All @@ -30,7 +30,7 @@
},
"iterations":
{
"value": 63,
"value": 68,
"absolute-tolerance": 2
}
}
Expand Down
2 changes: 1 addition & 1 deletion data/aminoacid-gcpfed.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "parallel-drew",
Expand Down
2 changes: 1 addition & 1 deletion data/aminoacid-gcpopt-lbfgsb.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-gcpopt-rol.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "serial",
Expand All @@ -32,7 +32,7 @@
},
"iterations":
{
"value": 80,
"value": 68,
"absolute-tolerance": 2
}
}
Expand Down
4 changes: 2 additions & 2 deletions data/aminoacid-gcpsgd-dense.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"initial-guess": "rand",
"distributed-guess": "parallel-drew",
"seed": 12345,
Expand Down Expand Up @@ -44,7 +44,7 @@
},
"iterations":
{
"value": 73,
"value": 75,
"absolute-tolerance": 20
}
}
Expand Down
8 changes: 4 additions & 4 deletions data/aminoacid-gcpsgd.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"k-tensor":
{
"rank": 16,
"rank": 15,
"output-file": "aminoacid.ktn",
"initial-guess": "rand",
"distributed-guess": "parallel-drew",
Expand All @@ -29,7 +29,7 @@
"seed": 31415,
"fuse": false,
"hash": true,
"gnzs": 100,
"gnzs": 150,
"gzs": 0,
"fnzs": 10000,
"fzs": 0,
Expand All @@ -44,8 +44,8 @@
},
"iterations":
{
"value": 73,
"absolute-tolerance": 20
"value": 60,
"absolute-tolerance": 30
}
}
}
27 changes: 12 additions & 15 deletions src/Genten_DistKtensorUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ KtensorOneSidedUpdate(const DistTensor<ExecSpace>& X,
parallel = pmap != nullptr && pmap->gridSize() > 1;
if (parallel) {
const unsigned nd = u.ndims();
padded.resize(nd);
sizes.resize(nd);
sizes_r.resize(nd);
offsets.resize(nd);
offsets_r.resize(nd);
for (unsigned n=0; n<nd; ++n) {
padded[n] = u[n].isPadded();
const unsigned np = pmap->subCommSize(n);

// Get number of rows on each processor
Expand Down Expand Up @@ -170,7 +172,9 @@ createOverlapKtensor(const KtensorT<ExecSpace>& u) const
for (unsigned n=0; n<nd; ++n) {
const unsigned np = offsets[n].size();
const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
FacMatrixT<ExecSpace> mat(nrows, nc);
// Create factor matrix to have same padding as u[n] for consistent
// dimensions in import/export
FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
u_overlapped.set_factor(n, mat);
}
u_overlapped.setProcessorMap(u.getProcessorMap());
Expand Down Expand Up @@ -785,17 +789,18 @@ KtensorTwoSidedUpdate<ExecSpace>::
KtensorTwoSidedUpdate(const DistTensor<ExecSpace>& X,
const KtensorT<ExecSpace>& u,
const AlgParams& a) :
pmap(u.getProcessorMap()), algParams(a), nc(u.ncomponents())
pmap(u.getProcessorMap()), algParams(a), nd(u.ndims()), nc(u.ncomponents())
{
parallel = pmap != nullptr && pmap->gridSize() > 1;
if (parallel) {
const unsigned nd = u.ndims();
padded.resize(nd);
sizes.resize(nd);
sizes_r.resize(nd);
offsets.resize(nd);
offsets_r.resize(nd);
offsets_dev.resize(nd);
for (unsigned n=0; n<nd; ++n) {
padded[n] = u[n].isPadded();
const unsigned np = pmap->subCommSize(n);

// Get number of rows on each processor
Expand Down Expand Up @@ -839,7 +844,6 @@ extractRowRecvsHost()
{
GENTEN_START_TIMER("extract row recvs host");
const ttb_indx nnz = X_sparse.nnz();
const unsigned nd = X_sparse.ndims();
if (maps.empty()) {
maps.resize(nd);
row_recvs_for_proc.resize(nd);
Expand Down Expand Up @@ -953,7 +957,6 @@ extractRowRecvsDevice()

GENTEN_START_TIMER("extract row recvs device");
const ttb_indx nnz = X_sparse.nnz();
const unsigned nd = X_sparse.ndims();
const unsigned nc_ = nc;
if (num_row_recvs_dev.empty()) {
num_row_recvs_dev.resize(nd);
Expand Down Expand Up @@ -1073,7 +1076,6 @@ updateTensor(const DistTensor<ExecSpace>& X)
if (sparse && parallel) {
GENTEN_START_TIMER("initialize");
X_sparse = X.getSptensor();
const unsigned nd = X_sparse.ndims();
if (num_row_sends.empty()) {
num_row_sends.resize(nd);
num_row_recvs.resize(nd);
Expand Down Expand Up @@ -1186,16 +1188,16 @@ createOverlapKtensor(const KtensorT<ExecSpace>& u) const
if (!parallel)
return u;

const unsigned nd = u.ndims();
const unsigned nc = u.ncomponents();
KtensorT<ExecSpace> u_overlapped = KtensorT<ExecSpace>(nc, nd);
for (unsigned n=0; n<nd; ++n) {
const unsigned np = offsets[n].size();
const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
FacMatrixT<ExecSpace> mat(nrows, nc);
// Create factor matrix to have same padding as u[n] for consistent
// dimensions in import/export
FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
u_overlapped.set_factor(n, mat);
}
u_overlapped.setProcessorMap(u.getProcessorMap());
u_overlapped.setProcessorMap(pmap);
return u_overlapped;
}

Expand All @@ -1206,7 +1208,6 @@ initOverlapKtensor(KtensorT<ExecSpace>& u) const
{
GENTEN_TIME_MONITOR("k-tensor init");
if (parallel && sparse) {
const unsigned nd = u.ndims();
const unsigned nc_ = nc;
for (unsigned n=0; n<nd; ++n) {
const unsigned np = pmap->subCommSize(n);
Expand Down Expand Up @@ -1324,7 +1325,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
doImportSparse(const KtensorT<ExecSpace>& u_overlapped,
const KtensorT<ExecSpace>& u) const
{
const unsigned nd = u.ndims();
for (unsigned n=0; n<nd; ++n) {
doImportSparse(u_overlapped, u, n);
}
Expand Down Expand Up @@ -1393,7 +1393,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
doImportDense(const KtensorT<ExecSpace>& u_overlapped,
const KtensorT<ExecSpace>& u) const
{
const unsigned nd = u.ndims();
for (unsigned n=0; n<nd; ++n)
doImportDense(u_overlapped, u, n);
}
Expand Down Expand Up @@ -1421,7 +1420,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
doExportSparse(const KtensorT<ExecSpace>& u,
const KtensorT<ExecSpace>& u_overlapped) const
{
const unsigned nd = u.ndims();
for (unsigned n=0; n<nd; ++n) {
doExportSparse(u, u_overlapped, n);
}
Expand Down Expand Up @@ -1491,7 +1489,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
doExportDense(const KtensorT<ExecSpace>& u,
const KtensorT<ExecSpace>& u_overlapped) const
{
const unsigned nd = u.ndims();
for (unsigned n=0; n<nd; ++n)
doExportDense(u, u_overlapped, n);
}
Expand Down
11 changes: 10 additions & 1 deletion src/Genten_DistKtensorUpdate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,17 +338,21 @@ class KtensorAllGatherReduceUpdate : public DistKtensorUpdate<ExecSpace> {
std::vector< std::vector<int> > offsets_r;
std::vector< std::vector<int> > sizes_r;

std::vector<unsigned> padded;

public:
KtensorAllGatherReduceUpdate(const KtensorT<ExecSpace>& u) :
pmap(u.getProcessorMap())
{
const unsigned nd = u.ndims();
padded.resize(nd);
sizes.resize(nd);
sizes_r.resize(nd);
offsets.resize(nd);
offsets_r.resize(nd);
for (unsigned n=0; n<nd; ++n) {
if (pmap != nullptr) {
padded[n] = u[n].isPadded();
const unsigned np = pmap->subCommSize(n);

// Get number of rows on each processor
Expand Down Expand Up @@ -401,7 +405,9 @@ class KtensorAllGatherReduceUpdate : public DistKtensorUpdate<ExecSpace> {
for (unsigned n=0; n<nd; ++n) {
const unsigned np = offsets[n].size();
const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
FacMatrixT<ExecSpace> mat(nrows, nc);
// Create factor matrix to have same padding as u[n] for consistent
// dimensions in import/export
FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
u_overlapped.set_factor(n, mat);
}
u_overlapped.setProcessorMap(u.getProcessorMap());
Expand Down Expand Up @@ -523,6 +529,7 @@ class KtensorOneSidedUpdate :

bool sparse;
SptensorT<ExecSpace> X_sparse;
std::vector<unsigned> padded;

public:
using unordered_map_type =
Expand Down Expand Up @@ -639,7 +646,9 @@ class KtensorTwoSidedUpdate :

bool sparse;
SptensorT<ExecSpace> X_sparse;
unsigned nd;
unsigned nc;
std::vector<unsigned> padded;

using offsets_type = Kokkos::View<int*,ExecSpace>;
// Set the host execution space to be ExecSpace if it is a host execution
Expand Down
Loading

0 comments on commit 6107310

Please sign in to comment.