diff --git a/data/aminoacid-cpals-dense.json b/data/aminoacid-cpals-dense.json
index 15462124a0..67a078404d 100644
--- a/data/aminoacid-cpals-dense.json
+++ b/data/aminoacid-cpals-dense.json
@@ -9,7 +9,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "initial-guess": "rand",
         "distributed-guess": "serial",
         "seed": 12345,
@@ -35,7 +35,7 @@
         },
         "iterations":
         {
-            "value": 8,
+            "value": 5,
             "absolute-tolerance": 1
         }
     }
diff --git a/data/aminoacid-cpals.json b/data/aminoacid-cpals.json
index 2249e109b0..ddaec3d61b 100644
--- a/data/aminoacid-cpals.json
+++ b/data/aminoacid-cpals.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
@@ -33,7 +33,7 @@
         },
         "iterations":
         {
-            "value": 8,
+            "value": 5,
             "absolute-tolerance": 1
         }
     }
diff --git a/data/aminoacid-cpopt-lbfgsb.json b/data/aminoacid-cpopt-lbfgsb.json
index 8d0aec3a77..3b9af43454 100644
--- a/data/aminoacid-cpopt-lbfgsb.json
+++ b/data/aminoacid-cpopt-lbfgsb.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
@@ -30,7 +30,7 @@
         },
         "iterations":
         {
-            "value": 39,
+            "value": 47,
             "absolute-tolerance": 2
         }
     }
diff --git a/data/aminoacid-cpopt-rol-hess.json b/data/aminoacid-cpopt-rol-hess.json
index 26eb9da357..05c5f4d39d 100644
--- a/data/aminoacid-cpopt-rol-hess.json
+++ b/data/aminoacid-cpopt-rol-hess.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
@@ -30,7 +30,7 @@
         },
         "iterations":
         {
-            "value": 19,
+            "value": 20,
             "absolute-tolerance": 3
         }
     }
diff --git a/data/aminoacid-cpopt-rol.json b/data/aminoacid-cpopt-rol.json
index cb21daf372..b407e27a04 100644
--- a/data/aminoacid-cpopt-rol.json
+++ b/data/aminoacid-cpopt-rol.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
@@ -30,7 +30,7 @@
         },
         "iterations":
         {
-            "value": 63,
+            "value": 68,
             "absolute-tolerance": 2
         }
     }
diff --git a/data/aminoacid-gcpfed.json b/data/aminoacid-gcpfed.json
index 4782d8c6f9..b3e36b9329 100644
--- a/data/aminoacid-gcpfed.json
+++ b/data/aminoacid-gcpfed.json
@@ -7,7 +7,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "parallel-drew",
diff --git a/data/aminoacid-gcpopt-lbfgsb.json b/data/aminoacid-gcpopt-lbfgsb.json
index da2014ef35..7997006093 100644
--- a/data/aminoacid-gcpopt-lbfgsb.json
+++ b/data/aminoacid-gcpopt-lbfgsb.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
diff --git a/data/aminoacid-gcpopt-rol.json b/data/aminoacid-gcpopt-rol.json
index 6acda4f9f2..6105822244 100644
--- a/data/aminoacid-gcpopt-rol.json
+++ b/data/aminoacid-gcpopt-rol.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "serial",
@@ -32,7 +32,7 @@
         },
         "iterations":
         {
-            "value": 80,
+            "value": 68,
             "absolute-tolerance": 2
         }
     }
diff --git a/data/aminoacid-gcpsgd-dense.json b/data/aminoacid-gcpsgd-dense.json
index 2af2fb34d6..0cb971b39c 100644
--- a/data/aminoacid-gcpsgd-dense.json
+++ b/data/aminoacid-gcpsgd-dense.json
@@ -9,7 +9,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "initial-guess": "rand",
         "distributed-guess": "parallel-drew",
         "seed": 12345,
@@ -44,7 +44,7 @@
         },
         "iterations":
         {
-            "value": 73,
+            "value": 75,
             "absolute-tolerance": 20
         }
     }
diff --git a/data/aminoacid-gcpsgd.json b/data/aminoacid-gcpsgd.json
index 3ca0c4c61e..883ab3daeb 100644
--- a/data/aminoacid-gcpsgd.json
+++ b/data/aminoacid-gcpsgd.json
@@ -8,7 +8,7 @@
     },
     "k-tensor":
     {
-        "rank": 16,
+        "rank": 15,
         "output-file": "aminoacid.ktn",
         "initial-guess": "rand",
         "distributed-guess": "parallel-drew",
@@ -29,7 +29,7 @@
         "seed": 31415,
         "fuse": false,
         "hash": true,
-        "gnzs": 100,
+        "gnzs": 150,
         "gzs": 0,
         "fnzs": 10000,
         "fzs": 0,
@@ -44,8 +44,8 @@
         },
         "iterations":
         {
-            "value": 73,
-            "absolute-tolerance": 20
+            "value": 60,
+            "absolute-tolerance": 30
         }
     }
 }
diff --git a/src/Genten_DistKtensorUpdate.cpp b/src/Genten_DistKtensorUpdate.cpp
index ee6825a090..9b6121040f 100644
--- a/src/Genten_DistKtensorUpdate.cpp
+++ b/src/Genten_DistKtensorUpdate.cpp
@@ -52,11 +52,13 @@ KtensorOneSidedUpdate(const DistTensor<ExecSpace>& X,
   parallel = pmap != nullptr && pmap->gridSize() > 1;
   if (parallel) {
     const unsigned nd = u.ndims();
+    padded.resize(nd);
     sizes.resize(nd);
     sizes_r.resize(nd);
     offsets.resize(nd);
     offsets_r.resize(nd);
     for (unsigned n=0; n<nd; ++n) {
+      padded[n] = u[n].isPadded();
       const unsigned np = pmap->subCommSize(n);
 
       // Get number of rows on each processor
@@ -170,7 +172,9 @@ createOverlapKtensor(const KtensorT<ExecSpace>& u) const
   for (unsigned n=0; n<nd; ++n) {
     const unsigned np = offsets[n].size();
     const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
-    FacMatrixT<ExecSpace> mat(nrows, nc);
+    // Create factor matrix to have same padding as u[n] for consistent
+    // dimensions in import/export
+    FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
     u_overlapped.set_factor(n, mat);
   }
   u_overlapped.setProcessorMap(u.getProcessorMap());
@@ -785,17 +789,18 @@ KtensorTwoSidedUpdate<ExecSpace>::
 KtensorTwoSidedUpdate(const DistTensor<ExecSpace>& X,
                       const KtensorT<ExecSpace>& u,
                       const AlgParams& a) :
-  pmap(u.getProcessorMap()), algParams(a), nc(u.ncomponents())
+  pmap(u.getProcessorMap()), algParams(a), nd(u.ndims()), nc(u.ncomponents())
 {
   parallel = pmap != nullptr && pmap->gridSize() > 1;
   if (parallel) {
-    const unsigned nd = u.ndims();
+    padded.resize(nd);
     sizes.resize(nd);
     sizes_r.resize(nd);
     offsets.resize(nd);
     offsets_r.resize(nd);
     offsets_dev.resize(nd);
     for (unsigned n=0; n<nd; ++n) {
+      padded[n] = u[n].isPadded();
       const unsigned np = pmap->subCommSize(n);
 
       // Get number of rows on each processor
@@ -839,7 +844,6 @@ extractRowRecvsHost()
 {
   GENTEN_START_TIMER("extract row recvs host");
   const ttb_indx nnz = X_sparse.nnz();
-  const unsigned nd = X_sparse.ndims();
   if (maps.empty()) {
     maps.resize(nd);
     row_recvs_for_proc.resize(nd);
@@ -953,7 +957,6 @@ extractRowRecvsDevice()
 
   GENTEN_START_TIMER("extract row recvs device");
   const ttb_indx nnz = X_sparse.nnz();
-  const unsigned nd = X_sparse.ndims();
   const unsigned nc_ = nc;
   if (num_row_recvs_dev.empty()) {
     num_row_recvs_dev.resize(nd);
@@ -1073,7 +1076,6 @@ updateTensor(const DistTensor<ExecSpace>& X)
   if (sparse && parallel) {
     GENTEN_START_TIMER("initialize");
     X_sparse = X.getSptensor();
-    const unsigned nd = X_sparse.ndims();
     if (num_row_sends.empty()) {
       num_row_sends.resize(nd);
       num_row_recvs.resize(nd);
@@ -1186,16 +1188,16 @@ createOverlapKtensor(const KtensorT<ExecSpace>& u) const
   if (!parallel)
     return u;
 
-  const unsigned nd = u.ndims();
-  const unsigned nc = u.ncomponents();
   KtensorT<ExecSpace> u_overlapped = KtensorT<ExecSpace>(nc, nd);
   for (unsigned n=0; n<nd; ++n) {
     const unsigned np = offsets[n].size();
     const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
-    FacMatrixT<ExecSpace> mat(nrows, nc);
+    // Create factor matrix to have same padding as u[n] for consistent
+    // dimensions in import/export
+    FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
     u_overlapped.set_factor(n, mat);
   }
-  u_overlapped.setProcessorMap(u.getProcessorMap());
+  u_overlapped.setProcessorMap(pmap);
   return u_overlapped;
 }
 
@@ -1206,7 +1208,6 @@ initOverlapKtensor(KtensorT<ExecSpace>& u) const
 {
   GENTEN_TIME_MONITOR("k-tensor init");
   if (parallel && sparse) {
-    const unsigned nd = u.ndims();
     const unsigned nc_ = nc;
     for (unsigned n=0; n<nd; ++n) {
       const unsigned np = pmap->subCommSize(n);
@@ -1324,7 +1325,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
 doImportSparse(const KtensorT<ExecSpace>& u_overlapped,
                const KtensorT<ExecSpace>& u) const
 {
-  const unsigned nd = u.ndims();
   for (unsigned n=0; n<nd; ++n) {
     doImportSparse(u_overlapped, u, n);
   }
@@ -1393,7 +1393,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
 doImportDense(const KtensorT<ExecSpace>& u_overlapped,
               const KtensorT<ExecSpace>& u) const
 {
-  const unsigned nd = u.ndims();
   for (unsigned n=0; n<nd; ++n)
     doImportDense(u_overlapped, u, n);
 }
@@ -1421,7 +1420,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
 doExportSparse(const KtensorT<ExecSpace>& u,
                const KtensorT<ExecSpace>& u_overlapped) const
 {
-  const unsigned nd = u.ndims();
   for (unsigned n=0; n<nd; ++n) {
     doExportSparse(u, u_overlapped, n);
   }
@@ -1491,7 +1489,6 @@ KtensorTwoSidedUpdate<ExecSpace>::
 doExportDense(const KtensorT<ExecSpace>& u,
               const KtensorT<ExecSpace>& u_overlapped) const
 {
-  const unsigned nd = u.ndims();
   for (unsigned n=0; n<nd; ++n)
     doExportDense(u, u_overlapped, n);
 }
diff --git a/src/Genten_DistKtensorUpdate.hpp b/src/Genten_DistKtensorUpdate.hpp
index ea7e5f1a55..4b83cbc5a1 100644
--- a/src/Genten_DistKtensorUpdate.hpp
+++ b/src/Genten_DistKtensorUpdate.hpp
@@ -338,17 +338,21 @@ class KtensorAllGatherReduceUpdate : public DistKtensorUpdate<ExecSpace> {
   std::vector< std::vector<int> > offsets_r;
   std::vector< std::vector<int> > sizes_r;
 
+  std::vector<unsigned> padded;
+
 public:
   KtensorAllGatherReduceUpdate(const KtensorT<ExecSpace>& u) :
     pmap(u.getProcessorMap())
   {
     const unsigned nd = u.ndims();
+    padded.resize(nd);
     sizes.resize(nd);
     sizes_r.resize(nd);
     offsets.resize(nd);
     offsets_r.resize(nd);
     for (unsigned n=0; n<nd; ++n) {
       if (pmap != nullptr) {
+        padded[n] = u[n].isPadded();
         const unsigned np = pmap->subCommSize(n);
 
         // Get number of rows on each processor
@@ -401,7 +405,9 @@ class KtensorAllGatherReduceUpdate : public DistKtensorUpdate<ExecSpace> {
     for (unsigned n=0; n<nd; ++n) {
       const unsigned np = offsets[n].size();
       const ttb_indx nrows = offsets[n][np-1]+sizes[n][np-1];
-      FacMatrixT<ExecSpace> mat(nrows, nc);
+      // Create factor matrix to have same padding as u[n] for consistent
+      // dimensions in import/export
+      FacMatrixT<ExecSpace> mat(nrows, nc, nullptr, true, padded[n]);
       u_overlapped.set_factor(n, mat);
     }
     u_overlapped.setProcessorMap(u.getProcessorMap());
@@ -523,6 +529,7 @@ class KtensorOneSidedUpdate :
 
   bool sparse;
   SptensorT<ExecSpace> X_sparse;
+  std::vector<unsigned> padded;
 
 public:
   using unordered_map_type =
@@ -639,7 +646,9 @@ class KtensorTwoSidedUpdate :
 
   bool sparse;
   SptensorT<ExecSpace> X_sparse;
+  unsigned nd;
   unsigned nc;
+  std::vector<unsigned> padded;
 
   using offsets_type = Kokkos::View<int*,ExecSpace>;
   // Set the host execution space to be ExecSpace if it is a host execution
diff --git a/src/Genten_FacMatrix.cpp b/src/Genten_FacMatrix.cpp
index 871a83c40a..c5e8d6e49f 100644
--- a/src/Genten_FacMatrix.cpp
+++ b/src/Genten_FacMatrix.cpp
@@ -48,6 +48,7 @@
 #include "Genten_FacMatArray.hpp"
 #include "Genten_MathLibs_Wpr.hpp"
 #include "Genten_portability.hpp"
+#include "Kokkos_Random.hpp"
 #include "CMakeInclude.h"
 #include <algorithm>     // for std::max with MSVC compiler
 #include <cstring>
@@ -59,13 +60,13 @@
 template <typename ExecSpace>
 Genten::FacMatrixT<ExecSpace>::
 FacMatrixT(ttb_indx m, ttb_indx n, const ProcessorMap::FacMap* pmap_,
-           const bool zero) :
+           const bool zero, const bool pad) :
   pmap(pmap_)
 {
   // Don't use padding if Cuda, HIP or SYCL is the default execution space, so factor
   // matrices allocated on the host have the same shape.  We really need a
   // better way to do this.
-  if (Genten::is_gpu_space<DefaultExecutionSpace>::value) {
+  if (!pad || Genten::is_gpu_space<DefaultExecutionSpace>::value) {
     if (zero)
       data = view_type("Genten::FacMatrix::data",m,n);
     else
@@ -86,12 +87,12 @@ FacMatrixT(ttb_indx m, ttb_indx n, const ProcessorMap::FacMap* pmap_,
 template <typename ExecSpace>
 Genten::FacMatrixT<ExecSpace>::
 FacMatrixT(ttb_indx m, ttb_indx n, const ttb_real * cvec,
-           const ProcessorMap::FacMap* pmap_) : pmap(pmap_)
+           const ProcessorMap::FacMap* pmap_, const bool pad) : pmap(pmap_)
 {
   // Don't use padding if Cuda, HIP or SYCL is the default execution space, so factor
   // matrices allocated on the host have the same shape.  We really need a
   // better way to do this.
-  if (Genten::is_gpu_space<DefaultExecutionSpace>::value)
+  if (!pad || Genten::is_gpu_space<DefaultExecutionSpace>::value)
     data = view_type(Kokkos::view_alloc("Genten::FacMatrix::data",
                                         Kokkos::WithoutInitializing),m,n);
   else
@@ -112,8 +113,13 @@ template <typename ExecSpace>
 void Genten::FacMatrixT<ExecSpace>::
 rand() const
 {
-  auto data_1d = make_data_1d();
-  data_1d.rand();
+  // Don't fill as 1-D to get consistent values with and without padding
+
+  //auto data_1d = make_data_1d();
+  //data_1d.rand();
+
+  RandomMT cRNG(0);
+  scatter(false, false, cRNG);
 }
 
 template <typename ExecSpace>
@@ -122,8 +128,36 @@ scatter (const bool bUseMatlabRNG,
          const bool bUseParallelRNG,
          RandomMT &  cRMT) const
 {
-  auto data_1d = make_data_1d();
-  data_1d.scatter (bUseMatlabRNG, bUseParallelRNG, cRMT);
+  // Don't fill as 1-D to get consistent values with and without padding
+
+  //auto data_1d = make_data_1d();
+  //data_1d.scatter (bUseMatlabRNG, bUseParallelRNG, cRMT);
+
+  if (bUseParallelRNG)
+  {
+    const ttb_indx seed = cRMT.genrnd_int32();
+    Kokkos::Random_XorShift64_Pool<ExecSpace> rand_pool(seed);
+    ttb_real min_val = 0.0;
+    ttb_real max_val = 1.0;
+    Kokkos::fill_random(data, rand_pool, min_val, max_val);
+  }
+  else
+  {
+    const ttb_indx nrows = data.extent(0);
+    const ttb_indx ncols = data.extent(1);
+    auto d = create_mirror_view(data);
+    for (ttb_indx i=0; i<nrows; ++i) {
+      for (ttb_indx j=0; j<ncols; ++j) {
+        ttb_real  dNextRan;
+        if (bUseMatlabRNG)
+          dNextRan = cRMT.genMatlabMT();
+        else
+          dNextRan = cRMT.genrnd_double();
+        d(i,j) = dNextRan;
+      }
+    }
+    deep_copy(data, d);
+  }
 }
 
 template <typename ExecSpace>
diff --git a/src/Genten_FacMatrix.hpp b/src/Genten_FacMatrix.hpp
index 4a800eff93..f986ba420a 100644
--- a/src/Genten_FacMatrix.hpp
+++ b/src/Genten_FacMatrix.hpp
@@ -104,7 +104,7 @@ class FacMatrixT
      *  @param[in] n  Number of columns, should equal the number of components
      *                in the Ktensor.
      */
-    FacMatrixT(ttb_indx m, ttb_indx n, const ProcessorMap::FacMap* pmap_ = nullptr, const bool zero = true);
+    FacMatrixT(ttb_indx m, ttb_indx n, const ProcessorMap::FacMap* pmap_ = nullptr, const bool zero = true, const bool pad = true);
 
     //! Constructor to create a Factor Matrix of Size M x N using the
     // given view
@@ -117,7 +117,8 @@ class FacMatrixT
     // given data vector CVEC which is assumed to be stored *columnwise*.
     // Not currently used; therefore not part of doxygen API.
     FacMatrixT(ttb_indx m, ttb_indx n, const ttb_real * cvec,
-               const ProcessorMap::FacMap* pmap_ = nullptr);
+               const ProcessorMap::FacMap* pmap_ = nullptr,
+               const bool pad = true);
 
     //! @brief Create matrix from supplied view
     KOKKOS_INLINE_FUNCTION
@@ -245,6 +246,12 @@ class FacMatrixT
     void setProcessorMap(const ProcessorMap::FacMap* pmap_) { pmap = pmap_; }
     const ProcessorMap::FacMap* getProcessorMap() const { return pmap; }
 
+     KOKKOS_INLINE_FUNCTION
+     bool isPadded() const
+    {
+      return data.extent(0)*data.extent(1) != data.span();
+    }
+
     /** @} */
 
     /** ----------------------------------------------------------------
diff --git a/src/Genten_GCP_SGD.cpp b/src/Genten_GCP_SGD.cpp
index 5a435ab343..d8fe42f5e0 100644
--- a/src/Genten_GCP_SGD.cpp
+++ b/src/Genten_GCP_SGD.cpp
@@ -176,9 +176,24 @@ namespace Genten {
     const ttb_indx printIter = print_itn ? algParams.printitn : 0;
     const bool compute_fit = algParams.compute_fit;
 
-    // Create sampler
+    // Create iterator
+    Impl::GCP_SGD_Iter<TensorType,LossFunction> *itp =
+      Impl::createIter<LossFunction>(
+        X, u0, hist, penalty, mode_beg, mode_end, algParams);
+    Impl::GCP_SGD_Iter<TensorType,LossFunction>& it = *itp;
+
+    // Get vector/Ktensor for current solution (this is a view of the data)
+    VectorType u = it.getSolution();
+    KtensorT<exec_space> ut = u.getKtensor();
+    ut.setProcessorMap(pmap);
+
+    // Copy Ktensor for restoring previous solution
+    VectorType u_prev = u.clone();
+    u_prev.set(u);
+
+    // Create sampler -- must use u from iterator to get the right padding
     Sampler<TensorType,LossFunction> *sampler =
-      createSampler<LossFunction>(X, u0, algParams);
+      createSampler<LossFunction>(X, ut, algParams);
 
     // Create annealer
     auto annealer = getAnnealer(algParams);
@@ -221,21 +236,6 @@ namespace Genten {
     // Start timer for total execution time of the algorithm.
     timer.start(timer_sgd);
 
-    // Create iterator
-    Impl::GCP_SGD_Iter<TensorType,LossFunction> *itp =
-      Impl::createIter<LossFunction>(
-        X, u0, hist, penalty, mode_beg, mode_end, algParams);
-    Impl::GCP_SGD_Iter<TensorType,LossFunction>& it = *itp;
-
-    // Get vector/Ktensor for current solution (this is a view of the data)
-    VectorType u = it.getSolution();
-    KtensorT<exec_space> ut = u.getKtensor();
-    ut.setProcessorMap(pmap);
-
-    // Copy Ktensor for restoring previous solution
-    VectorType u_prev = u.clone();
-    u_prev.set(u);
-
     // Initialize sampler (sorting, hashing, ...)
     timer.start(timer_sort);
     Kokkos::Random_XorShift64_Pool<exec_space> rand_pool(seed);
diff --git a/src/rol/Genten_CP_RolObjective.hpp b/src/rol/Genten_CP_RolObjective.hpp
index b261ffa5a4..2436c167ca 100644
--- a/src/rol/Genten_CP_RolObjective.hpp
+++ b/src/rol/Genten_CP_RolObjective.hpp
@@ -121,8 +121,12 @@ namespace Genten {
   CP_RolObjective(const tensor_type& x,
                   const ktensor_type& m,
                   const AlgParams& algParams,
-                  PerfHistory& h) : M(m), cp_model(x, m, algParams), history(h),
-                                    timer(1)
+                  PerfHistory& h) :
+    M(m),
+    // create and pass a ktensor to cp_model through the KokkosVector so to
+    // ensure the same padding as vectors that will be used in the algorithm
+    cp_model(x, vector_type(m).getKtensor(), algParams), history(h),
+    timer(1)
     {
 #if COPY_KTENSOR
       const ttb_indx nc = M.ncomponents();
diff --git a/src/rol/Genten_GCP_Opt_Rol.cpp b/src/rol/Genten_GCP_Opt_Rol.cpp
index 0ccc5dba28..8bdf4dd24c 100644
--- a/src/rol/Genten_GCP_Opt_Rol.cpp
+++ b/src/rol/Genten_GCP_Opt_Rol.cpp
@@ -170,7 +170,7 @@ void gcp_opt_rol(const TensorT<ExecSpace>& x, KtensorT<ExecSpace>& u,
   if (algParams.printitn > 0) {
     if (algParams.compute_fit) {
       stream << "Final fit = " << std::setprecision(3) << std::scientific
-             <<  objective->computeFit(u) << std::endl;
+             <<  objective->computeFit(z->getKtensor()) << std::endl;
     }
     stream << "Total time = " << std::setprecision(2) << std::scientific
            << history.lastEntry().cum_time << std::endl
diff --git a/src/rol/Genten_GCP_RolObjective.cpp b/src/rol/Genten_GCP_RolObjective.cpp
index 1ee57b9cac..8a67127a9a 100644
--- a/src/rol/Genten_GCP_RolObjective.cpp
+++ b/src/rol/Genten_GCP_RolObjective.cpp
@@ -119,7 +119,11 @@ namespace Genten {
                    const loss_function_type& func,
                    const AlgParams& algParams,
                    PerfHistory& h) :
-    M(m), gcp_model(x, m, func, algParams), history(h), timer(1),
+    M(m),
+    // create and pass a ktensor to gcp_model through the KokkosVector so to
+    // ensure the same padding as vectors that will be used in the algorithm
+    gcp_model(x, vector_type(m).getKtensor(), func, algParams),
+    history(h), timer(1),
     compute_fit(algParams.compute_fit)
   {
 #if COPY_KTENSOR
@@ -142,6 +146,11 @@ namespace Genten {
   {
     GENTEN_TIME_MONITOR("GCP_RolObjective::update");
 
+    const vector_type& x = dynamic_cast<const vector_type&>(xx);
+
+    // Convert input vector to a Ktensor
+    M = x.getKtensor();
+
     gcp_model.update(M);
 
     // If the step was accepted, record the time and add a new working entry
diff --git a/src/rol/Genten_RolKokkosVector.hpp b/src/rol/Genten_RolKokkosVector.hpp
index b37489763e..62e8bfbd0d 100644
--- a/src/rol/Genten_RolKokkosVector.hpp
+++ b/src/rol/Genten_RolKokkosVector.hpp
@@ -63,15 +63,16 @@ namespace Genten {
     typedef typename kokkos_vector::Ktensor_type Ktensor_type;
 
     // This just copies the shape of V, not the values
-    RolKokkosVector(const Ktensor_type& V, const bool make_view,
-                    const DistKtensorUpdate<ExecSpace> *dku) : kv(V, dku)
+    RolKokkosVector(const Ktensor_type& V, const bool make_view = false,
+                    const DistKtensorUpdate<ExecSpace> *dku = nullptr) :
+      kv(V, dku)
     {
       gt_assert(make_view == false);
     }
 
     RolKokkosVector(const unsigned nc, const unsigned nd, const IndxArray& sz,
                     const ProcessorMap* pmap,
-                    const DistKtensorUpdate<ExecSpace> *dku) :
+                    const DistKtensorUpdate<ExecSpace> *dku = nullptr) :
       kv(nc,nd,sz,pmap,dku)
     {
     }