From a90b8b11e3b1ee694c9f2072c7d7f1fcc12a7509 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Tue, 17 Oct 2023 15:17:56 -0400
Subject: [PATCH 1/3] update Ginkgo interface to accept data on GPU through
 mem_space

---
 src/LinAlg/hiopLinSolverSparseGinkgo.cpp | 51 +++++++++++++++++++-----
 src/LinAlg/hiopLinSolverSparseGinkgo.hpp |  2 +
 2 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
index 971ef699a..3096f7b3d 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
@@ -283,12 +283,21 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
       nnz_{0},
       index_covert_CSR2Triplet_{nullptr},
       index_covert_extra_Diag2CSR_{nullptr}
-  {}
+  {
+    if(nlp_->options->GetString("mem_space") == "device") {
+      M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
+    }
+  }
 
   hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo()
   {
     delete [] index_covert_CSR2Triplet_;
     delete [] index_covert_extra_Diag2CSR_;
+    
+    // If memory space is device, delete allocated host mirrors
+    if(nlp_->options->GetString("mem_space") == "device") {
+      delete M_host_;
+    }
   }
 
   void hiopLinSolverSymSparseGinkgo::firstCall()
@@ -304,7 +313,19 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
     auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart");
     iterative_refinement_ = gmres_iter > 0;
 
-    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
+    // If the matrix is on device, copy it to the host mirror
+    std::string mem_space = nlp_->options->GetString("mem_space");
+    if(mem_space == "device") {
+      auto host = exec_->get_master();
+      auto nnz = M_->numberOfNonzeros();
+      host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
+      host->copy_from(exec_.get(), nnz, M_->i_row(), M_host_->i_row());
+      host->copy_from(exec_.get(), nnz, M_->j_col(), M_host_->j_col());
+    } else {
+      M_host_ = M_;
+    } 
+
+    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_host_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
     mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
     nnz_ = mtx_->get_num_stored_elements();
 
@@ -321,7 +342,15 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
     if( !mtx_ ) {
       this->firstCall();
     } else {
-      update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
+      std::string mem_space = nlp_->options->GetString("mem_space");
+      if(mem_space == "device") {
+        auto host = exec_->get_master();
+        auto nnz = M_->numberOfNonzeros();
+        host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
+      } else {
+        M_host_ = M_;
+      } 
+      update_matrix(M_host_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
     }
     
     gko_solver_ = gko::share(reusable_factory_->generate(mtx_));
@@ -353,20 +382,22 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
     hiopVectorPar* x = dynamic_cast<hiopVectorPar*>(&x_);
     assert(x != NULL);
     hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
+
+    std::string mem_space = nlp_->options->GetString("mem_space");
+    auto exec = host;
+    if(mem_space == "device") {
+      exec = exec_;
+    }
+
     double* dx = x->local_data();
     double* drhs = rhs->local_data();
     const auto size = gko::dim<2>{(long unsigned int)n_, 1};
-    auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1);
-    auto dense_x = vec::create(exec_, size);
-    dense_x->copy_from(dense_x_host.get());
-    auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1);
-    auto dense_b = vec::create(exec_, size);
-    dense_b->copy_from(dense_b_host.get());
+    auto dense_x = vec::create(exec, size, arr::view(exec, n_, dx), 1);
+    auto dense_b = vec::create(exec, size, arr::view(exec, n_, drhs), 1);
 
     gko_solver_->apply(dense_b.get(), dense_x.get());
     nlp_->runStats.linsolv.tmTriuSolves.stop();
     
-    dense_x_host->copy_from(dense_x.get());
     delete rhs; rhs=nullptr;
     return 1;
   }
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
index 058c606a5..198322595 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
@@ -96,6 +96,8 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
 
   static const std::map<std::string, gko::solver::trisolve_algorithm> alg_map_;
 
+  hiopMatrixSparse* M_host_{ nullptr }; ///< Host mirror for the KKT matrix
+
 public:
 
   /** called the very first time a matrix is factored. Allocates space

From 23336498b1555617c3e676cc188dadd56a18d29a Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Mon, 30 Oct 2023 17:37:08 -0400
Subject: [PATCH 2/3] Add Ginkgo as option for hybrid and GPU compute mode

---
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp |  4 ++
 src/LinAlg/hiopLinSolverSparseGinkgo.cpp  | 50 +++++++++++++----------
 src/LinAlg/hiopLinSolverSparseGinkgo.hpp  |  1 +
 src/Optimization/hiopKKTLinSysSparse.cpp  | 28 +++++++++++--
 4 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 3131fc024..25cc08e06 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -239,8 +239,12 @@ int main(int argc, char **argv)
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("mem_space", "device");
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "cuda");
     } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("mem_space", "device");
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "hip");
     } else {
         nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
index 3096f7b3d..b957747f5 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
@@ -284,6 +284,7 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
       index_covert_CSR2Triplet_{nullptr},
       index_covert_extra_Diag2CSR_{nullptr}
   {
+      std::cout << "START" << std::endl;
     if(nlp_->options->GetString("mem_space") == "device") {
       M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
     }
@@ -291,13 +292,13 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
   hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo()
   {
-    delete [] index_covert_CSR2Triplet_;
-    delete [] index_covert_extra_Diag2CSR_;
-    
     // If memory space is device, delete allocated host mirrors
     if(nlp_->options->GetString("mem_space") == "device") {
       delete M_host_;
     }
+
+    delete [] index_covert_CSR2Triplet_;
+    delete [] index_covert_extra_Diag2CSR_;
   }
 
   void hiopLinSolverSymSparseGinkgo::firstCall()
@@ -315,21 +316,33 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
     // If the matrix is on device, copy it to the host mirror
     std::string mem_space = nlp_->options->GetString("mem_space");
+    std::cout << nlp_->options->GetString("ginkgo_exec") << " " << mem_space << std::endl;
+    auto M = M_;
     if(mem_space == "device") {
       auto host = exec_->get_master();
       auto nnz = M_->numberOfNonzeros();
-      host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
-      host->copy_from(exec_.get(), nnz, M_->i_row(), M_host_->i_row());
-      host->copy_from(exec_.get(), nnz, M_->j_col(), M_host_->j_col());
-    } else {
-      M_host_ = M_;
+      //host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
+      auto dv = gko::make_const_array_view(exec_, nnz, M_->M());
+      auto hv = gko::make_array_view(host, nnz, M_host_->M());
+      host->copy_from(exec_.get(), nnz, dv.get_const_data(), hv.get_data());
+      auto di = gko::make_const_array_view(exec_, nnz, M_->i_row());
+      auto hi = gko::make_array_view(host, nnz, M_host_->i_row());
+      host->copy_from(exec_.get(), nnz, di.get_const_data(), hi.get_data());
+      auto dj = gko::make_const_array_view(exec_, nnz, M_->j_col());
+      auto hj = gko::make_array_view(host, nnz, M_host_->j_col());
+      host->copy_from(exec_.get(), nnz, dj.get_const_data(), hj.get_data());
+      //host->copy_from(exec_.get(), nnz, M_->i_row(), M_host_->i_row());
+      //host->copy_from(exec_.get(), nnz, M_->j_col(), M_host_->j_col());
+      M = M_host_;
     } 
 
-    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_host_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
+    host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
     mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
     nnz_ = mtx_->get_num_stored_elements();
 
     reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart);
+
+    dense_b_ = gko::matrix::Dense<double>::create(exec_, gko::dim<2>{n_, 1});
   }
 
   int hiopLinSolverSymSparseGinkgo::matrixChanged()
@@ -343,14 +356,14 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
       this->firstCall();
     } else {
       std::string mem_space = nlp_->options->GetString("mem_space");
+      auto M = M_;
       if(mem_space == "device") {
         auto host = exec_->get_master();
         auto nnz = M_->numberOfNonzeros();
         host->copy_from(exec_.get(), nnz, M_->M(), M_host_->M());
-      } else {
-        M_host_ = M_;
+        M = M_host_;
       } 
-      update_matrix(M_host_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
+      update_matrix(M, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_);
     }
     
     gko_solver_ = gko::share(reusable_factory_->generate(mtx_));
@@ -370,6 +383,7 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
   bool hiopLinSolverSymSparseGinkgo::solve ( hiopVector& x_ )
   {
+      std::cout << "SOLVE" << std::endl;
     using vec = gko::matrix::Dense<double>;
     using arr = gko::array<double>;
     auto host = exec_->get_master();
@@ -379,26 +393,20 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
     nlp_->runStats.linsolv.tmTriuSolves.start();
 
-    hiopVectorPar* x = dynamic_cast<hiopVectorPar*>(&x_);
-    assert(x != NULL);
-    hiopVectorPar* rhs = dynamic_cast<hiopVectorPar*>(x->new_copy());
-
     std::string mem_space = nlp_->options->GetString("mem_space");
     auto exec = host;
     if(mem_space == "device") {
       exec = exec_;
     }
 
-    double* dx = x->local_data();
-    double* drhs = rhs->local_data();
+    double* dx = x_.local_data();
     const auto size = gko::dim<2>{(long unsigned int)n_, 1};
     auto dense_x = vec::create(exec, size, arr::view(exec, n_, dx), 1);
-    auto dense_b = vec::create(exec, size, arr::view(exec, n_, drhs), 1);
+    dense_b_->copy_from(dense_x.get());
 
-    gko_solver_->apply(dense_b.get(), dense_x.get());
+    gko_solver_->apply(dense_b_.get(), dense_x.get());
     nlp_->runStats.linsolv.tmTriuSolves.stop();
     
-    delete rhs; rhs=nullptr;
     return 1;
   }
 
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
index 198322595..d1c7a0419 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp
@@ -90,6 +90,7 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
   std::shared_ptr<gko::Executor> exec_;
   std::shared_ptr<gko::matrix::Csr<double, int>> mtx_;
   std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx_;
+  std::shared_ptr<gko::matrix::Dense<double>> dense_b_;
   std::shared_ptr<gko::LinOpFactory> reusable_factory_;
   std::shared_ptr<gko::LinOp> gko_solver_;
   bool iterative_refinement_;
diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp
index c41a8579a..3cee4380f 100644
--- a/src/Optimization/hiopKKTLinSysSparse.cpp
+++ b/src/Optimization/hiopKKTLinSysSparse.cpp
@@ -317,9 +317,7 @@ namespace hiop
         if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
           //ma57, pardiso and strumpack are not available or user requested ginkgo
 #ifdef HIOP_USE_GINKGO              
-          nlp_->log->printf(hovScalars,
-                            "KKT_SPARSE_XYcYd linsys: alloc GINKGO with matrix size %d (%d cons)\n",
-                            n, neq+nineq);
+          linsol_actual = "GINKGO";
           linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
 #endif  // HIOP_USE_GINKGO        
         }
@@ -376,6 +374,14 @@ namespace hiop
           linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_);
 #endif // HIOP_USE_PARDISO
         }
+        
+        if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO              
+          linsol_actual = "GINKGO";
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+#endif  // HIOP_USE_GINKGO        
+        }
 
         if(linSys_) {
           nlp_->log->printf(hovScalars,
@@ -747,6 +753,14 @@ namespace hiop
 #endif // HIOP_USE_PARDISO          
         }
 
+        if(nullptr == linSys_ && linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+          actual_lin_solver = "GINKGO";        
+#endif  // HIOP_USE_GINKGO        
+        }
+
         if(linSys_) {
           nlp_->log->printf(hovScalars,
                             "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons) (hybrid)\n",
@@ -781,6 +795,14 @@ namespace hiop
           }
 #endif
         } //end resolve
+        
+        if(nullptr == linSys_ && linear_solver == "ginkgo") {
+          //ma57, pardiso and strumpack are not available or user requested ginkgo
+#ifdef HIOP_USE_GINKGO
+          linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_);
+          actual_lin_solver = "GINKGO";        
+#endif  // HIOP_USE_GINKGO        
+        }
       } // end of compute mode gpu
     }
     assert(linSys_&& "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver");

From 7630d4b3f72f3f75b7bf7f07f01c60be395a10ec Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Thu, 2 Nov 2023 15:07:19 -0400
Subject: [PATCH 3/3] Remove device reset for Ginkgo Executor, add Ginkgo as
 option for NlpSparseRajaEx2

---
 src/Drivers/Sparse/CMakeLists.txt             |  3 +--
 src/Drivers/Sparse/NlpSparseEx1Driver.cpp     |  2 --
 src/Drivers/Sparse/NlpSparseEx2Driver.cpp     |  2 ++
 src/Drivers/Sparse/NlpSparseEx4Driver.cpp     |  2 ++
 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 14 +++++++++++++-
 src/LinAlg/hiopLinSolverSparseGinkgo.cpp      |  9 ++-------
 6 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt
index 91efb88a7..1c7ed6d8c 100644
--- a/src/Drivers/Sparse/CMakeLists.txt
+++ b/src/Drivers/Sparse/CMakeLists.txt
@@ -18,11 +18,10 @@ add_executable(NlpSparseEx4.exe NlpSparseEx4.cpp NlpSparseEx4Driver.cpp)
 target_link_libraries(NlpSparseEx4.exe HiOp::HiOp)
 
 if(HIOP_USE_RAJA)
-  if(HIOP_USE_GPU AND HIOP_USE_CUDA)
+  if(HIOP_USE_GPU)
     set_source_files_properties(
       NlpSparseRajaEx2.cpp 
       NlpSparseRajaEx2Driver.cpp 
-      PROPERTIES LANGUAGE CUDA
     )
   
     add_executable(NlpSparseRajaEx2.exe  NlpSparseRajaEx2Driver.cpp  NlpSparseRajaEx2.cpp)
diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
index 25cc08e06..3bc259bae 100644
--- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp
@@ -239,11 +239,9 @@ int main(int argc, char **argv)
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     if (use_ginkgo_cuda) {
-        nlp.options->SetStringValue("mem_space", "device");
         nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "cuda");
     } else if (use_ginkgo_hip) {
-        nlp.options->SetStringValue("mem_space", "device");
         nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "hip");
     } else {
diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
index e61b866a0..27e3048d0 100644
--- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp
@@ -248,8 +248,10 @@ int main(int argc, char **argv)
       nlp.options->SetStringValue("linsol_mode", "speculative");
       nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
       if (use_ginkgo_cuda) {
+          nlp.options->SetStringValue("compute_mode", "gpu");
           nlp.options->SetStringValue("ginkgo_exec", "cuda");
       } else if (use_ginkgo_hip) {
+          nlp.options->SetStringValue("compute_mode", "gpu");
           nlp.options->SetStringValue("ginkgo_exec", "hip");
       } else {
           nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/Drivers/Sparse/NlpSparseEx4Driver.cpp b/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
index 0200284f6..878a605e9 100644
--- a/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseEx4Driver.cpp
@@ -238,8 +238,10 @@ int main(int argc, char **argv)
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
     nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "cuda");
     } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("compute_mode", "gpu");
         nlp.options->SetStringValue("ginkgo_exec", "hip");
     } else {
         nlp.options->SetStringValue("ginkgo_exec", "reference");
diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
index a2455ebe1..d079ed46e 100644
--- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
+++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
@@ -256,12 +256,24 @@ int main(int argc, char **argv)
     // only support cusolverLU right now, 2023.02.28
     //lsq initialization of the duals fails for this example since the Jacobian is rank deficient
     //use zero initialization
-    nlp.options->SetStringValue("linear_solver_sparse", "resolve");
     if(use_resolve_cuda_rf) {
+      nlp.options->SetStringValue("linear_solver_sparse", "resolve");
       nlp.options->SetStringValue("resolve_refactorization", "rf");
       nlp.options->SetIntegerValue("ir_inner_maxit", 20);
       nlp.options->SetIntegerValue("ir_outer_maxit", 0);
     }
+    if (use_ginkgo) {
+      nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
+      nlp.options->SetIntegerValue("ir_outer_maxit", 0);
+      if (use_ginkgo_cuda) {
+        nlp.options->SetStringValue("ginkgo_exec", "cuda");
+      } else if (use_ginkgo_hip) {
+        nlp.options->SetStringValue("ginkgo_exec", "hip");
+      } else {
+        nlp.options->SetStringValue("ginkgo_exec", "reference");
+        nlp.options->SetStringValue("compute_mode", "cpu");
+      }
+    }
     nlp.options->SetStringValue("duals_init", "zero");
     nlp.options->SetStringValue("mem_space", "device");
     nlp.options->SetStringValue("fact_acceptor", "inertia_free");
diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
index b957747f5..e463500b2 100644
--- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
+++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp
@@ -212,13 +212,11 @@ std::shared_ptr<gko::Executor> create_exec(std::string executor_string)
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
              }},
             {"dpcpp",
              [] {
@@ -284,7 +282,6 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
       index_covert_CSR2Triplet_{nullptr},
       index_covert_extra_Diag2CSR_{nullptr}
   {
-      std::cout << "START" << std::endl;
     if(nlp_->options->GetString("mem_space") == "device") {
       M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz);
     }
@@ -316,7 +313,6 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
     // If the matrix is on device, copy it to the host mirror
     std::string mem_space = nlp_->options->GetString("mem_space");
-    std::cout << nlp_->options->GetString("ginkgo_exec") << " " << mem_space << std::endl;
     auto M = M_;
     if(mem_space == "device") {
       auto host = exec_->get_master();
@@ -383,7 +379,6 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
 
   bool hiopLinSolverSymSparseGinkgo::solve ( hiopVector& x_ )
   {
-      std::cout << "SOLVE" << std::endl;
     using vec = gko::matrix::Dense<double>;
     using arr = gko::array<double>;
     auto host = exec_->get_master();