From 14b2184506e45f029201592d551ec2e2894511cc Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Mon, 3 Mar 2025 15:06:46 -0800
Subject: [PATCH 01/27] interface draft

---
 src/Interface/hiopInterface.hpp | 72 +++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/src/Interface/hiopInterface.hpp b/src/Interface/hiopInterface.hpp
index 904efb9d..8195d045 100644
--- a/src/Interface/hiopInterface.hpp
+++ b/src/Interface/hiopInterface.hpp
@@ -279,6 +279,78 @@ class hiopInterfaceBase
     return false;  // defaults to serial
   }
 
+  /**
+   * Computes action y of the mass matrix on a vector x, namely y=M*x
+   *
+   * The mass matrix is generally the weight matrix associated with L^2 finite element 
+   * discretizations. This matrix is used internally to build and maintain Riesz-like 
+   * representations of the dual variables (associated with bound constraints) to ensure
+   * mesh independent performance of the algorithm.
+   *
+   * @note If the variables are not coming from L^2 discretizations, this method should 
+   * return false on its first call. HiOp will use internally the identity as the mass 
+   * matrix in this case. Otherwise should return true on its first call; subsequent 
+   * errors in matrix apply can be flagged with false return values.
+   *
+   * @param[in] n the (global) number of variables
+   * @param[in] x the array to which mass action is applied
+   * @param[out] y the result of apply
+   * @return true if successful, false otherwise (also see note above).
+   */
+  virtual bool applyM(const size_type& n, const double* x, double* y)
+  {
+    //the default impl. instructs HiOp to use Euclidean/l^2 variables (M=I) internally
+    return false;
+  }
+  
+  /**
+   * Computes action y of the inner product weight matrix H on a vector x, namely y=H*x
+   *
+   * The weight matrix H is generally associated with L^2 or H^1 finite element 
+   * discretizations and corresponding weighted inner products: <u_h,v_h> = u^T H v. For L^2, 
+   * H is the mass matrix, while for H^1 is the mass plus stiffness. This matrix is applied 
+   * internally to obtain termination criteria and derive Hessian representations that are 
+   * mesh independent and consistent with the function space nature of the problem.
+   *
+   * Additional info: C. G. Petra et. al., On the implementation of a quasi-Newton 
+   * interior-point method for PDE-constrained optimization using finite element 
+   * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023.
+   *
+   * @note If the variables are not coming from L^2 discretizations, this method should 
+   * return false on its first call. HiOp will use internally H=I in this case. Otherwise 
+   * should return true on its first call; subsequent errors in matrix apply can be flagged 
+   * with false return values.
+   *
+   * @param[in] n the (global) number of variables
+   * @param[in] x the array to which the H matrix is applied
+   * @param[out] y the result of apply
+   * @return true if successful, false otherwise (also see note above)
+   */
+  virtual bool applyH(const size_type& n, const double* x, double* y)
+  {
+    //the default impl. instructs HiOp to use Euclidean/l^2 variables (H=I) internally
+    return false;
+  }
+
+  /**
+   * Computes action y of the inverse of H on a vector x, namely y=H^{-1}*x
+   *
+   * See @applyH for a discussion of H and additional notes. The inverse of H plays an
+   * important role in computing the "dual" norms and, in turn, to ensure mesh
+   * independent behavior of the IPM solver(s). The inverse apply is also needed by the
+   * specialized solves HiOp performs when the quasi-Newton solver is used. 
+   *
+   * @param[in] n the (global) number of variables
+   * @param[in] x the array to which the inverse is applied
+   * @param[out] y the result of apply
+   * @return see return of @applyH
+   */
+  virtual bool applyHinv(const size_type& n, const double* x, double* y)
+  {
+    //the default impl. instructs HiOp to use Euclidean/l^2 variables (H=I) internally
+    return false;
+  }
+
   /**
    * Method provides a primal or starting point. This point is subject to internal adjustments.
    *

From 14a243c92d34a741e1d1eb4665ea1e2a725070b7 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Wed, 5 Mar 2025 10:55:57 -0800
Subject: [PATCH 02/27] wrapper for WeiInProd wrapper in NlpFormulation

---
 src/Interface/hiopInterface.hpp         | 32 ++++++++++-----
 src/Optimization/hiopNlpFormulation.cpp | 54 +++++++++++++++++++++++++
 src/Optimization/hiopNlpFormulation.hpp | 17 +++++++-
 src/Utils/hiopRunStats.hpp              | 21 +++++++++-
 4 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/src/Interface/hiopInterface.hpp b/src/Interface/hiopInterface.hpp
index 8195d045..e21829c7 100644
--- a/src/Interface/hiopInterface.hpp
+++ b/src/Interface/hiopInterface.hpp
@@ -287,15 +287,15 @@ class hiopInterfaceBase
    * representations of the dual variables (associated with bound constraints) to ensure
    * mesh independent performance of the algorithm.
    *
-   * @note If the variables are not coming from L^2 discretizations, this method should 
-   * return false on its first call. HiOp will use internally the identity as the mass 
-   * matrix in this case. Otherwise should return true on its first call; subsequent 
-   * errors in matrix apply can be flagged with false return values.
+   * @note If the variables are not coming from discretizations (specified by a false 
+   * value returned by @useWeightedInnerProducts), the methods should return false. HiOp 
+   * will use internally M=I. Otherwise, should return true or false depending on whether 
+   * the user code succesfully applied M. 
    *
    * @param[in] n the (global) number of variables
    * @param[in] x the array to which mass action is applied
    * @param[out] y the result of apply
-   * @return true if successful, false otherwise (also see note above).
+   * @return see note above.
    */
   virtual bool applyM(const size_type& n, const double* x, double* y)
   {
@@ -316,15 +316,15 @@ class hiopInterfaceBase
    * interior-point method for PDE-constrained optimization using finite element 
    * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023.
    *
-   * @note If the variables are not coming from L^2 discretizations, this method should 
-   * return false on its first call. HiOp will use internally H=I in this case. Otherwise 
-   * should return true on its first call; subsequent errors in matrix apply can be flagged 
-   * with false return values.
+   * @note If the variables are not coming from discretizations (specified by a false 
+   * value returned by @useWeightedInnerProducts), the methods should return false. HiOp 
+   * will use internally H=I. Otherwise, should return true or false depending on whether 
+   * the user code succesfully applied M.
    *
    * @param[in] n the (global) number of variables
    * @param[in] x the array to which the H matrix is applied
    * @param[out] y the result of apply
-   * @return true if successful, false otherwise (also see note above)
+   * @return see note above
    */
   virtual bool applyH(const size_type& n, const double* x, double* y)
   {
@@ -351,6 +351,18 @@ class hiopInterfaceBase
     return false;
   }
 
+  /**
+   * Enables the use of weighted inner products via @applyM, @applyH, and @applyHinv
+   *
+   * See also notes for @applyM, @applyH, and @applyHinv.
+   *
+   * @return true if enabled, false to default to Euclidean space with <u,v>=u^T*v
+   */
+  virtual bool useWeightedInnerProducts()
+  {
+    return false;
+  }
+  
   /**
    * Method provides a primal or starting point. This point is subject to internal adjustments.
    *
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index 63000fd9..89b57d5d 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -54,6 +54,7 @@
  */
 
 #include "hiopNlpFormulation.hpp"
+#include "InnerProduct.hpp"
 #include "HessianDiagPlusRowRank.hpp"
 #include "hiopVector.hpp"
 #include "LinAlgFactory.hpp"
@@ -79,6 +80,7 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char
       nlp_transformations_(this),
       interface_base(interface_)
 {
+//  inner_prod_ = new InnerProduct(this);
   strFixedVars_ = "";    // uninitialized
   dFixedVarsTol_ = -1.;  // uninitialized
   bool bret;
@@ -143,6 +145,8 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char
   temp_x_ = nullptr;
   nlp_scaling_ = nullptr;
   relax_bounds_ = nullptr;
+
+  inner_prod_ = new InnerProduct(this);
 }
 
 hiopNlpFormulation::~hiopNlpFormulation()
@@ -184,6 +188,8 @@ hiopNlpFormulation::~hiopNlpFormulation()
   delete temp_ineq_;
   delete temp_x_;
   /// nlp_scaling_ and relax_bounds_ are deleted inside nlp_transformations_
+
+  delete inner_prod_;
 }
 
 bool hiopNlpFormulation::finalizeInitialization()
@@ -757,6 +763,54 @@ bool hiopNlpFormulation::eval_grad_f(hiopVector& x, bool new_x, hiopVector& grad
   return bret;
 }
 
+bool hiopNlpFormulation::eval_M(const hiopVector& x, hiopVector& y)
+{
+  //x is in the reduced space used by HiOp
+  //x_full is x in the full/untransformed/user space
+  hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast<hiopVector&>(x), true/*new_x*/);
+
+  // We need to also transform y to a y_full, pass it to user apply, and then the reverse, y_full back to y.
+  // The following works, unless a "remove fixed variables" NLP transformation is present.
+  // TO DO: Either add functionality in hiopFixedVarRemovers to allow apply_inv_to_x for more than one vector
+  // simultaneously, or create y_full here use apply_inv_to_x with copying from the return to y_full
+  hiopVector* y_full = &y;
+  assert(x_full->get_size() == y.get_size() &&
+         "weighted inner products not supported when fixed variables are removed");
+  
+  runStats.tm_eval_M_apply.start();
+  bool bret = interface_base.applyM(nlp_transformations_.n_pre(),
+                                    x_full->local_data_const(),
+                                    y_full->local_data());
+  runStats.tm_eval_M_apply.stop();
+  runStats.n_eval_M_apply++;
+
+  // TODO: copy back to y from y_full
+  
+  return bret;  
+}
+
+bool hiopNlpFormulation::eval_H(const hiopVector& x, hiopVector& y)
+{
+  //x is in the reduced space used by HiOp
+  //x_full is x in the full/untransformed/user space
+  hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast<hiopVector&>(x), true/*new_x*/);
+
+  // TODO: see notes from eval_M
+  hiopVector* y_full = &y;
+  assert(x_full->get_size() == y.get_size() &&
+         "weighted inner products not supported when fixed variables are removed");
+
+  runStats.tm_eval_H_apply.start();
+  bool bret = interface_base.applyH(nlp_transformations_.n_pre(),
+                                    x_full->local_data_const(),
+                                    y_full->local_data());
+  runStats.tm_eval_H_apply.stop();
+  runStats.n_eval_H_apply++;
+  
+  return bret;  
+}
+
+
 bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop,
                                             bool& duals_avail,
                                             hiopVector& zL0_for_hiop,
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index 4d0df76a..b75f54c4 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -80,7 +80,8 @@ namespace hiop
 
 // some forward decls
 class hiopDualsLsqUpdate;
-
+class InnerProduct;
+  
 /** Class for a general NlpFormulation with general constraints and bounds on the variables.
  * This class also  acts as a factory for linear algebra objects (derivative
  * matrices, KKT system) whose types are decided based on the hiopInterfaceXXX object passed in the
@@ -118,6 +119,17 @@ class hiopNlpFormulation
   virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) = 0;
   virtual bool eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d);
 
+  inline bool useWeightedInnerProd()
+  {
+    return interface_base.useWeightedInnerProducts();
+  }
+  /* Wrapper over user-defined M apply that also applies the NLP transformations. */
+  bool eval_M(const hiopVector& x, hiopVector& y);
+  /* Wrapper over user-defined H apply that also applies the NLP transformations. */
+  bool eval_H(const hiopVector& x, hiopVector& y); 
+  /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */
+  bool eval_H_inv(const hiopVector& x, hiopVector& y); 
+
 protected:
   // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments
   virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0;
@@ -353,6 +365,9 @@ class hiopNlpFormulation
   /* User provided interface */
   hiopInterfaceBase& interface_base;
 
+  /* Inner product wrapper */
+  InnerProduct* inner_prod_;
+  
   /**
    * Flag to indicate whether to use evaluate all constraints once or separately for equalities
    * or inequalities. Possible values
diff --git a/src/Utils/hiopRunStats.hpp b/src/Utils/hiopRunStats.hpp
index 0ee87903..c1e59e42 100644
--- a/src/Utils/hiopRunStats.hpp
+++ b/src/Utils/hiopRunStats.hpp
@@ -320,6 +320,14 @@ class hiopRunStats
 
   int nIter;
 
+  // measure user-defined calls other than the above, for example matrices for weighted inner products
+  hiopTimer tm_eval_M_apply;
+  hiopTimer tm_eval_H_apply;
+  hiopTimer tm_eval_Hinv_apply;
+  int n_eval_M_apply;
+  int n_eval_H_apply;
+  int n_eval_Hinv_apply;
+  
   hiopRunKKTSolStats kkt;
   hiopLinSolStats linsolv;
   inline virtual void initialize()
@@ -329,6 +337,12 @@ class hiopRunStats
     nEvalObj = nEvalGrad_f = nEvalCons_eq = nEvalCons_ineq = nEvalJac_con_eq = nEvalJac_con_ineq = 0;
     nEvalHessL = 0;
     nIter = 0;
+    tm_eval_M_apply = 0.;
+    tm_eval_H_apply = 0.;
+    tm_eval_Hinv_apply = 0.;
+    n_eval_M_apply = 0;
+    n_eval_H_apply = 0;
+    n_eval_Hinv_apply = 0;
   }
 
   inline std::string get_summary(int masterRank = 0)
@@ -364,7 +378,9 @@ class hiopRunStats
        << "s  "
        << "( obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime()
        << " cons=" << tmEvalCons.getElapsedTime() << " Jac=" << tmEvalJac_con.getElapsedTime()
-       << " Hess=" << tmEvalHessL.getElapsedTime() << ") " << std::endl;
+       << " Hess=" << tmEvalHessL.getElapsedTime() << ") " 
+       << " WeigInProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " "
+       << tm_eval_Hinv_apply.getElapsedTime() << ")" << std::endl;
 
 #ifdef HIOP_USE_MPI
     loc = tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() +
@@ -385,7 +401,8 @@ class hiopRunStats
 
 #endif
     ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f << " eq cons " << nEvalCons_eq << " ineq cons "
-       << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq << std::endl;
+       << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq
+       << "WeigInProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl;
 
     return ss.str();
   }

From 89bc337adfc9313762f98efe0ea2abb5481a9ba6 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Wed, 5 Mar 2025 10:58:10 -0800
Subject: [PATCH 03/27] draft of InnerProd class

---
 src/Optimization/InnerProduct.hpp | 144 ++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 src/Optimization/InnerProduct.hpp

diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
new file mode 100644
index 00000000..540018e6
--- /dev/null
+++ b/src/Optimization/InnerProduct.hpp
@@ -0,0 +1,144 @@
+// Copyright (c) 2025, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory (LLNL).
+// LLNL-CODE-742473. All rights reserved.
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp
+// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause).
+// Please also read "Additional BSD Notice" below.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice, this list
+// of conditions and the disclaimer below.
+// ii. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S. Department
+// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under
+// Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC
+// nor any of their employees, makes any warranty, express or implied, or assumes any
+// liability or responsibility for the accuracy, completeness, or usefulness of any
+// information, apparatus, product, or process disclosed, or represents that its use would
+// not infringe privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or services by
+// trade name, trademark, manufacturer or otherwise does not necessarily constitute or
+// imply its endorsement, recommendation, or favoring by the United States Government or
+// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed
+// herein do not necessarily state or reflect those of the United States Government or
+// Lawrence Livermore National Security, LLC, and shall not be used for advertising or
+// product endorsement purposes.
+
+/**
+ * @file InnerProduct.hpp
+ *
+ * @author Cosmin G. Petra <petra1@llnl.gov>, LLNL
+ * @author Nai-Yuan Chiang <chiang7@llnl.gov>, LLNL
+ *
+ */
+
+#ifndef HIOP_NLP_INNERPROD
+#define HIOP_NLP_INNERPROD
+
+#include "hiopVector.hpp"
+
+namespace hiop
+{
+
+// some forward decls
+class hiopNlpFormulation;
+
+/** 
+ * Provides functionality required for using (weighted) inner products within the IPM algorithm(s). 
+ *
+ * These weighted inner products appear when optimizing over (discretization of) function spaces,
+ * such as PDE-constrained optimization. It wraps around user-provided methods for computing the 
+ * mass matrix M and the weight matrix H generally associated with L^2 or H^1 finite element 
+ * discretizations and corresponding weighted inner products: <u_h,v_h> = u^T H v. For L^2, 
+ * H is the mass matrix, while for H^1 is the mass plus stiffness. These user methods are called
+ * to perform various operations associated with Hilbert spaces, such as inner products and norms. 
+ *
+ * Additional info: C. G. Petra et. al., On the implementation of a quasi-Newton 
+ * interior-point method for PDE-constrained optimization using finite element 
+ * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023.
+ *
+ * This class also covers Euclidean (i.e., non-weighted) inner products, for which M=H=I.
+ */
+class InnerProduct
+{
+public:
+  InnerProduct(hiopNlpFormulation* nlp)
+    : nlp_(nlp)
+  {
+    if(nlp->useWeightedInnerProd()) {
+      vec_n_ = nlp_->alloc_primal_vec();      
+    } else {
+      vec_n_ = nullptr;
+    }
+  }
+  
+  virtual ~InnerProduct()
+  {
+    delete vec_n_;
+  }
+
+  // Computes ||x||_M
+  double norm_M(const hiopVector& x)
+  {
+    if(nlp_->useWeightedInnerProd()) {      
+      nlp_->eval_M(x, *vec_n_);
+      auto dp = x.dotProductWith(*vec_n_);
+      return ::std::sqrt(dp);
+    } else {
+      return x.twonorm();
+    }
+  }
+  // Computes H primal norm
+  double norm_H_primal(const hiopVector& x)
+  {
+    if(nlp_->useWeightedInnerProd()) {      
+      nlp_->eval_H(x, *vec_n_);
+      auto dp = x.dotProductWith(*vec_n_);
+      return ::std::sqrt(dp);
+    } else {
+      return x.twonorm();
+    }
+  }
+  // Computes H dual norm
+  double norm_H_dual(const hiopVector& x)
+  {
+    if(nlp_->useWeightedInnerProd()) {      
+      nlp_->eval_H_inv(x, *vec_n_);
+      auto dp = x.dotProductWith(*vec_n_);
+      return ::std::sqrt(dp);
+    } else {
+      return x.twonorm();
+    }
+  }
+
+private:
+  // Pointer to "client" NLP
+  hiopNlpFormulation* nlp_;
+
+  // Working vector in the size n of the variables, allocated only when for the weighted case
+  hiopVector* vec_n_;
+};
+
+} //end namespace
+#endif

From a1ae282e1058a35278e2c4987568303b1b058bd3 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Thu, 6 Mar 2025 12:43:26 -0800
Subject: [PATCH 04/27] cleaning up

---
 src/Optimization/hiopResidual.hpp | 13 ++++++++++---
 src/Utils/hiopRunStats.hpp        |  6 +++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/Optimization/hiopResidual.hpp b/src/Optimization/hiopResidual.hpp
index 4ad02969..5f0f725b 100644
--- a/src/Optimization/hiopResidual.hpp
+++ b/src/Optimization/hiopResidual.hpp
@@ -87,7 +87,8 @@ class hiopResidual
    *      c_soc = c_rhs - c_body and d_soc = d - d_body
    *
    * computed in c_eval and d_eval, respectively.
-   * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attemp
+   * The method modifies 'this', in particular ryd, ryc, rxl,rxu, rdl, rdu in an attempt
+   * to reuse storage/buffers
    */
   virtual void update_soc(const hiopIterate& it,
                           const hiopVector& c_soc,
@@ -115,11 +116,14 @@ class hiopResidual
   inline double getInfeasInfNorm() const { return nrmInf_nlp_feasib; }
   /* get the previously computed Infeasibility */
   inline double get_theta() const { return nrmOne_nlp_feasib; }
-  /* evaluate the Infeasibility at the new iterate, which has eq and ineq functions
+  
+  /**
+   * Evaluate the infeasibility at the new iterate, which has eq and ineq functions
    * computed in c_eval and d_eval, respectively.
    * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attempt
    * to reuse storage/buffers, but does not update the cached nrmInf_XXX members.
-   * It computes and returns the one norm of [ryc ryd] */
+   * It computes and returns the one norm of [ryc ryd] 
+   */
   double compute_nlp_infeasib_onenorm(const hiopIterate& iter, const hiopVector& c_eval, const hiopVector& d_eval);
 
   /* residual printing function - calls hiopVector::print
@@ -170,6 +174,7 @@ class hiopResidual
    *  for the nlp (\mu=0)
    */
   double nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem;
+
   /** storage for the norm of [rx,rd], [rxl,...,rdu,ryc,ryd], and [rszl,...,rsvu]
    *  for the barrier subproblem
    */
@@ -185,6 +190,8 @@ class hiopResidual
   /** inf norm of constraint violation */
   double nrmInf_cons_violation;
 
+  
+  
   // and associated info from problem formulation
   hiopNlpFormulation* nlp;
 
diff --git a/src/Utils/hiopRunStats.hpp b/src/Utils/hiopRunStats.hpp
index c1e59e42..a635f1b6 100644
--- a/src/Utils/hiopRunStats.hpp
+++ b/src/Utils/hiopRunStats.hpp
@@ -376,10 +376,10 @@ class hiopRunStats
        << (tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() +
            tmEvalJac_con.getElapsedTime() + tmEvalHessL.getElapsedTime())
        << "s  "
-       << "( obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime()
+       << "(obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime()
        << " cons=" << tmEvalCons.getElapsedTime() << " Jac=" << tmEvalJac_con.getElapsedTime()
        << " Hess=" << tmEvalHessL.getElapsedTime() << ") " 
-       << " WeigInProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " "
+       << " WIProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " "
        << tm_eval_Hinv_apply.getElapsedTime() << ")" << std::endl;
 
 #ifdef HIOP_USE_MPI
@@ -402,7 +402,7 @@ class hiopRunStats
 #endif
     ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f << " eq cons " << nEvalCons_eq << " ineq cons "
        << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq
-       << "WeigInProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl;
+       << "  WIProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl;
 
     return ss.str();
   }

From 826c5d28496d0f9e756c41491ad5ac55ee50216a Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Thu, 6 Mar 2025 12:44:57 -0800
Subject: [PATCH 05/27] temp code + notes

---
 src/Optimization/hiopAlgFilterIPM.cpp | 88 +++++++++++++++++++++++++++
 src/Optimization/hiopAlgFilterIPM.hpp | 14 +++++
 2 files changed, 102 insertions(+)

diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp
index 9ecb9747..86088236 100644
--- a/src/Optimization/hiopAlgFilterIPM.cpp
+++ b/src/Optimization/hiopAlgFilterIPM.cpp
@@ -715,6 +715,94 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   return true;
 }
 
+bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
+                                               const hiopResidual& resid,
+                                               const double& mu,
+                                               double& nlpoptim,
+                                               double& nlpfeas,
+                                               double& nlpcomplem,
+                                               double& nlpoverall,
+                                               double& logoptim,
+                                               double& logfeas,
+                                               double& logcomplem,
+                                               double& logoverall,
+                                               double& cons_violation)
+{
+  nlp->runStats.tmSolverInternal.start();
+
+  size_type n = nlp->n_complem(), m = nlp->m();
+  // the one norms
+  // double nrmDualBou=it.normOneOfBoundDuals();
+  // double nrmDualEqu=it.normOneOfEqualityDuals();
+  double nrmDualBou, nrmDualEqu;
+  it.normOneOfDuals(nrmDualEqu, nrmDualBou);
+
+  nlp->log->printf(hovScalars, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
+  if(nrmDualBou > 1e+10) {
+    nlp->log->printf(hovWarning,
+                     "Unusually large bound dual variables (norm1=%g) occured, "
+                     "which may cause numerical instabilities if it persists. Convergence "
+                     " issues or inacurate optimal solutions may be experienced. Possible causes: "
+                     " tight bounds or bad scaling of the optimization variables.\n",
+                     nrmDualBou);
+    if(nlp->options->GetString("fixed_var") == "remove") {
+      nlp->log->printf(hovWarning,
+                       "For example, increase 'fixed_var_tolerance' to remove "
+                       "additional variables.\n");
+    } else if(nlp->options->GetString("fixed_var") == "relax") {
+      nlp->log->printf(hovWarning,
+                       "For example, increase 'fixed_var_tolerance' to relax "
+                       "aditional (tight) variables and/or increase 'fixed_var_perturb' "
+                       "to decrease the tightness.\n");
+    } else {
+      nlp->log->printf(hovWarning,
+                       "Potential fixes: fix or relax variables with tight bounds "
+                       "(see 'fixed_var' option) or rescale variables.\n");
+    }
+  }
+
+  // scaling factors
+  //sd = max { p_smax, ||zl||_H + ||zu||_H + (||vl||_1 + ||vu||_1)/m } /p_smax
+  //sc = max { p_smax, ||zl||_H + ||zu||_H } /p_smax
+  //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv
+  //           ||yd+vl-vu||_inf
+  //nlpfeas = ||crhs- c||_inf
+  //          ||drs- d||_inf
+  //          ||x-sxl-xl||_H
+  //          ||x-sxu+xu||_H
+  //          ||d-sdl-dl||
+  //          
+  double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
+  double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+
+  sd = fmin(sd, 1e+8);
+  sc = fmin(sc, 1e+8);
+
+  // actual nlp errors
+  resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation);
+
+  // finally, the scaled nlp error
+  nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc));
+
+  nlp->log->printf(hovScalars,
+                   "nlpoverall %g  nloptim %g  sd %g  nlpfeas %g  nlpcomplem %g  sc %g cons_violation %g\n",
+                   nlpoverall,
+                   nlpoptim,
+                   sd,
+                   nlpfeas,
+                   nlpcomplem,
+                   cons_violation,
+                   sc);
+
+  // actual log errors
+  resid.getBarrierErrors(logoptim, logfeas, logcomplem);
+
+  // finally, the scaled barrier error
+  logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc));
+  nlp->runStats.tmSolverInternal.stop();
+  return true;
+}
+
 bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVector& c, hiopVector& d)
 {
   bool new_x = true;
diff --git a/src/Optimization/hiopAlgFilterIPM.hpp b/src/Optimization/hiopAlgFilterIPM.hpp
index 624fe60e..059aaafd 100644
--- a/src/Optimization/hiopAlgFilterIPM.hpp
+++ b/src/Optimization/hiopAlgFilterIPM.hpp
@@ -183,6 +183,20 @@ class hiopAlgFilterIPMBase
                                    double& logoverall,
                                    double& cons_violation);
 
+  virtual bool evalNlpAndLogErrors2(const hiopIterate& it,
+                                    const hiopResidual& resid,
+                                    const double& mu,
+                                    double& nlpoptim,
+                                    double& nlpfeas,
+                                    double& nlpcomplem,
+                                    double& nlpoverall,
+                                    double& logoptim,
+                                    double& logfeas,
+                                    double& logcomplem,
+                                    double& logoverall,
+                                    double& cons_violation);
+
+  
   virtual double thetaLogBarrier(const hiopIterate& it, const hiopResidual& resid, const double& mu);
 
   /**

From 46c055f000923f321a8bdf90e8bc826dd53523f2 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Thu, 6 Mar 2025 12:45:39 -0800
Subject: [PATCH 06/27] added additional functionality for residual norm
 (weighted)

---
 src/Optimization/CMakeLists.txt         |   4 +-
 src/Optimization/InnerProduct.cpp       | 149 ++++++++++++++++++++++++
 src/Optimization/InnerProduct.hpp       |  61 +++-------
 src/Optimization/hiopNlpFormulation.cpp |  22 +++-
 src/Optimization/hiopNlpFormulation.hpp |   7 +-
 src/Optimization/hiopResidual.cpp       |  23 +++-
 6 files changed, 213 insertions(+), 53 deletions(-)
 create mode 100644 src/Optimization/InnerProduct.cpp

diff --git a/src/Optimization/CMakeLists.txt b/src/Optimization/CMakeLists.txt
index 30e35d54..160ccfaf 100644
--- a/src/Optimization/CMakeLists.txt
+++ b/src/Optimization/CMakeLists.txt
@@ -8,7 +8,8 @@ set(hiopOptimization_SRC
   hiopKKTLinSys.cpp
   KktLinSysLowRank.cpp
   hiopKKTLinSysMDS.cpp 
-  HessianDiagPlusRowRank.cpp 
+  HessianDiagPlusRowRank.cpp
+  InnerProduct.cpp
   hiopDualsUpdater.cpp 
   hiopNlpTransforms.cpp
   hiopPDPerturbation.cpp
@@ -29,6 +30,7 @@ set(hiopOptimization_INTERFACE_HEADERS
   hiopFactAcceptor.hpp
   hiopFilter.hpp
   HessianDiagPlusRowRank.hpp
+  InnerProduct.hpp
   hiopIterate.hpp
   hiopKKTLinSys.hpp
   hiopKKTLinSysDense.hpp
diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
new file mode 100644
index 00000000..71441e29
--- /dev/null
+++ b/src/Optimization/InnerProduct.cpp
@@ -0,0 +1,149 @@
+// Copyright (c) 2025, Lawrence Livermore National Security, LLC.
+// Produced at the Lawrence Livermore National Laboratory (LLNL).
+// LLNL-CODE-742473. All rights reserved.
+//
+// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp
+// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause).
+// Please also read "Additional BSD Notice" below.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+// i. Redistributions of source code must retain the above copyright notice, this list
+// of conditions and the disclaimer below.
+// ii. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the disclaimer (as noted below) in the documentation and/or
+// other materials provided with the distribution.
+// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Additional BSD Notice
+// 1. This notice is required to be provided under our contract with the U.S. Department
+// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under
+// Contract No. DE-AC52-07NA27344 with the DOE.
+// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC
+// nor any of their employees, makes any warranty, express or implied, or assumes any
+// liability or responsibility for the accuracy, completeness, or usefulness of any
+// information, apparatus, product, or process disclosed, or represents that its use would
+// not infringe privately-owned rights.
+// 3. Also, reference herein to any specific commercial products, process, or services by
+// trade name, trademark, manufacturer or otherwise does not necessarily constitute or
+// imply its endorsement, recommendation, or favoring by the United States Government or
+// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed
+// herein do not necessarily state or reflect those of the United States Government or
+// Lawrence Livermore National Security, LLC, and shall not be used for advertising or
+// product endorsement purposes.
+
+/**
+ * @file InnerProduct.hpp
+ *
+ * @author Cosmin G. Petra <petra1@llnl.gov>, LLNL
+ *
+ */
+
+
+#include "InnerProduct.hpp"
+#include "hiopNlpFormulation.hpp"
+
+namespace hiop
+{
+
+InnerProduct::InnerProduct(hiopNlpFormulation* nlp)
+  : nlp_(nlp)
+{
+  if(nlp->useWeightedInnerProd()) {
+    vec_n_ = nlp_->alloc_primal_vec();
+    vec_n2_ = nlp_->alloc_primal_vec();
+  } else {
+    vec_n_ = nullptr;
+    vec_n2_ = nullptr;
+  }
+}
+  
+InnerProduct::~InnerProduct()
+{
+  delete vec_n2_;
+  delete vec_n_;
+}
+
+bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y)
+{
+  if(nlp_->useWeightedInnerProd()) {
+    return nlp_->eval_M(x, y);
+  } else {
+    y.copyFrom(x);
+    return true;
+  }
+}
+  
+// Computes ||x||_M
+double InnerProduct::norm_M(const hiopVector& x)
+{
+  if(nlp_->useWeightedInnerProd()) {
+    nlp_->eval_M(x, *vec_n_);
+    auto dp = x.dotProductWith(*vec_n_);
+    return ::std::sqrt(dp);
+  } else {
+    return x.twonorm();
+  }
+}
+  // Computes H primal norm
+  double InnerProduct::norm_H_primal(const hiopVector& x)
+  {
+    if(nlp_->useWeightedInnerProd()) {      
+      nlp_->eval_H(x, *vec_n_);
+      auto dp = x.dotProductWith(*vec_n_);
+      return ::std::sqrt(dp);
+    } else {
+      return x.twonorm();
+    }
+  }
+// Computes H dual norm
+double InnerProduct::norm_H_dual(const hiopVector& x)
+{
+  if(nlp_->useWeightedInnerProd()) {      
+    nlp_->eval_H_inv(x, *vec_n_);
+    auto dp = x.dotProductWith(*vec_n_);
+    return ::std::sqrt(dp);
+  } else {
+    return x.twonorm();
+  }
+}
+
+double InnerProduct::norm_stationarity(const hiopVector& x)
+{
+  if(nlp_->useWeightedInnerProd()) {
+    nlp_->eval_H_inv(x, *vec_n_);
+    auto dp = x.dotProductWith(*vec_n_);
+    return ::std::sqrt(dp);
+  } else {
+    return x.infnorm();
+  }
+}
+
+// Compute norm one weighted by M, i.e., 1^T*M*|x|
+double InnerProduct::norm_M_one(const hiopVector&x)
+{
+  if(nlp_->useWeightedInnerProd()) {
+    vec_n_->copyFrom(x);
+    vec_n_->component_abs();
+    nlp_->eval_M(*vec_n_, *vec_n2_);
+    vec_n_->setToConstant(1.);
+    return vec_n_->dotProductWith(*vec_n2_);
+  } else {
+    return x.onenorm();
+  }
+}
+
+} // end namespace
diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 540018e6..93be0d35 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -49,7 +49,6 @@
  * @file InnerProduct.hpp
  *
  * @author Cosmin G. Petra <petra1@llnl.gov>, LLNL
- * @author Nai-Yuan Chiang <chiang7@llnl.gov>, LLNL
  *
  */
 
@@ -83,61 +82,35 @@ class hiopNlpFormulation;
 class InnerProduct
 {
 public:
-  InnerProduct(hiopNlpFormulation* nlp)
-    : nlp_(nlp)
-  {
-    if(nlp->useWeightedInnerProd()) {
-      vec_n_ = nlp_->alloc_primal_vec();      
-    } else {
-      vec_n_ = nullptr;
-    }
-  }
+  InnerProduct(hiopNlpFormulation* nlp);
   
-  virtual ~InnerProduct()
-  {
-    delete vec_n_;
-  }
+  virtual ~InnerProduct();
 
+  // Compute y=M*x
+  bool apply_M(const hiopVector& x, hiopVector& y);
+  
   // Computes ||x||_M
-  double norm_M(const hiopVector& x)
-  {
-    if(nlp_->useWeightedInnerProd()) {      
-      nlp_->eval_M(x, *vec_n_);
-      auto dp = x.dotProductWith(*vec_n_);
-      return ::std::sqrt(dp);
-    } else {
-      return x.twonorm();
-    }
-  }
+  double norm_M(const hiopVector& x);
+
   // Computes H primal norm
-  double norm_H_primal(const hiopVector& x)
-  {
-    if(nlp_->useWeightedInnerProd()) {      
-      nlp_->eval_H(x, *vec_n_);
-      auto dp = x.dotProductWith(*vec_n_);
-      return ::std::sqrt(dp);
-    } else {
-      return x.twonorm();
-    }
-  }
+  double norm_H_primal(const hiopVector& x);
+  
   // Computes H dual norm
-  double norm_H_dual(const hiopVector& x)
-  {
-    if(nlp_->useWeightedInnerProd()) {      
-      nlp_->eval_H_inv(x, *vec_n_);
-      auto dp = x.dotProductWith(*vec_n_);
-      return ::std::sqrt(dp);
-    } else {
-      return x.twonorm();
-    }
-  }
+  double norm_H_dual(const hiopVector& x);
 
+  // Computes norm of stationarity residual, using inf-norm for Euclidean spaces, H-inverse norm for Hilbert spaces
+  double norm_stationarity(const hiopVector& x);
+
+  // Compute norm one weighted by M, i.e., 1^T*M*|x|
+  double norm_M_one(const hiopVector&x);
 private:
   // Pointer to "client" NLP
   hiopNlpFormulation* nlp_;
 
   // Working vector in the size n of the variables, allocated only when for the weighted case
   hiopVector* vec_n_;
+  // Working vector in the size n of the variables, allocated only when for the weighted case
+  hiopVector* vec_n2_;
 };
 
 } //end namespace
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index 89b57d5d..2578c831 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -80,7 +80,6 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char
       nlp_transformations_(this),
       interface_base(interface_)
 {
-//  inner_prod_ = new InnerProduct(this);
   strFixedVars_ = "";    // uninitialized
   dFixedVarsTol_ = -1.;  // uninitialized
   bool bret;
@@ -810,6 +809,27 @@ bool hiopNlpFormulation::eval_H(const hiopVector& x, hiopVector& y)
   return bret;  
 }
 
+bool hiopNlpFormulation::eval_H_inv(const hiopVector& x, hiopVector& y)
+{
+  //x is in the reduced space used by HiOp
+  //x_full is x in the full/untransformed/user space
+  hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast<hiopVector&>(x), true/*new_x*/);
+
+  // TODO: see notes from eval_M
+  hiopVector* y_full = &y;
+  assert(x_full->get_size() == y.get_size() &&
+         "weighted inner products not supported when fixed variables are removed");
+
+  runStats.tm_eval_Hinv_apply.start();
+  bool bret = interface_base.applyHinv(nlp_transformations_.n_pre(),
+                                       x_full->local_data_const(),
+                                       y_full->local_data());
+  runStats.tm_eval_Hinv_apply.stop();
+  runStats.n_eval_Hinv_apply++;
+  
+  return bret;  
+}
+
 
 bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop,
                                             bool& duals_avail,
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index b75f54c4..9a511d5a 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -66,7 +66,7 @@
 #endif
 
 #include "hiopNlpTransforms.hpp"
-
+#include "InnerProduct.hpp"
 #include "hiopRunStats.hpp"
 #include "hiopLogger.hpp"
 #include "hiopOptions.hpp"
@@ -80,7 +80,6 @@ namespace hiop
 
 // some forward decls
 class hiopDualsLsqUpdate;
-class InnerProduct;
   
 /** Class for a general NlpFormulation with general constraints and bounds on the variables.
  * This class also  acts as a factory for linear algebra objects (derivative
@@ -130,6 +129,10 @@ class hiopNlpFormulation
   /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */
   bool eval_H_inv(const hiopVector& x, hiopVector& y); 
 
+  InnerProduct* inner_prod()
+  {
+    return inner_prod_;
+  }
 protected:
   // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments
   virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0;
diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp
index 3f44bfe8..3b4ab1ad 100644
--- a/src/Optimization/hiopResidual.cpp
+++ b/src/Optimization/hiopResidual.cpp
@@ -177,19 +177,32 @@ int hiopResidual::update(const hiopIterate& it,
   assert(it.sxu->matchesPattern(nlp->get_ixu()));
 #endif
   // rx = -grad_f - J_c^t*x - J_d^t*x+zl-zu - linear damping term in x
-  rx->copyFrom(grad);
+  //rx->copyFrom(grad);
+  //jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc);
+  //jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd);
+  //rx->axpy(-1.0, *it.zl);
+  //rx->axpy(1.0, *it.zu);
+
+  //using rxl as auxiliary variable to compute -M*zl+M*zu
+  rxl->copyFrom(*it.zu);
+  rxl->axpy(-1.0, *it.zl);
+  nlp->inner_prod()->apply_M(*rxl, *rx);
+  //continue adding to rx
   jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc);
   jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd);
-  rx->axpy(-1.0, *it.zl);
-  rx->axpy(1.0, *it.zu);
-  buf = rx->infnorm_local();
+  rx->axpy(1.0, grad);
+
+  //buf = rx->infnorm_local();
+  buf = nlp->inner_prod()->norm_stationarity(*rx);
   nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf);
-  nrmOne_nlp_optim += rx->onenorm();
+  //nrmOne_nlp_optim += rx->onenorm();
+  nrmOne_nlp_optim += nlp->inner_prod()->norm_M_one(*rx);
   nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf);
   logprob.addNonLogBarTermsToGrad_x(1.0, *rx);
   rx->negate();
   nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local());
   nrmOne_bar_optim += rx->onenorm();
+
   //~ done with rx
   // rd
   rd->copyFrom(*it.yd);

From 1caeb3304fc1764c37eea59cab5f568b9b0c3678 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 7 Mar 2025 09:26:31 -0800
Subject: [PATCH 07/27] IPM alg nlp errors

---
 src/Optimization/InnerProduct.cpp       | 56 ++++++++++++++++------
 src/Optimization/InnerProduct.hpp       | 24 ++++++----
 src/Optimization/hiopAlgFilterIPM.cpp   | 55 ++++++++++++---------
 src/Optimization/hiopIterate.cpp        | 63 ++++++++++++++++++-------
 src/Optimization/hiopIterate.hpp        |  5 +-
 src/Optimization/hiopNlpFormulation.hpp |  3 +-
 src/Optimization/hiopResidual.cpp       | 39 +++++++++++----
 7 files changed, 174 insertions(+), 71 deletions(-)

diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
index 71441e29..c9409981 100644
--- a/src/Optimization/InnerProduct.cpp
+++ b/src/Optimization/InnerProduct.cpp
@@ -77,7 +77,7 @@ InnerProduct::~InnerProduct()
   delete vec_n_;
 }
 
-bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y)
+bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const
 {
   if(nlp_->useWeightedInnerProd()) {
     return nlp_->eval_M(x, y);
@@ -88,7 +88,7 @@ bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y)
 }
   
 // Computes ||x||_M
-double InnerProduct::norm_M(const hiopVector& x)
+double InnerProduct::norm_M(const hiopVector& x) const
 {
   if(nlp_->useWeightedInnerProd()) {
     nlp_->eval_M(x, *vec_n_);
@@ -98,19 +98,19 @@ double InnerProduct::norm_M(const hiopVector& x)
     return x.twonorm();
   }
 }
-  // Computes H primal norm
-  double InnerProduct::norm_H_primal(const hiopVector& x)
-  {
-    if(nlp_->useWeightedInnerProd()) {      
-      nlp_->eval_H(x, *vec_n_);
-      auto dp = x.dotProductWith(*vec_n_);
+// Computes H primal norm
+double InnerProduct::norm_H_primal(const hiopVector& x) const
+{
+  if(nlp_->useWeightedInnerProd()) {      
+    nlp_->eval_H(x, *vec_n_);
+    auto dp = x.dotProductWith(*vec_n_);
       return ::std::sqrt(dp);
-    } else {
-      return x.twonorm();
-    }
+  } else {
+    return x.twonorm();
   }
+}
 // Computes H dual norm
-double InnerProduct::norm_H_dual(const hiopVector& x)
+double InnerProduct::norm_H_dual(const hiopVector& x) const
 {
   if(nlp_->useWeightedInnerProd()) {      
     nlp_->eval_H_inv(x, *vec_n_);
@@ -121,7 +121,7 @@ double InnerProduct::norm_H_dual(const hiopVector& x)
   }
 }
 
-double InnerProduct::norm_stationarity(const hiopVector& x)
+double InnerProduct::norm_stationarity(const hiopVector& x) const
 {
   if(nlp_->useWeightedInnerProd()) {
     nlp_->eval_H_inv(x, *vec_n_);
@@ -133,9 +133,10 @@ double InnerProduct::norm_stationarity(const hiopVector& x)
 }
 
 // Compute norm one weighted by M, i.e., 1^T*M*|x|
-double InnerProduct::norm_M_one(const hiopVector&x)
+double InnerProduct::norm_M_one(const hiopVector&x) const
 {
   if(nlp_->useWeightedInnerProd()) {
+    //opt! pre-compute M*1
     vec_n_->copyFrom(x);
     vec_n_->component_abs();
     nlp_->eval_M(*vec_n_, *vec_n2_);
@@ -146,4 +147,31 @@ double InnerProduct::norm_M_one(const hiopVector&x)
   }
 }
 
+double InnerProduct::norm_complementarity(const hiopVector& x) const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    //opt! pre-compute M*1
+    vec_n_->copyFrom(x);
+    vec_n_->component_abs();
+    nlp_->eval_M(*vec_n_, *vec_n2_);
+    vec_n_->setToConstant(1.);
+    return vec_n_->dotProductWith(*vec_n2_);
+  } else {
+    return x.infnorm();
+  }
+}
+
+// Computes the "volume" of the space, 1^T M*1 
+double InnerProduct::volume() const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    //opt! pre-compute M*1
+    vec_n_->setToConstant(1.);
+    nlp_->eval_M(*vec_n_, *vec_n2_);
+    return vec_n2_->dotProductWith(*vec_n_);
+  } else {
+    return nlp_->n_complem();
+  }
+}
+
 } // end namespace
diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 93be0d35..3c185430 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -87,30 +87,36 @@ class InnerProduct
   virtual ~InnerProduct();
 
   // Compute y=M*x
-  bool apply_M(const hiopVector& x, hiopVector& y);
+  bool apply_M(const hiopVector& x, hiopVector& y) const;
   
   // Computes ||x||_M
-  double norm_M(const hiopVector& x);
+  double norm_M(const hiopVector& x) const;
 
   // Computes H primal norm
-  double norm_H_primal(const hiopVector& x);
+  double norm_H_primal(const hiopVector& x) const;
   
   // Computes H dual norm
-  double norm_H_dual(const hiopVector& x);
+  double norm_H_dual(const hiopVector& x) const;
 
   // Computes norm of stationarity residual, using inf-norm for Euclidean spaces, H-inverse norm for Hilbert spaces
-  double norm_stationarity(const hiopVector& x);
+  double norm_stationarity(const hiopVector& x) const;
 
-  // Compute norm one weighted by M, i.e., 1^T*M*|x|
-  double norm_M_one(const hiopVector&x);
+  // Computes norm of complementarity, using inf-norm for Euclidean spaces, 1M norm for Hilbert spaces
+  double norm_complementarity(const hiopVector& x) const;
+  
+  // Computes 1M norm, i.e., ||x|| =  1^T*M*|x|
+  double norm_M_one(const hiopVector&x) const;
+
+  // Computes the "volume" of the space, norm of 1 function
+  double volume() const;
 private:
   // Pointer to "client" NLP
   hiopNlpFormulation* nlp_;
 
   // Working vector in the size n of the variables, allocated only when for the weighted case
-  hiopVector* vec_n_;
+  mutable hiopVector* vec_n_;
   // Working vector in the size n of the variables, allocated only when for the weighted case
-  hiopVector* vec_n2_;
+  mutable hiopVector* vec_n2_;
 };
 
 } //end namespace
diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp
index 86088236..407326d4 100644
--- a/src/Optimization/hiopAlgFilterIPM.cpp
+++ b/src/Optimization/hiopAlgFilterIPM.cpp
@@ -656,10 +656,15 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   // the one norms
   // double nrmDualBou=it.normOneOfBoundDuals();
   // double nrmDualEqu=it.normOneOfEqualityDuals();
-  double nrmDualBou, nrmDualEqu;
-  it.normOneOfDuals(nrmDualEqu, nrmDualBou);
+  double sc;
+  double sd;
+  double nrmDualBou;
+  double nrmDualEqu;
+  if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) {
+    return false;
+  }
 
-  nlp->log->printf(hovScalars, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
+  nlp->log->printf(hovWarning, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
   if(nrmDualBou > 1e+10) {
     nlp->log->printf(hovWarning,
                      "Unusually large bound dual variables (norm1=%g) occured, "
@@ -684,8 +689,11 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   }
 
   // scaling factors
-  double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
-  double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+  //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
+  //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+  
+  sd = fmax(p_smax, sd) / p_smax;
+  sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax;
 
   sd = fmin(sd, 1e+8);
   sc = fmin(sc, 1e+8);
@@ -696,7 +704,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   // finally, the scaled nlp error
   nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc));
 
-  nlp->log->printf(hovScalars,
+  nlp->log->printf(hovWarning,
                    "nlpoverall %g  nloptim %g  sd %g  nlpfeas %g  nlpcomplem %g  sc %g cons_violation %g\n",
                    nlpoverall,
                    nlpoptim,
@@ -731,13 +739,15 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
   nlp->runStats.tmSolverInternal.start();
 
   size_type n = nlp->n_complem(), m = nlp->m();
-  // the one norms
-  // double nrmDualBou=it.normOneOfBoundDuals();
-  // double nrmDualEqu=it.normOneOfEqualityDuals();
-  double nrmDualBou, nrmDualEqu;
-  it.normOneOfDuals(nrmDualEqu, nrmDualBou);
+  double sc;
+  double sd;
+  double nrmDualBou;
+  double nrmDualEqu;
+  if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) {
+    return false;
+  }
 
-  nlp->log->printf(hovScalars, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
+  nlp->log->printf(hovWarning, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
   if(nrmDualBou > 1e+10) {
     nlp->log->printf(hovWarning,
                      "Unusually large bound dual variables (norm1=%g) occured, "
@@ -762,18 +772,21 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
   }
 
   // scaling factors
-  //sd = max { p_smax, ||zl||_H + ||zu||_H + (||vl||_1 + ||vu||_1)/m } /p_smax
-  //sc = max { p_smax, ||zl||_H + ||zu||_H } /p_smax
+  //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax
+  //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax
   //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv
   //           ||yd+vl-vu||_inf
   //nlpfeas = ||crhs- c||_inf
   //          ||drs- d||_inf
-  //          ||x-sxl-xl||_H
-  //          ||x-sxu+xu||_H
+  //          ||x-sxl-xl||_inf
+  //          ||x-sxu+xu||_inf
   //          ||d-sdl-dl||
   //          
-  double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
-  double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+  //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
+  //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+
+  sd = fmax(p_smax, sd) / p_smax;
+  sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax;
 
   sd = fmin(sd, 1e+8);
   sc = fmin(sc, 1e+8);
@@ -784,7 +797,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
   // finally, the scaled nlp error
   nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc));
 
-  nlp->log->printf(hovScalars,
+  nlp->log->printf(hovWarning,
                    "nlpoverall %g  nloptim %g  sd %g  nlpfeas %g  nlpcomplem %g  sc %g cons_violation %g\n",
                    nlpoverall,
                    nlpoptim,
@@ -1159,7 +1172,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run()
   solver_status_ = NlpSolve_Pending;
 
   while(true) {
-    bret = evalNlpAndLogErrors(*it_curr,
+    bret = evalNlpAndLogErrors2(*it_curr,
                                *resid,
                                _mu,
                                _err_nlp_optim,
@@ -1252,7 +1265,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run()
 
       //! should perform only a partial update since NLP didn't change
       resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar);
-      bret = evalNlpAndLogErrors(*it_curr,
+      bret = evalNlpAndLogErrors2(*it_curr,
                                  *resid,
                                  _mu,
                                  _err_nlp_optim,
diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp
index 81c9d688..392d38e4 100644
--- a/src/Optimization/hiopIterate.cpp
+++ b/src/Optimization/hiopIterate.cpp
@@ -236,26 +236,55 @@ double hiopIterate::normOneOfEqualityDuals() const
   return nrm1;
 }
 
-void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const
+// void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const
+// {
+// #ifdef HIOP_DEEPCHECKS
+//   assert(zl->matchesPattern(nlp->get_ixl()));
+//   assert(zu->matchesPattern(nlp->get_ixu()));
+//   assert(vl->matchesPattern(nlp->get_idl()));
+//   assert(vu->matchesPattern(nlp->get_idu()));
+// #endif
+//   // work locally with all the vectors. This will result in only one MPI_Allreduce call
+//   nrm1Bnd = zl->onenorm_local() + zu->onenorm_local();
+// #ifdef HIOP_USE_MPI
+//   double nrm1_global;
+//   int ierr = MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
+//   assert(MPI_SUCCESS == ierr);
+//   nrm1Bnd = nrm1_global;
+// #endif
+//   nrm1Bnd += vl->onenorm_local() + vu->onenorm_local();
+//   nrm1Eq = yc->onenorm_local() + yd->onenorm_local();
+
+//   nlp->log->printf(hovWarning,
+//               "sc=--- sd=--- volume_n=--- nrm zl=%12.5e zu=%12.5e vl=%12.5e vu=%12.5e  yc=%12.5e yd=%12.5e\n",
+//                    zl->onenorm(), zu->onenorm(), vl->onenorm(), vu->onenorm(), yc->onenorm(), yd->onenorm());
+
+// }
+
+/// @brief Computes scaling factors sc and sd; also returns the "norms" of duals
+bool hiopIterate::compute_sc_sd(double& sc, double& sd, double& nrmDualsEq, double& nrmDualsBou) const
 {
-#ifdef HIOP_DEEPCHECKS
-  assert(zl->matchesPattern(nlp->get_ixl()));
-  assert(zu->matchesPattern(nlp->get_ixu()));
-  assert(vl->matchesPattern(nlp->get_idl()));
-  assert(vu->matchesPattern(nlp->get_idu()));
-#endif
-  // work locally with all the vectors. This will result in only one MPI_Allreduce call
-  nrm1Bnd = zl->onenorm_local() + zu->onenorm_local();
-#ifdef HIOP_USE_MPI
-  double nrm1_global;
-  int ierr = MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
-  assert(MPI_SUCCESS == ierr);
-  nrm1Bnd = nrm1_global;
-#endif
-  nrm1Bnd += vl->onenorm_local() + vu->onenorm_local();
-  nrm1Eq = yc->onenorm_local() + yd->onenorm_local();
+  const auto nrmzl = nlp->inner_prod()->norm_M_one(*zl);
+  const auto nrmzu = nlp->inner_prod()->norm_M_one(*zu);
+  const auto volume_n = nlp->inner_prod()->volume();
+  const auto nrmvl = vl->onenorm_local();
+  const auto nrmvu = vu->onenorm_local();
+  nrmDualsBou = nrmzl + nrmzu + nrmvl + nrmvu;
+  sc = nrmDualsBou / volume_n;
+
+  const auto volume_m = nlp->m();
+  const auto nrmyc = yc->onenorm_local();
+  const auto nrmyd = yd->onenorm_local();
+  nrmDualsEq = nrmyc+nrmyd;
+  sd = (nrmDualsBou+nrmDualsEq)/(volume_n+volume_m);
+
+  nlp->log->printf(hovWarning,
+              "sc=%g sd=%g volume_n=%12.5e norms zl=%12.5e zu=%12.5e vl=%12.5e vu=%12.5e yc=%12.5e yd=%12.5e\n",
+                   sc, sd, volume_n, nrmzl, nrmzu, nrmvl, nrmvu, nrmyc, nrmyd);
+  return true;
 }
 
+  
 void hiopIterate::selectPattern()
 {
   sxl->selectPattern(nlp->get_ixl());
diff --git a/src/Optimization/hiopIterate.hpp b/src/Optimization/hiopIterate.hpp
index a2ad86d2..a1d66383 100644
--- a/src/Optimization/hiopIterate.hpp
+++ b/src/Optimization/hiopIterate.hpp
@@ -142,7 +142,10 @@ class hiopIterate
   virtual double normOneOfBoundDuals() const;
   virtual double normOneOfEqualityDuals() const;
   /* same as above but computed in one shot to save on communication and computation */
-  virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const;
+  //virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const;
+
+  /// @brief Computes scaling factors sc and sd; also returns the "norms" of duals
+  bool compute_sc_sd(double& sc, double& sd, double& nrmDualsEq, double& nrmDualsBou) const;
 
   /// @brief Entries corresponding to zeros in ix are set to zero
   virtual void selectPattern();
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index 9a511d5a..8bb64761 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -129,10 +129,11 @@ class hiopNlpFormulation
   /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */
   bool eval_H_inv(const hiopVector& x, hiopVector& y); 
 
-  InnerProduct* inner_prod()
+  InnerProduct const* inner_prod() const
   {
     return inner_prod_;
   }
+
 protected:
   // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments
   virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0;
diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp
index 3b4ab1ad..c61d8625 100644
--- a/src/Optimization/hiopResidual.cpp
+++ b/src/Optimization/hiopResidual.cpp
@@ -200,8 +200,10 @@ int hiopResidual::update(const hiopIterate& it,
   nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf);
   logprob.addNonLogBarTermsToGrad_x(1.0, *rx);
   rx->negate();
-  nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local());
-  nrmOne_bar_optim += rx->onenorm();
+  //nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local());
+  //nrmOne_bar_optim += rx->onenorm();
+  nrmInf_bar_optim = fmax(nrmInf_bar_optim, nlp->inner_prod()->norm_stationarity(*rx));
+  nrmOne_bar_optim += nlp->inner_prod()->norm_M_one(*rx);
 
   //~ done with rx
   // rd
@@ -256,7 +258,7 @@ int hiopResidual::update(const hiopIterate& it,
     if(nlp->n_low_local() < nx_loc) {
       rxl->selectPattern(nlp->get_ixl());
     }
-    buf = rxl->infnorm_local();
+    buf = rxl->infnorm_local();   
     // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf);
   }
@@ -307,10 +309,13 @@ int hiopResidual::update(const hiopIterate& it,
     if(nlp->n_low_local() < nx_loc) {
       rszl->selectPattern(nlp->get_ixl());
     }
-    nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local());
-
+    // nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local());
+    buf = nlp->inner_prod()->norm_complementarity(*rszl);
+    nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf);
+    
     rszl->addConstant_w_patternSelect(mu, nlp->get_ixl());
-    buf = rszl->infnorm_local();
+    //buf = rszl->infnorm_local();
+    buf = nlp->inner_prod()->norm_complementarity(*rszl);
     nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf);
   }
@@ -322,11 +327,13 @@ int hiopResidual::update(const hiopIterate& it,
       rszu->selectPattern(nlp->get_ixu());
     }
 
-    buf = rszu->infnorm_local();
+    //buf = rszu->infnorm_local();
+    buf = nlp->inner_prod()->norm_complementarity(*rszu);
     nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf);
 
     rszu->addConstant_w_patternSelect(mu, nlp->get_ixu());
-    buf = rszu->infnorm_local();
+    //buf = rszu->infnorm_local();
+    buf = nlp->inner_prod()->norm_complementarity(*rszu);
     nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszu=%22.17e\n", buf);
   }
@@ -364,6 +371,21 @@ int hiopResidual::update(const hiopIterate& it,
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf);
   }
 
+// #ifdef HIOP_USE_MPI
+//   // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles
+//   // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double
+//   double aux[6] =
+//       {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem};
+//   double aux_g[6];
+//   int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm());
+//   assert(MPI_SUCCESS == ierr);
+//   nrmInf_nlp_optim = aux_g[0];
+//   nrmInf_nlp_feasib = aux_g[1];
+//   nrmInf_nlp_complem = aux_g[2];
+//   nrmInf_bar_optim = aux_g[3];
+//   nrmInf_bar_feasib = aux_g[4];
+//   nrmInf_bar_complem = aux_g[5];
+// #endif
 #ifdef HIOP_USE_MPI
   // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles
   // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double
@@ -379,6 +401,7 @@ int hiopResidual::update(const hiopIterate& it,
   nrmInf_bar_feasib = aux_g[4];
   nrmInf_bar_complem = aux_g[5];
 #endif
+
   nlp->runStats.tmSolverInternal.stop();
   return true;
 }

From e4edea2597f2ed9ef2f51938e28a0b61e427621b Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 7 Mar 2025 09:30:09 -0800
Subject: [PATCH 08/27] clean up, fixed warnings

---
 src/Optimization/hiopAlgFilterIPM.cpp |  7 +--
 src/Optimization/hiopIterate.cpp      | 76 +++++++++++++--------------
 src/Optimization/hiopIterate.hpp      |  4 +-
 3 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp
index 407326d4..dc2e7aa6 100644
--- a/src/Optimization/hiopAlgFilterIPM.cpp
+++ b/src/Optimization/hiopAlgFilterIPM.cpp
@@ -652,10 +652,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
 {
   nlp->runStats.tmSolverInternal.start();
 
-  size_type n = nlp->n_complem(), m = nlp->m();
-  // the one norms
-  // double nrmDualBou=it.normOneOfBoundDuals();
-  // double nrmDualEqu=it.normOneOfEqualityDuals();
+  size_type n = nlp->n_complem();
   double sc;
   double sd;
   double nrmDualBou;
@@ -738,7 +735,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
 {
   nlp->runStats.tmSolverInternal.start();
 
-  size_type n = nlp->n_complem(), m = nlp->m();
+  size_type n = nlp->n_complem();
   double sc;
   double sd;
   double nrmDualBou;
diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp
index 392d38e4..e133ab94 100644
--- a/src/Optimization/hiopIterate.cpp
+++ b/src/Optimization/hiopIterate.cpp
@@ -196,45 +196,45 @@ void hiopIterate::setEqualityDualsToConstant(const double& v)
   yd->setToConstant(v);
 }
 
-double hiopIterate::normOneOfBoundDuals() const
-{
-#ifdef HIOP_DEEPCHECKS
-  assert(zl->matchesPattern(nlp->get_ixl()));
-  assert(zu->matchesPattern(nlp->get_ixu()));
-  assert(vl->matchesPattern(nlp->get_idl()));
-  assert(vu->matchesPattern(nlp->get_idu()));
-#endif
-  // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two.
-  double nrm1 = zl->onenorm_local() + zu->onenorm_local();
-#ifdef HIOP_USE_MPI
-  double nrm1_global;
-  int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
-  assert(MPI_SUCCESS == ierr);
-  nrm1 = nrm1_global;
-#endif
-  nrm1 += vl->onenorm_local() + vu->onenorm_local();
-  return nrm1;
-}
+// double hiopIterate::normOneOfBoundDuals() const
+// {
+// #ifdef HIOP_DEEPCHECKS
+//   assert(zl->matchesPattern(nlp->get_ixl()));
+//   assert(zu->matchesPattern(nlp->get_ixu()));
+//   assert(vl->matchesPattern(nlp->get_idl()));
+//   assert(vu->matchesPattern(nlp->get_idu()));
+// #endif
+//   // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two.
+//   double nrm1 = zl->onenorm_local() + zu->onenorm_local();
+// #ifdef HIOP_USE_MPI
+//   double nrm1_global;
+//   int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
+//   assert(MPI_SUCCESS == ierr);
+//   nrm1 = nrm1_global;
+// #endif
+//   nrm1 += vl->onenorm_local() + vu->onenorm_local();
+//   return nrm1;
+// }
 
-double hiopIterate::normOneOfEqualityDuals() const
-{
-#ifdef HIOP_DEEPCHECKS
-  assert(zl->matchesPattern(nlp->get_ixl()));
-  assert(zu->matchesPattern(nlp->get_ixu()));
-  assert(vl->matchesPattern(nlp->get_idl()));
-  assert(vu->matchesPattern(nlp->get_idu()));
-#endif
-  // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two.
-  double nrm1 = zl->onenorm_local() + zu->onenorm_local();
-#ifdef HIOP_USE_MPI
-  double nrm1_global;
-  int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
-  assert(MPI_SUCCESS == ierr);
-  nrm1 = nrm1_global;
-#endif
-  nrm1 += vl->onenorm_local() + vu->onenorm_local() + yc->onenorm_local() + yd->onenorm_local();
-  return nrm1;
-}
+// double hiopIterate::normOneOfEqualityDuals() const
+// {
+// #ifdef HIOP_DEEPCHECKS
+//   assert(zl->matchesPattern(nlp->get_ixl()));
+//   assert(zu->matchesPattern(nlp->get_ixu()));
+//   assert(vl->matchesPattern(nlp->get_idl()));
+//   assert(vu->matchesPattern(nlp->get_idu()));
+// #endif
+//   // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two.
+//   double nrm1 = zl->onenorm_local() + zu->onenorm_local();
+// #ifdef HIOP_USE_MPI
+//   double nrm1_global;
+//   int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
+//   assert(MPI_SUCCESS == ierr);
+//   nrm1 = nrm1_global;
+// #endif
+//   nrm1 += vl->onenorm_local() + vu->onenorm_local() + yc->onenorm_local() + yd->onenorm_local();
+//   return nrm1;
+// }
 
 // void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const
 // {
diff --git a/src/Optimization/hiopIterate.hpp b/src/Optimization/hiopIterate.hpp
index a1d66383..00ce9cd1 100644
--- a/src/Optimization/hiopIterate.hpp
+++ b/src/Optimization/hiopIterate.hpp
@@ -139,8 +139,8 @@ class hiopIterate
                                             hiopVector& grad_d) const;
 
   /** norms for individual parts of the iterate (on demand computation) */
-  virtual double normOneOfBoundDuals() const;
-  virtual double normOneOfEqualityDuals() const;
+  //virtual double normOneOfBoundDuals() const;
+  //virtual double normOneOfEqualityDuals() const;
   /* same as above but computed in one shot to save on communication and computation */
   //virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const;
 

From cab209f9f0d7abd4deae3449b47601437c29776f Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 7 Mar 2025 10:06:05 -0800
Subject: [PATCH 09/27] addressed MPI issues

---
 src/Drivers/Dense/NlpDenseConsEx1.hpp   |  35 +++++
 src/Optimization/InnerProduct.cpp       |   3 +
 src/Optimization/hiopAlgFilterIPM.cpp   | 193 ++++++++++++------------
 src/Optimization/hiopAlgFilterIPM.hpp   |  14 --
 src/Optimization/hiopNlpFormulation.cpp |   9 +-
 src/Optimization/hiopResidual.cpp       |  34 ++---
 6 files changed, 152 insertions(+), 136 deletions(-)

diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp
index 33a3178d..8de2f80e 100644
--- a/src/Drivers/Dense/NlpDenseConsEx1.hpp
+++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp
@@ -257,6 +257,41 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
                         double alpha_du,
                         double alpha_pr,
                         int ls_trials);
+  
+  bool applyM(const size_type& n, const double* x_in, double* y_out)
+  {
+    x->copyFrom(x_in);
+    x->copyTo(y_out);
+    return true;
+  }
+  
+  /**
+   * Computes action y of the inner product weight matrix H on a vector x, namely y=H*x
+   */
+  virtual bool applyH(const size_type& n, const double* x_in, double* y_out)
+  {
+    x->copyFrom(x_in);
+    x->copyTo(y_out);
+    return true;
+  }
+
+  /**
+   * Computes action y of the inverse of H on a vector x, namely y=H^{-1}*x
+   */
+  virtual bool applyHinv(const size_type& n, const double* x_in, double* y_out)
+  {
+    x->copyFrom(x_in);
+    x->copyTo(y_out);
+    return true;
+  }
+
+  /**
+   * Enables the use of weighted inner products via @applyM, @applyH, and @applyHinv
+   */
+  virtual bool useWeightedInnerProducts()
+  {
+    return false;
+  }
 
 private:
   int n_vars;
diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
index c9409981..95978da3 100644
--- a/src/Optimization/InnerProduct.cpp
+++ b/src/Optimization/InnerProduct.cpp
@@ -62,6 +62,8 @@ namespace hiop
 InnerProduct::InnerProduct(hiopNlpFormulation* nlp)
   : nlp_(nlp)
 {
+  printf("InnerProduct::InnerProduct begin\n"); fflush(stdout);
+  assert(nlp);
   if(nlp->useWeightedInnerProd()) {
     vec_n_ = nlp_->alloc_primal_vec();
     vec_n2_ = nlp_->alloc_primal_vec();
@@ -69,6 +71,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp)
     vec_n_ = nullptr;
     vec_n2_ = nullptr;
   }
+  printf("InnerProduct::InnerProduct end\n"); fflush(stdout);
 }
   
 InnerProduct::~InnerProduct()
diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp
index dc2e7aa6..f479bb74 100644
--- a/src/Optimization/hiopAlgFilterIPM.cpp
+++ b/src/Optimization/hiopAlgFilterIPM.cpp
@@ -686,9 +686,8 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   }
 
   // scaling factors
-  //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
-  //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
-  
+  //c double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
+  //c double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
   sd = fmax(p_smax, sd) / p_smax;
   sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax;
 
@@ -720,98 +719,98 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it,
   return true;
 }
 
-bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
-                                               const hiopResidual& resid,
-                                               const double& mu,
-                                               double& nlpoptim,
-                                               double& nlpfeas,
-                                               double& nlpcomplem,
-                                               double& nlpoverall,
-                                               double& logoptim,
-                                               double& logfeas,
-                                               double& logcomplem,
-                                               double& logoverall,
-                                               double& cons_violation)
-{
-  nlp->runStats.tmSolverInternal.start();
-
-  size_type n = nlp->n_complem();
-  double sc;
-  double sd;
-  double nrmDualBou;
-  double nrmDualEqu;
-  if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) {
-    return false;
-  }
-
-  nlp->log->printf(hovWarning, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
-  if(nrmDualBou > 1e+10) {
-    nlp->log->printf(hovWarning,
-                     "Unusually large bound dual variables (norm1=%g) occured, "
-                     "which may cause numerical instabilities if it persists. Convergence "
-                     " issues or inacurate optimal solutions may be experienced. Possible causes: "
-                     " tight bounds or bad scaling of the optimization variables.\n",
-                     nrmDualBou);
-    if(nlp->options->GetString("fixed_var") == "remove") {
-      nlp->log->printf(hovWarning,
-                       "For example, increase 'fixed_var_tolerance' to remove "
-                       "additional variables.\n");
-    } else if(nlp->options->GetString("fixed_var") == "relax") {
-      nlp->log->printf(hovWarning,
-                       "For example, increase 'fixed_var_tolerance' to relax "
-                       "aditional (tight) variables and/or increase 'fixed_var_perturb' "
-                       "to decrease the tightness.\n");
-    } else {
-      nlp->log->printf(hovWarning,
-                       "Potential fixes: fix or relax variables with tight bounds "
-                       "(see 'fixed_var' option) or rescale variables.\n");
-    }
-  }
-
-  // scaling factors
-  //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax
-  //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax
-  //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv
-  //           ||yd+vl-vu||_inf
-  //nlpfeas = ||crhs- c||_inf
-  //          ||drs- d||_inf
-  //          ||x-sxl-xl||_inf
-  //          ||x-sxu+xu||_inf
-  //          ||d-sdl-dl||
-  //          
-  //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
-  //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
-
-  sd = fmax(p_smax, sd) / p_smax;
-  sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax;
-
-  sd = fmin(sd, 1e+8);
-  sc = fmin(sc, 1e+8);
-
-  // actual nlp errors
-  resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation);
-
-  // finally, the scaled nlp error
-  nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc));
-
-  nlp->log->printf(hovWarning,
-                   "nlpoverall %g  nloptim %g  sd %g  nlpfeas %g  nlpcomplem %g  sc %g cons_violation %g\n",
-                   nlpoverall,
-                   nlpoptim,
-                   sd,
-                   nlpfeas,
-                   nlpcomplem,
-                   cons_violation,
-                   sc);
-
-  // actual log errors
-  resid.getBarrierErrors(logoptim, logfeas, logcomplem);
-
-  // finally, the scaled barrier error
-  logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc));
-  nlp->runStats.tmSolverInternal.stop();
-  return true;
-}
+// bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it,
+//                                                const hiopResidual& resid,
+//                                                const double& mu,
+//                                                double& nlpoptim,
+//                                                double& nlpfeas,
+//                                                double& nlpcomplem,
+//                                                double& nlpoverall,
+//                                                double& logoptim,
+//                                                double& logfeas,
+//                                                double& logcomplem,
+//                                                double& logoverall,
+//                                                double& cons_violation)
+// {
+//   nlp->runStats.tmSolverInternal.start();
+
+//   size_type n = nlp->n_complem();
+//   double sc;
+//   double sd;
+//   double nrmDualBou;
+//   double nrmDualEqu;
+//   if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) {
+//     return false;
+//   }
+
+//   nlp->log->printf(hovWarning, "nrmOneDualEqu %g   nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou);
+//   if(nrmDualBou > 1e+10) {
+//     nlp->log->printf(hovWarning,
+//                      "Unusually large bound dual variables (norm1=%g) occured, "
+//                      "which may cause numerical instabilities if it persists. Convergence "
+//                      " issues or inacurate optimal solutions may be experienced. Possible causes: "
+//                      " tight bounds or bad scaling of the optimization variables.\n",
+//                      nrmDualBou);
+//     if(nlp->options->GetString("fixed_var") == "remove") {
+//       nlp->log->printf(hovWarning,
+//                        "For example, increase 'fixed_var_tolerance' to remove "
+//                        "additional variables.\n");
+//     } else if(nlp->options->GetString("fixed_var") == "relax") {
+//       nlp->log->printf(hovWarning,
+//                        "For example, increase 'fixed_var_tolerance' to relax "
+//                        "aditional (tight) variables and/or increase 'fixed_var_perturb' "
+//                        "to decrease the tightness.\n");
+//     } else {
+//       nlp->log->printf(hovWarning,
+//                        "Potential fixes: fix or relax variables with tight bounds "
+//                        "(see 'fixed_var' option) or rescale variables.\n");
+//     }
+//   }
+
+//   // scaling factors
+//   //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax
+//   //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax
+//   //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv
+//   //           ||yd+vl-vu||_inf
+//   //nlpfeas = ||crhs- c||_inf
+//   //          ||drs- d||_inf
+//   //          ||x-sxl-xl||_inf
+//   //          ||x-sxu+xu||_inf
+//   //          ||d-sdl-dl||
+//   //          
+//   //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax;
+//   //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax;
+
+//   sd = fmax(p_smax, sd) / p_smax;
+//   sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax;
+
+//   sd = fmin(sd, 1e+8);
+//   sc = fmin(sc, 1e+8);
+
+//   // actual nlp errors
+//   resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation);
+
+//   // finally, the scaled nlp error
+//   nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc));
+
+//   nlp->log->printf(hovWarning,
+//                    "nlpoverall %g  nloptim %g  sd %g  nlpfeas %g  nlpcomplem %g  sc %g cons_violation %g\n",
+//                    nlpoverall,
+//                    nlpoptim,
+//                    sd,
+//                    nlpfeas,
+//                    nlpcomplem,
+//                    cons_violation,
+//                    sc);
+
+//   // actual log errors
+//   resid.getBarrierErrors(logoptim, logfeas, logcomplem);
+
+//   // finally, the scaled barrier error
+//   logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc));
+//   nlp->runStats.tmSolverInternal.stop();
+//   return true;
+// }
 
 bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVector& c, hiopVector& d)
 {
@@ -1169,7 +1168,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run()
   solver_status_ = NlpSolve_Pending;
 
   while(true) {
-    bret = evalNlpAndLogErrors2(*it_curr,
+    bret = evalNlpAndLogErrors(*it_curr,
                                *resid,
                                _mu,
                                _err_nlp_optim,
@@ -1262,7 +1261,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run()
 
       //! should perform only a partial update since NLP didn't change
       resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar);
-      bret = evalNlpAndLogErrors2(*it_curr,
+      bret = evalNlpAndLogErrors(*it_curr,
                                  *resid,
                                  _mu,
                                  _err_nlp_optim,
diff --git a/src/Optimization/hiopAlgFilterIPM.hpp b/src/Optimization/hiopAlgFilterIPM.hpp
index 059aaafd..c7707b43 100644
--- a/src/Optimization/hiopAlgFilterIPM.hpp
+++ b/src/Optimization/hiopAlgFilterIPM.hpp
@@ -182,20 +182,6 @@ class hiopAlgFilterIPMBase
                                    double& logcomplem,
                                    double& logoverall,
                                    double& cons_violation);
-
-  virtual bool evalNlpAndLogErrors2(const hiopIterate& it,
-                                    const hiopResidual& resid,
-                                    const double& mu,
-                                    double& nlpoptim,
-                                    double& nlpfeas,
-                                    double& nlpcomplem,
-                                    double& nlpoverall,
-                                    double& logoptim,
-                                    double& logfeas,
-                                    double& logcomplem,
-                                    double& logoverall,
-                                    double& cons_violation);
-
   
   virtual double thetaLogBarrier(const hiopIterate& it, const hiopResidual& resid, const double& mu);
 
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index 2578c831..e90146f1 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -78,7 +78,8 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char
       prob_type_(hiopInterfaceBase::hiopNonlinear),
       nlp_evaluated_(false),
       nlp_transformations_(this),
-      interface_base(interface_)
+      interface_base(interface_),
+      inner_prod_(nullptr)
 {
   strFixedVars_ = "";    // uninitialized
   dFixedVarsTol_ = -1.;  // uninitialized
@@ -144,8 +145,6 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char
   temp_x_ = nullptr;
   nlp_scaling_ = nullptr;
   relax_bounds_ = nullptr;
-
-  inner_prod_ = new InnerProduct(this);
 }
 
 hiopNlpFormulation::~hiopNlpFormulation()
@@ -259,6 +258,10 @@ bool hiopNlpFormulation::finalizeInitialization()
   ixl_ = xu_->alloc_clone();
   ixu_ = xu_->alloc_clone();
 
+  // create NLP space (H-weighted or Euclidean)
+  delete inner_prod_;
+  inner_prod_ = new InnerProduct(this);
+  
   //
   // preprocess variables bounds - this is curently done on the CPU
   //
diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp
index c61d8625..2cb0791d 100644
--- a/src/Optimization/hiopResidual.cpp
+++ b/src/Optimization/hiopResidual.cpp
@@ -210,17 +210,20 @@ int hiopResidual::update(const hiopIterate& it,
   rd->copyFrom(*it.yd);
   rd->axpy(1.0, *it.vl);
   rd->axpy(-1.0, *it.vu);
-  buf = rd->infnorm_local();
+  //buf = rd->infnorm_local();
+  buf = rd->infnorm();
   nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf);
   nrmOne_nlp_optim += rd->onenorm();
   nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rd=%22.17e\n", buf);
   logprob.addNonLogBarTermsToGrad_d(-1.0, *rd);
-  nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local());
+  //nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local());
+  nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm());
   nrmOne_bar_optim += rd->onenorm();
   // ryc
   ryc->copyFrom(nlp->get_crhs());
   ryc->axpy(-1.0, c);
-  buf = ryc->infnorm_local();
+  //buf = ryc->infnorm_local();
+  buf = ryc->infnorm();
   nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf);
   nrmOne_nlp_feasib += ryc->onenorm();
   nrmInf_cons_violation = fmax(nrmInf_cons_violation, buf);
@@ -244,7 +247,8 @@ int hiopResidual::update(const hiopIterate& it,
   // ryd
   ryd->copyFrom(*it.d);
   ryd->axpy(-1.0, d);
-  buf = ryd->infnorm_local();
+  //buf = ryd->infnorm_local();
+  buf = ryd->infnorm();
   nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf);
   nrmOne_nlp_feasib += ryd->onenorm();
   nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf);
@@ -303,7 +307,7 @@ int hiopResidual::update(const hiopIterate& it,
   nrmOne_bar_feasib = nrmOne_nlp_feasib;
 
   // rszl = \mu e - sxl * zl
-  if(nlp->n_low_local() > 0) {
+  if(nlp->n_low_local() > 0) { //! n_low()
     rszl->setToZero();
     rszl->axzpy(-1.0, *it.sxl, *it.zl);
     if(nlp->n_low_local() < nx_loc) {
@@ -320,7 +324,7 @@ int hiopResidual::update(const hiopIterate& it,
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf);
   }
   // rszu = \mu e - sxu * zu
-  if(nlp->n_upp_local() > 0) {
+  if(nlp->n_upp_local() > 0) { //!
     rszu->setToZero();
     rszu->axzpy(-1.0, *it.sxu, *it.zu);
     if(nlp->n_upp_local() < nx_loc) {
@@ -366,7 +370,8 @@ int hiopResidual::update(const hiopIterate& it,
 
     // add mu
     rsvu->addConstant_w_patternSelect(mu, nlp->get_idu());
-    buf = rsvu->infnorm_local();
+    //buf = rsvu->infnorm_local();
+    buf = rsvu->infnorm();
     nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf);
   }
@@ -386,21 +391,6 @@ int hiopResidual::update(const hiopIterate& it,
 //   nrmInf_bar_feasib = aux_g[4];
 //   nrmInf_bar_complem = aux_g[5];
 // #endif
-#ifdef HIOP_USE_MPI
-  // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles
-  // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double
-  double aux[6] =
-      {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem};
-  double aux_g[6];
-  int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm());
-  assert(MPI_SUCCESS == ierr);
-  nrmInf_nlp_optim = aux_g[0];
-  nrmInf_nlp_feasib = aux_g[1];
-  nrmInf_nlp_complem = aux_g[2];
-  nrmInf_bar_optim = aux_g[3];
-  nrmInf_bar_feasib = aux_g[4];
-  nrmInf_bar_complem = aux_g[5];
-#endif
 
   nlp->runStats.tmSolverInternal.stop();
   return true;

From bf79f0d93f9d5ab424b47adca5d611285c17f57b Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 14 Mar 2025 14:08:34 -0700
Subject: [PATCH 10/27] exposing mass matrix in the example

---
 src/Drivers/Dense/NlpDenseConsEx1.cpp | 10 +++++++++-
 src/Drivers/Dense/NlpDenseConsEx1.hpp |  9 ++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/Drivers/Dense/NlpDenseConsEx1.cpp b/src/Drivers/Dense/NlpDenseConsEx1.cpp
index ea5fcdfd..02883642 100644
--- a/src/Drivers/Dense/NlpDenseConsEx1.cpp
+++ b/src/Drivers/Dense/NlpDenseConsEx1.cpp
@@ -76,7 +76,15 @@ bool Ex1Meshing1D::get_vecdistrib_info(size_type global_n, index_type* cols)
   for(int i = 0; i <= comm_size; i++) cols[i] = col_partition[i];
   return true;
 }
-void Ex1Meshing1D::applyM(DiscretizedFunction& f) { f.componentMult(*this->_mass); }
+void Ex1Meshing1D::applyM(DiscretizedFunction& f)
+{
+  f.componentMult(*this->_mass);
+}
+
+void Ex1Meshing1D::applyMinv(DiscretizedFunction& f)
+{
+  f.componentDiv(*this->_mass);
+}
 
 // converts the local indexes to global indexes
 index_type Ex1Meshing1D::getGlobalIndex(index_type i_local) const
diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp
index 8de2f80e..7c9be216 100644
--- a/src/Drivers/Dense/NlpDenseConsEx1.hpp
+++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp
@@ -68,8 +68,8 @@ class Ex1Meshing1D
   index_type* get_col_partition() const { return col_partition; }
   MPI_Comm get_comm() const { return comm; }
 
-  virtual void applyM(DiscretizedFunction& f);
-
+  void applyM(DiscretizedFunction& f);
+  void applyMinv(DiscretizedFunction& f);
 protected:
   hiop::hiopVector* _mass;  // the length or the mass of the elements
   double _a, _b;            // end points
@@ -261,6 +261,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
   bool applyM(const size_type& n, const double* x_in, double* y_out)
   {
     x->copyFrom(x_in);
+    _mesh->applyM(*x);
     x->copyTo(y_out);
     return true;
   }
@@ -271,6 +272,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
   virtual bool applyH(const size_type& n, const double* x_in, double* y_out)
   {
     x->copyFrom(x_in);
+    _mesh->applyM(*x);
     x->copyTo(y_out);
     return true;
   }
@@ -281,6 +283,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
   virtual bool applyHinv(const size_type& n, const double* x_in, double* y_out)
   {
     x->copyFrom(x_in);
+    _mesh->applyMinv(*x);
     x->copyTo(y_out);
     return true;
   }
@@ -290,7 +293,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
    */
   virtual bool useWeightedInnerProducts()
   {
-    return false;
+    return false;//true;
   }
 
 private:

From 5887c7539f68e6d560b90ecb4c12bdca194def2f Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 14 Mar 2025 14:09:36 -0700
Subject: [PATCH 11/27] added volume and lumped matrix

---
 src/Optimization/InnerProduct.cpp | 64 ++++++++++++++++++++++++++-----
 src/Optimization/InnerProduct.hpp | 12 ++++++
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
index 95978da3..1183088e 100644
--- a/src/Optimization/InnerProduct.cpp
+++ b/src/Optimization/InnerProduct.cpp
@@ -64,6 +64,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp)
 {
   printf("InnerProduct::InnerProduct begin\n"); fflush(stdout);
   assert(nlp);
+  M_lump_ = nullptr;
   if(nlp->useWeightedInnerProd()) {
     vec_n_ = nlp_->alloc_primal_vec();
     vec_n2_ = nlp_->alloc_primal_vec();
@@ -71,6 +72,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp)
     vec_n_ = nullptr;
     vec_n2_ = nullptr;
   }
+
   printf("InnerProduct::InnerProduct end\n"); fflush(stdout);
 }
   
@@ -78,6 +80,7 @@ InnerProduct::~InnerProduct()
 {
   delete vec_n2_;
   delete vec_n_;
+  delete M_lump_;
 }
 
 bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const
@@ -153,12 +156,13 @@ double InnerProduct::norm_M_one(const hiopVector&x) const
 double InnerProduct::norm_complementarity(const hiopVector& x) const
 {
   if(nlp_->useWeightedInnerProd()) {
-    //opt! pre-compute M*1
-    vec_n_->copyFrom(x);
-    vec_n_->component_abs();
-    nlp_->eval_M(*vec_n_, *vec_n2_);
-    vec_n_->setToConstant(1.);
-    return vec_n_->dotProductWith(*vec_n2_);
+    // //opt! pre-compute M*1
+    // vec_n_->copyFrom(x);
+    // vec_n_->component_abs();
+    // nlp_->eval_M(*vec_n_, *vec_n2_);
+    // vec_n_->setToConstant(1.);
+    // return vec_n_->dotProductWith(*vec_n2_);
+    return x.infnorm();
   } else {
     return x.infnorm();
   }
@@ -168,13 +172,53 @@ double InnerProduct::norm_complementarity(const hiopVector& x) const
 double InnerProduct::volume() const
 {
   if(nlp_->useWeightedInnerProd()) {
-    //opt! pre-compute M*1
-    vec_n_->setToConstant(1.);
-    nlp_->eval_M(*vec_n_, *vec_n2_);
-    return vec_n2_->dotProductWith(*vec_n_);
+    double vol_total = nlp_->m_ineq_low() + nlp_->m_ineq_upp();
+    if(nlp_->n_low() > 0 || nlp_->n_upp() > 0) {
+      //compute ||1||_M
+      //vec_n_->setToConstant(1.);      
+      const double vol_mult_bnds = M_lumped()->onenorm();
+      if(nlp_->n_low() > 0) {
+        //For weighted Hilbert spaces we assume that if lower bounds are present, they are for all vars
+        vol_total += vol_mult_bnds;
+      }
+      if(nlp_->n_upp() > 0) {
+        //For weighted Hilbert spaces we assume that if lower bounds are present, they are for all vars
+        vol_total += vol_mult_bnds;
+      }      
+    }
+    return vol_total;
   } else {
     return nlp_->n_complem();
   }
 }
 
+// Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object
+const hiopVector* InnerProduct::M_lumped() const
+{
+  if(M_lump_ == nullptr) {
+    M_lump_ = nlp_->alloc_primal_vec();
+    if(nlp_->useWeightedInnerProd()) {    
+      vec_n_->setToConstant(1.);
+      apply_M(*vec_n_, *M_lump_);
+    } else {
+      M_lump_->setToConstant(1.);
+    }
+  }
+  return M_lump_;
+}
+
+void InnerProduct::
+add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    vec_n_->copyFrom(ixl);
+    vec_n_->axpy(-1.0, ixu);
+    vec_n_->componentMult(*M_lumped());
+    x.axpy(ct, *vec_n_);
+  } else {
+    x.addLinearDampingTerm(ixl, ixu, 1.0, ct);
+  }
+}
+
+  
 } // end namespace
diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 3c185430..13493ec6 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -109,6 +109,16 @@ class InnerProduct
 
   // Computes the "volume" of the space, norm of 1 function
   double volume() const;
+
+  // Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object
+  const hiopVector* M_lumped() const;
+
+  /*
+   * @brief add linear damping term
+   * Performs `this[i] = alpha*this[i] + sign[i]*ct*M_lumped[i]` where sign[i]=1 when EXACTLY one of
+   * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise.
+   */
+  void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const; 
 private:
   // Pointer to "client" NLP
   hiopNlpFormulation* nlp_;
@@ -117,6 +127,8 @@ class InnerProduct
   mutable hiopVector* vec_n_;
   // Working vector in the size n of the variables, allocated only when for the weighted case
   mutable hiopVector* vec_n2_;
+  // Lumped mass matrix
+  mutable hiopVector* M_lump_;
 };
 
 } //end namespace

From f0de791941abe250f5cd43b2aaeee251f93d7f90 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 14 Mar 2025 14:17:18 -0700
Subject: [PATCH 12/27] adding weights through the code (linear system and
 resids)

---
 src/Optimization/KktLinSysLowRank.cpp |  3 +-
 src/Optimization/hiopIterate.cpp      |  3 +-
 src/Optimization/hiopKKTLinSys.cpp    | 45 +++++++++++++++++++++------
 src/Optimization/hiopResidual.cpp     |  8 +++--
 4 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/src/Optimization/KktLinSysLowRank.cpp b/src/Optimization/KktLinSysLowRank.cpp
index 5bc5b49b..36291e9b 100644
--- a/src/Optimization/KktLinSysLowRank.cpp
+++ b/src/Optimization/KktLinSysLowRank.cpp
@@ -100,6 +100,7 @@ bool KktLinSysLowRank::update(const hiopIterate* iter,
   Dx_->setToZero();
   Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl());
   Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu());
+  Dx_->componentMult(*nlp_->inner_prod()->M_lumped());
   nlp_->log->write("Dx in KKT", *Dx_, hovMatrices);
 
   hess_low_rank->update_logbar_diag(*Dx_);
@@ -218,7 +219,7 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx,
   // some outputing
   nlp_->log->write("KKT Low rank: solve compressed SOL", hovIteration);
   nlp_->log->write("  dx: ", dx, hovIteration);
-  nlp_->log->write(" dyc: ", dyc, hovIteration);
+  nlp_->log->write(" dyc: ", dyc, hovWarning);
   nlp_->log->write(" dyd: ", dyd, hovIteration);
   delete r;
 #endif
diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp
index e133ab94..aaffc04b 100644
--- a/src/Optimization/hiopIterate.cpp
+++ b/src/Optimization/hiopIterate.cpp
@@ -621,7 +621,8 @@ void hiopIterate::addLinearDampingTermToGrad_x(const double& mu,
   assert(x->get_local_size() == grad_x.get_local_size());
 
   const double ct = kappa_d * mu * beta;
-  grad_x.addLinearDampingTerm(nlp->get_ixl(), nlp->get_ixu(), 1.0, ct);
+  //grad_x.addLinearDampingTerm(nlp->get_ixl(), nlp->get_ixu(), 1.0, ct);
+  nlp->inner_prod()->add_linear_damping_term(nlp->get_ixl(), nlp->get_ixu(), ct, grad_x);
 }
 
 void hiopIterate::addLinearDampingTermToGrad_d(const double& mu,
diff --git a/src/Optimization/hiopKKTLinSys.cpp b/src/Optimization/hiopKKTLinSys.cpp
index d388d1f1..80d11142 100644
--- a/src/Optimization/hiopKKTLinSys.cpp
+++ b/src/Optimization/hiopKKTLinSys.cpp
@@ -568,7 +568,7 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter,
   Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl());
   Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu());
   nlp_->log->write("Dx in KKT", *Dx_, hovMatrices);
-
+  assert(false);
   // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu
   Dd_->setToZero();
   Dd_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl());
@@ -595,7 +595,7 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid,
 
   /***********************************************************************
    * perform the reduction to the compressed linear system
-   * rx_tilde  = rx+Sxl^{-1}*[rszl-Zl*rxl] - Sxu^{-1}*(rszu-Zu*rxu)
+   * rx_tilde  = rx+ M_lumped* Sxl^{-1}*[rszl-Zl*rxl] - M_lumped* Sxu^{-1}*(rszu-Zu*rxu)
    * ryd_tilde = ryd + [(Sdl^{-1}Vl+Sdu^{-1}Vu)]^{-1}*
    *                     [rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu)]
    * rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu)
@@ -603,23 +603,50 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid,
    * yd_tilde = ryd + Dd_inv*rd_tilde
    */
   rx_tilde_->copyFrom(*r.rx);
+  
+  // if(nlp_->n_low_local() > 0) {
+  //   // rl:=rszl-Zl*rxl (using dir->x as working buffer)
+  //   hiopVector& rl = *(dir->x);  // temporary working buffer
+  //   rl.copyFrom(*r.rszl);
+  //   rl.axzpy(-1.0, *iter_->zl, *r.rxl);
+  //   // rx_tilde = rx+Sxl^{-1}*rl
+  //   rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl());
+  // }
+  // if(nlp_->n_upp_local() > 0) {
+  //   // ru:=rszu-Zu*rxu (using dir->x as working buffer)
+  //   hiopVector& ru = *(dir->x);  // temporary working buffer
+  //   ru.copyFrom(*r.rszu);
+  //   ru.axzpy(-1.0, *iter_->zu, *r.rxu);
+  //   // rx_tilde = rx_tilde - Sxu^{-1}*ru
+  //   rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu());
+  // }
+
+  hiopVector& rx2 = *(dir->sxl);  // temporary working buffer
+  rx2.setToZero();
   if(nlp_->n_low_local() > 0) {
-    // rl:=rszl-Zl*rxl (using dir->x as working buffer)
     hiopVector& rl = *(dir->x);  // temporary working buffer
     rl.copyFrom(*r.rszl);
     rl.axzpy(-1.0, *iter_->zl, *r.rxl);
-    // rx_tilde = rx+Sxl^{-1}*rl
-    rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl());
+    //// rx_tilde = rx+Sxl^{-1}*rl
+    //rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl());
+    rx2.axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); 
   }
+  
   if(nlp_->n_upp_local() > 0) {
-    // ru:=rszu-Zu*rxu (using dir->x as working buffer)
-    hiopVector& ru = *(dir->x);  // temporary working buffer
+    // ru:=rszu-Zu*rxu
+    hiopVector& ru = *(dir->x);
     ru.copyFrom(*r.rszu);
     ru.axzpy(-1.0, *iter_->zu, *r.rxu);
-    // rx_tilde = rx_tilde - Sxu^{-1}*ru
-    rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu());
+    //// rx_tilde = rx_tilde - Sxu^{-1}*ru
+    ////rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu());
+    rx2.axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu());
+  }
+  if(nlp_->n_low_local() > 0 || nlp_->n_upp_local() > 0) {
+    rx2.componentMult(*nlp_->inner_prod()->M_lumped());
+    rx_tilde_->axpy(1.0, rx2);
   }
 
+  
   // for ryd_tilde:
   ryd_tilde_->copyFrom(*r.ryd);
   // 1. the diag (Sdl^{-1}Vl+Sdu^{-1}Vu)^{-1} has already computed in Dd_inv in 'update'
diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp
index 2cb0791d..4090def5 100644
--- a/src/Optimization/hiopResidual.cpp
+++ b/src/Optimization/hiopResidual.cpp
@@ -146,7 +146,7 @@ double hiopResidual::compute_nlp_infeasib_onenorm(const hiopIterate& it, const h
     rdu->axpy(-1.0, *it.d);
     rdu->selectPattern(nlp->get_idu());
   }
-
+  //! the ifs above seem to do nothing for the return value
   nlp->runStats.tmSolverInternal.stop();
   return nrmOne_infeasib;
 }
@@ -192,6 +192,8 @@ int hiopResidual::update(const hiopIterate& it,
   jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd);
   rx->axpy(1.0, grad);
 
+  //nlp->log->write("rx w/o log terms:", *rx, hovWarning);
+  
   //buf = rx->infnorm_local();
   buf = nlp->inner_prod()->norm_stationarity(*rx);
   nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf);
@@ -205,6 +207,8 @@ int hiopResidual::update(const hiopIterate& it,
   nrmInf_bar_optim = fmax(nrmInf_bar_optim, nlp->inner_prod()->norm_stationarity(*rx));
   nrmOne_bar_optim += nlp->inner_prod()->norm_M_one(*rx);
 
+  //nlp->log->write("rx with log terms:", *rx, hovWarning);
+  
   //~ done with rx
   // rd
   rd->copyFrom(*it.yd);
@@ -470,7 +474,7 @@ void hiopResidual::update_soc(const hiopIterate& it,
   nrmInf_bar_optim = nrmInf_bar_feasib = nrmInf_bar_complem = 0;
   nrmOne_nlp_feasib = nrmOne_bar_feasib = 0.;
   nrmOne_nlp_optim = nrmOne_bar_optim = 0.;
-
+  assert(false);
   size_type nx_loc = rx->get_local_size();
   const double& mu = logprob.mu;
   double buf;

From 67f98d9c197946652b6bf3a60fe27431e0c33490 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Sun, 16 Mar 2025 16:41:49 -0700
Subject: [PATCH 13/27] weighted log barrier reduce/sum

---
 src/LinAlg/hiopVector.hpp           | 15 +++++++++++++++
 src/LinAlg/hiopVectorCompoundPD.cpp | 12 ++++++++++++
 src/LinAlg/hiopVectorCompoundPD.hpp |  1 +
 src/LinAlg/hiopVectorPar.cpp        | 22 ++++++++++++++++++++++
 src/LinAlg/hiopVectorPar.hpp        |  1 +
 5 files changed, 51 insertions(+)

diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp
index a2743be5..f5d2292b 100644
--- a/src/LinAlg/hiopVector.hpp
+++ b/src/LinAlg/hiopVector.hpp
@@ -643,6 +643,21 @@ class hiopVector
    */
   virtual double logBarrier_local(const hiopVector& select) const = 0;
 
+  /**
+   * @brief Weighted sum of selected log(this[i])
+   *
+   * @param[in] weights - weights vector
+   * @param[in] select - pattern selection
+   *
+   * @pre `this`, `weights`, and `select` have same partitioning.
+   * @pre Selected elements of `this` are > 0.
+   * @post `this`, `weights`, and `select` are not modified
+   *
+   * @warning This is local method only!
+   */
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const = 0;
+
+  
   /**
    * @brief adds the gradient of the log barrier, namely this[i]=this[i]+alpha*1/select(x[i])
    *
diff --git a/src/LinAlg/hiopVectorCompoundPD.cpp b/src/LinAlg/hiopVectorCompoundPD.cpp
index fda099cf..782d8b8d 100644
--- a/src/LinAlg/hiopVectorCompoundPD.cpp
+++ b/src/LinAlg/hiopVectorCompoundPD.cpp
@@ -712,6 +712,18 @@ double hiopVectorCompoundPD::logBarrier_local(const hiopVector& select) const
   return sum;
 }
 
+double hiopVectorCompoundPD::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const
+{
+  double sum = 0.0;
+  const hiopVectorCompoundPD& ix = dynamic_cast<const hiopVectorCompoundPD&>(select);
+  const hiopVectorCompoundPD& w = dynamic_cast<const hiopVectorCompoundPD&>(weights);
+  assert(this->get_num_parts() == ix.get_num_parts());
+  for(index_type i = 0; i < n_parts_; i++) {
+    sum += vectors_[i]->logBarrierWeighted_local(ix.getVector(i), w.getVector(i));
+  }
+  return sum;
+}
+
 double hiopVectorCompoundPD::sum_local() const
 {
   double sum = 0.0;
diff --git a/src/LinAlg/hiopVectorCompoundPD.hpp b/src/LinAlg/hiopVectorCompoundPD.hpp
index 4b58b6e1..570a8cf1 100644
--- a/src/LinAlg/hiopVectorCompoundPD.hpp
+++ b/src/LinAlg/hiopVectorCompoundPD.hpp
@@ -241,6 +241,7 @@ class hiopVectorCompoundPD : public hiopVector
   virtual void negate();
   virtual void invert();
   virtual double logBarrier_local(const hiopVector& select) const;
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const;  
   virtual double sum_local() const;
   virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select);
 
diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index a7ad806b..2686e094 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -875,6 +875,28 @@ double hiopVectorPar::logBarrier_local(const hiopVector& select) const
   return sum;
 }
 
+double hiopVectorPar::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const
+{
+  double sum = 0.0;
+  double comp = 0.0;
+  const hiopVectorPar& ix = dynamic_cast<const hiopVectorPar&>(select);
+  const hiopVectorPar& w = dynamic_cast<const hiopVectorPar&>(weights);
+  assert(this->n_local_ == ix.n_local_);
+  assert(this->n_local_ == w.n_local_);
+  
+  const double* ix_vec = ix.data_;
+  const double* w_vec = w.data_;
+  for(int i = 0; i < n_local_; i++) {
+    if(ix_vec[i] == 1.) {
+      double y = w_vec[i]*log(data_[i]) - comp;
+      double t = sum + y;
+      comp = (t - sum) - y;
+      sum = t;
+    }
+  }
+  return sum;
+}
+
 double hiopVectorPar::sum_local() const
 {
   double sum = 0.0;
diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp
index fc7ca599..0063ea5f 100644
--- a/src/LinAlg/hiopVectorPar.hpp
+++ b/src/LinAlg/hiopVectorPar.hpp
@@ -227,6 +227,7 @@ class hiopVectorPar : public hiopVector
   virtual void negate();
   virtual void invert();
   virtual double logBarrier_local(const hiopVector& select) const;
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const;
   virtual double sum_local() const;
   virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select);
 

From df028e9d9232b9d1d55c8e3c60074e0e1df66264 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Sun, 16 Mar 2025 17:15:48 -0700
Subject: [PATCH 14/27] log bar weighting functionality in Iterate

---
 src/Optimization/InnerProduct.cpp | 38 +++++++++++++++++++++++++++++++
 src/Optimization/InnerProduct.hpp | 22 +++++++++++++++---
 src/Optimization/hiopIterate.cpp  | 20 ++++++++++------
 3 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
index 1183088e..8c5a497d 100644
--- a/src/Optimization/InnerProduct.cpp
+++ b/src/Optimization/InnerProduct.cpp
@@ -220,5 +220,43 @@ add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const doub
   }
 }
 
+double InnerProduct::log_barrier_eval_local(const hiopVector& x, const hiopVector& ix) const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    return x.logBarrierWeighted_local(ix, *M_lumped());
+  } else {
+    return x.logBarrier_local(ix);
+  }
+}
+
+// Adds (to `gradx`) the gradient of the weighted log, namely gradx = gradx - mu * M_lumped * ix/s
+void InnerProduct::log_barrier_grad_add(const double& mu, const hiopVector& s, const hiopVector& ix, hiopVector& gradx) const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    vec_n_->copyFrom(ix);
+    vec_n_->componentDiv(s);
+    vec_n_->componentMult(*M_lumped());
+    gradx.axpy(mu, *vec_n_);
+  } else {
+    gradx.addLogBarrierGrad(mu, s, ix);
+  }
+}
+
+double InnerProduct::linear_damping_term_local(const hiopVector& s,
+                                               const hiopVector& ixl,
+                                               const hiopVector& ixr,
+                                               const double& mu,
+                                               const double& kappa_d) const
+{
+  if(nlp_->useWeightedInnerProd()) {
+    vec_n_->copyFrom(s);
+    vec_n_->componentMult(*M_lumped());
+    return vec_n_->linearDampingTerm_local(ixl, ixr, mu, kappa_d);
+  } else {
+    return s.linearDampingTerm_local(ixl, ixr, mu, kappa_d);
+  }
   
+}
+
+
 } // end namespace
diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 13493ec6..90d739ed 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -113,12 +113,28 @@ class InnerProduct
   // Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object
   const hiopVector* M_lumped() const;
 
-  /*
-   * @brief add linear damping term
+  /**
+   * Compute linear damping terms required by the weighted log barrier subproblem 
+   * 
+   * Essentially computes kappa_d*mu* \sum { this[i] | ixl[i]==1 and ixr[i]==0 }
+   */
+  double linear_damping_term_local(const hiopVector& s,
+                                   const hiopVector& ixl,
+                                   const hiopVector& ixr,
+                                   const double& mu,
+                                   const double& kappa_d) const;
+  /**
+   * @brief Add linear damping term
    * Performs `this[i] = alpha*this[i] + sign[i]*ct*M_lumped[i]` where sign[i]=1 when EXACTLY one of
    * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise.
    */
-  void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const; 
+  void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const;
+
+  // Computes weighted (by the lumped matrix) sum of log of selected (by `ix`) entries of `x`
+  double log_barrier_eval_local(const hiopVector& x, const hiopVector& ix) const;
+
+  // Adds (to `gradx`) the gradient of the weighted log, namely gradx = gradx - mu * M_lumped * ix/s
+  void log_barrier_grad_add(const double& mu, const hiopVector& s, const hiopVector& ix, hiopVector& gradx) const;
 private:
   // Pointer to "client" NLP
   hiopNlpFormulation* nlp_;
diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp
index aaffc04b..aa1a97f5 100644
--- a/src/Optimization/hiopIterate.cpp
+++ b/src/Optimization/hiopIterate.cpp
@@ -569,8 +569,10 @@ bool hiopIterate::adjustDuals_primalLogHessian(const double& mu, const double& k
 double hiopIterate::evalLogBarrier() const
 {
   double barrier;
-  barrier = sxl->logBarrier_local(nlp->get_ixl());
-  barrier += sxu->logBarrier_local(nlp->get_ixu());
+  //barrier = sxl->logBarrier_local(nlp->get_ixl());
+  //barrier += sxu->logBarrier_local(nlp->get_ixu());
+  barrier = nlp->inner_prod()->log_barrier_eval_local(*sxl, nlp->get_ixl());
+  barrier += nlp->inner_prod()->log_barrier_eval_local(*sxu, nlp->get_ixu());
 #ifdef HIOP_USE_MPI
   double res;
   int ierr = MPI_Allreduce(&barrier, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());
@@ -586,8 +588,10 @@ double hiopIterate::evalLogBarrier() const
 void hiopIterate::addLogBarGrad_x(const double& mu, hiopVector& gradx) const
 {
   // gradx = grad - mu / sxl = grad - mu * select/sxl
-  gradx.addLogBarrierGrad(-mu, *sxl, nlp->get_ixl());
-  gradx.addLogBarrierGrad(mu, *sxu, nlp->get_ixu());
+  //gradx.addLogBarrierGrad(-mu, *sxl, nlp->get_ixl());
+  //gradx.addLogBarrierGrad(mu, *sxu, nlp->get_ixu());
+  nlp->inner_prod()->log_barrier_grad_add(-mu, *sxl, nlp->get_ixl(), gradx);
+  nlp->inner_prod()->log_barrier_grad_add( mu, *sxu, nlp->get_ixu(), gradx);
 }
 
 void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const
@@ -598,9 +602,11 @@ void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const
 
 double hiopIterate::linearDampingTerm(const double& mu, const double& kappa_d) const
 {
-  double term;
-  term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d);
-  term += sxu->linearDampingTerm_local(nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d);
+  //double term;
+  //term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d);
+  //term += sxu->linearDampingTerm_local(nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d);
+  double term = nlp->inner_prod()->linear_damping_term_local(*sxl, nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d);
+  term += nlp->inner_prod()->linear_damping_term_local(*sxu, nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d);
 #ifdef HIOP_USE_MPI
   double res;
   int ierr = MPI_Allreduce(&term, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm());

From a44d355d3e8c05b0ee5f75e8c15b20bc8093f6c3 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Mon, 17 Mar 2025 14:30:00 -0700
Subject: [PATCH 15/27] fixing ci runtime errors

---
 src/Optimization/InnerProduct.cpp | 19 +++++++++++++++----
 src/Optimization/InnerProduct.hpp |  3 +++
 src/Optimization/hiopResidual.cpp | 16 ++++++++--------
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp
index 8c5a497d..493bb764 100644
--- a/src/Optimization/InnerProduct.cpp
+++ b/src/Optimization/InnerProduct.cpp
@@ -92,7 +92,15 @@ bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const
     return true;
   }
 }
-  
+
+// Applies lumped mass matrix;
+bool InnerProduct::apply_M_lumped(const hiopVector& x, hiopVector& y) const
+{
+  y.copyFrom(x);
+  y.componentMult(*M_lumped());
+  return true;
+}
+
 // Computes ||x||_M
 double InnerProduct::norm_M(const hiopVector& x) const
 {
@@ -130,9 +138,12 @@ double InnerProduct::norm_H_dual(const hiopVector& x) const
 double InnerProduct::norm_stationarity(const hiopVector& x) const
 {
   if(nlp_->useWeightedInnerProd()) {
-    nlp_->eval_H_inv(x, *vec_n_);
-    auto dp = x.dotProductWith(*vec_n_);
-    return ::std::sqrt(dp);
+    //nlp_->eval_H_inv(x, *vec_n_);
+    //auto dp = x.dotProductWith(*vec_n_);
+    //return ::std::sqrt(dp);
+    vec_n_->copyFrom(x);
+    vec_n_->componentDiv(*M_lumped());
+    return vec_n_->infnorm();
   } else {
     return x.infnorm();
   }
diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 90d739ed..03cdad43 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -88,6 +88,9 @@ class InnerProduct
 
   // Compute y=M*x
   bool apply_M(const hiopVector& x, hiopVector& y) const;
+
+  // Appply lumped mass matrix, y=M_lumped*x
+  bool apply_M_lumped(const hiopVector& x, hiopVector& y) const;
   
   // Computes ||x||_M
   double norm_M(const hiopVector& x) const;
diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp
index 4090def5..8ae309fb 100644
--- a/src/Optimization/hiopResidual.cpp
+++ b/src/Optimization/hiopResidual.cpp
@@ -186,7 +186,7 @@ int hiopResidual::update(const hiopIterate& it,
   //using rxl as auxiliary variable to compute -M*zl+M*zu
   rxl->copyFrom(*it.zu);
   rxl->axpy(-1.0, *it.zl);
-  nlp->inner_prod()->apply_M(*rxl, *rx);
+  nlp->inner_prod()->apply_M_lumped(*rxl, *rx);
   //continue adding to rx
   jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc);
   jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd);
@@ -258,7 +258,7 @@ int hiopResidual::update(const hiopIterate& it,
   nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf);
 
   // rxl=x-sxl-xl
-  if(nlp->n_low_local() > 0) {
+  if(nlp->n_low() > 0) {
     rxl->copyFrom(*it.x);
     rxl->axpy(-1.0, *it.sxl);
     rxl->axpy(-1.0, nlp->get_xl());
@@ -266,20 +266,20 @@ int hiopResidual::update(const hiopIterate& it,
     if(nlp->n_low_local() < nx_loc) {
       rxl->selectPattern(nlp->get_ixl());
     }
-    buf = rxl->infnorm_local();   
+    buf = rxl->infnorm();   
     // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf);
   }
   // printf("  %10.4e (xl)", nrmInf_nlp_feasib);
   // rxu=-x-sxu+xu
-  if(nlp->n_upp_local() > 0) {
+  if(nlp->n_upp() > 0) {
     rxu->copyFrom(nlp->get_xu());
     rxu->axpy(-1.0, *it.x);
     rxu->axpy(-1.0, *it.sxu);
     if(nlp->n_upp_local() < nx_loc) {
       rxu->selectPattern(nlp->get_ixu());
     }
-    buf = rxu->infnorm_local();
+    buf = rxu->infnorm();
     // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf);
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxu=%22.17e\n", buf);
   }
@@ -311,7 +311,7 @@ int hiopResidual::update(const hiopIterate& it,
   nrmOne_bar_feasib = nrmOne_nlp_feasib;
 
   // rszl = \mu e - sxl * zl
-  if(nlp->n_low_local() > 0) { //! n_low()
+  if(nlp->n_low() > 0) { //! n_low()
     rszl->setToZero();
     rszl->axzpy(-1.0, *it.sxl, *it.zl);
     if(nlp->n_low_local() < nx_loc) {
@@ -328,7 +328,7 @@ int hiopResidual::update(const hiopIterate& it,
     nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf);
   }
   // rszu = \mu e - sxu * zu
-  if(nlp->n_upp_local() > 0) { //!
+  if(nlp->n_upp() > 0) {
     rszu->setToZero();
     rszu->axzpy(-1.0, *it.sxu, *it.zu);
     if(nlp->n_upp_local() < nx_loc) {
@@ -474,7 +474,7 @@ void hiopResidual::update_soc(const hiopIterate& it,
   nrmInf_bar_optim = nrmInf_bar_feasib = nrmInf_bar_complem = 0;
   nrmOne_nlp_feasib = nrmOne_bar_feasib = 0.;
   nrmOne_nlp_optim = nrmOne_bar_optim = 0.;
-  assert(false);
+
   size_type nx_loc = rx->get_local_size();
   const double& mu = logprob.mu;
   double buf;

From 82c02d6477e2c3916465ecf8d4b5a3883c38c570 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Mon, 17 Mar 2025 15:21:43 -0700
Subject: [PATCH 16/27] cuda and raja impl; also added tests

---
 src/LinAlg/VectorCudaKernels.cu   | 29 ++++++++++++++++++
 src/LinAlg/VectorCudaKernels.hpp  |  2 ++
 src/LinAlg/hiopVectorCuda.cpp     | 10 +++++++
 src/LinAlg/hiopVectorCuda.hpp     |  2 ++
 src/LinAlg/hiopVectorRaja.hpp     |  1 +
 src/LinAlg/hiopVectorRajaImpl.hpp | 24 +++++++++++++++
 tests/LinAlg/vectorTests.hpp      | 49 +++++++++++++++++++++++++++++++
 tests/testVector.cpp              |  1 +
 8 files changed, 118 insertions(+)

diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index b1c9a591..edff95f5 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -1042,6 +1042,35 @@ double log_barr_obj_kernel(int n, double* d1, const double* id)
   return sum;
 }
 
+/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/
+double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w)
+{
+  thrust::device_ptr<double> dev_v = thrust::device_pointer_cast(d1);
+  thrust::device_ptr<const double> id_v = thrust::device_pointer_cast(id);
+  thrust::device_ptr<const double> wei_v = thrust::device_pointer_cast(w);
+
+  // wrap raw pointer with a device_ptr 
+  thrust_log_select<double> log_select_op;
+  thrust::plus<double> plus_op;
+  thrust::multiplies<double> mult_op;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<double> v_temp = thrust::device_malloc(n*sizeof(double));
+
+  // compute v=x*id
+  thrust::transform(thrust::device, dev_v, dev_v+n, id_v, v_temp, mult_op);
+  // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 )
+  thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); 
+  // compute v[i] = w[i]*v[i] 
+  thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, mult_op); 
+  // sum up
+  const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op);
+
+  thrust::device_free(v_temp);
+
+  return sum;
+}
+
 /** @brief compute sum(d1[i]) */
 double thrust_sum_kernel(int n, double* d1)
 {
diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp
index 1a7e478b..06a674e2 100644
--- a/src/LinAlg/VectorCudaKernels.hpp
+++ b/src/LinAlg/VectorCudaKernels.hpp
@@ -187,6 +187,8 @@ void thrust_component_sqrt_kernel(int n, double* d1);
 void thrust_negate_kernel(int n, double* d1);
 /** @brief compute sum(log(d1[i])) forall i where id[i]=1*/
 double log_barr_obj_kernel(int n, double* d1, const double* id);
+/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/
+double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w);
 /** @brief compute sum(d1[i]) */
 double thrust_sum_kernel(int n, double* d1);
 /** @brief Linear damping term */
diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp
index 1a81bc78..36d9e9ee 100644
--- a/src/LinAlg/hiopVectorCuda.cpp
+++ b/src/LinAlg/hiopVectorCuda.cpp
@@ -642,6 +642,16 @@ double hiopVectorCuda::logBarrier_local(const hiopVector& select) const
   return hiop::cuda::log_barr_obj_kernel(n_local_, data_, id);
 }
 
+double hiopVectorCuda::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const
+{
+  assert(n_local_ == select.get_local_size());
+  assert(n_local_ == weights.get_local_size());
+  const double* id = select.local_data_const();
+  const double* w = weights.local_data_const();
+
+  return hiop::cuda::log_barr_wei_obj_kernel(n_local_, data_, id, w);
+}
+
 /* @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) */
 void hiopVectorCuda::addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select)
 {
diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp
index 5c719705..b4b750d9 100644
--- a/src/LinAlg/hiopVectorCuda.hpp
+++ b/src/LinAlg/hiopVectorCuda.hpp
@@ -233,6 +233,8 @@ class hiopVectorCuda : public hiopVector
   virtual void invert();
   /// @brief compute log barrier term, that is sum{ln(x_i):i=1,..,n}
   virtual double logBarrier_local(const hiopVector& select) const;
+  /// @brief compute log barrier term, that is sum{weights[i]*ln(x_i):select[i]==1}
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const;
   /// @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x)
   virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select);
   /// @brief compute sum{(x_i):i=1,..,n}
diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp
index 8541c0e0..1f68a735 100644
--- a/src/LinAlg/hiopVectorRaja.hpp
+++ b/src/LinAlg/hiopVectorRaja.hpp
@@ -224,6 +224,7 @@ class hiopVectorRaja : public hiopVector
   virtual void negate();
   virtual void invert();
   virtual double logBarrier_local(const hiopVector& select) const;
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const;
   virtual double sum_local() const;
   virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select);
 
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index bfe1e67f..ca13545c 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -1388,6 +1388,30 @@ double hiopVectorRaja<MEM, POL>::logBarrier_local(const hiopVector& select) cons
   return sum.get();
 }
 
+template<class MEM, class POL>
+double hiopVectorRaja<MEM, POL>::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const
+{
+  const hiopVectorRaja& sel = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(select);
+  const hiopVectorRaja& wei = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(weights);
+  assert(this->n_local_ == sel.n_local_);
+  assert(this->n_local_ == wei.n_local_);`
+
+  double* data = data_dev_;
+  const double* id = sel.local_data_const();
+  const double* w = wei.local_data_const();
+  RAJA::ReduceSum<hiop_raja_reduce, double> sum(0.0);
+  RAJA::forall<hiop_raja_exec>(
+      RAJA::RangeSegment(0, n_local_),
+      RAJA_LAMBDA(RAJA::Index_type i) {
+#ifdef HIOP_DEEPCHECKS
+        assert(id[i] == one || id[i] == zero);
+#endif
+        sum += id[i]*w[i]*std::log(data[i]);
+      });
+
+  return sum.get();
+}
+
 /**
  * @brief Sum all elements
  */
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 1a0b10d0..2baaacbb 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -1211,6 +1211,55 @@ class VectorTests : public TestBase
     return reduceReturn(fail, &x);
   }
 
+    /**
+   * @brief Test:
+   * sum{w[i]*ln(x[i]): pattern[i] = 1}
+   */
+  bool vectorLogBarrierWeighted(hiop::hiopVector& x,
+                                hiop::hiopVector& pattern,
+                                hiop::hiopVector& weight,
+                                const int rank)
+  {
+    const local_ordinal_type N = getLocalSize(&x);
+    assert(N == getLocalSize(&pattern));
+    assert(N == getLocalSize(&weight));
+
+    // Ensure that only N-1 elements of x are
+    // used in the log calculation
+    pattern.setToConstant(one);
+    setLocalElement(&pattern, N - 1, zero);
+
+    const real_type x_val = three;
+    x.setToConstant(x_val);
+    // Make sure pattern eliminates the correct element
+    setLocalElement(&x, N - 1, 1000 * three);
+
+    const real_type w_val = 1./N;
+    weight.setToConstant(w_val);
+    // Make sure pattern eliminates the correct element
+    setLocalElement(&weight, N-1, 100);
+    
+    real_type expected = (N - 1) * std::log(x_val) / N;
+    real_type result = x.logBarrier_local(pattern);
+
+    int fail = !isEqual(result, expected);
+
+    // Make sure pattern eliminates the correct elements
+    pattern.setToConstant(zero);
+    setLocalElement(&pattern, N - 1, one);
+    x.setToConstant(zero);
+    setLocalElement(&x, N - 1, x_val);
+    weight.setToConstant(zero);
+    setLocalElement(&weight, N - 1, w_val);
+    
+    expected = std::log(x_val)*w_val;
+    result = x.logBarrier_local(pattern);
+    fail += !isEqual(result, expected);
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &x);
+  }
+
   /**
    * @brief Test:
    * sum{x[i]}
diff --git a/tests/testVector.cpp b/tests/testVector.cpp
index f0a362a7..2e264939 100644
--- a/tests/testVector.cpp
+++ b/tests/testVector.cpp
@@ -296,6 +296,7 @@ int runTests(const char* mem_space, MPI_Comm comm)
   fail += test.vectorNegate(*x, rank);
   fail += test.vectorInvert(*x, rank);
   fail += test.vectorLogBarrier(*x, *y, rank);
+  fail += test.vectorLogBarrierWeighted(*x, *y, *z, rank);
   fail += test.vector_sum_local(*x, rank);
   fail += test.vectorAddLogBarrierGrad(*x, *y, *z, rank);
   fail += test.vectorLinearDampingTerm(*x, *y, *z, rank);

From 758ee139178528fa2d298bcdb1911a52d1703884 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Mon, 17 Mar 2025 16:02:53 -0700
Subject: [PATCH 17/27] fixed bug in test and syntax for thrust::transform

---
 src/LinAlg/VectorCudaKernels.cu | 2 +-
 tests/LinAlg/vectorTests.hpp    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index edff95f5..e1ee3cd6 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -1062,7 +1062,7 @@ double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double
   // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 )
   thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); 
   // compute v[i] = w[i]*v[i] 
-  thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, mult_op); 
+  thrust::transform(thrust::device, v_temp, v_temp+n, wei_v, v_temp, mult_op); 
   // sum up
   const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op);
 
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 2baaacbb..215b3fdd 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -1240,8 +1240,8 @@ class VectorTests : public TestBase
     setLocalElement(&weight, N-1, 100);
     
     real_type expected = (N - 1) * std::log(x_val) / N;
-    real_type result = x.logBarrier_local(pattern);
-
+    real_type result = x.logBarrierWeighted_local(pattern, weight);
+    
     int fail = !isEqual(result, expected);
 
     // Make sure pattern eliminates the correct elements
@@ -1253,7 +1253,7 @@ class VectorTests : public TestBase
     setLocalElement(&weight, N - 1, w_val);
     
     expected = std::log(x_val)*w_val;
-    result = x.logBarrier_local(pattern);
+    result = x.logBarrierWeighted_local(pattern, weight);
     fail += !isEqual(result, expected);
 
     printMessage(fail, __func__, rank);

From ecb755427ce95f672e6230432cf07b6e2d97a95e Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Mon, 17 Mar 2025 16:16:20 -0700
Subject: [PATCH 18/27] fixed typo in Raja Vec Impl

---
 src/LinAlg/hiopVectorRajaImpl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index ca13545c..70f0a74a 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -1394,7 +1394,7 @@ double hiopVectorRaja<MEM, POL>::logBarrierWeighted_local(const hiopVector& sele
   const hiopVectorRaja& sel = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(select);
   const hiopVectorRaja& wei = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(weights);
   assert(this->n_local_ == sel.n_local_);
-  assert(this->n_local_ == wei.n_local_);`
+  assert(this->n_local_ == wei.n_local_);
 
   double* data = data_dev_;
   const double* id = sel.local_data_const();

From fc2b25dd0dcea1dc6f0989002fa1c6301c929f0d Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Tue, 18 Mar 2025 09:10:41 -0700
Subject: [PATCH 19/27] fixing logBarWeighted in RAJA vector

---
 src/LinAlg/hiopVectorRajaImpl.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index 70f0a74a..bf4387a5 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -1406,7 +1406,9 @@ double hiopVectorRaja<MEM, POL>::logBarrierWeighted_local(const hiopVector& sele
 #ifdef HIOP_DEEPCHECKS
         assert(id[i] == one || id[i] == zero);
 #endif
-        sum += id[i]*w[i]*std::log(data[i]);
+        if(one == id[i]) {
+          sum += w[i]*std::log(data[i]);
+        }
       });
 
   return sum.get();

From 86c437e69315e3d051c13fb69366dc5dd9154e13 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Tue, 18 Mar 2025 09:22:39 -0700
Subject: [PATCH 20/27] stregthen test for logBarrierWeighted

---
 tests/LinAlg/vectorTests.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 215b3fdd..5be67f8c 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -1247,9 +1247,9 @@ class VectorTests : public TestBase
     // Make sure pattern eliminates the correct elements
     pattern.setToConstant(zero);
     setLocalElement(&pattern, N - 1, one);
-    x.setToConstant(zero);
+    x.setToConstant(three);
     setLocalElement(&x, N - 1, x_val);
-    weight.setToConstant(zero);
+    weight.setToConstant(three);
     setLocalElement(&weight, N - 1, w_val);
     
     expected = std::log(x_val)*w_val;

From 038229f94b51f9e8639170019ac5a22b25473881 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Tue, 18 Mar 2025 09:23:18 -0700
Subject: [PATCH 21/27] HIP implementation logBarrierWeight

---
 src/LinAlg/VectorHipKernels.cpp | 30 ++++++++++++++++++++++++++++++
 src/LinAlg/VectorHipKernels.hpp |  9 ++++++++-
 src/LinAlg/hiopVectorHip.cpp    | 11 +++++++++++
 src/LinAlg/hiopVectorHip.hpp    |  2 ++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp
index 55db4899..356b18ce 100644
--- a/src/LinAlg/VectorHipKernels.cpp
+++ b/src/LinAlg/VectorHipKernels.cpp
@@ -937,6 +937,36 @@ double log_barr_obj_kernel(int n, double* d1, const double* id)
   return sum;
 }
 
+/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/
+double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w)
+{
+  thrust::device_ptr<double> dev_v = thrust::device_pointer_cast(d1);
+  thrust::device_ptr<const double> id_v = thrust::device_pointer_cast(id);
+  thrust::device_ptr<const double> wei_v = thrust::device_pointer_cast(w);
+
+  // wrap raw pointer with a device_ptr 
+  thrust_log_select<double> log_select_op;
+  thrust::plus<double> plus_op;
+  thrust::multiplies<double> mult_op;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<double> v_temp = thrust::device_malloc(n*sizeof(double));
+
+  // compute v=x*id
+  thrust::transform(thrust::device, dev_v, dev_v+n, id_v, v_temp, mult_op);
+  // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 )
+  thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); 
+  // compute v[i] = w[i]*v[i] 
+  thrust::transform(thrust::device, v_temp, v_temp+n, wei_v, v_temp, mult_op); 
+  // sum up
+  const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op);
+
+  thrust::device_free(v_temp);
+
+  return sum;
+}
+
+
 /** @brief compute sum(d1[i]) */
 double thrust_sum_kernel(int n, double* d1)
 {
diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp
index 2766d76b..10b3cb25 100644
--- a/src/LinAlg/VectorHipKernels.hpp
+++ b/src/LinAlg/VectorHipKernels.hpp
@@ -187,10 +187,17 @@ void thrust_component_sqrt_kernel(int n, double* d1);
 void thrust_negate_kernel(int n, double* d1);
 /** @brief compute sum(log(d1[i])) forall i where id[i]=1*/
 double log_barr_obj_kernel(int n, double* d1, const double* id);
+/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/
+double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w);
 /** @brief compute sum(d1[i]) */
 double thrust_sum_kernel(int n, double* d1);
 /** @brief Linear damping term */
-double linear_damping_term_kernel(int n, const double* vd, const double* ld, const double* rd, double mu, double kappa_d);
+double linear_damping_term_kernel(int n,
+                                  const double* vd,
+                                  const double* ld,
+                                  const double* rd,
+                                  double mu,
+                                  double kappa_d);
 /** @brief compute min(d1) */
 double min_local_kernel(int n, double* d1);
 /** @brief Checks if selected elements of `d1` are positive */
diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp
index b7ce752c..815002b6 100644
--- a/src/LinAlg/hiopVectorHip.cpp
+++ b/src/LinAlg/hiopVectorHip.cpp
@@ -642,6 +642,17 @@ void hiopVectorHip::negate() { hiop::hip::thrust_negate_kernel(n_local_, data_);
 /// @brief Invert (1/x) the elements of this
 void hiopVectorHip::invert() { hiop::hip::invert_kernel(n_local_, data_); }
 
+/** @brief Sum all selected log(this[i]) */
+double hiopVectorHip::logBarrier_local(const hiopVector& select, const hiopVector& weights) const
+{
+  assert(n_local_ == select.get_local_size());
+  assert(n_local_ == weights.get_local_size());
+  const double* id = select.local_data_const();
+  const double* w = weights.local_data_const();
+
+  return hiop::hip::log_barr_wei_obj_kernel(n_local_, data_, id, w);
+}
+
 /** @brief Sum all selected log(this[i]) */
 double hiopVectorHip::logBarrier_local(const hiopVector& select) const
 {
diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp
index 36922c1c..958f7d14 100644
--- a/src/LinAlg/hiopVectorHip.hpp
+++ b/src/LinAlg/hiopVectorHip.hpp
@@ -237,6 +237,8 @@ class hiopVectorHip : public hiopVector
   virtual void invert();
   /// @brief compute log barrier term, that is sum{ln(x_i):i=1,..,n}
   virtual double logBarrier_local(const hiopVector& select) const;
+  /// @brief compute weighted log barrier term, that is sum{w[i]*ln(x_i): select[i]==1}
+  virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& w) const;  
   /// @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x)
   virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select);
   /// @brief compute sum{(x_i):i=1,..,n}

From 01495bf913be224b7a12ac92a4f06c723bc17155 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Wed, 19 Mar 2025 10:42:43 -0700
Subject: [PATCH 22/27] draft of weighted B0 in the LowRank quasi Newton solves

---
 src/Optimization/HessianDiagPlusRowRank.cpp | 44 +++++++++++++++------
 src/Optimization/HessianDiagPlusRowRank.hpp | 14 +++++--
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp
index 9446f6de..3a6174c6 100644
--- a/src/Optimization/HessianDiagPlusRowRank.cpp
+++ b/src/Optimization/HessianDiagPlusRowRank.cpp
@@ -160,6 +160,7 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in,
                     sigma_strategy.c_str());
 
   Dx_ = DhInv_->alloc_clone();
+  //B0_ = Dx_->alloc_clone();
 #ifdef HIOP_DEEPCHECKS
   Vmat_ = V_->alloc_clone();
 #endif
@@ -172,6 +173,7 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank()
 {
   delete DhInv_;
   delete Dx_;
+  //delete B0_;
 
   delete St_;
   delete Yt_;
@@ -238,7 +240,11 @@ void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length)
 
 bool HessianDiagPlusRowRank::update_logbar_diag(const hiopVector& Dx)
 {
-  DhInv_->setToConstant(sigma_);
+  // DhInv = (B0+Dx)^{-1}
+  //DhInv_->setToConstant(sigma_);
+  const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
+  DhInv_->copyFrom(B0);
+  DhInv_->scale(sigma_);
   DhInv_->axpy(1.0, Dx);
   Dx_->copyFrom(Dx);
 #ifdef HIOP_DEEPCHECKS
@@ -256,8 +262,6 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg)
   fprintf(f, "%s\n", msg);
 #ifdef HIOP_DEEPCHECKS
   nlp_->log->write("Dx", *Dx_, v);
-#else
-  fprintf(f, "Dx is not stored in this class, but it can be computed from Dx=DhInv^(1)-sigma");
 #endif
   nlp_->log->printf(v, "sigma=%22.16f;\n", sigma_);
   nlp_->log->write("DhInv", *DhInv_, v);
@@ -269,7 +273,7 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg)
   nlp_->log->write("V", *Vmat_, v);
 #else
   fprintf(f,
-          "V matrix is available at this point (only its LAPACK factorization). Print it in "
+          "V matrix is not available at this point (only its LAPACK factorization). Print it in "
           "updateInternalBFGSRepresentation() instead, before factorizeV()\n");
 #endif
   nlp_->log->write("L", *L_, v);
@@ -383,9 +387,9 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr,
         }  // else of the switch
         // safe guard it
         sigma_ = fmax(fmin(sigma_safe_max_, sigma_), sigma_safe_min_);
-        nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_);
+        nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_);
       } else {  // sTy is too small or negative -> skip
-        nlp_->log->printf(hovLinAlgScalars,
+        nlp_->log->printf(hovScalars,
                           "HessianDiagPlusRowRank: s^T*y=%12.6e not positive enough... skipping the Hessian update\n",
                           sTy);
       }
@@ -422,7 +426,7 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr,
  * Namely it computes V, a symmetric 2lx2l given by
  *  V =  [S'*B0*(DhInv*B0-I)*S    -L+S'*B0*DhInv*Y ]
  *       [-L'+Y'*Dhinv*B0*S       +D+Y'*Dhinv*Y    ]
- * In this function V is factorized and it will hold the factors at the end of the function
+ * Also, the method factorizes V. V will contain the factors at the end of this method.
  * Note that L, D, S, and Y are from the BFGS secant representation and are updated/computed in 'update'
  */
 void HessianDiagPlusRowRank::updateInternalBFGSRepresentation()
@@ -457,9 +461,11 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation()
 
   //-- block (1,2)
   hiopMatrixDense& StB0DhInvYmL = DpYtDhInvY;  // just a rename
+  const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
   hiopVector& B0DhInv = new_n_vec1(n);
   B0DhInv.copyFrom(*DhInv_);
-  B0DhInv.scale(sigma_);
+  //B0DhInv.scale(sigma_);
+  B0DhInv.axpy(sigma_, B0);
   mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_);
 #ifdef HIOP_USE_MPI
   memcpy(buff1_lxlx3_ + l * l, StB0DhInvYmL.local_data(), buffsize);
@@ -470,10 +476,14 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation()
   V_->copyBlockFromMatrix(0, l, StB0DhInvYmL);
 #endif
 
-  //-- block (2,2)
+  //-- block (1,1)
   hiopVector& theDiag = B0DhInv;  // just a rename, also reuses values
   theDiag.addConstant(-1.0);      // at this point theDiag=DhInv*B0-I
+
+  //multiply with sigma*I
   theDiag.scale(sigma_);
+  theDiag.componentMult(B0);
+  
   hiopMatrixDense& StDS = DpYtDhInvY;  // a rename
   sym_mat_times_diag_times_mattrans_local(0.0, StDS, 1.0, *St_, theDiag);
 #ifdef HIOP_USE_MPI
@@ -550,6 +560,9 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x)
   hiopVector& B0DhInvx = new_n_vec1(n);
   B0DhInvx.copyFrom(x);    // it contains DhInv*res
   B0DhInvx.scale(sigma_);  // B0*(DhInv*res)
+  const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
+  B0DhInvx.componentMult(B0);
+    
   St_->timesVec(0.0, stx, 1.0, B0DhInvx);
 
   // 3. solve with V
@@ -562,6 +575,7 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x)
   hiopVector& result = new_n_vec1(n);
   St_->transTimesVec(0.0, result, 1.0, spart);
   result.scale(sigma_);
+  result.componentMult(B0);
   Yt_->transTimesVec(1.0, result, 1.0, ypart);
   result.componentMult(*DhInv_);
 
@@ -613,7 +627,9 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta,
   auto& Y1 = new_Y1(X, *Yt_);  // both are kxl
   hiopVector& B0DhInv = new_n_vec1(n);
   B0DhInv.copyFrom(*DhInv_);
+  const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
   B0DhInv.scale(sigma_);
+  B0DhInv.componentMult(B0);
   mat_times_diag_times_mattrans_local(S1, X, B0DhInv, *St_);
   mat_times_diag_times_mattrans_local(Y1, X, *DhInv_, *Yt_);
 
@@ -1060,7 +1076,8 @@ void HessianDiagPlusRowRank::times_vec_common(double beta,
 
   // we have B+=B-B*s*B*s'/(s'*B*s)+yy'/(y'*s)
   // B0 is sigma*I. There is an additional diagonal log-barrier term Dx_
-
+  const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
+  
   bool print = false;
   if(print) {
     nlp_->log->printf(hovMatrices, "---HessianDiagPlusRowRank::times_vec \n");
@@ -1106,6 +1123,7 @@ void HessianDiagPlusRowRank::times_vec_common(double beta,
     // compute ak by an inner loop
     a[k]->copyFrom(*sk);
     a[k]->scale(sigma_);
+    a[k]->componentMult(B0);
 
     for(int i = 0; i < k; i++) {
       double biTsk = b[i]->dotProductWith(*sk);
@@ -1125,8 +1143,12 @@ void HessianDiagPlusRowRank::times_vec_common(double beta,
     y.axzpy(alpha, x, *Dx_);
   }
 
-  y.axpy(alpha * sigma_, x);
+  hiopVector* aux = B0.new_copy();
+  aux->componentMult(x);
 
+  y.axpy(alpha * sigma_, *aux);
+  delete aux;
+  
   for(int k = 0; k < l_curr_; k++) {
     double bkTx = b[k]->dotProductWith(x);
     double akTx = a[k]->dotProductWith(x);
diff --git a/src/Optimization/HessianDiagPlusRowRank.hpp b/src/Optimization/HessianDiagPlusRowRank.hpp
index 92d4ed6a..bec4d9a0 100644
--- a/src/Optimization/HessianDiagPlusRowRank.hpp
+++ b/src/Optimization/HessianDiagPlusRowRank.hpp
@@ -161,10 +161,18 @@ class HessianDiagPlusRowRank : public hiopMatrix
 private:
   // Vector for (B0+Dk)^{-1}
   hiopVector* DhInv_;
-  // Dx_ is needed in times_vec (for residual checking in solveCompressed). Can be recomputed from DhInv, but I decided to
-  // store it instead to avoid round-off errors
+
+  /** @brief Log-barrier diagonal term
+   *
+   * `Dx_` is needed in times_vec (for residual checking in solveCompressed). While it can be recomputed on the fly,
+   * for example from DhInv, I decided to store it to avoid round-off errors.
+   */
   hiopVector* Dx_;
 
+  //// The initial B0 Hessian approximation
+  //hiopVector* B0_;
+  
+  // Flags recomputation of the internal inverse representation
   bool matrix_changed_;
 
   // These are matrices from the compact representation; they are updated at each iteration.
@@ -178,7 +186,7 @@ class HessianDiagPlusRowRank : public hiopMatrix
   hiopMatrixDense* L_;
   /// Diagonal matrix from the compact representation
   hiopVector* D_;
-  // Matrix V from the representation of the inverse
+  // Matrix V from the representation of the inverse, or its factors
   hiopMatrixDense* V_;
 #ifdef HIOP_DEEPCHECKS
   // copy of the V matrix - needed to check the residual

From b283b07fd66df77c4d2c3955ed64f9860be5cf08 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Wed, 19 Mar 2025 13:40:28 -0700
Subject: [PATCH 23/27] fixed bug in new B0 code

---
 src/Optimization/HessianDiagPlusRowRank.cpp | 17 +++++++++--------
 src/Optimization/HessianDiagPlusRowRank.hpp |  5 +----
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp
index 3a6174c6..c0fea832 100644
--- a/src/Optimization/HessianDiagPlusRowRank.cpp
+++ b/src/Optimization/HessianDiagPlusRowRank.cpp
@@ -160,7 +160,7 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in,
                     sigma_strategy.c_str());
 
   Dx_ = DhInv_->alloc_clone();
-  //B0_ = Dx_->alloc_clone();
+
 #ifdef HIOP_DEEPCHECKS
   Vmat_ = V_->alloc_clone();
 #endif
@@ -173,7 +173,6 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank()
 {
   delete DhInv_;
   delete Dx_;
-  //delete B0_;
 
   delete St_;
   delete Yt_;
@@ -464,8 +463,8 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation()
   const hiopVector& B0 = *nlp_->inner_prod()->M_lumped();
   hiopVector& B0DhInv = new_n_vec1(n);
   B0DhInv.copyFrom(*DhInv_);
-  //B0DhInv.scale(sigma_);
-  B0DhInv.axpy(sigma_, B0);
+  B0DhInv.scale(sigma_);
+  B0DhInv.componentMult(B0);
   mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_);
 #ifdef HIOP_USE_MPI
   memcpy(buff1_lxlx3_ + l * l, StB0DhInvYmL.local_data(), buffsize);
@@ -1143,11 +1142,13 @@ void HessianDiagPlusRowRank::times_vec_common(double beta,
     y.axzpy(alpha, x, *Dx_);
   }
 
-  hiopVector* aux = B0.new_copy();
-  aux->componentMult(x);
+  //y.axpy(alpha * sigma_, x);
+  assert(n_vec1_ != nullptr);
 
-  y.axpy(alpha * sigma_, *aux);
-  delete aux;
+  hiopVector& aux = *n_vec1_;
+  aux.copyFrom(B0);
+  aux.componentMult(x);
+  y.axpy(alpha*sigma_, aux);
   
   for(int k = 0; k < l_curr_; k++) {
     double bkTx = b[k]->dotProductWith(x);
diff --git a/src/Optimization/HessianDiagPlusRowRank.hpp b/src/Optimization/HessianDiagPlusRowRank.hpp
index bec4d9a0..887030aa 100644
--- a/src/Optimization/HessianDiagPlusRowRank.hpp
+++ b/src/Optimization/HessianDiagPlusRowRank.hpp
@@ -168,9 +168,6 @@ class HessianDiagPlusRowRank : public hiopMatrix
    * for example from DhInv, I decided to store it to avoid round-off errors.
    */
   hiopVector* Dx_;
-
-  //// The initial B0 Hessian approximation
-  //hiopVector* B0_;
   
   // Flags recomputation of the internal inverse representation
   bool matrix_changed_;
@@ -244,7 +241,7 @@ class HessianDiagPlusRowRank : public hiopMatrix
 
   hiopVector* l_vec1_;
   hiopVector* l_vec2_;
-  hiopVector* n_vec1_;
+  mutable hiopVector* n_vec1_;
   hiopVector* n_vec2_;
   hiopVector* twol_vec1_;
   hiopVector& new_l_vec1(int l);

From 1d54e3f0cfe40b01ea6ced0cbded38141cc9f547 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 21 Mar 2025 11:39:43 -0700
Subject: [PATCH 24/27] duals lsq computation

---
 src/Optimization/InnerProduct.hpp     |  2 +-
 src/Optimization/hiopDualsUpdater.cpp | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp
index 03cdad43..b7490777 100644
--- a/src/Optimization/InnerProduct.hpp
+++ b/src/Optimization/InnerProduct.hpp
@@ -108,7 +108,7 @@ class InnerProduct
   double norm_complementarity(const hiopVector& x) const;
   
   // Computes 1M norm, i.e., ||x|| =  1^T*M*|x|
-  double norm_M_one(const hiopVector&x) const;
+  double norm_M_one(const hiopVector& x) const;
 
   // Computes the "volume" of the space, norm of 1 function
   double volume() const;
diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp
index bd19c2a2..51c5a137 100644
--- a/src/Optimization/hiopDualsUpdater.cpp
+++ b/src/Optimization/hiopDualsUpdater.cpp
@@ -188,18 +188,18 @@ hiopDualsLsqUpdateLinsysRedDense::~hiopDualsLsqUpdateLinsysRedDense()
 
 /** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem
  * corresponding to dual infeasibility equation
- *    min_{y_c,y_d} ||  \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - zk_l+zk_u  ||^2
- *                  || - y_d - vk_l + vk_u                                        ||_2,
+ *    min_{y_c,y_d} ||  \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - M*(zk_l-zk_u)  ||^2
+ *                  || - y_d - vk_l + vk_u                                            ||_2,
  *  which is
- *   min_{y_c, y_d} || [ J_c^T  J_d^T ] [ y_c ]  -  [ -\nabla f(xk) + zk_l-zk_u ]  ||^2
- *                  || [  0       I   ] [ y_d ]     [ - vk_l + vk_u             ]  ||_2
+ *   min_{y_c, y_d} || [ J_c^T  J_d^T ] [ y_c ]  -  [ -\nabla f(xk) + M*(zk_l-zk_u) ]  ||^2
+ *                  || [  0       I   ] [ y_d ]     [ - vk_l + vk_u                 ]  ||_2
  * ******************************
  * NLPs with dense constraints
  * ******************************
  * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear
  *  system in y_c and y_d:
- *   [ J_c J_c^T    J_c J_d^T     ] [ y_c ]  =  [ J_c   0 ] [ -\nabla f(xk) + zk_l-zk_u ]
- *   [ J_d J_c^T    J_d J_d^T + I ] [ y_d ]     [ J_d   I ] [ - vk_l + vk_u             ]
+ *   [ J_c J_c^T    J_c J_d^T     ] [ y_c ]  =  [ J_c   0 ] [ -\nabla f(xk) + M*(zk_l-zk_u) ]
+ *   [ J_d J_c^T    J_d J_d^T + I ] [ y_d ]     [ J_d   I ] [ - vk_l + vk_u                 ]
  * This linear system is small (of size m=m_E+m_I) (so it is replicated for all MPI ranks).
  *
  * The matrix of the above system is stored in the member variable M_ of this class and the
@@ -275,9 +275,13 @@ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter,
   // [ rhsd_ ]     [ J_d   I ] [ vecd ]
   // [vecx,vecd] = - [ -\nabla f(xk) + zk_l-zk_u, - vk_l + vk_u].
   hiopVector& vecx = *vec_n_;
-  vecx.copyFrom(grad_f);
+  //vecx.copyFrom(grad_f);
+  //vecx.axpy(-1.0, *iter.get_zl());
+  //vecx.axpy(1.0, *iter.get_zu());
+  vecx.copyFrom(*iter.get_zu());
   vecx.axpy(-1.0, *iter.get_zl());
-  vecx.axpy(1.0, *iter.get_zu());
+  vecx.componentMult(*nlp_->inner_prod()->M_lumped());
+  vecx.axpy(1.0, grad_f);
   hiopVector& vecd = *vec_mi_;
   vecd.copyFrom(*iter.get_vl());
   vecd.axpy(-1.0, *iter.get_vu());

From 63d8f88def6c50af5514bbd051b66fa066118861 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 21 Mar 2025 11:40:40 -0700
Subject: [PATCH 25/27] draft of Hessian approx - code works mesh independently

---
 src/Optimization/HessianDiagPlusRowRank.cpp | 23 ++++++++++++---------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp
index c0fea832..58459c11 100644
--- a/src/Optimization/HessianDiagPlusRowRank.cpp
+++ b/src/Optimization/HessianDiagPlusRowRank.cpp
@@ -305,12 +305,12 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr,
     hiopVector& s_new = new_n_vec1(n);
     s_new.copyFrom(*it_curr.x);
     s_new.axpy(-1., *it_prev_->x);
-    double s_infnorm = s_new.infnorm();
+    const double s_infnorm = s_new.infnorm();
     if(s_infnorm >= 100 * std::numeric_limits<double>::epsilon()) {  // norm of s not too small
 
       // compute y_new = \grad J(x_curr,\lambda_curr) - \grad J(x_prev, \lambda_curr) (yes, J(x_prev, \lambda_curr))
       //               = graf_f_curr-grad_f_prev + (Jac_c_curr-Jac_c_prev)yc_curr+ (Jac_d_curr-Jac_c_prev)yd_curr -
-      //               zl_curr*s_new + zu_curr*s_new
+      //                 M_lumped*zl_curr*s_new + M_lumped*zu_curr*s_new
       hiopVector& y_new = new_n_vec2(n);
       y_new.copyFrom(grad_f_curr);
       y_new.axpy(-1., *grad_f_prev_);
@@ -321,17 +321,20 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr,
       Jac_d_curr.transTimesVec(1.0, y_new, 1.0, *it_curr.yd);
       Jac_d_prev_->transTimesVec(1.0, y_new, -1.0, *it_curr.yd);
 
-      double sTy = s_new.dotProductWith(y_new), s_nrm2 = s_new.twonorm(), y_nrm2 = y_new.twonorm();
+      const double sTy = s_new.dotProductWith(y_new);
+      const double s_nrm2 = nlp_->inner_prod()->norm_M(s_new);//s_new.twonorm();
+      const double y_nrm2 = nlp_->inner_prod()->norm_H_dual(y_new);//y_new.twonorm();
 
-#ifdef HIOP_DEEPCHECKS
-      nlp_->log->printf(hovLinAlgScalarsVerb,
-                        "HessianDiagPlusRowRank: s^T*y=%20.14e ||s||=%20.14e ||y||=%20.14e\n",
+      nlp_->log->printf(hovWarning, //hovLinAlgScalarsVerb,
+                        "sigma HessianDiagPlusRowRank: s^T*y=%20.14e ||s||=%20.14e ||y||=%20.14e ||s||_inf=%20.14e\n",
                         sTy,
                         s_nrm2,
-                        y_nrm2);
+                        y_nrm2,
+                        s_infnorm);
+      nlp_->log->printf(hovWarning, "sigma y_new twonrm=%20.14e infnrm=%20.14e\n", y_nrm2, y_new.infnorm());
       nlp_->log->write("HessianDiagPlusRowRank s_new", s_new, hovIteration);
       nlp_->log->write("HessianDiagPlusRowRank y_new", y_new, hovIteration);
-#endif
+
       if(sTy > s_nrm2 * y_nrm2 * sqrt(std::numeric_limits<double>::epsilon())) {  // sTy far away from zero
 
         if(l_max_ > 0) {
@@ -357,12 +360,12 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr,
             l_curr_ = l_max_;
           }
         }  // end of l_max_>0
-#ifdef HIOP_DEEPCHECKS
+
         nlp_->log->printf(hovMatrices, "\nHessianDiagPlusRowRank: these are L and D from the BFGS compact representation\n");
         nlp_->log->write("L", *L_, hovMatrices);
         nlp_->log->write("D", *D_, hovMatrices);
         nlp_->log->printf(hovMatrices, "\n");
-#endif
+
         // update B0 (i.e., sigma)
         switch(sigma_update_strategy_) {
           case SIGMA_STRATEGY1:

From 3295382c9d873ad8db0f19d781f3223657ec1c91 Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Fri, 21 Mar 2025 13:54:40 -0700
Subject: [PATCH 26/27] added test for Ex1 with L2

---
 src/Drivers/Dense/CMakeLists.txt            |  1 +
 src/Drivers/Dense/NlpDenseConsEx1.hpp       |  6 +++--
 src/Drivers/Dense/NlpDenseConsEx1Driver.cpp | 27 +++++++++++++++------
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/Drivers/Dense/CMakeLists.txt b/src/Drivers/Dense/CMakeLists.txt
index 76249621..24228d09 100644
--- a/src/Drivers/Dense/CMakeLists.txt
+++ b/src/Drivers/Dense/CMakeLists.txt
@@ -32,6 +32,7 @@ install(
 ##########################################################
 add_test(NAME NlpDenseCons1_5H  COMMAND ${RUNCMD} "$<TARGET_FILE:NlpDenseConsEx1.exe>"  "500" "1.0" "-selfcheck")
 add_test(NAME NlpDenseCons1_5K  COMMAND ${RUNCMD} "$<TARGET_FILE:NlpDenseConsEx1.exe>" "5000" "1.0" "-selfcheck")
+add_test(NAME NlpDenseCons1_25K_InfDim COMMAND ${RUNCMD} "$<TARGET_FILE:NlpDenseConsEx1.exe>" "25000" "1.0" "-selfcheck" "-use_L2")
 add_test(NAME NlpDenseCons1_50K COMMAND ${RUNCMD} "$<TARGET_FILE:NlpDenseConsEx1.exe>" "50000" "1.0" "-selfcheck")
 if(HIOP_USE_MPI)
   add_test(NAME NlpDenseCons1_50K_mpi COMMAND ${MPICMD} -n 2 "$<TARGET_FILE:NlpDenseConsEx1.exe>" "50000" "1.0" "-selfcheck")
diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp
index 7c9be216..7101ec6e 100644
--- a/src/Drivers/Dense/NlpDenseConsEx1.hpp
+++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp
@@ -112,9 +112,10 @@ class DiscretizedFunction : public hiop::hiopVectorPar
 class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
 {
 public:
-  DenseConsEx1(int n_mesh_elem = 100, double mesh_ratio = 1.0)
+  DenseConsEx1(int n_mesh_elem, double mesh_ratio=1.0, bool use_weighted_vars=false)
       : n_vars(n_mesh_elem),
         comm(MPI_COMM_WORLD),
+        use_weighted_space_(use_weighted_vars),
         solver_(nullptr)
   {
     // create the members
@@ -293,12 +294,13 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints
    */
   virtual bool useWeightedInnerProducts()
   {
-    return false;//true;
+    return use_weighted_space_;
   }
 
 private:
   int n_vars;
   MPI_Comm comm;
+  bool use_weighted_space_;
   Ex1Meshing1D* _mesh;
 
   int n_local;
diff --git a/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp b/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp
index 4da45db4..cd15a783 100644
--- a/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp
+++ b/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp
@@ -21,10 +21,11 @@ static bool self_check(size_type n, double obj_value);
 #ifdef HIOP_USE_AXOM
 static bool do_load_checkpoint_test(const size_type& mesh_size, const double& ratio, const double& obj_val_expected);
 #endif
-static bool parse_arguments(int argc, char** argv, size_type& n, double& distortion_ratio, bool& self_check)
+static bool parse_arguments(int argc, char** argv, size_type& n, double& distortion_ratio, bool& wei_vars, bool& self_check)
 {
   n = 20000;
   distortion_ratio = 1.;
+  wei_vars = false;
   self_check = false;  // default options
 
   switch(argc) {
@@ -32,6 +33,14 @@ static bool parse_arguments(int argc, char** argv, size_type& n, double& distort
       // no arguments
       return true;
       break;
+    case 5: // 4 arguments - -use_L2 expected
+    {
+      if(std::string(argv[4]) == "-use_L2") {
+        wei_vars = true;
+      } else {
+        return false;
+      }
+    }
     case 4:  // 3 arguments - -selfcheck expected
     {
       if(std::string(argv[3]) == "-selfcheck") {
@@ -91,15 +100,17 @@ int main(int argc, char** argv)
   }
 #endif
   bool selfCheck;
+  //flags the use of Ex1 with the mass weighted variables (L2 inner product) or without (Euclidean inner product) 
+  bool wei_vars;
   size_type mesh_size;
   double ratio;
   double objective = 0.;
-  if(!parse_arguments(argc, argv, mesh_size, ratio, selfCheck)) {
+  if(!parse_arguments(argc, argv, mesh_size, ratio, wei_vars, selfCheck)) {
     usage(argv[0]);
     return 1;
   }
 
-  DenseConsEx1 problem(mesh_size, ratio);
+  DenseConsEx1 problem(mesh_size, ratio, wei_vars);
   hiop::hiopNlpDenseConstraints nlp(problem);
 
   hiop::hiopAlgFilterIPM solver(&nlp);
@@ -110,7 +121,9 @@ int main(int argc, char** argv)
 
   // this is used for testing when the driver is called with -selfcheck
   if(selfCheck) {
-    if(!self_check(mesh_size, objective)) return -1;
+    if(!self_check(mesh_size, objective)) {
+      return -1;
+    }
   } else {
     if(rank == 0) {
       printf("Optimal objective: %22.14e. Solver status: %d. Number of iterations: %d\n",
@@ -142,9 +155,9 @@ int main(int argc, char** argv)
 
 static bool self_check(size_type n, double objval)
 {
-#define num_n_saved 3  // keep this is sync with n_saved and objval_saved
-  const size_type n_saved[] = {500, 5000, 50000};
-  const double objval_saved[] = {8.6156700e-2, 8.6156106e-02, 8.6161001e-02};
+#define num_n_saved 4  // keep this is sync with n_saved and objval_saved
+  const size_type n_saved[] = {500, 5000, 50000, 25000};
+  const double objval_saved[] = {8.6156700e-2, 8.6156106e-02, 8.6161001e-02, 8.6155777e-02};
 
 #define relerr 1e-6
   bool found = false;

From 57a30671187b682924c9115fc1c322df512c797a Mon Sep 17 00:00:00 2001
From: Cosmin G Petra <petra1@llnl.gov>
Date: Sun, 23 Mar 2025 16:18:38 -0700
Subject: [PATCH 27/27] M lumped applies in residuals

---
 src/Optimization/hiopKKTLinSys.cpp | 33 +++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/Optimization/hiopKKTLinSys.cpp b/src/Optimization/hiopKKTLinSys.cpp
index 80d11142..16b61f25 100644
--- a/src/Optimization/hiopKKTLinSys.cpp
+++ b/src/Optimization/hiopKKTLinSys.cpp
@@ -100,16 +100,20 @@ double hiopKKTLinSys::errorKKT(const hiopResidual* resid, const hiopIterate* sol
   }
 
   double derr = 1e20, aux;
-  hiopVector* RX = resid->rx->new_copy();
-
   // RX = rx-H*dx-J'c*dyc-J'*dyd +dzl-dzu
+  //hiopVector* RX = resid->rx->new_copy();
+  hiopVector* RX = sol->zl->new_copy();
+  RX->axpy(-1.0, *sol->zu);
+  RX->componentMult(*nlp_->inner_prod()->M_lumped());
+  RX->axpy(1.0, *resid->rx);
+
   HessianTimesVec_noLogBarrierTerm(1.0, *RX, -1.0, *sol->x);
   RX->axzpy(-1., *delta_wx_, *sol->x);
-
   Jac_c_->transTimesVec(1.0, *RX, -1.0, *sol->yc);
   Jac_d_->transTimesVec(1.0, *RX, -1.0, *sol->yd);
-  RX->axpy(1.0, *sol->zl);
-  RX->axpy(-1.0, *sol->zu);
+  //RX->axpy(1.0, *sol->zl);
+  //RX->axpy(-1.0, *sol->zu);
+
   aux = RX->twonorm();
   derr = fmax(aux, derr);
   nlp_->log->printf(hovLinAlgScalars, "  --- rx=%g\n", aux);
@@ -568,7 +572,7 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter,
   Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl());
   Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu());
   nlp_->log->write("Dx in KKT", *Dx_, hovMatrices);
-  assert(false);
+
   // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu
   Dd_->setToZero();
   Dd_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl());
@@ -1150,7 +1154,7 @@ bool hiopMatVecKKTFullOpr::combine_res_to_build_vec(hiopVector& y) { return true
 
 /**
  * Full KKT matrix is
- * [   H    0   Jc^T  Jd^T |  -I  I   0   0   |  0   0   0   0  ] [  dx]   [    rx    ]
+ * [   H    0   Jc^T  Jd^T |  -M  M   0   0   |  0   0   0   0  ] [  dx]   [    rx    ]
  * [  0     0     0    -I  |  0   0  -I   I   |  0   0   0   0  ] [  dd]   [    rd    ]
  * [  Jc    0     0     0  |  0   0   0   0   |  0   0   0   0  ] [ dyc] = [   ryc    ]
  * [  Jd    -I    0     0  |  0   0   0   0   |  0   0   0   0  ] [ dyd]   [   ryd    ]
@@ -1217,12 +1221,15 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec)
   hiopVector* yrvu_ = &(y.getVector(11));
 
   // rx = H*dx + delta_wx*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu
-  Hess->timesVec(0.0, *yrx_, +1.0, *dx_);
+  yrx_->copyFrom(*dzu_);
+  yrx_->axpy(-1.0, *dzl_);
+  yrx_->componentMult(*kkt_->nlp_->inner_prod()->M_lumped());
+  Hess->timesVec(1.0, *yrx_, +1.0, *dx_);
   yrx_->axzpy(1., *delta_wx, *dx_);
   Jac_c->transTimesVec(1.0, *yrx_, 1.0, *dyc_);
   Jac_d->transTimesVec(1.0, *yrx_, 1.0, *dyd_);
-  yrx_->axpy(-1.0, *dzl_);
-  yrx_->axpy(1.0, *dzu_);
+  //yrx_->axpy(-1.0, *dzl_);
+  //yrx_->axpy(1.0, *dzu_);
 
   // RD = delta_wd_*dd - dyd - dvl + dvu
   yrd_->setToZero();
@@ -1285,7 +1292,7 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec)
 
 /**
  * Full KKT matrix is
- * [   H    0   Jc^T  Jd^T |  -I  I   0   0   |  0   0   0   0  ] [  dx]   [    rx    ]
+ * [   H    0   Jc^T  Jd^T |  -M  M   0   0   |  0   0   0   0  ] [  dx]   [    rx    ]
  * [  0     0     0    -I  |  0   0  -I   I   |  0   0   0   0  ] [  dd]   [    rd    ]
  * [  Jc    0     0     0  |  0   0   0   0   |  0   0   0   0  ] [ dyc] = [   ryc    ]
  * [  Jd    -I    0     0  |  0   0   0   0   |  0   0   0   0  ] [ dyd]   [   ryd    ]
@@ -1378,12 +1385,14 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x
 
   // RXL = -dx + Sxl*dsxl
   yrsxl_->setToZero();
-  yrsxl_->axpy(-1.0, *dx_);
+  //yrsxl_->axpy(-1.0, *dx_);
+  yrsxl_->axzpy(-1.0, *dx_, *kkt_->nlp_->inner_prod()->M_lumped());
   yrsxl_->axzpy(1.0, *iter_->get_sxl(), *dsxl_);
   yrsxl_->selectPattern(kkt_->nlp_->get_ixl());
 
   // RXU = dx + Sxu*dsxu
   yrsxu_->copyFrom(*dx_);
+  yrsxu_->componentMult(*kkt_->nlp_->inner_prod()->M_lumped());
   yrsxu_->axzpy(1.0, *iter_->get_sxu(), *dsxu_);
   yrsxu_->selectPattern(kkt_->nlp_->get_ixu());