From 14b2184506e45f029201592d551ec2e2894511cc Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Mon, 3 Mar 2025 15:06:46 -0800 Subject: [PATCH 01/27] interface draft --- src/Interface/hiopInterface.hpp | 72 +++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/Interface/hiopInterface.hpp b/src/Interface/hiopInterface.hpp index 904efb9d..8195d045 100644 --- a/src/Interface/hiopInterface.hpp +++ b/src/Interface/hiopInterface.hpp @@ -279,6 +279,78 @@ class hiopInterfaceBase return false; // defaults to serial } + /** + * Computes action y of the mass matrix on a vector x, namely y=M*x + * + * The mass matrix is generally the weight matrix associated with L^2 finite element + * discretizations. This matrix is used internally to build and maintain Riesz-like + * representations of the dual variables (associated with bound constraints) to ensure + * mesh independent performance of the algorithm. + * + * @note If the variables are not coming from L^2 discretizations, this method should + * return false on its first call. HiOp will use internally the identity as the mass + * matrix in this case. Otherwise should return true on its first call; subsequent + * errors in matrix apply can be flagged with false return values. + * + * @param[in] n the (global) number of variables + * @param[in] x the array to which mass action is applied + * @param[out] y the result of apply + * @return true if successful, false otherwise (also see note above). + */ + virtual bool applyM(const size_type& n, const double* x, double* y) + { + //the default impl. instructs HiOp to use Euclidean/l^2 variables (M=I) internally + return false; + } + + /** + * Computes action y of the inner product weight matrix H on a vector x, namely y=H*x + * + * The weight matrix H is generally associated with L^2 or H^1 finite element + * discretizations and corresponding weighted inner products: = u^T H v. For L^2, + * H is the mass matrix, while for H^1 is the mass plus stiffness. This matrix is applied + * internally to obtain termination criteria and derive Hessian representations that are + * mesh independent and consistent with the function space nature of the problem. + * + * Additional info: C. G. Petra et. al., On the implementation of a quasi-Newton + * interior-point method for PDE-constrained optimization using finite element + * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023. + * + * @note If the variables are not coming from L^2 discretizations, this method should + * return false on its first call. HiOp will use internally H=I in this case. Otherwise + * should return true on its first call; subsequent errors in matrix apply can be flagged + * with false return values. + * + * @param[in] n the (global) number of variables + * @param[in] x the array to which the H matrix is applied + * @param[out] y the result of apply + * @return true if successful, false otherwise (also see note above) + */ + virtual bool applyH(const size_type& n, const double* x, double* y) + { + //the default impl. instructs HiOp to use Euclidean/l^2 variables (H=I) internally + return false; + } + + /** + * Computes action y of the inverse of H on a vector x, namely y=H^{-1}*x + * + * See @applyH for a discussion of H and additional notes. The inverse of H plays an + * important role in computing the "dual" norms and, in turn, to ensure mesh + * independent behavior of the IPM solver(s). The inverse apply is also needed by the + * specialized solves HiOp performs when the quasi-Newton solver is used. + * + * @param[in] n the (global) number of variables + * @param[in] x the array to which the inverse is applied + * @param[out] y the result of apply + * @return see return of @applyH + */ + virtual bool applyHinv(const size_type& n, const double* x, double* y) + { + //the default impl. instructs HiOp to use Euclidean/l^2 variables (H=I) internally + return false; + } + /** * Method provides a primal or starting point. This point is subject to internal adjustments. * From 14a243c92d34a741e1d1eb4665ea1e2a725070b7 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Wed, 5 Mar 2025 10:55:57 -0800 Subject: [PATCH 02/27] wrapper for WeiInProd wrapper in NlpFormulation --- src/Interface/hiopInterface.hpp | 32 ++++++++++----- src/Optimization/hiopNlpFormulation.cpp | 54 +++++++++++++++++++++++++ src/Optimization/hiopNlpFormulation.hpp | 17 +++++++- src/Utils/hiopRunStats.hpp | 21 +++++++++- 4 files changed, 111 insertions(+), 13 deletions(-) diff --git a/src/Interface/hiopInterface.hpp b/src/Interface/hiopInterface.hpp index 8195d045..e21829c7 100644 --- a/src/Interface/hiopInterface.hpp +++ b/src/Interface/hiopInterface.hpp @@ -287,15 +287,15 @@ class hiopInterfaceBase * representations of the dual variables (associated with bound constraints) to ensure * mesh independent performance of the algorithm. * - * @note If the variables are not coming from L^2 discretizations, this method should - * return false on its first call. HiOp will use internally the identity as the mass - * matrix in this case. Otherwise should return true on its first call; subsequent - * errors in matrix apply can be flagged with false return values. + * @note If the variables are not coming from discretizations (specified by a false + * value returned by @useWeightedInnerProducts), the methods should return false. HiOp + * will use internally M=I. Otherwise, should return true or false depending on whether + * the user code succesfully applied M. * * @param[in] n the (global) number of variables * @param[in] x the array to which mass action is applied * @param[out] y the result of apply - * @return true if successful, false otherwise (also see note above). + * @return see note above. */ virtual bool applyM(const size_type& n, const double* x, double* y) { @@ -316,15 +316,15 @@ class hiopInterfaceBase * interior-point method for PDE-constrained optimization using finite element * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023. * - * @note If the variables are not coming from L^2 discretizations, this method should - * return false on its first call. HiOp will use internally H=I in this case. Otherwise - * should return true on its first call; subsequent errors in matrix apply can be flagged - * with false return values. + * @note If the variables are not coming from discretizations (specified by a false + * value returned by @useWeightedInnerProducts), the methods should return false. HiOp + * will use internally H=I. Otherwise, should return true or false depending on whether + * the user code succesfully applied M. * * @param[in] n the (global) number of variables * @param[in] x the array to which the H matrix is applied * @param[out] y the result of apply - * @return true if successful, false otherwise (also see note above) + * @return see note above */ virtual bool applyH(const size_type& n, const double* x, double* y) { @@ -351,6 +351,18 @@ class hiopInterfaceBase return false; } + /** + * Enables the use of weighted inner products via @applyM, @applyH, and @applyHinv + * + * See also notes for @applyM, @applyH, and @applyHinv. + * + * @return true if enabled, false to default to Euclidean space with =u^T*v + */ + virtual bool useWeightedInnerProducts() + { + return false; + } + /** * Method provides a primal or starting point. This point is subject to internal adjustments. * diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index 63000fd9..89b57d5d 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -54,6 +54,7 @@ */ #include "hiopNlpFormulation.hpp" +#include "InnerProduct.hpp" #include "HessianDiagPlusRowRank.hpp" #include "hiopVector.hpp" #include "LinAlgFactory.hpp" @@ -79,6 +80,7 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char nlp_transformations_(this), interface_base(interface_) { +// inner_prod_ = new InnerProduct(this); strFixedVars_ = ""; // uninitialized dFixedVarsTol_ = -1.; // uninitialized bool bret; @@ -143,6 +145,8 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char temp_x_ = nullptr; nlp_scaling_ = nullptr; relax_bounds_ = nullptr; + + inner_prod_ = new InnerProduct(this); } hiopNlpFormulation::~hiopNlpFormulation() @@ -184,6 +188,8 @@ hiopNlpFormulation::~hiopNlpFormulation() delete temp_ineq_; delete temp_x_; /// nlp_scaling_ and relax_bounds_ are deleted inside nlp_transformations_ + + delete inner_prod_; } bool hiopNlpFormulation::finalizeInitialization() @@ -757,6 +763,54 @@ bool hiopNlpFormulation::eval_grad_f(hiopVector& x, bool new_x, hiopVector& grad return bret; } +bool hiopNlpFormulation::eval_M(const hiopVector& x, hiopVector& y) +{ + //x is in the reduced space used by HiOp + //x_full is x in the full/untransformed/user space + hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast(x), true/*new_x*/); + + // We need to also transform y to a y_full, pass it to user apply, and then the reverse, y_full back to y. + // The following works, unless a "remove fixed variables" NLP transformation is present. + // TO DO: Either add functionality in hiopFixedVarRemovers to allow apply_inv_to_x for more than one vector + // simultaneously, or create y_full here use apply_inv_to_x with copying from the return to y_full + hiopVector* y_full = &y; + assert(x_full->get_size() == y.get_size() && + "weighted inner products not supported when fixed variables are removed"); + + runStats.tm_eval_M_apply.start(); + bool bret = interface_base.applyM(nlp_transformations_.n_pre(), + x_full->local_data_const(), + y_full->local_data()); + runStats.tm_eval_M_apply.stop(); + runStats.n_eval_M_apply++; + + // TODO: copy back to y from y_full + + return bret; +} + +bool hiopNlpFormulation::eval_H(const hiopVector& x, hiopVector& y) +{ + //x is in the reduced space used by HiOp + //x_full is x in the full/untransformed/user space + hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast(x), true/*new_x*/); + + // TODO: see notes from eval_M + hiopVector* y_full = &y; + assert(x_full->get_size() == y.get_size() && + "weighted inner products not supported when fixed variables are removed"); + + runStats.tm_eval_H_apply.start(); + bool bret = interface_base.applyH(nlp_transformations_.n_pre(), + x_full->local_data_const(), + y_full->local_data()); + runStats.tm_eval_H_apply.stop(); + runStats.n_eval_H_apply++; + + return bret; +} + + bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop, bool& duals_avail, hiopVector& zL0_for_hiop, diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp index 4d0df76a..b75f54c4 100644 --- a/src/Optimization/hiopNlpFormulation.hpp +++ b/src/Optimization/hiopNlpFormulation.hpp @@ -80,7 +80,8 @@ namespace hiop // some forward decls class hiopDualsLsqUpdate; - +class InnerProduct; + /** Class for a general NlpFormulation with general constraints and bounds on the variables. * This class also acts as a factory for linear algebra objects (derivative * matrices, KKT system) whose types are decided based on the hiopInterfaceXXX object passed in the @@ -118,6 +119,17 @@ class hiopNlpFormulation virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) = 0; virtual bool eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d); + inline bool useWeightedInnerProd() + { + return interface_base.useWeightedInnerProducts(); + } + /* Wrapper over user-defined M apply that also applies the NLP transformations. */ + bool eval_M(const hiopVector& x, hiopVector& y); + /* Wrapper over user-defined H apply that also applies the NLP transformations. */ + bool eval_H(const hiopVector& x, hiopVector& y); + /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */ + bool eval_H_inv(const hiopVector& x, hiopVector& y); + protected: // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0; @@ -353,6 +365,9 @@ class hiopNlpFormulation /* User provided interface */ hiopInterfaceBase& interface_base; + /* Inner product wrapper */ + InnerProduct* inner_prod_; + /** * Flag to indicate whether to use evaluate all constraints once or separately for equalities * or inequalities. Possible values diff --git a/src/Utils/hiopRunStats.hpp b/src/Utils/hiopRunStats.hpp index 0ee87903..c1e59e42 100644 --- a/src/Utils/hiopRunStats.hpp +++ b/src/Utils/hiopRunStats.hpp @@ -320,6 +320,14 @@ class hiopRunStats int nIter; + // measure user-defined calls other than the above, for example matrices for weighted inner products + hiopTimer tm_eval_M_apply; + hiopTimer tm_eval_H_apply; + hiopTimer tm_eval_Hinv_apply; + int n_eval_M_apply; + int n_eval_H_apply; + int n_eval_Hinv_apply; + hiopRunKKTSolStats kkt; hiopLinSolStats linsolv; inline virtual void initialize() @@ -329,6 +337,12 @@ class hiopRunStats nEvalObj = nEvalGrad_f = nEvalCons_eq = nEvalCons_ineq = nEvalJac_con_eq = nEvalJac_con_ineq = 0; nEvalHessL = 0; nIter = 0; + tm_eval_M_apply = 0.; + tm_eval_H_apply = 0.; + tm_eval_Hinv_apply = 0.; + n_eval_M_apply = 0; + n_eval_H_apply = 0; + n_eval_Hinv_apply = 0; } inline std::string get_summary(int masterRank = 0) @@ -364,7 +378,9 @@ class hiopRunStats << "s " << "( obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime() << " cons=" << tmEvalCons.getElapsedTime() << " Jac=" << tmEvalJac_con.getElapsedTime() - << " Hess=" << tmEvalHessL.getElapsedTime() << ") " << std::endl; + << " Hess=" << tmEvalHessL.getElapsedTime() << ") " + << " WeigInProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " " + << tm_eval_Hinv_apply.getElapsedTime() << ")" << std::endl; #ifdef HIOP_USE_MPI loc = tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + @@ -385,7 +401,8 @@ class hiopRunStats #endif ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f << " eq cons " << nEvalCons_eq << " ineq cons " - << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq << std::endl; + << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq + << "WeigInProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl; return ss.str(); } From 89bc337adfc9313762f98efe0ea2abb5481a9ba6 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Wed, 5 Mar 2025 10:58:10 -0800 Subject: [PATCH 03/27] draft of InnerProd class --- src/Optimization/InnerProduct.hpp | 144 ++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 src/Optimization/InnerProduct.hpp diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp new file mode 100644 index 00000000..540018e6 --- /dev/null +++ b/src/Optimization/InnerProduct.hpp @@ -0,0 +1,144 @@ +// Copyright (c) 2025, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory (LLNL). +// LLNL-CODE-742473. All rights reserved. +// +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// Please also read "Additional BSD Notice" below. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// i. Redistributions of source code must retain the above copyright notice, this list +// of conditions and the disclaimer below. +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// other materials provided with the distribution. +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Additional BSD Notice +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// Contract No. DE-AC52-07NA27344 with the DOE. +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any +// information, apparatus, product, or process disclosed, or represents that its use would +// not infringe privately-owned rights. +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// product endorsement purposes. + +/** + * @file InnerProduct.hpp + * + * @author Cosmin G. Petra , LLNL + * @author Nai-Yuan Chiang , LLNL + * + */ + +#ifndef HIOP_NLP_INNERPROD +#define HIOP_NLP_INNERPROD + +#include "hiopVector.hpp" + +namespace hiop +{ + +// some forward decls +class hiopNlpFormulation; + +/** + * Provides functionality required for using (weighted) inner products within the IPM algorithm(s). + * + * These weighted inner products appear when optimizing over (discretization of) function spaces, + * such as PDE-constrained optimization. It wraps around user-provided methods for computing the + * mass matrix M and the weight matrix H generally associated with L^2 or H^1 finite element + * discretizations and corresponding weighted inner products: = u^T H v. For L^2, + * H is the mass matrix, while for H^1 is the mass plus stiffness. These user methods are called + * to perform various operations associated with Hilbert spaces, such as inner products and norms. + * + * Additional info: C. G. Petra et. al., On the implementation of a quasi-Newton + * interior-point method for PDE-constrained optimization using finite element + * discretizations, Optimiz. Meth. and Software, Vol. 38, 2023. + * + * This class also covers Euclidean (i.e., non-weighted) inner products, for which M=H=I. + */ +class InnerProduct +{ +public: + InnerProduct(hiopNlpFormulation* nlp) + : nlp_(nlp) + { + if(nlp->useWeightedInnerProd()) { + vec_n_ = nlp_->alloc_primal_vec(); + } else { + vec_n_ = nullptr; + } + } + + virtual ~InnerProduct() + { + delete vec_n_; + } + + // Computes ||x||_M + double norm_M(const hiopVector& x) + { + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_M(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } + } + // Computes H primal norm + double norm_H_primal(const hiopVector& x) + { + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } + } + // Computes H dual norm + double norm_H_dual(const hiopVector& x) + { + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H_inv(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } + } + +private: + // Pointer to "client" NLP + hiopNlpFormulation* nlp_; + + // Working vector in the size n of the variables, allocated only when for the weighted case + hiopVector* vec_n_; +}; + +} //end namespace +#endif From a1ae282e1058a35278e2c4987568303b1b058bd3 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Thu, 6 Mar 2025 12:43:26 -0800 Subject: [PATCH 04/27] cleaning up --- src/Optimization/hiopResidual.hpp | 13 ++++++++++--- src/Utils/hiopRunStats.hpp | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Optimization/hiopResidual.hpp b/src/Optimization/hiopResidual.hpp index 4ad02969..5f0f725b 100644 --- a/src/Optimization/hiopResidual.hpp +++ b/src/Optimization/hiopResidual.hpp @@ -87,7 +87,8 @@ class hiopResidual * c_soc = c_rhs - c_body and d_soc = d - d_body * * computed in c_eval and d_eval, respectively. - * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attemp + * The method modifies 'this', in particular ryd, ryc, rxl,rxu, rdl, rdu in an attempt + * to reuse storage/buffers */ virtual void update_soc(const hiopIterate& it, const hiopVector& c_soc, @@ -115,11 +116,14 @@ class hiopResidual inline double getInfeasInfNorm() const { return nrmInf_nlp_feasib; } /* get the previously computed Infeasibility */ inline double get_theta() const { return nrmOne_nlp_feasib; } - /* evaluate the Infeasibility at the new iterate, which has eq and ineq functions + + /** + * Evaluate the infeasibility at the new iterate, which has eq and ineq functions * computed in c_eval and d_eval, respectively. * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attempt * to reuse storage/buffers, but does not update the cached nrmInf_XXX members. - * It computes and returns the one norm of [ryc ryd] */ + * It computes and returns the one norm of [ryc ryd] + */ double compute_nlp_infeasib_onenorm(const hiopIterate& iter, const hiopVector& c_eval, const hiopVector& d_eval); /* residual printing function - calls hiopVector::print @@ -170,6 +174,7 @@ class hiopResidual * for the nlp (\mu=0) */ double nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem; + /** storage for the norm of [rx,rd], [rxl,...,rdu,ryc,ryd], and [rszl,...,rsvu] * for the barrier subproblem */ @@ -185,6 +190,8 @@ class hiopResidual /** inf norm of constraint violation */ double nrmInf_cons_violation; + + // and associated info from problem formulation hiopNlpFormulation* nlp; diff --git a/src/Utils/hiopRunStats.hpp b/src/Utils/hiopRunStats.hpp index c1e59e42..a635f1b6 100644 --- a/src/Utils/hiopRunStats.hpp +++ b/src/Utils/hiopRunStats.hpp @@ -376,10 +376,10 @@ class hiopRunStats << (tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + tmEvalJac_con.getElapsedTime() + tmEvalHessL.getElapsedTime()) << "s " - << "( obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime() + << "(obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime() << " cons=" << tmEvalCons.getElapsedTime() << " Jac=" << tmEvalJac_con.getElapsedTime() << " Hess=" << tmEvalHessL.getElapsedTime() << ") " - << " WeigInProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " " + << " WIProd=(" << tm_eval_M_apply.getElapsedTime() << " " << tm_eval_H_apply.getElapsedTime() << " " << tm_eval_Hinv_apply.getElapsedTime() << ")" << std::endl; #ifdef HIOP_USE_MPI @@ -402,7 +402,7 @@ class hiopRunStats #endif ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f << " eq cons " << nEvalCons_eq << " ineq cons " << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq - << "WeigInProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl; + << " WIProd (" << n_eval_M_apply << " " << n_eval_H_apply << " " << n_eval_Hinv_apply << ")" << std::endl; return ss.str(); } From 826c5d28496d0f9e756c41491ad5ac55ee50216a Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Thu, 6 Mar 2025 12:44:57 -0800 Subject: [PATCH 05/27] temp code + notes --- src/Optimization/hiopAlgFilterIPM.cpp | 88 +++++++++++++++++++++++++++ src/Optimization/hiopAlgFilterIPM.hpp | 14 +++++ 2 files changed, 102 insertions(+) diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp index 9ecb9747..86088236 100644 --- a/src/Optimization/hiopAlgFilterIPM.cpp +++ b/src/Optimization/hiopAlgFilterIPM.cpp @@ -715,6 +715,94 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, return true; } +bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, + const hiopResidual& resid, + const double& mu, + double& nlpoptim, + double& nlpfeas, + double& nlpcomplem, + double& nlpoverall, + double& logoptim, + double& logfeas, + double& logcomplem, + double& logoverall, + double& cons_violation) +{ + nlp->runStats.tmSolverInternal.start(); + + size_type n = nlp->n_complem(), m = nlp->m(); + // the one norms + // double nrmDualBou=it.normOneOfBoundDuals(); + // double nrmDualEqu=it.normOneOfEqualityDuals(); + double nrmDualBou, nrmDualEqu; + it.normOneOfDuals(nrmDualEqu, nrmDualBou); + + nlp->log->printf(hovScalars, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); + if(nrmDualBou > 1e+10) { + nlp->log->printf(hovWarning, + "Unusually large bound dual variables (norm1=%g) occured, " + "which may cause numerical instabilities if it persists. Convergence " + " issues or inacurate optimal solutions may be experienced. Possible causes: " + " tight bounds or bad scaling of the optimization variables.\n", + nrmDualBou); + if(nlp->options->GetString("fixed_var") == "remove") { + nlp->log->printf(hovWarning, + "For example, increase 'fixed_var_tolerance' to remove " + "additional variables.\n"); + } else if(nlp->options->GetString("fixed_var") == "relax") { + nlp->log->printf(hovWarning, + "For example, increase 'fixed_var_tolerance' to relax " + "aditional (tight) variables and/or increase 'fixed_var_perturb' " + "to decrease the tightness.\n"); + } else { + nlp->log->printf(hovWarning, + "Potential fixes: fix or relax variables with tight bounds " + "(see 'fixed_var' option) or rescale variables.\n"); + } + } + + // scaling factors + //sd = max { p_smax, ||zl||_H + ||zu||_H + (||vl||_1 + ||vu||_1)/m } /p_smax + //sc = max { p_smax, ||zl||_H + ||zu||_H } /p_smax + //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv + // ||yd+vl-vu||_inf + //nlpfeas = ||crhs- c||_inf + // ||drs- d||_inf + // ||x-sxl-xl||_H + // ||x-sxu+xu||_H + // ||d-sdl-dl|| + // + double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; + double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + + sd = fmin(sd, 1e+8); + sc = fmin(sc, 1e+8); + + // actual nlp errors + resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation); + + // finally, the scaled nlp error + nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); + + nlp->log->printf(hovScalars, + "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", + nlpoverall, + nlpoptim, + sd, + nlpfeas, + nlpcomplem, + cons_violation, + sc); + + // actual log errors + resid.getBarrierErrors(logoptim, logfeas, logcomplem); + + // finally, the scaled barrier error + logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc)); + nlp->runStats.tmSolverInternal.stop(); + return true; +} + bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVector& c, hiopVector& d) { bool new_x = true; diff --git a/src/Optimization/hiopAlgFilterIPM.hpp b/src/Optimization/hiopAlgFilterIPM.hpp index 624fe60e..059aaafd 100644 --- a/src/Optimization/hiopAlgFilterIPM.hpp +++ b/src/Optimization/hiopAlgFilterIPM.hpp @@ -183,6 +183,20 @@ class hiopAlgFilterIPMBase double& logoverall, double& cons_violation); + virtual bool evalNlpAndLogErrors2(const hiopIterate& it, + const hiopResidual& resid, + const double& mu, + double& nlpoptim, + double& nlpfeas, + double& nlpcomplem, + double& nlpoverall, + double& logoptim, + double& logfeas, + double& logcomplem, + double& logoverall, + double& cons_violation); + + virtual double thetaLogBarrier(const hiopIterate& it, const hiopResidual& resid, const double& mu); /** From 46c055f000923f321a8bdf90e8bc826dd53523f2 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Thu, 6 Mar 2025 12:45:39 -0800 Subject: [PATCH 06/27] added additional functionality for residual norm (weighted) --- src/Optimization/CMakeLists.txt | 4 +- src/Optimization/InnerProduct.cpp | 149 ++++++++++++++++++++++++ src/Optimization/InnerProduct.hpp | 61 +++------- src/Optimization/hiopNlpFormulation.cpp | 22 +++- src/Optimization/hiopNlpFormulation.hpp | 7 +- src/Optimization/hiopResidual.cpp | 23 +++- 6 files changed, 213 insertions(+), 53 deletions(-) create mode 100644 src/Optimization/InnerProduct.cpp diff --git a/src/Optimization/CMakeLists.txt b/src/Optimization/CMakeLists.txt index 30e35d54..160ccfaf 100644 --- a/src/Optimization/CMakeLists.txt +++ b/src/Optimization/CMakeLists.txt @@ -8,7 +8,8 @@ set(hiopOptimization_SRC hiopKKTLinSys.cpp KktLinSysLowRank.cpp hiopKKTLinSysMDS.cpp - HessianDiagPlusRowRank.cpp + HessianDiagPlusRowRank.cpp + InnerProduct.cpp hiopDualsUpdater.cpp hiopNlpTransforms.cpp hiopPDPerturbation.cpp @@ -29,6 +30,7 @@ set(hiopOptimization_INTERFACE_HEADERS hiopFactAcceptor.hpp hiopFilter.hpp HessianDiagPlusRowRank.hpp + InnerProduct.hpp hiopIterate.hpp hiopKKTLinSys.hpp hiopKKTLinSysDense.hpp diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp new file mode 100644 index 00000000..71441e29 --- /dev/null +++ b/src/Optimization/InnerProduct.cpp @@ -0,0 +1,149 @@ +// Copyright (c) 2025, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory (LLNL). +// LLNL-CODE-742473. All rights reserved. +// +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// Please also read "Additional BSD Notice" below. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// i. Redistributions of source code must retain the above copyright notice, this list +// of conditions and the disclaimer below. +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// other materials provided with the distribution. +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Additional BSD Notice +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// Contract No. DE-AC52-07NA27344 with the DOE. +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any +// information, apparatus, product, or process disclosed, or represents that its use would +// not infringe privately-owned rights. +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// product endorsement purposes. + +/** + * @file InnerProduct.hpp + * + * @author Cosmin G. Petra , LLNL + * + */ + + +#include "InnerProduct.hpp" +#include "hiopNlpFormulation.hpp" + +namespace hiop +{ + +InnerProduct::InnerProduct(hiopNlpFormulation* nlp) + : nlp_(nlp) +{ + if(nlp->useWeightedInnerProd()) { + vec_n_ = nlp_->alloc_primal_vec(); + vec_n2_ = nlp_->alloc_primal_vec(); + } else { + vec_n_ = nullptr; + vec_n2_ = nullptr; + } +} + +InnerProduct::~InnerProduct() +{ + delete vec_n2_; + delete vec_n_; +} + +bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) +{ + if(nlp_->useWeightedInnerProd()) { + return nlp_->eval_M(x, y); + } else { + y.copyFrom(x); + return true; + } +} + +// Computes ||x||_M +double InnerProduct::norm_M(const hiopVector& x) +{ + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_M(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } +} + // Computes H primal norm + double InnerProduct::norm_H_primal(const hiopVector& x) + { + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } + } +// Computes H dual norm +double InnerProduct::norm_H_dual(const hiopVector& x) +{ + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H_inv(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.twonorm(); + } +} + +double InnerProduct::norm_stationarity(const hiopVector& x) +{ + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H_inv(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); + return ::std::sqrt(dp); + } else { + return x.infnorm(); + } +} + +// Compute norm one weighted by M, i.e., 1^T*M*|x| +double InnerProduct::norm_M_one(const hiopVector&x) +{ + if(nlp_->useWeightedInnerProd()) { + vec_n_->copyFrom(x); + vec_n_->component_abs(); + nlp_->eval_M(*vec_n_, *vec_n2_); + vec_n_->setToConstant(1.); + return vec_n_->dotProductWith(*vec_n2_); + } else { + return x.onenorm(); + } +} + +} // end namespace diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 540018e6..93be0d35 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -49,7 +49,6 @@ * @file InnerProduct.hpp * * @author Cosmin G. Petra , LLNL - * @author Nai-Yuan Chiang , LLNL * */ @@ -83,61 +82,35 @@ class hiopNlpFormulation; class InnerProduct { public: - InnerProduct(hiopNlpFormulation* nlp) - : nlp_(nlp) - { - if(nlp->useWeightedInnerProd()) { - vec_n_ = nlp_->alloc_primal_vec(); - } else { - vec_n_ = nullptr; - } - } + InnerProduct(hiopNlpFormulation* nlp); - virtual ~InnerProduct() - { - delete vec_n_; - } + virtual ~InnerProduct(); + // Compute y=M*x + bool apply_M(const hiopVector& x, hiopVector& y); + // Computes ||x||_M - double norm_M(const hiopVector& x) - { - if(nlp_->useWeightedInnerProd()) { - nlp_->eval_M(x, *vec_n_); - auto dp = x.dotProductWith(*vec_n_); - return ::std::sqrt(dp); - } else { - return x.twonorm(); - } - } + double norm_M(const hiopVector& x); + // Computes H primal norm - double norm_H_primal(const hiopVector& x) - { - if(nlp_->useWeightedInnerProd()) { - nlp_->eval_H(x, *vec_n_); - auto dp = x.dotProductWith(*vec_n_); - return ::std::sqrt(dp); - } else { - return x.twonorm(); - } - } + double norm_H_primal(const hiopVector& x); + // Computes H dual norm - double norm_H_dual(const hiopVector& x) - { - if(nlp_->useWeightedInnerProd()) { - nlp_->eval_H_inv(x, *vec_n_); - auto dp = x.dotProductWith(*vec_n_); - return ::std::sqrt(dp); - } else { - return x.twonorm(); - } - } + double norm_H_dual(const hiopVector& x); + // Computes norm of stationarity residual, using inf-norm for Euclidean spaces, H-inverse norm for Hilbert spaces + double norm_stationarity(const hiopVector& x); + + // Compute norm one weighted by M, i.e., 1^T*M*|x| + double norm_M_one(const hiopVector&x); private: // Pointer to "client" NLP hiopNlpFormulation* nlp_; // Working vector in the size n of the variables, allocated only when for the weighted case hiopVector* vec_n_; + // Working vector in the size n of the variables, allocated only when for the weighted case + hiopVector* vec_n2_; }; } //end namespace diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index 89b57d5d..2578c831 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -80,7 +80,6 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char nlp_transformations_(this), interface_base(interface_) { -// inner_prod_ = new InnerProduct(this); strFixedVars_ = ""; // uninitialized dFixedVarsTol_ = -1.; // uninitialized bool bret; @@ -810,6 +809,27 @@ bool hiopNlpFormulation::eval_H(const hiopVector& x, hiopVector& y) return bret; } +bool hiopNlpFormulation::eval_H_inv(const hiopVector& x, hiopVector& y) +{ + //x is in the reduced space used by HiOp + //x_full is x in the full/untransformed/user space + hiopVector* x_full = nlp_transformations_.apply_inv_to_x(const_cast(x), true/*new_x*/); + + // TODO: see notes from eval_M + hiopVector* y_full = &y; + assert(x_full->get_size() == y.get_size() && + "weighted inner products not supported when fixed variables are removed"); + + runStats.tm_eval_Hinv_apply.start(); + bool bret = interface_base.applyHinv(nlp_transformations_.n_pre(), + x_full->local_data_const(), + y_full->local_data()); + runStats.tm_eval_Hinv_apply.stop(); + runStats.n_eval_Hinv_apply++; + + return bret; +} + bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop, bool& duals_avail, diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp index b75f54c4..9a511d5a 100644 --- a/src/Optimization/hiopNlpFormulation.hpp +++ b/src/Optimization/hiopNlpFormulation.hpp @@ -66,7 +66,7 @@ #endif #include "hiopNlpTransforms.hpp" - +#include "InnerProduct.hpp" #include "hiopRunStats.hpp" #include "hiopLogger.hpp" #include "hiopOptions.hpp" @@ -80,7 +80,6 @@ namespace hiop // some forward decls class hiopDualsLsqUpdate; -class InnerProduct; /** Class for a general NlpFormulation with general constraints and bounds on the variables. * This class also acts as a factory for linear algebra objects (derivative @@ -130,6 +129,10 @@ class hiopNlpFormulation /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */ bool eval_H_inv(const hiopVector& x, hiopVector& y); + InnerProduct* inner_prod() + { + return inner_prod_; + } protected: // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0; diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index 3f44bfe8..3b4ab1ad 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -177,19 +177,32 @@ int hiopResidual::update(const hiopIterate& it, assert(it.sxu->matchesPattern(nlp->get_ixu())); #endif // rx = -grad_f - J_c^t*x - J_d^t*x+zl-zu - linear damping term in x - rx->copyFrom(grad); + //rx->copyFrom(grad); + //jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc); + //jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); + //rx->axpy(-1.0, *it.zl); + //rx->axpy(1.0, *it.zu); + + //using rxl as auxiliary variable to compute -M*zl+M*zu + rxl->copyFrom(*it.zu); + rxl->axpy(-1.0, *it.zl); + nlp->inner_prod()->apply_M(*rxl, *rx); + //continue adding to rx jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc); jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); - rx->axpy(-1.0, *it.zl); - rx->axpy(1.0, *it.zu); - buf = rx->infnorm_local(); + rx->axpy(1.0, grad); + + //buf = rx->infnorm_local(); + buf = nlp->inner_prod()->norm_stationarity(*rx); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); - nrmOne_nlp_optim += rx->onenorm(); + //nrmOne_nlp_optim += rx->onenorm(); + nrmOne_nlp_optim += nlp->inner_prod()->norm_M_one(*rx); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf); logprob.addNonLogBarTermsToGrad_x(1.0, *rx); rx->negate(); nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local()); nrmOne_bar_optim += rx->onenorm(); + //~ done with rx // rd rd->copyFrom(*it.yd); From 1caeb3304fc1764c37eea59cab5f568b9b0c3678 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 7 Mar 2025 09:26:31 -0800 Subject: [PATCH 07/27] IPM alg nlp errors --- src/Optimization/InnerProduct.cpp | 56 ++++++++++++++++------ src/Optimization/InnerProduct.hpp | 24 ++++++---- src/Optimization/hiopAlgFilterIPM.cpp | 55 ++++++++++++--------- src/Optimization/hiopIterate.cpp | 63 ++++++++++++++++++------- src/Optimization/hiopIterate.hpp | 5 +- src/Optimization/hiopNlpFormulation.hpp | 3 +- src/Optimization/hiopResidual.cpp | 39 +++++++++++---- 7 files changed, 174 insertions(+), 71 deletions(-) diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp index 71441e29..c9409981 100644 --- a/src/Optimization/InnerProduct.cpp +++ b/src/Optimization/InnerProduct.cpp @@ -77,7 +77,7 @@ InnerProduct::~InnerProduct() delete vec_n_; } -bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) +bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const { if(nlp_->useWeightedInnerProd()) { return nlp_->eval_M(x, y); @@ -88,7 +88,7 @@ bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) } // Computes ||x||_M -double InnerProduct::norm_M(const hiopVector& x) +double InnerProduct::norm_M(const hiopVector& x) const { if(nlp_->useWeightedInnerProd()) { nlp_->eval_M(x, *vec_n_); @@ -98,19 +98,19 @@ double InnerProduct::norm_M(const hiopVector& x) return x.twonorm(); } } - // Computes H primal norm - double InnerProduct::norm_H_primal(const hiopVector& x) - { - if(nlp_->useWeightedInnerProd()) { - nlp_->eval_H(x, *vec_n_); - auto dp = x.dotProductWith(*vec_n_); +// Computes H primal norm +double InnerProduct::norm_H_primal(const hiopVector& x) const +{ + if(nlp_->useWeightedInnerProd()) { + nlp_->eval_H(x, *vec_n_); + auto dp = x.dotProductWith(*vec_n_); return ::std::sqrt(dp); - } else { - return x.twonorm(); - } + } else { + return x.twonorm(); } +} // Computes H dual norm -double InnerProduct::norm_H_dual(const hiopVector& x) +double InnerProduct::norm_H_dual(const hiopVector& x) const { if(nlp_->useWeightedInnerProd()) { nlp_->eval_H_inv(x, *vec_n_); @@ -121,7 +121,7 @@ double InnerProduct::norm_H_dual(const hiopVector& x) } } -double InnerProduct::norm_stationarity(const hiopVector& x) +double InnerProduct::norm_stationarity(const hiopVector& x) const { if(nlp_->useWeightedInnerProd()) { nlp_->eval_H_inv(x, *vec_n_); @@ -133,9 +133,10 @@ double InnerProduct::norm_stationarity(const hiopVector& x) } // Compute norm one weighted by M, i.e., 1^T*M*|x| -double InnerProduct::norm_M_one(const hiopVector&x) +double InnerProduct::norm_M_one(const hiopVector&x) const { if(nlp_->useWeightedInnerProd()) { + //opt! pre-compute M*1 vec_n_->copyFrom(x); vec_n_->component_abs(); nlp_->eval_M(*vec_n_, *vec_n2_); @@ -146,4 +147,31 @@ double InnerProduct::norm_M_one(const hiopVector&x) } } +double InnerProduct::norm_complementarity(const hiopVector& x) const +{ + if(nlp_->useWeightedInnerProd()) { + //opt! pre-compute M*1 + vec_n_->copyFrom(x); + vec_n_->component_abs(); + nlp_->eval_M(*vec_n_, *vec_n2_); + vec_n_->setToConstant(1.); + return vec_n_->dotProductWith(*vec_n2_); + } else { + return x.infnorm(); + } +} + +// Computes the "volume" of the space, 1^T M*1 +double InnerProduct::volume() const +{ + if(nlp_->useWeightedInnerProd()) { + //opt! pre-compute M*1 + vec_n_->setToConstant(1.); + nlp_->eval_M(*vec_n_, *vec_n2_); + return vec_n2_->dotProductWith(*vec_n_); + } else { + return nlp_->n_complem(); + } +} + } // end namespace diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 93be0d35..3c185430 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -87,30 +87,36 @@ class InnerProduct virtual ~InnerProduct(); // Compute y=M*x - bool apply_M(const hiopVector& x, hiopVector& y); + bool apply_M(const hiopVector& x, hiopVector& y) const; // Computes ||x||_M - double norm_M(const hiopVector& x); + double norm_M(const hiopVector& x) const; // Computes H primal norm - double norm_H_primal(const hiopVector& x); + double norm_H_primal(const hiopVector& x) const; // Computes H dual norm - double norm_H_dual(const hiopVector& x); + double norm_H_dual(const hiopVector& x) const; // Computes norm of stationarity residual, using inf-norm for Euclidean spaces, H-inverse norm for Hilbert spaces - double norm_stationarity(const hiopVector& x); + double norm_stationarity(const hiopVector& x) const; - // Compute norm one weighted by M, i.e., 1^T*M*|x| - double norm_M_one(const hiopVector&x); + // Computes norm of complementarity, using inf-norm for Euclidean spaces, 1M norm for Hilbert spaces + double norm_complementarity(const hiopVector& x) const; + + // Computes 1M norm, i.e., ||x|| = 1^T*M*|x| + double norm_M_one(const hiopVector&x) const; + + // Computes the "volume" of the space, norm of 1 function + double volume() const; private: // Pointer to "client" NLP hiopNlpFormulation* nlp_; // Working vector in the size n of the variables, allocated only when for the weighted case - hiopVector* vec_n_; + mutable hiopVector* vec_n_; // Working vector in the size n of the variables, allocated only when for the weighted case - hiopVector* vec_n2_; + mutable hiopVector* vec_n2_; }; } //end namespace diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp index 86088236..407326d4 100644 --- a/src/Optimization/hiopAlgFilterIPM.cpp +++ b/src/Optimization/hiopAlgFilterIPM.cpp @@ -656,10 +656,15 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, // the one norms // double nrmDualBou=it.normOneOfBoundDuals(); // double nrmDualEqu=it.normOneOfEqualityDuals(); - double nrmDualBou, nrmDualEqu; - it.normOneOfDuals(nrmDualEqu, nrmDualBou); + double sc; + double sd; + double nrmDualBou; + double nrmDualEqu; + if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) { + return false; + } - nlp->log->printf(hovScalars, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); + nlp->log->printf(hovWarning, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); if(nrmDualBou > 1e+10) { nlp->log->printf(hovWarning, "Unusually large bound dual variables (norm1=%g) occured, " @@ -684,8 +689,11 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, } // scaling factors - double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; - double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; + //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + + sd = fmax(p_smax, sd) / p_smax; + sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax; sd = fmin(sd, 1e+8); sc = fmin(sc, 1e+8); @@ -696,7 +704,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, // finally, the scaled nlp error nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); - nlp->log->printf(hovScalars, + nlp->log->printf(hovWarning, "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", nlpoverall, nlpoptim, @@ -731,13 +739,15 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, nlp->runStats.tmSolverInternal.start(); size_type n = nlp->n_complem(), m = nlp->m(); - // the one norms - // double nrmDualBou=it.normOneOfBoundDuals(); - // double nrmDualEqu=it.normOneOfEqualityDuals(); - double nrmDualBou, nrmDualEqu; - it.normOneOfDuals(nrmDualEqu, nrmDualBou); + double sc; + double sd; + double nrmDualBou; + double nrmDualEqu; + if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) { + return false; + } - nlp->log->printf(hovScalars, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); + nlp->log->printf(hovWarning, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); if(nrmDualBou > 1e+10) { nlp->log->printf(hovWarning, "Unusually large bound dual variables (norm1=%g) occured, " @@ -762,18 +772,21 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, } // scaling factors - //sd = max { p_smax, ||zl||_H + ||zu||_H + (||vl||_1 + ||vu||_1)/m } /p_smax - //sc = max { p_smax, ||zl||_H + ||zu||_H } /p_smax + //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax + //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv // ||yd+vl-vu||_inf //nlpfeas = ||crhs- c||_inf // ||drs- d||_inf - // ||x-sxl-xl||_H - // ||x-sxu+xu||_H + // ||x-sxl-xl||_inf + // ||x-sxu+xu||_inf // ||d-sdl-dl|| // - double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; - double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; + //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + + sd = fmax(p_smax, sd) / p_smax; + sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax; sd = fmin(sd, 1e+8); sc = fmin(sc, 1e+8); @@ -784,7 +797,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, // finally, the scaled nlp error nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); - nlp->log->printf(hovScalars, + nlp->log->printf(hovWarning, "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", nlpoverall, nlpoptim, @@ -1159,7 +1172,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() solver_status_ = NlpSolve_Pending; while(true) { - bret = evalNlpAndLogErrors(*it_curr, + bret = evalNlpAndLogErrors2(*it_curr, *resid, _mu, _err_nlp_optim, @@ -1252,7 +1265,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() //! should perform only a partial update since NLP didn't change resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); - bret = evalNlpAndLogErrors(*it_curr, + bret = evalNlpAndLogErrors2(*it_curr, *resid, _mu, _err_nlp_optim, diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp index 81c9d688..392d38e4 100644 --- a/src/Optimization/hiopIterate.cpp +++ b/src/Optimization/hiopIterate.cpp @@ -236,26 +236,55 @@ double hiopIterate::normOneOfEqualityDuals() const return nrm1; } -void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const +// void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const +// { +// #ifdef HIOP_DEEPCHECKS +// assert(zl->matchesPattern(nlp->get_ixl())); +// assert(zu->matchesPattern(nlp->get_ixu())); +// assert(vl->matchesPattern(nlp->get_idl())); +// assert(vu->matchesPattern(nlp->get_idu())); +// #endif +// // work locally with all the vectors. This will result in only one MPI_Allreduce call +// nrm1Bnd = zl->onenorm_local() + zu->onenorm_local(); +// #ifdef HIOP_USE_MPI +// double nrm1_global; +// int ierr = MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); +// assert(MPI_SUCCESS == ierr); +// nrm1Bnd = nrm1_global; +// #endif +// nrm1Bnd += vl->onenorm_local() + vu->onenorm_local(); +// nrm1Eq = yc->onenorm_local() + yd->onenorm_local(); + +// nlp->log->printf(hovWarning, +// "sc=--- sd=--- volume_n=--- nrm zl=%12.5e zu=%12.5e vl=%12.5e vu=%12.5e yc=%12.5e yd=%12.5e\n", +// zl->onenorm(), zu->onenorm(), vl->onenorm(), vu->onenorm(), yc->onenorm(), yd->onenorm()); + +// } + +/// @brief Computes scaling factors sc and sd; also returns the "norms" of duals +bool hiopIterate::compute_sc_sd(double& sc, double& sd, double& nrmDualsEq, double& nrmDualsBou) const { -#ifdef HIOP_DEEPCHECKS - assert(zl->matchesPattern(nlp->get_ixl())); - assert(zu->matchesPattern(nlp->get_ixu())); - assert(vl->matchesPattern(nlp->get_idl())); - assert(vu->matchesPattern(nlp->get_idu())); -#endif - // work locally with all the vectors. This will result in only one MPI_Allreduce call - nrm1Bnd = zl->onenorm_local() + zu->onenorm_local(); -#ifdef HIOP_USE_MPI - double nrm1_global; - int ierr = MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); - assert(MPI_SUCCESS == ierr); - nrm1Bnd = nrm1_global; -#endif - nrm1Bnd += vl->onenorm_local() + vu->onenorm_local(); - nrm1Eq = yc->onenorm_local() + yd->onenorm_local(); + const auto nrmzl = nlp->inner_prod()->norm_M_one(*zl); + const auto nrmzu = nlp->inner_prod()->norm_M_one(*zu); + const auto volume_n = nlp->inner_prod()->volume(); + const auto nrmvl = vl->onenorm_local(); + const auto nrmvu = vu->onenorm_local(); + nrmDualsBou = nrmzl + nrmzu + nrmvl + nrmvu; + sc = nrmDualsBou / volume_n; + + const auto volume_m = nlp->m(); + const auto nrmyc = yc->onenorm_local(); + const auto nrmyd = yd->onenorm_local(); + nrmDualsEq = nrmyc+nrmyd; + sd = (nrmDualsBou+nrmDualsEq)/(volume_n+volume_m); + + nlp->log->printf(hovWarning, + "sc=%g sd=%g volume_n=%12.5e norms zl=%12.5e zu=%12.5e vl=%12.5e vu=%12.5e yc=%12.5e yd=%12.5e\n", + sc, sd, volume_n, nrmzl, nrmzu, nrmvl, nrmvu, nrmyc, nrmyd); + return true; } + void hiopIterate::selectPattern() { sxl->selectPattern(nlp->get_ixl()); diff --git a/src/Optimization/hiopIterate.hpp b/src/Optimization/hiopIterate.hpp index a2ad86d2..a1d66383 100644 --- a/src/Optimization/hiopIterate.hpp +++ b/src/Optimization/hiopIterate.hpp @@ -142,7 +142,10 @@ class hiopIterate virtual double normOneOfBoundDuals() const; virtual double normOneOfEqualityDuals() const; /* same as above but computed in one shot to save on communication and computation */ - virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const; + //virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const; + + /// @brief Computes scaling factors sc and sd; also returns the "norms" of duals + bool compute_sc_sd(double& sc, double& sd, double& nrmDualsEq, double& nrmDualsBou) const; /// @brief Entries corresponding to zeros in ix are set to zero virtual void selectPattern(); diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp index 9a511d5a..8bb64761 100644 --- a/src/Optimization/hiopNlpFormulation.hpp +++ b/src/Optimization/hiopNlpFormulation.hpp @@ -129,10 +129,11 @@ class hiopNlpFormulation /* Wrapper over user-defined inverse of H apply that also applies the NLP transformations. */ bool eval_H_inv(const hiopVector& x, hiopVector& y); - InnerProduct* inner_prod() + InnerProduct const* inner_prod() const { return inner_prod_; } + protected: // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0; diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index 3b4ab1ad..c61d8625 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -200,8 +200,10 @@ int hiopResidual::update(const hiopIterate& it, nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf); logprob.addNonLogBarTermsToGrad_x(1.0, *rx); rx->negate(); - nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local()); - nrmOne_bar_optim += rx->onenorm(); + //nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local()); + //nrmOne_bar_optim += rx->onenorm(); + nrmInf_bar_optim = fmax(nrmInf_bar_optim, nlp->inner_prod()->norm_stationarity(*rx)); + nrmOne_bar_optim += nlp->inner_prod()->norm_M_one(*rx); //~ done with rx // rd @@ -256,7 +258,7 @@ int hiopResidual::update(const hiopIterate& it, if(nlp->n_low_local() < nx_loc) { rxl->selectPattern(nlp->get_ixl()); } - buf = rxl->infnorm_local(); + buf = rxl->infnorm_local(); // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf); } @@ -307,10 +309,13 @@ int hiopResidual::update(const hiopIterate& it, if(nlp->n_low_local() < nx_loc) { rszl->selectPattern(nlp->get_ixl()); } - nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local()); - + // nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local()); + buf = nlp->inner_prod()->norm_complementarity(*rszl); + nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); + rszl->addConstant_w_patternSelect(mu, nlp->get_ixl()); - buf = rszl->infnorm_local(); + //buf = rszl->infnorm_local(); + buf = nlp->inner_prod()->norm_complementarity(*rszl); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf); } @@ -322,11 +327,13 @@ int hiopResidual::update(const hiopIterate& it, rszu->selectPattern(nlp->get_ixu()); } - buf = rszu->infnorm_local(); + //buf = rszu->infnorm_local(); + buf = nlp->inner_prod()->norm_complementarity(*rszu); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); rszu->addConstant_w_patternSelect(mu, nlp->get_ixu()); - buf = rszu->infnorm_local(); + //buf = rszu->infnorm_local(); + buf = nlp->inner_prod()->norm_complementarity(*rszu); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszu=%22.17e\n", buf); } @@ -364,6 +371,21 @@ int hiopResidual::update(const hiopIterate& it, nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf); } +// #ifdef HIOP_USE_MPI +// // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles +// // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double +// double aux[6] = +// {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem}; +// double aux_g[6]; +// int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm()); +// assert(MPI_SUCCESS == ierr); +// nrmInf_nlp_optim = aux_g[0]; +// nrmInf_nlp_feasib = aux_g[1]; +// nrmInf_nlp_complem = aux_g[2]; +// nrmInf_bar_optim = aux_g[3]; +// nrmInf_bar_feasib = aux_g[4]; +// nrmInf_bar_complem = aux_g[5]; +// #endif #ifdef HIOP_USE_MPI // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double @@ -379,6 +401,7 @@ int hiopResidual::update(const hiopIterate& it, nrmInf_bar_feasib = aux_g[4]; nrmInf_bar_complem = aux_g[5]; #endif + nlp->runStats.tmSolverInternal.stop(); return true; } From e4edea2597f2ed9ef2f51938e28a0b61e427621b Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 7 Mar 2025 09:30:09 -0800 Subject: [PATCH 08/27] clean up, fixed warnings --- src/Optimization/hiopAlgFilterIPM.cpp | 7 +-- src/Optimization/hiopIterate.cpp | 76 +++++++++++++-------------- src/Optimization/hiopIterate.hpp | 4 +- 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp index 407326d4..dc2e7aa6 100644 --- a/src/Optimization/hiopAlgFilterIPM.cpp +++ b/src/Optimization/hiopAlgFilterIPM.cpp @@ -652,10 +652,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, { nlp->runStats.tmSolverInternal.start(); - size_type n = nlp->n_complem(), m = nlp->m(); - // the one norms - // double nrmDualBou=it.normOneOfBoundDuals(); - // double nrmDualEqu=it.normOneOfEqualityDuals(); + size_type n = nlp->n_complem(); double sc; double sd; double nrmDualBou; @@ -738,7 +735,7 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, { nlp->runStats.tmSolverInternal.start(); - size_type n = nlp->n_complem(), m = nlp->m(); + size_type n = nlp->n_complem(); double sc; double sd; double nrmDualBou; diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp index 392d38e4..e133ab94 100644 --- a/src/Optimization/hiopIterate.cpp +++ b/src/Optimization/hiopIterate.cpp @@ -196,45 +196,45 @@ void hiopIterate::setEqualityDualsToConstant(const double& v) yd->setToConstant(v); } -double hiopIterate::normOneOfBoundDuals() const -{ -#ifdef HIOP_DEEPCHECKS - assert(zl->matchesPattern(nlp->get_ixl())); - assert(zu->matchesPattern(nlp->get_ixu())); - assert(vl->matchesPattern(nlp->get_idl())); - assert(vu->matchesPattern(nlp->get_idu())); -#endif - // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. - double nrm1 = zl->onenorm_local() + zu->onenorm_local(); -#ifdef HIOP_USE_MPI - double nrm1_global; - int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); - assert(MPI_SUCCESS == ierr); - nrm1 = nrm1_global; -#endif - nrm1 += vl->onenorm_local() + vu->onenorm_local(); - return nrm1; -} +// double hiopIterate::normOneOfBoundDuals() const +// { +// #ifdef HIOP_DEEPCHECKS +// assert(zl->matchesPattern(nlp->get_ixl())); +// assert(zu->matchesPattern(nlp->get_ixu())); +// assert(vl->matchesPattern(nlp->get_idl())); +// assert(vu->matchesPattern(nlp->get_idu())); +// #endif +// // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. +// double nrm1 = zl->onenorm_local() + zu->onenorm_local(); +// #ifdef HIOP_USE_MPI +// double nrm1_global; +// int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); +// assert(MPI_SUCCESS == ierr); +// nrm1 = nrm1_global; +// #endif +// nrm1 += vl->onenorm_local() + vu->onenorm_local(); +// return nrm1; +// } -double hiopIterate::normOneOfEqualityDuals() const -{ -#ifdef HIOP_DEEPCHECKS - assert(zl->matchesPattern(nlp->get_ixl())); - assert(zu->matchesPattern(nlp->get_ixu())); - assert(vl->matchesPattern(nlp->get_idl())); - assert(vu->matchesPattern(nlp->get_idu())); -#endif - // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. - double nrm1 = zl->onenorm_local() + zu->onenorm_local(); -#ifdef HIOP_USE_MPI - double nrm1_global; - int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); - assert(MPI_SUCCESS == ierr); - nrm1 = nrm1_global; -#endif - nrm1 += vl->onenorm_local() + vu->onenorm_local() + yc->onenorm_local() + yd->onenorm_local(); - return nrm1; -} +// double hiopIterate::normOneOfEqualityDuals() const +// { +// #ifdef HIOP_DEEPCHECKS +// assert(zl->matchesPattern(nlp->get_ixl())); +// assert(zu->matchesPattern(nlp->get_ixu())); +// assert(vl->matchesPattern(nlp->get_idl())); +// assert(vu->matchesPattern(nlp->get_idu())); +// #endif +// // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. +// double nrm1 = zl->onenorm_local() + zu->onenorm_local(); +// #ifdef HIOP_USE_MPI +// double nrm1_global; +// int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); +// assert(MPI_SUCCESS == ierr); +// nrm1 = nrm1_global; +// #endif +// nrm1 += vl->onenorm_local() + vu->onenorm_local() + yc->onenorm_local() + yd->onenorm_local(); +// return nrm1; +// } // void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const // { diff --git a/src/Optimization/hiopIterate.hpp b/src/Optimization/hiopIterate.hpp index a1d66383..00ce9cd1 100644 --- a/src/Optimization/hiopIterate.hpp +++ b/src/Optimization/hiopIterate.hpp @@ -139,8 +139,8 @@ class hiopIterate hiopVector& grad_d) const; /** norms for individual parts of the iterate (on demand computation) */ - virtual double normOneOfBoundDuals() const; - virtual double normOneOfEqualityDuals() const; + //virtual double normOneOfBoundDuals() const; + //virtual double normOneOfEqualityDuals() const; /* same as above but computed in one shot to save on communication and computation */ //virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const; From cab209f9f0d7abd4deae3449b47601437c29776f Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 7 Mar 2025 10:06:05 -0800 Subject: [PATCH 09/27] addressed MPI issues --- src/Drivers/Dense/NlpDenseConsEx1.hpp | 35 +++++ src/Optimization/InnerProduct.cpp | 3 + src/Optimization/hiopAlgFilterIPM.cpp | 193 ++++++++++++------------ src/Optimization/hiopAlgFilterIPM.hpp | 14 -- src/Optimization/hiopNlpFormulation.cpp | 9 +- src/Optimization/hiopResidual.cpp | 34 ++--- 6 files changed, 152 insertions(+), 136 deletions(-) diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp index 33a3178d..8de2f80e 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.hpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp @@ -257,6 +257,41 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints double alpha_du, double alpha_pr, int ls_trials); + + bool applyM(const size_type& n, const double* x_in, double* y_out) + { + x->copyFrom(x_in); + x->copyTo(y_out); + return true; + } + + /** + * Computes action y of the inner product weight matrix H on a vector x, namely y=H*x + */ + virtual bool applyH(const size_type& n, const double* x_in, double* y_out) + { + x->copyFrom(x_in); + x->copyTo(y_out); + return true; + } + + /** + * Computes action y of the inverse of H on a vector x, namely y=H^{-1}*x + */ + virtual bool applyHinv(const size_type& n, const double* x_in, double* y_out) + { + x->copyFrom(x_in); + x->copyTo(y_out); + return true; + } + + /** + * Enables the use of weighted inner products via @applyM, @applyH, and @applyHinv + */ + virtual bool useWeightedInnerProducts() + { + return false; + } private: int n_vars; diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp index c9409981..95978da3 100644 --- a/src/Optimization/InnerProduct.cpp +++ b/src/Optimization/InnerProduct.cpp @@ -62,6 +62,8 @@ namespace hiop InnerProduct::InnerProduct(hiopNlpFormulation* nlp) : nlp_(nlp) { + printf("InnerProduct::InnerProduct begin\n"); fflush(stdout); + assert(nlp); if(nlp->useWeightedInnerProd()) { vec_n_ = nlp_->alloc_primal_vec(); vec_n2_ = nlp_->alloc_primal_vec(); @@ -69,6 +71,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp) vec_n_ = nullptr; vec_n2_ = nullptr; } + printf("InnerProduct::InnerProduct end\n"); fflush(stdout); } InnerProduct::~InnerProduct() diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp index dc2e7aa6..f479bb74 100644 --- a/src/Optimization/hiopAlgFilterIPM.cpp +++ b/src/Optimization/hiopAlgFilterIPM.cpp @@ -686,9 +686,8 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, } // scaling factors - //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; - //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; - + //c double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; + //c double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; sd = fmax(p_smax, sd) / p_smax; sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax; @@ -720,98 +719,98 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, return true; } -bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, - const hiopResidual& resid, - const double& mu, - double& nlpoptim, - double& nlpfeas, - double& nlpcomplem, - double& nlpoverall, - double& logoptim, - double& logfeas, - double& logcomplem, - double& logoverall, - double& cons_violation) -{ - nlp->runStats.tmSolverInternal.start(); - - size_type n = nlp->n_complem(); - double sc; - double sd; - double nrmDualBou; - double nrmDualEqu; - if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) { - return false; - } - - nlp->log->printf(hovWarning, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); - if(nrmDualBou > 1e+10) { - nlp->log->printf(hovWarning, - "Unusually large bound dual variables (norm1=%g) occured, " - "which may cause numerical instabilities if it persists. Convergence " - " issues or inacurate optimal solutions may be experienced. Possible causes: " - " tight bounds or bad scaling of the optimization variables.\n", - nrmDualBou); - if(nlp->options->GetString("fixed_var") == "remove") { - nlp->log->printf(hovWarning, - "For example, increase 'fixed_var_tolerance' to remove " - "additional variables.\n"); - } else if(nlp->options->GetString("fixed_var") == "relax") { - nlp->log->printf(hovWarning, - "For example, increase 'fixed_var_tolerance' to relax " - "aditional (tight) variables and/or increase 'fixed_var_perturb' " - "to decrease the tightness.\n"); - } else { - nlp->log->printf(hovWarning, - "Potential fixes: fix or relax variables with tight bounds " - "(see 'fixed_var' option) or rescale variables.\n"); - } - } - - // scaling factors - //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax - //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax - //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv - // ||yd+vl-vu||_inf - //nlpfeas = ||crhs- c||_inf - // ||drs- d||_inf - // ||x-sxl-xl||_inf - // ||x-sxu+xu||_inf - // ||d-sdl-dl|| - // - //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; - //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; - - sd = fmax(p_smax, sd) / p_smax; - sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax; - - sd = fmin(sd, 1e+8); - sc = fmin(sc, 1e+8); - - // actual nlp errors - resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation); - - // finally, the scaled nlp error - nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); - - nlp->log->printf(hovWarning, - "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", - nlpoverall, - nlpoptim, - sd, - nlpfeas, - nlpcomplem, - cons_violation, - sc); - - // actual log errors - resid.getBarrierErrors(logoptim, logfeas, logcomplem); - - // finally, the scaled barrier error - logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc)); - nlp->runStats.tmSolverInternal.stop(); - return true; -} +// bool hiopAlgFilterIPMBase::evalNlpAndLogErrors2(const hiopIterate& it, +// const hiopResidual& resid, +// const double& mu, +// double& nlpoptim, +// double& nlpfeas, +// double& nlpcomplem, +// double& nlpoverall, +// double& logoptim, +// double& logfeas, +// double& logcomplem, +// double& logoverall, +// double& cons_violation) +// { +// nlp->runStats.tmSolverInternal.start(); + +// size_type n = nlp->n_complem(); +// double sc; +// double sd; +// double nrmDualBou; +// double nrmDualEqu; +// if(!it.compute_sc_sd(sc, sd, nrmDualEqu, nrmDualBou)) { +// return false; +// } + +// nlp->log->printf(hovWarning, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); +// if(nrmDualBou > 1e+10) { +// nlp->log->printf(hovWarning, +// "Unusually large bound dual variables (norm1=%g) occured, " +// "which may cause numerical instabilities if it persists. Convergence " +// " issues or inacurate optimal solutions may be experienced. Possible causes: " +// " tight bounds or bad scaling of the optimization variables.\n", +// nrmDualBou); +// if(nlp->options->GetString("fixed_var") == "remove") { +// nlp->log->printf(hovWarning, +// "For example, increase 'fixed_var_tolerance' to remove " +// "additional variables.\n"); +// } else if(nlp->options->GetString("fixed_var") == "relax") { +// nlp->log->printf(hovWarning, +// "For example, increase 'fixed_var_tolerance' to relax " +// "aditional (tight) variables and/or increase 'fixed_var_perturb' " +// "to decrease the tightness.\n"); +// } else { +// nlp->log->printf(hovWarning, +// "Potential fixes: fix or relax variables with tight bounds " +// "(see 'fixed_var' option) or rescale variables.\n"); +// } +// } + +// // scaling factors +// //sd = max { p_smax, ||zl||_M + ||zu||_M + (||vl||_1 + ||vu||_1)/m } /p_smax +// //sc = max { p_smax, ||zl||_M + ||zu||_M } /p_smax +// //nlpoptim = ||gradf + Jc'*yc + Jd'*Jd - M*zl - M*zu||_Hinv +// // ||yd+vl-vu||_inf +// //nlpfeas = ||crhs- c||_inf +// // ||drs- d||_inf +// // ||x-sxl-xl||_inf +// // ||x-sxu+xu||_inf +// // ||d-sdl-dl|| +// // +// //double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; +// //double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; + +// sd = fmax(p_smax, sd) / p_smax; +// sc = n == 0 ? 0 : fmax(p_smax, sc) / p_smax; + +// sd = fmin(sd, 1e+8); +// sc = fmin(sc, 1e+8); + +// // actual nlp errors +// resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation); + +// // finally, the scaled nlp error +// nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); + +// nlp->log->printf(hovWarning, +// "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", +// nlpoverall, +// nlpoptim, +// sd, +// nlpfeas, +// nlpcomplem, +// cons_violation, +// sc); + +// // actual log errors +// resid.getBarrierErrors(logoptim, logfeas, logcomplem); + +// // finally, the scaled barrier error +// logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc)); +// nlp->runStats.tmSolverInternal.stop(); +// return true; +// } bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVector& c, hiopVector& d) { @@ -1169,7 +1168,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() solver_status_ = NlpSolve_Pending; while(true) { - bret = evalNlpAndLogErrors2(*it_curr, + bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, _err_nlp_optim, @@ -1262,7 +1261,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() //! should perform only a partial update since NLP didn't change resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); - bret = evalNlpAndLogErrors2(*it_curr, + bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, _err_nlp_optim, diff --git a/src/Optimization/hiopAlgFilterIPM.hpp b/src/Optimization/hiopAlgFilterIPM.hpp index 059aaafd..c7707b43 100644 --- a/src/Optimization/hiopAlgFilterIPM.hpp +++ b/src/Optimization/hiopAlgFilterIPM.hpp @@ -182,20 +182,6 @@ class hiopAlgFilterIPMBase double& logcomplem, double& logoverall, double& cons_violation); - - virtual bool evalNlpAndLogErrors2(const hiopIterate& it, - const hiopResidual& resid, - const double& mu, - double& nlpoptim, - double& nlpfeas, - double& nlpcomplem, - double& nlpoverall, - double& logoptim, - double& logfeas, - double& logcomplem, - double& logoverall, - double& cons_violation); - virtual double thetaLogBarrier(const hiopIterate& it, const hiopResidual& resid, const double& mu); diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index 2578c831..e90146f1 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -78,7 +78,8 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char prob_type_(hiopInterfaceBase::hiopNonlinear), nlp_evaluated_(false), nlp_transformations_(this), - interface_base(interface_) + interface_base(interface_), + inner_prod_(nullptr) { strFixedVars_ = ""; // uninitialized dFixedVarsTol_ = -1.; // uninitialized @@ -144,8 +145,6 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char temp_x_ = nullptr; nlp_scaling_ = nullptr; relax_bounds_ = nullptr; - - inner_prod_ = new InnerProduct(this); } hiopNlpFormulation::~hiopNlpFormulation() @@ -259,6 +258,10 @@ bool hiopNlpFormulation::finalizeInitialization() ixl_ = xu_->alloc_clone(); ixu_ = xu_->alloc_clone(); + // create NLP space (H-weighted or Euclidean) + delete inner_prod_; + inner_prod_ = new InnerProduct(this); + // // preprocess variables bounds - this is curently done on the CPU // diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index c61d8625..2cb0791d 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -210,17 +210,20 @@ int hiopResidual::update(const hiopIterate& it, rd->copyFrom(*it.yd); rd->axpy(1.0, *it.vl); rd->axpy(-1.0, *it.vu); - buf = rd->infnorm_local(); + //buf = rd->infnorm_local(); + buf = rd->infnorm(); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); nrmOne_nlp_optim += rd->onenorm(); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rd=%22.17e\n", buf); logprob.addNonLogBarTermsToGrad_d(-1.0, *rd); - nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local()); + //nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local()); + nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm()); nrmOne_bar_optim += rd->onenorm(); // ryc ryc->copyFrom(nlp->get_crhs()); ryc->axpy(-1.0, c); - buf = ryc->infnorm_local(); + //buf = ryc->infnorm_local(); + buf = ryc->infnorm(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryc->onenorm(); nrmInf_cons_violation = fmax(nrmInf_cons_violation, buf); @@ -244,7 +247,8 @@ int hiopResidual::update(const hiopIterate& it, // ryd ryd->copyFrom(*it.d); ryd->axpy(-1.0, d); - buf = ryd->infnorm_local(); + //buf = ryd->infnorm_local(); + buf = ryd->infnorm(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryd->onenorm(); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf); @@ -303,7 +307,7 @@ int hiopResidual::update(const hiopIterate& it, nrmOne_bar_feasib = nrmOne_nlp_feasib; // rszl = \mu e - sxl * zl - if(nlp->n_low_local() > 0) { + if(nlp->n_low_local() > 0) { //! n_low() rszl->setToZero(); rszl->axzpy(-1.0, *it.sxl, *it.zl); if(nlp->n_low_local() < nx_loc) { @@ -320,7 +324,7 @@ int hiopResidual::update(const hiopIterate& it, nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf); } // rszu = \mu e - sxu * zu - if(nlp->n_upp_local() > 0) { + if(nlp->n_upp_local() > 0) { //! rszu->setToZero(); rszu->axzpy(-1.0, *it.sxu, *it.zu); if(nlp->n_upp_local() < nx_loc) { @@ -366,7 +370,8 @@ int hiopResidual::update(const hiopIterate& it, // add mu rsvu->addConstant_w_patternSelect(mu, nlp->get_idu()); - buf = rsvu->infnorm_local(); + //buf = rsvu->infnorm_local(); + buf = rsvu->infnorm(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf); } @@ -386,21 +391,6 @@ int hiopResidual::update(const hiopIterate& it, // nrmInf_bar_feasib = aux_g[4]; // nrmInf_bar_complem = aux_g[5]; // #endif -#ifdef HIOP_USE_MPI - // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles - // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double - double aux[6] = - {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem}; - double aux_g[6]; - int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm()); - assert(MPI_SUCCESS == ierr); - nrmInf_nlp_optim = aux_g[0]; - nrmInf_nlp_feasib = aux_g[1]; - nrmInf_nlp_complem = aux_g[2]; - nrmInf_bar_optim = aux_g[3]; - nrmInf_bar_feasib = aux_g[4]; - nrmInf_bar_complem = aux_g[5]; -#endif nlp->runStats.tmSolverInternal.stop(); return true; From bf79f0d93f9d5ab424b47adca5d611285c17f57b Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 14 Mar 2025 14:08:34 -0700 Subject: [PATCH 10/27] exposing mass matrix in the example --- src/Drivers/Dense/NlpDenseConsEx1.cpp | 10 +++++++++- src/Drivers/Dense/NlpDenseConsEx1.hpp | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Drivers/Dense/NlpDenseConsEx1.cpp b/src/Drivers/Dense/NlpDenseConsEx1.cpp index ea5fcdfd..02883642 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.cpp @@ -76,7 +76,15 @@ bool Ex1Meshing1D::get_vecdistrib_info(size_type global_n, index_type* cols) for(int i = 0; i <= comm_size; i++) cols[i] = col_partition[i]; return true; } -void Ex1Meshing1D::applyM(DiscretizedFunction& f) { f.componentMult(*this->_mass); } +void Ex1Meshing1D::applyM(DiscretizedFunction& f) +{ + f.componentMult(*this->_mass); +} + +void Ex1Meshing1D::applyMinv(DiscretizedFunction& f) +{ + f.componentDiv(*this->_mass); +} // converts the local indexes to global indexes index_type Ex1Meshing1D::getGlobalIndex(index_type i_local) const diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp index 8de2f80e..7c9be216 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.hpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp @@ -68,8 +68,8 @@ class Ex1Meshing1D index_type* get_col_partition() const { return col_partition; } MPI_Comm get_comm() const { return comm; } - virtual void applyM(DiscretizedFunction& f); - + void applyM(DiscretizedFunction& f); + void applyMinv(DiscretizedFunction& f); protected: hiop::hiopVector* _mass; // the length or the mass of the elements double _a, _b; // end points @@ -261,6 +261,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints bool applyM(const size_type& n, const double* x_in, double* y_out) { x->copyFrom(x_in); + _mesh->applyM(*x); x->copyTo(y_out); return true; } @@ -271,6 +272,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints virtual bool applyH(const size_type& n, const double* x_in, double* y_out) { x->copyFrom(x_in); + _mesh->applyM(*x); x->copyTo(y_out); return true; } @@ -281,6 +283,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints virtual bool applyHinv(const size_type& n, const double* x_in, double* y_out) { x->copyFrom(x_in); + _mesh->applyMinv(*x); x->copyTo(y_out); return true; } @@ -290,7 +293,7 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints */ virtual bool useWeightedInnerProducts() { - return false; + return false;//true; } private: From 5887c7539f68e6d560b90ecb4c12bdca194def2f Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 14 Mar 2025 14:09:36 -0700 Subject: [PATCH 11/27] added volume and lumped matrix --- src/Optimization/InnerProduct.cpp | 64 ++++++++++++++++++++++++++----- src/Optimization/InnerProduct.hpp | 12 ++++++ 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp index 95978da3..1183088e 100644 --- a/src/Optimization/InnerProduct.cpp +++ b/src/Optimization/InnerProduct.cpp @@ -64,6 +64,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp) { printf("InnerProduct::InnerProduct begin\n"); fflush(stdout); assert(nlp); + M_lump_ = nullptr; if(nlp->useWeightedInnerProd()) { vec_n_ = nlp_->alloc_primal_vec(); vec_n2_ = nlp_->alloc_primal_vec(); @@ -71,6 +72,7 @@ InnerProduct::InnerProduct(hiopNlpFormulation* nlp) vec_n_ = nullptr; vec_n2_ = nullptr; } + printf("InnerProduct::InnerProduct end\n"); fflush(stdout); } @@ -78,6 +80,7 @@ InnerProduct::~InnerProduct() { delete vec_n2_; delete vec_n_; + delete M_lump_; } bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const @@ -153,12 +156,13 @@ double InnerProduct::norm_M_one(const hiopVector&x) const double InnerProduct::norm_complementarity(const hiopVector& x) const { if(nlp_->useWeightedInnerProd()) { - //opt! pre-compute M*1 - vec_n_->copyFrom(x); - vec_n_->component_abs(); - nlp_->eval_M(*vec_n_, *vec_n2_); - vec_n_->setToConstant(1.); - return vec_n_->dotProductWith(*vec_n2_); + // //opt! pre-compute M*1 + // vec_n_->copyFrom(x); + // vec_n_->component_abs(); + // nlp_->eval_M(*vec_n_, *vec_n2_); + // vec_n_->setToConstant(1.); + // return vec_n_->dotProductWith(*vec_n2_); + return x.infnorm(); } else { return x.infnorm(); } @@ -168,13 +172,53 @@ double InnerProduct::norm_complementarity(const hiopVector& x) const double InnerProduct::volume() const { if(nlp_->useWeightedInnerProd()) { - //opt! pre-compute M*1 - vec_n_->setToConstant(1.); - nlp_->eval_M(*vec_n_, *vec_n2_); - return vec_n2_->dotProductWith(*vec_n_); + double vol_total = nlp_->m_ineq_low() + nlp_->m_ineq_upp(); + if(nlp_->n_low() > 0 || nlp_->n_upp() > 0) { + //compute ||1||_M + //vec_n_->setToConstant(1.); + const double vol_mult_bnds = M_lumped()->onenorm(); + if(nlp_->n_low() > 0) { + //For weighted Hilbert spaces we assume that if lower bounds are present, they are for all vars + vol_total += vol_mult_bnds; + } + if(nlp_->n_upp() > 0) { + //For weighted Hilbert spaces we assume that if lower bounds are present, they are for all vars + vol_total += vol_mult_bnds; + } + } + return vol_total; } else { return nlp_->n_complem(); } } +// Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object +const hiopVector* InnerProduct::M_lumped() const +{ + if(M_lump_ == nullptr) { + M_lump_ = nlp_->alloc_primal_vec(); + if(nlp_->useWeightedInnerProd()) { + vec_n_->setToConstant(1.); + apply_M(*vec_n_, *M_lump_); + } else { + M_lump_->setToConstant(1.); + } + } + return M_lump_; +} + +void InnerProduct:: +add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const +{ + if(nlp_->useWeightedInnerProd()) { + vec_n_->copyFrom(ixl); + vec_n_->axpy(-1.0, ixu); + vec_n_->componentMult(*M_lumped()); + x.axpy(ct, *vec_n_); + } else { + x.addLinearDampingTerm(ixl, ixu, 1.0, ct); + } +} + + } // end namespace diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 3c185430..13493ec6 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -109,6 +109,16 @@ class InnerProduct // Computes the "volume" of the space, norm of 1 function double volume() const; + + // Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object + const hiopVector* M_lumped() const; + + /* + * @brief add linear damping term + * Performs `this[i] = alpha*this[i] + sign[i]*ct*M_lumped[i]` where sign[i]=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ + void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const; private: // Pointer to "client" NLP hiopNlpFormulation* nlp_; @@ -117,6 +127,8 @@ class InnerProduct mutable hiopVector* vec_n_; // Working vector in the size n of the variables, allocated only when for the weighted case mutable hiopVector* vec_n2_; + // Lumped mass matrix + mutable hiopVector* M_lump_; }; } //end namespace From f0de791941abe250f5cd43b2aaeee251f93d7f90 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 14 Mar 2025 14:17:18 -0700 Subject: [PATCH 12/27] adding weights through the code (linear system and resids) --- src/Optimization/KktLinSysLowRank.cpp | 3 +- src/Optimization/hiopIterate.cpp | 3 +- src/Optimization/hiopKKTLinSys.cpp | 45 +++++++++++++++++++++------ src/Optimization/hiopResidual.cpp | 8 +++-- 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/Optimization/KktLinSysLowRank.cpp b/src/Optimization/KktLinSysLowRank.cpp index 5bc5b49b..36291e9b 100644 --- a/src/Optimization/KktLinSysLowRank.cpp +++ b/src/Optimization/KktLinSysLowRank.cpp @@ -100,6 +100,7 @@ bool KktLinSysLowRank::update(const hiopIterate* iter, Dx_->setToZero(); Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu()); + Dx_->componentMult(*nlp_->inner_prod()->M_lumped()); nlp_->log->write("Dx in KKT", *Dx_, hovMatrices); hess_low_rank->update_logbar_diag(*Dx_); @@ -218,7 +219,7 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, // some outputing nlp_->log->write("KKT Low rank: solve compressed SOL", hovIteration); nlp_->log->write(" dx: ", dx, hovIteration); - nlp_->log->write(" dyc: ", dyc, hovIteration); + nlp_->log->write(" dyc: ", dyc, hovWarning); nlp_->log->write(" dyd: ", dyd, hovIteration); delete r; #endif diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp index e133ab94..aaffc04b 100644 --- a/src/Optimization/hiopIterate.cpp +++ b/src/Optimization/hiopIterate.cpp @@ -621,7 +621,8 @@ void hiopIterate::addLinearDampingTermToGrad_x(const double& mu, assert(x->get_local_size() == grad_x.get_local_size()); const double ct = kappa_d * mu * beta; - grad_x.addLinearDampingTerm(nlp->get_ixl(), nlp->get_ixu(), 1.0, ct); + //grad_x.addLinearDampingTerm(nlp->get_ixl(), nlp->get_ixu(), 1.0, ct); + nlp->inner_prod()->add_linear_damping_term(nlp->get_ixl(), nlp->get_ixu(), ct, grad_x); } void hiopIterate::addLinearDampingTermToGrad_d(const double& mu, diff --git a/src/Optimization/hiopKKTLinSys.cpp b/src/Optimization/hiopKKTLinSys.cpp index d388d1f1..80d11142 100644 --- a/src/Optimization/hiopKKTLinSys.cpp +++ b/src/Optimization/hiopKKTLinSys.cpp @@ -568,7 +568,7 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter, Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu()); nlp_->log->write("Dx in KKT", *Dx_, hovMatrices); - + assert(false); // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu Dd_->setToZero(); Dd_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); @@ -595,7 +595,7 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, /*********************************************************************** * perform the reduction to the compressed linear system - * rx_tilde = rx+Sxl^{-1}*[rszl-Zl*rxl] - Sxu^{-1}*(rszu-Zu*rxu) + * rx_tilde = rx+ M_lumped* Sxl^{-1}*[rszl-Zl*rxl] - M_lumped* Sxu^{-1}*(rszu-Zu*rxu) * ryd_tilde = ryd + [(Sdl^{-1}Vl+Sdu^{-1}Vu)]^{-1}* * [rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu)] * rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu) @@ -603,23 +603,50 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, * yd_tilde = ryd + Dd_inv*rd_tilde */ rx_tilde_->copyFrom(*r.rx); + + // if(nlp_->n_low_local() > 0) { + // // rl:=rszl-Zl*rxl (using dir->x as working buffer) + // hiopVector& rl = *(dir->x); // temporary working buffer + // rl.copyFrom(*r.rszl); + // rl.axzpy(-1.0, *iter_->zl, *r.rxl); + // // rx_tilde = rx+Sxl^{-1}*rl + // rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); + // } + // if(nlp_->n_upp_local() > 0) { + // // ru:=rszu-Zu*rxu (using dir->x as working buffer) + // hiopVector& ru = *(dir->x); // temporary working buffer + // ru.copyFrom(*r.rszu); + // ru.axzpy(-1.0, *iter_->zu, *r.rxu); + // // rx_tilde = rx_tilde - Sxu^{-1}*ru + // rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); + // } + + hiopVector& rx2 = *(dir->sxl); // temporary working buffer + rx2.setToZero(); if(nlp_->n_low_local() > 0) { - // rl:=rszl-Zl*rxl (using dir->x as working buffer) hiopVector& rl = *(dir->x); // temporary working buffer rl.copyFrom(*r.rszl); rl.axzpy(-1.0, *iter_->zl, *r.rxl); - // rx_tilde = rx+Sxl^{-1}*rl - rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); + //// rx_tilde = rx+Sxl^{-1}*rl + //rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); + rx2.axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); } + if(nlp_->n_upp_local() > 0) { - // ru:=rszu-Zu*rxu (using dir->x as working buffer) - hiopVector& ru = *(dir->x); // temporary working buffer + // ru:=rszu-Zu*rxu + hiopVector& ru = *(dir->x); ru.copyFrom(*r.rszu); ru.axzpy(-1.0, *iter_->zu, *r.rxu); - // rx_tilde = rx_tilde - Sxu^{-1}*ru - rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); + //// rx_tilde = rx_tilde - Sxu^{-1}*ru + ////rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); + rx2.axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); + } + if(nlp_->n_low_local() > 0 || nlp_->n_upp_local() > 0) { + rx2.componentMult(*nlp_->inner_prod()->M_lumped()); + rx_tilde_->axpy(1.0, rx2); } + // for ryd_tilde: ryd_tilde_->copyFrom(*r.ryd); // 1. the diag (Sdl^{-1}Vl+Sdu^{-1}Vu)^{-1} has already computed in Dd_inv in 'update' diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index 2cb0791d..4090def5 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -146,7 +146,7 @@ double hiopResidual::compute_nlp_infeasib_onenorm(const hiopIterate& it, const h rdu->axpy(-1.0, *it.d); rdu->selectPattern(nlp->get_idu()); } - + //! the ifs above seem to do nothing for the return value nlp->runStats.tmSolverInternal.stop(); return nrmOne_infeasib; } @@ -192,6 +192,8 @@ int hiopResidual::update(const hiopIterate& it, jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); rx->axpy(1.0, grad); + //nlp->log->write("rx w/o log terms:", *rx, hovWarning); + //buf = rx->infnorm_local(); buf = nlp->inner_prod()->norm_stationarity(*rx); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); @@ -205,6 +207,8 @@ int hiopResidual::update(const hiopIterate& it, nrmInf_bar_optim = fmax(nrmInf_bar_optim, nlp->inner_prod()->norm_stationarity(*rx)); nrmOne_bar_optim += nlp->inner_prod()->norm_M_one(*rx); + //nlp->log->write("rx with log terms:", *rx, hovWarning); + //~ done with rx // rd rd->copyFrom(*it.yd); @@ -470,7 +474,7 @@ void hiopResidual::update_soc(const hiopIterate& it, nrmInf_bar_optim = nrmInf_bar_feasib = nrmInf_bar_complem = 0; nrmOne_nlp_feasib = nrmOne_bar_feasib = 0.; nrmOne_nlp_optim = nrmOne_bar_optim = 0.; - + assert(false); size_type nx_loc = rx->get_local_size(); const double& mu = logprob.mu; double buf; From 67f98d9c197946652b6bf3a60fe27431e0c33490 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Sun, 16 Mar 2025 16:41:49 -0700 Subject: [PATCH 13/27] weighted log barrier reduce/sum --- src/LinAlg/hiopVector.hpp | 15 +++++++++++++++ src/LinAlg/hiopVectorCompoundPD.cpp | 12 ++++++++++++ src/LinAlg/hiopVectorCompoundPD.hpp | 1 + src/LinAlg/hiopVectorPar.cpp | 22 ++++++++++++++++++++++ src/LinAlg/hiopVectorPar.hpp | 1 + 5 files changed, 51 insertions(+) diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp index a2743be5..f5d2292b 100644 --- a/src/LinAlg/hiopVector.hpp +++ b/src/LinAlg/hiopVector.hpp @@ -643,6 +643,21 @@ class hiopVector */ virtual double logBarrier_local(const hiopVector& select) const = 0; + /** + * @brief Weighted sum of selected log(this[i]) + * + * @param[in] weights - weights vector + * @param[in] select - pattern selection + * + * @pre `this`, `weights`, and `select` have same partitioning. + * @pre Selected elements of `this` are > 0. + * @post `this`, `weights`, and `select` are not modified + * + * @warning This is local method only! + */ + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const = 0; + + /** * @brief adds the gradient of the log barrier, namely this[i]=this[i]+alpha*1/select(x[i]) * diff --git a/src/LinAlg/hiopVectorCompoundPD.cpp b/src/LinAlg/hiopVectorCompoundPD.cpp index fda099cf..782d8b8d 100644 --- a/src/LinAlg/hiopVectorCompoundPD.cpp +++ b/src/LinAlg/hiopVectorCompoundPD.cpp @@ -712,6 +712,18 @@ double hiopVectorCompoundPD::logBarrier_local(const hiopVector& select) const return sum; } +double hiopVectorCompoundPD::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const +{ + double sum = 0.0; + const hiopVectorCompoundPD& ix = dynamic_cast(select); + const hiopVectorCompoundPD& w = dynamic_cast(weights); + assert(this->get_num_parts() == ix.get_num_parts()); + for(index_type i = 0; i < n_parts_; i++) { + sum += vectors_[i]->logBarrierWeighted_local(ix.getVector(i), w.getVector(i)); + } + return sum; +} + double hiopVectorCompoundPD::sum_local() const { double sum = 0.0; diff --git a/src/LinAlg/hiopVectorCompoundPD.hpp b/src/LinAlg/hiopVectorCompoundPD.hpp index 4b58b6e1..570a8cf1 100644 --- a/src/LinAlg/hiopVectorCompoundPD.hpp +++ b/src/LinAlg/hiopVectorCompoundPD.hpp @@ -241,6 +241,7 @@ class hiopVectorCompoundPD : public hiopVector virtual void negate(); virtual void invert(); virtual double logBarrier_local(const hiopVector& select) const; + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const; virtual double sum_local() const; virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index a7ad806b..2686e094 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -875,6 +875,28 @@ double hiopVectorPar::logBarrier_local(const hiopVector& select) const return sum; } +double hiopVectorPar::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const +{ + double sum = 0.0; + double comp = 0.0; + const hiopVectorPar& ix = dynamic_cast(select); + const hiopVectorPar& w = dynamic_cast(weights); + assert(this->n_local_ == ix.n_local_); + assert(this->n_local_ == w.n_local_); + + const double* ix_vec = ix.data_; + const double* w_vec = w.data_; + for(int i = 0; i < n_local_; i++) { + if(ix_vec[i] == 1.) { + double y = w_vec[i]*log(data_[i]) - comp; + double t = sum + y; + comp = (t - sum) - y; + sum = t; + } + } + return sum; +} + double hiopVectorPar::sum_local() const { double sum = 0.0; diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp index fc7ca599..0063ea5f 100644 --- a/src/LinAlg/hiopVectorPar.hpp +++ b/src/LinAlg/hiopVectorPar.hpp @@ -227,6 +227,7 @@ class hiopVectorPar : public hiopVector virtual void negate(); virtual void invert(); virtual double logBarrier_local(const hiopVector& select) const; + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const; virtual double sum_local() const; virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); From df028e9d9232b9d1d55c8e3c60074e0e1df66264 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Sun, 16 Mar 2025 17:15:48 -0700 Subject: [PATCH 14/27] log bar weighting functionality in Iterate --- src/Optimization/InnerProduct.cpp | 38 +++++++++++++++++++++++++++++++ src/Optimization/InnerProduct.hpp | 22 +++++++++++++++--- src/Optimization/hiopIterate.cpp | 20 ++++++++++------ 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp index 1183088e..8c5a497d 100644 --- a/src/Optimization/InnerProduct.cpp +++ b/src/Optimization/InnerProduct.cpp @@ -220,5 +220,43 @@ add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const doub } } +double InnerProduct::log_barrier_eval_local(const hiopVector& x, const hiopVector& ix) const +{ + if(nlp_->useWeightedInnerProd()) { + return x.logBarrierWeighted_local(ix, *M_lumped()); + } else { + return x.logBarrier_local(ix); + } +} + +// Adds (to `gradx`) the gradient of the weighted log, namely gradx = gradx - mu * M_lumped * ix/s +void InnerProduct::log_barrier_grad_add(const double& mu, const hiopVector& s, const hiopVector& ix, hiopVector& gradx) const +{ + if(nlp_->useWeightedInnerProd()) { + vec_n_->copyFrom(ix); + vec_n_->componentDiv(s); + vec_n_->componentMult(*M_lumped()); + gradx.axpy(mu, *vec_n_); + } else { + gradx.addLogBarrierGrad(mu, s, ix); + } +} + +double InnerProduct::linear_damping_term_local(const hiopVector& s, + const hiopVector& ixl, + const hiopVector& ixr, + const double& mu, + const double& kappa_d) const +{ + if(nlp_->useWeightedInnerProd()) { + vec_n_->copyFrom(s); + vec_n_->componentMult(*M_lumped()); + return vec_n_->linearDampingTerm_local(ixl, ixr, mu, kappa_d); + } else { + return s.linearDampingTerm_local(ixl, ixr, mu, kappa_d); + } +} + + } // end namespace diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 13493ec6..90d739ed 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -113,12 +113,28 @@ class InnerProduct // Return vector containing the diagonals of the lumped mass matrix, possibly creating the internal object const hiopVector* M_lumped() const; - /* - * @brief add linear damping term + /** + * Compute linear damping terms required by the weighted log barrier subproblem + * + * Essentially computes kappa_d*mu* \sum { this[i] | ixl[i]==1 and ixr[i]==0 } + */ + double linear_damping_term_local(const hiopVector& s, + const hiopVector& ixl, + const hiopVector& ixr, + const double& mu, + const double& kappa_d) const; + /** + * @brief Add linear damping term * Performs `this[i] = alpha*this[i] + sign[i]*ct*M_lumped[i]` where sign[i]=1 when EXACTLY one of * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. */ - void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const; + void add_linear_damping_term(const hiopVector& ixl, const hiopVector& ixu, const double& ct, hiopVector& x) const; + + // Computes weighted (by the lumped matrix) sum of log of selected (by `ix`) entries of `x` + double log_barrier_eval_local(const hiopVector& x, const hiopVector& ix) const; + + // Adds (to `gradx`) the gradient of the weighted log, namely gradx = gradx - mu * M_lumped * ix/s + void log_barrier_grad_add(const double& mu, const hiopVector& s, const hiopVector& ix, hiopVector& gradx) const; private: // Pointer to "client" NLP hiopNlpFormulation* nlp_; diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp index aaffc04b..aa1a97f5 100644 --- a/src/Optimization/hiopIterate.cpp +++ b/src/Optimization/hiopIterate.cpp @@ -569,8 +569,10 @@ bool hiopIterate::adjustDuals_primalLogHessian(const double& mu, const double& k double hiopIterate::evalLogBarrier() const { double barrier; - barrier = sxl->logBarrier_local(nlp->get_ixl()); - barrier += sxu->logBarrier_local(nlp->get_ixu()); + //barrier = sxl->logBarrier_local(nlp->get_ixl()); + //barrier += sxu->logBarrier_local(nlp->get_ixu()); + barrier = nlp->inner_prod()->log_barrier_eval_local(*sxl, nlp->get_ixl()); + barrier += nlp->inner_prod()->log_barrier_eval_local(*sxu, nlp->get_ixu()); #ifdef HIOP_USE_MPI double res; int ierr = MPI_Allreduce(&barrier, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); @@ -586,8 +588,10 @@ double hiopIterate::evalLogBarrier() const void hiopIterate::addLogBarGrad_x(const double& mu, hiopVector& gradx) const { // gradx = grad - mu / sxl = grad - mu * select/sxl - gradx.addLogBarrierGrad(-mu, *sxl, nlp->get_ixl()); - gradx.addLogBarrierGrad(mu, *sxu, nlp->get_ixu()); + //gradx.addLogBarrierGrad(-mu, *sxl, nlp->get_ixl()); + //gradx.addLogBarrierGrad(mu, *sxu, nlp->get_ixu()); + nlp->inner_prod()->log_barrier_grad_add(-mu, *sxl, nlp->get_ixl(), gradx); + nlp->inner_prod()->log_barrier_grad_add( mu, *sxu, nlp->get_ixu(), gradx); } void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const @@ -598,9 +602,11 @@ void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const double hiopIterate::linearDampingTerm(const double& mu, const double& kappa_d) const { - double term; - term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d); - term += sxu->linearDampingTerm_local(nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d); + //double term; + //term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d); + //term += sxu->linearDampingTerm_local(nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d); + double term = nlp->inner_prod()->linear_damping_term_local(*sxl, nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d); + term += nlp->inner_prod()->linear_damping_term_local(*sxu, nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d); #ifdef HIOP_USE_MPI double res; int ierr = MPI_Allreduce(&term, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); From a44d355d3e8c05b0ee5f75e8c15b20bc8093f6c3 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Mon, 17 Mar 2025 14:30:00 -0700 Subject: [PATCH 15/27] fixing ci runtime errors --- src/Optimization/InnerProduct.cpp | 19 +++++++++++++++---- src/Optimization/InnerProduct.hpp | 3 +++ src/Optimization/hiopResidual.cpp | 16 ++++++++-------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/Optimization/InnerProduct.cpp b/src/Optimization/InnerProduct.cpp index 8c5a497d..493bb764 100644 --- a/src/Optimization/InnerProduct.cpp +++ b/src/Optimization/InnerProduct.cpp @@ -92,7 +92,15 @@ bool InnerProduct::apply_M(const hiopVector& x, hiopVector& y) const return true; } } - + +// Applies lumped mass matrix; +bool InnerProduct::apply_M_lumped(const hiopVector& x, hiopVector& y) const +{ + y.copyFrom(x); + y.componentMult(*M_lumped()); + return true; +} + // Computes ||x||_M double InnerProduct::norm_M(const hiopVector& x) const { @@ -130,9 +138,12 @@ double InnerProduct::norm_H_dual(const hiopVector& x) const double InnerProduct::norm_stationarity(const hiopVector& x) const { if(nlp_->useWeightedInnerProd()) { - nlp_->eval_H_inv(x, *vec_n_); - auto dp = x.dotProductWith(*vec_n_); - return ::std::sqrt(dp); + //nlp_->eval_H_inv(x, *vec_n_); + //auto dp = x.dotProductWith(*vec_n_); + //return ::std::sqrt(dp); + vec_n_->copyFrom(x); + vec_n_->componentDiv(*M_lumped()); + return vec_n_->infnorm(); } else { return x.infnorm(); } diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 90d739ed..03cdad43 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -88,6 +88,9 @@ class InnerProduct // Compute y=M*x bool apply_M(const hiopVector& x, hiopVector& y) const; + + // Appply lumped mass matrix, y=M_lumped*x + bool apply_M_lumped(const hiopVector& x, hiopVector& y) const; // Computes ||x||_M double norm_M(const hiopVector& x) const; diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index 4090def5..8ae309fb 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -186,7 +186,7 @@ int hiopResidual::update(const hiopIterate& it, //using rxl as auxiliary variable to compute -M*zl+M*zu rxl->copyFrom(*it.zu); rxl->axpy(-1.0, *it.zl); - nlp->inner_prod()->apply_M(*rxl, *rx); + nlp->inner_prod()->apply_M_lumped(*rxl, *rx); //continue adding to rx jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc); jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); @@ -258,7 +258,7 @@ int hiopResidual::update(const hiopIterate& it, nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf); // rxl=x-sxl-xl - if(nlp->n_low_local() > 0) { + if(nlp->n_low() > 0) { rxl->copyFrom(*it.x); rxl->axpy(-1.0, *it.sxl); rxl->axpy(-1.0, nlp->get_xl()); @@ -266,20 +266,20 @@ int hiopResidual::update(const hiopIterate& it, if(nlp->n_low_local() < nx_loc) { rxl->selectPattern(nlp->get_ixl()); } - buf = rxl->infnorm_local(); + buf = rxl->infnorm(); // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf); } // printf(" %10.4e (xl)", nrmInf_nlp_feasib); // rxu=-x-sxu+xu - if(nlp->n_upp_local() > 0) { + if(nlp->n_upp() > 0) { rxu->copyFrom(nlp->get_xu()); rxu->axpy(-1.0, *it.x); rxu->axpy(-1.0, *it.sxu); if(nlp->n_upp_local() < nx_loc) { rxu->selectPattern(nlp->get_ixu()); } - buf = rxu->infnorm_local(); + buf = rxu->infnorm(); // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxu=%22.17e\n", buf); } @@ -311,7 +311,7 @@ int hiopResidual::update(const hiopIterate& it, nrmOne_bar_feasib = nrmOne_nlp_feasib; // rszl = \mu e - sxl * zl - if(nlp->n_low_local() > 0) { //! n_low() + if(nlp->n_low() > 0) { //! n_low() rszl->setToZero(); rszl->axzpy(-1.0, *it.sxl, *it.zl); if(nlp->n_low_local() < nx_loc) { @@ -328,7 +328,7 @@ int hiopResidual::update(const hiopIterate& it, nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf); } // rszu = \mu e - sxu * zu - if(nlp->n_upp_local() > 0) { //! + if(nlp->n_upp() > 0) { rszu->setToZero(); rszu->axzpy(-1.0, *it.sxu, *it.zu); if(nlp->n_upp_local() < nx_loc) { @@ -474,7 +474,7 @@ void hiopResidual::update_soc(const hiopIterate& it, nrmInf_bar_optim = nrmInf_bar_feasib = nrmInf_bar_complem = 0; nrmOne_nlp_feasib = nrmOne_bar_feasib = 0.; nrmOne_nlp_optim = nrmOne_bar_optim = 0.; - assert(false); + size_type nx_loc = rx->get_local_size(); const double& mu = logprob.mu; double buf; From 82c02d6477e2c3916465ecf8d4b5a3883c38c570 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Mon, 17 Mar 2025 15:21:43 -0700 Subject: [PATCH 16/27] cuda and raja impl; also added tests --- src/LinAlg/VectorCudaKernels.cu | 29 ++++++++++++++++++ src/LinAlg/VectorCudaKernels.hpp | 2 ++ src/LinAlg/hiopVectorCuda.cpp | 10 +++++++ src/LinAlg/hiopVectorCuda.hpp | 2 ++ src/LinAlg/hiopVectorRaja.hpp | 1 + src/LinAlg/hiopVectorRajaImpl.hpp | 24 +++++++++++++++ tests/LinAlg/vectorTests.hpp | 49 +++++++++++++++++++++++++++++++ tests/testVector.cpp | 1 + 8 files changed, 118 insertions(+) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index b1c9a591..edff95f5 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -1042,6 +1042,35 @@ double log_barr_obj_kernel(int n, double* d1, const double* id) return sum; } +/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/ +double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w) +{ + thrust::device_ptr dev_v = thrust::device_pointer_cast(d1); + thrust::device_ptr id_v = thrust::device_pointer_cast(id); + thrust::device_ptr wei_v = thrust::device_pointer_cast(w); + + // wrap raw pointer with a device_ptr + thrust_log_select log_select_op; + thrust::plus plus_op; + thrust::multiplies mult_op; + + // TODO: how to avoid this temp vec? + thrust::device_ptr v_temp = thrust::device_malloc(n*sizeof(double)); + + // compute v=x*id + thrust::transform(thrust::device, dev_v, dev_v+n, id_v, v_temp, mult_op); + // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 ) + thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); + // compute v[i] = w[i]*v[i] + thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, mult_op); + // sum up + const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op); + + thrust::device_free(v_temp); + + return sum; +} + /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1) { diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index 1a7e478b..06a674e2 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -187,6 +187,8 @@ void thrust_component_sqrt_kernel(int n, double* d1); void thrust_negate_kernel(int n, double* d1); /** @brief compute sum(log(d1[i])) forall i where id[i]=1*/ double log_barr_obj_kernel(int n, double* d1, const double* id); +/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/ +double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w); /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1); /** @brief Linear damping term */ diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index 1a81bc78..36d9e9ee 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -642,6 +642,16 @@ double hiopVectorCuda::logBarrier_local(const hiopVector& select) const return hiop::cuda::log_barr_obj_kernel(n_local_, data_, id); } +double hiopVectorCuda::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const +{ + assert(n_local_ == select.get_local_size()); + assert(n_local_ == weights.get_local_size()); + const double* id = select.local_data_const(); + const double* w = weights.local_data_const(); + + return hiop::cuda::log_barr_wei_obj_kernel(n_local_, data_, id, w); +} + /* @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) */ void hiopVectorCuda::addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select) { diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp index 5c719705..b4b750d9 100644 --- a/src/LinAlg/hiopVectorCuda.hpp +++ b/src/LinAlg/hiopVectorCuda.hpp @@ -233,6 +233,8 @@ class hiopVectorCuda : public hiopVector virtual void invert(); /// @brief compute log barrier term, that is sum{ln(x_i):i=1,..,n} virtual double logBarrier_local(const hiopVector& select) const; + /// @brief compute log barrier term, that is sum{weights[i]*ln(x_i):select[i]==1} + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const; /// @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); /// @brief compute sum{(x_i):i=1,..,n} diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp index 8541c0e0..1f68a735 100644 --- a/src/LinAlg/hiopVectorRaja.hpp +++ b/src/LinAlg/hiopVectorRaja.hpp @@ -224,6 +224,7 @@ class hiopVectorRaja : public hiopVector virtual void negate(); virtual void invert(); virtual double logBarrier_local(const hiopVector& select) const; + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const; virtual double sum_local() const; virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index bfe1e67f..ca13545c 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -1388,6 +1388,30 @@ double hiopVectorRaja::logBarrier_local(const hiopVector& select) cons return sum.get(); } +template +double hiopVectorRaja::logBarrierWeighted_local(const hiopVector& select, const hiopVector& weights) const +{ + const hiopVectorRaja& sel = dynamic_cast&>(select); + const hiopVectorRaja& wei = dynamic_cast&>(weights); + assert(this->n_local_ == sel.n_local_); + assert(this->n_local_ == wei.n_local_);` + + double* data = data_dev_; + const double* id = sel.local_data_const(); + const double* w = wei.local_data_const(); + RAJA::ReduceSum sum(0.0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { +#ifdef HIOP_DEEPCHECKS + assert(id[i] == one || id[i] == zero); +#endif + sum += id[i]*w[i]*std::log(data[i]); + }); + + return sum.get(); +} + /** * @brief Sum all elements */ diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 1a0b10d0..2baaacbb 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -1211,6 +1211,55 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } + /** + * @brief Test: + * sum{w[i]*ln(x[i]): pattern[i] = 1} + */ + bool vectorLogBarrierWeighted(hiop::hiopVector& x, + hiop::hiopVector& pattern, + hiop::hiopVector& weight, + const int rank) + { + const local_ordinal_type N = getLocalSize(&x); + assert(N == getLocalSize(&pattern)); + assert(N == getLocalSize(&weight)); + + // Ensure that only N-1 elements of x are + // used in the log calculation + pattern.setToConstant(one); + setLocalElement(&pattern, N - 1, zero); + + const real_type x_val = three; + x.setToConstant(x_val); + // Make sure pattern eliminates the correct element + setLocalElement(&x, N - 1, 1000 * three); + + const real_type w_val = 1./N; + weight.setToConstant(w_val); + // Make sure pattern eliminates the correct element + setLocalElement(&weight, N-1, 100); + + real_type expected = (N - 1) * std::log(x_val) / N; + real_type result = x.logBarrier_local(pattern); + + int fail = !isEqual(result, expected); + + // Make sure pattern eliminates the correct elements + pattern.setToConstant(zero); + setLocalElement(&pattern, N - 1, one); + x.setToConstant(zero); + setLocalElement(&x, N - 1, x_val); + weight.setToConstant(zero); + setLocalElement(&weight, N - 1, w_val); + + expected = std::log(x_val)*w_val; + result = x.logBarrier_local(pattern); + fail += !isEqual(result, expected); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &x); + } + /** * @brief Test: * sum{x[i]} diff --git a/tests/testVector.cpp b/tests/testVector.cpp index f0a362a7..2e264939 100644 --- a/tests/testVector.cpp +++ b/tests/testVector.cpp @@ -296,6 +296,7 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorNegate(*x, rank); fail += test.vectorInvert(*x, rank); fail += test.vectorLogBarrier(*x, *y, rank); + fail += test.vectorLogBarrierWeighted(*x, *y, *z, rank); fail += test.vector_sum_local(*x, rank); fail += test.vectorAddLogBarrierGrad(*x, *y, *z, rank); fail += test.vectorLinearDampingTerm(*x, *y, *z, rank); From 758ee139178528fa2d298bcdb1911a52d1703884 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Mon, 17 Mar 2025 16:02:53 -0700 Subject: [PATCH 17/27] fixed bug in test and syntax for thrust::transform --- src/LinAlg/VectorCudaKernels.cu | 2 +- tests/LinAlg/vectorTests.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index edff95f5..e1ee3cd6 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -1062,7 +1062,7 @@ double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 ) thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); // compute v[i] = w[i]*v[i] - thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, mult_op); + thrust::transform(thrust::device, v_temp, v_temp+n, wei_v, v_temp, mult_op); // sum up const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op); diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 2baaacbb..215b3fdd 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -1240,8 +1240,8 @@ class VectorTests : public TestBase setLocalElement(&weight, N-1, 100); real_type expected = (N - 1) * std::log(x_val) / N; - real_type result = x.logBarrier_local(pattern); - + real_type result = x.logBarrierWeighted_local(pattern, weight); + int fail = !isEqual(result, expected); // Make sure pattern eliminates the correct elements @@ -1253,7 +1253,7 @@ class VectorTests : public TestBase setLocalElement(&weight, N - 1, w_val); expected = std::log(x_val)*w_val; - result = x.logBarrier_local(pattern); + result = x.logBarrierWeighted_local(pattern, weight); fail += !isEqual(result, expected); printMessage(fail, __func__, rank); From ecb755427ce95f672e6230432cf07b6e2d97a95e Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Mon, 17 Mar 2025 16:16:20 -0700 Subject: [PATCH 18/27] fixed typo in Raja Vec Impl --- src/LinAlg/hiopVectorRajaImpl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index ca13545c..70f0a74a 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -1394,7 +1394,7 @@ double hiopVectorRaja::logBarrierWeighted_local(const hiopVector& sele const hiopVectorRaja& sel = dynamic_cast&>(select); const hiopVectorRaja& wei = dynamic_cast&>(weights); assert(this->n_local_ == sel.n_local_); - assert(this->n_local_ == wei.n_local_);` + assert(this->n_local_ == wei.n_local_); double* data = data_dev_; const double* id = sel.local_data_const(); From fc2b25dd0dcea1dc6f0989002fa1c6301c929f0d Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Tue, 18 Mar 2025 09:10:41 -0700 Subject: [PATCH 19/27] fixing logBarWeighted in RAJA vector --- src/LinAlg/hiopVectorRajaImpl.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index 70f0a74a..bf4387a5 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -1406,7 +1406,9 @@ double hiopVectorRaja::logBarrierWeighted_local(const hiopVector& sele #ifdef HIOP_DEEPCHECKS assert(id[i] == one || id[i] == zero); #endif - sum += id[i]*w[i]*std::log(data[i]); + if(one == id[i]) { + sum += w[i]*std::log(data[i]); + } }); return sum.get(); From 86c437e69315e3d051c13fb69366dc5dd9154e13 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Tue, 18 Mar 2025 09:22:39 -0700 Subject: [PATCH 20/27] stregthen test for logBarrierWeighted --- tests/LinAlg/vectorTests.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 215b3fdd..5be67f8c 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -1247,9 +1247,9 @@ class VectorTests : public TestBase // Make sure pattern eliminates the correct elements pattern.setToConstant(zero); setLocalElement(&pattern, N - 1, one); - x.setToConstant(zero); + x.setToConstant(three); setLocalElement(&x, N - 1, x_val); - weight.setToConstant(zero); + weight.setToConstant(three); setLocalElement(&weight, N - 1, w_val); expected = std::log(x_val)*w_val; From 038229f94b51f9e8639170019ac5a22b25473881 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Tue, 18 Mar 2025 09:23:18 -0700 Subject: [PATCH 21/27] HIP implementation logBarrierWeight --- src/LinAlg/VectorHipKernels.cpp | 30 ++++++++++++++++++++++++++++++ src/LinAlg/VectorHipKernels.hpp | 9 ++++++++- src/LinAlg/hiopVectorHip.cpp | 11 +++++++++++ src/LinAlg/hiopVectorHip.hpp | 2 ++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 55db4899..356b18ce 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -937,6 +937,36 @@ double log_barr_obj_kernel(int n, double* d1, const double* id) return sum; } +/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/ +double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w) +{ + thrust::device_ptr dev_v = thrust::device_pointer_cast(d1); + thrust::device_ptr id_v = thrust::device_pointer_cast(id); + thrust::device_ptr wei_v = thrust::device_pointer_cast(w); + + // wrap raw pointer with a device_ptr + thrust_log_select log_select_op; + thrust::plus plus_op; + thrust::multiplies mult_op; + + // TODO: how to avoid this temp vec? + thrust::device_ptr v_temp = thrust::device_malloc(n*sizeof(double)); + + // compute v=x*id + thrust::transform(thrust::device, dev_v, dev_v+n, id_v, v_temp, mult_op); + // compute v[i] = ( log(v[i]) if v[i]>0, otherwise 0 ) + thrust::transform(thrust::device, v_temp, v_temp+n, v_temp, log_select_op); + // compute v[i] = w[i]*v[i] + thrust::transform(thrust::device, v_temp, v_temp+n, wei_v, v_temp, mult_op); + // sum up + const double sum = thrust::reduce(thrust::device, v_temp, v_temp+n, 0.0, plus_op); + + thrust::device_free(v_temp); + + return sum; +} + + /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1) { diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index 2766d76b..10b3cb25 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -187,10 +187,17 @@ void thrust_component_sqrt_kernel(int n, double* d1); void thrust_negate_kernel(int n, double* d1); /** @brief compute sum(log(d1[i])) forall i where id[i]=1*/ double log_barr_obj_kernel(int n, double* d1, const double* id); +/** @brief compute sum(w[i]*log(d1[i])) forall i where id[i]=1*/ +double log_barr_wei_obj_kernel(int n, double* d1, const double* id, const double* w); /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1); /** @brief Linear damping term */ -double linear_damping_term_kernel(int n, const double* vd, const double* ld, const double* rd, double mu, double kappa_d); +double linear_damping_term_kernel(int n, + const double* vd, + const double* ld, + const double* rd, + double mu, + double kappa_d); /** @brief compute min(d1) */ double min_local_kernel(int n, double* d1); /** @brief Checks if selected elements of `d1` are positive */ diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index b7ce752c..815002b6 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -642,6 +642,17 @@ void hiopVectorHip::negate() { hiop::hip::thrust_negate_kernel(n_local_, data_); /// @brief Invert (1/x) the elements of this void hiopVectorHip::invert() { hiop::hip::invert_kernel(n_local_, data_); } +/** @brief Sum all selected log(this[i]) */ +double hiopVectorHip::logBarrier_local(const hiopVector& select, const hiopVector& weights) const +{ + assert(n_local_ == select.get_local_size()); + assert(n_local_ == weights.get_local_size()); + const double* id = select.local_data_const(); + const double* w = weights.local_data_const(); + + return hiop::hip::log_barr_wei_obj_kernel(n_local_, data_, id, w); +} + /** @brief Sum all selected log(this[i]) */ double hiopVectorHip::logBarrier_local(const hiopVector& select) const { diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp index 36922c1c..958f7d14 100644 --- a/src/LinAlg/hiopVectorHip.hpp +++ b/src/LinAlg/hiopVectorHip.hpp @@ -237,6 +237,8 @@ class hiopVectorHip : public hiopVector virtual void invert(); /// @brief compute log barrier term, that is sum{ln(x_i):i=1,..,n} virtual double logBarrier_local(const hiopVector& select) const; + /// @brief compute weighted log barrier term, that is sum{w[i]*ln(x_i): select[i]==1} + virtual double logBarrierWeighted_local(const hiopVector& select, const hiopVector& w) const; /// @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); /// @brief compute sum{(x_i):i=1,..,n} From 01495bf913be224b7a12ac92a4f06c723bc17155 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Wed, 19 Mar 2025 10:42:43 -0700 Subject: [PATCH 22/27] draft of weighted B0 in the LowRank quasi Newton solves --- src/Optimization/HessianDiagPlusRowRank.cpp | 44 +++++++++++++++------ src/Optimization/HessianDiagPlusRowRank.hpp | 14 +++++-- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp index 9446f6de..3a6174c6 100644 --- a/src/Optimization/HessianDiagPlusRowRank.cpp +++ b/src/Optimization/HessianDiagPlusRowRank.cpp @@ -160,6 +160,7 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in, sigma_strategy.c_str()); Dx_ = DhInv_->alloc_clone(); + //B0_ = Dx_->alloc_clone(); #ifdef HIOP_DEEPCHECKS Vmat_ = V_->alloc_clone(); #endif @@ -172,6 +173,7 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() { delete DhInv_; delete Dx_; + //delete B0_; delete St_; delete Yt_; @@ -238,7 +240,11 @@ void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) bool HessianDiagPlusRowRank::update_logbar_diag(const hiopVector& Dx) { - DhInv_->setToConstant(sigma_); + // DhInv = (B0+Dx)^{-1} + //DhInv_->setToConstant(sigma_); + const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); + DhInv_->copyFrom(B0); + DhInv_->scale(sigma_); DhInv_->axpy(1.0, Dx); Dx_->copyFrom(Dx); #ifdef HIOP_DEEPCHECKS @@ -256,8 +262,6 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg) fprintf(f, "%s\n", msg); #ifdef HIOP_DEEPCHECKS nlp_->log->write("Dx", *Dx_, v); -#else - fprintf(f, "Dx is not stored in this class, but it can be computed from Dx=DhInv^(1)-sigma"); #endif nlp_->log->printf(v, "sigma=%22.16f;\n", sigma_); nlp_->log->write("DhInv", *DhInv_, v); @@ -269,7 +273,7 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg) nlp_->log->write("V", *Vmat_, v); #else fprintf(f, - "V matrix is available at this point (only its LAPACK factorization). Print it in " + "V matrix is not available at this point (only its LAPACK factorization). Print it in " "updateInternalBFGSRepresentation() instead, before factorizeV()\n"); #endif nlp_->log->write("L", *L_, v); @@ -383,9 +387,9 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, } // else of the switch // safe guard it sigma_ = fmax(fmin(sigma_safe_max_, sigma_), sigma_safe_min_); - nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_); + nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_); } else { // sTy is too small or negative -> skip - nlp_->log->printf(hovLinAlgScalars, + nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank: s^T*y=%12.6e not positive enough... skipping the Hessian update\n", sTy); } @@ -422,7 +426,7 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, * Namely it computes V, a symmetric 2lx2l given by * V = [S'*B0*(DhInv*B0-I)*S -L+S'*B0*DhInv*Y ] * [-L'+Y'*Dhinv*B0*S +D+Y'*Dhinv*Y ] - * In this function V is factorized and it will hold the factors at the end of the function + * Also, the method factorizes V. V will contain the factors at the end of this method. * Note that L, D, S, and Y are from the BFGS secant representation and are updated/computed in 'update' */ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() @@ -457,9 +461,11 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() //-- block (1,2) hiopMatrixDense& StB0DhInvYmL = DpYtDhInvY; // just a rename + const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); - B0DhInv.scale(sigma_); + //B0DhInv.scale(sigma_); + B0DhInv.axpy(sigma_, B0); mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_); #ifdef HIOP_USE_MPI memcpy(buff1_lxlx3_ + l * l, StB0DhInvYmL.local_data(), buffsize); @@ -470,10 +476,14 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() V_->copyBlockFromMatrix(0, l, StB0DhInvYmL); #endif - //-- block (2,2) + //-- block (1,1) hiopVector& theDiag = B0DhInv; // just a rename, also reuses values theDiag.addConstant(-1.0); // at this point theDiag=DhInv*B0-I + + //multiply with sigma*I theDiag.scale(sigma_); + theDiag.componentMult(B0); + hiopMatrixDense& StDS = DpYtDhInvY; // a rename sym_mat_times_diag_times_mattrans_local(0.0, StDS, 1.0, *St_, theDiag); #ifdef HIOP_USE_MPI @@ -550,6 +560,9 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) hiopVector& B0DhInvx = new_n_vec1(n); B0DhInvx.copyFrom(x); // it contains DhInv*res B0DhInvx.scale(sigma_); // B0*(DhInv*res) + const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); + B0DhInvx.componentMult(B0); + St_->timesVec(0.0, stx, 1.0, B0DhInvx); // 3. solve with V @@ -562,6 +575,7 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) hiopVector& result = new_n_vec1(n); St_->transTimesVec(0.0, result, 1.0, spart); result.scale(sigma_); + result.componentMult(B0); Yt_->transTimesVec(1.0, result, 1.0, ypart); result.componentMult(*DhInv_); @@ -613,7 +627,9 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta, auto& Y1 = new_Y1(X, *Yt_); // both are kxl hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); + const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); B0DhInv.scale(sigma_); + B0DhInv.componentMult(B0); mat_times_diag_times_mattrans_local(S1, X, B0DhInv, *St_); mat_times_diag_times_mattrans_local(Y1, X, *DhInv_, *Yt_); @@ -1060,7 +1076,8 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, // we have B+=B-B*s*B*s'/(s'*B*s)+yy'/(y'*s) // B0 is sigma*I. There is an additional diagonal log-barrier term Dx_ - + const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); + bool print = false; if(print) { nlp_->log->printf(hovMatrices, "---HessianDiagPlusRowRank::times_vec \n"); @@ -1106,6 +1123,7 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, // compute ak by an inner loop a[k]->copyFrom(*sk); a[k]->scale(sigma_); + a[k]->componentMult(B0); for(int i = 0; i < k; i++) { double biTsk = b[i]->dotProductWith(*sk); @@ -1125,8 +1143,12 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, y.axzpy(alpha, x, *Dx_); } - y.axpy(alpha * sigma_, x); + hiopVector* aux = B0.new_copy(); + aux->componentMult(x); + y.axpy(alpha * sigma_, *aux); + delete aux; + for(int k = 0; k < l_curr_; k++) { double bkTx = b[k]->dotProductWith(x); double akTx = a[k]->dotProductWith(x); diff --git a/src/Optimization/HessianDiagPlusRowRank.hpp b/src/Optimization/HessianDiagPlusRowRank.hpp index 92d4ed6a..bec4d9a0 100644 --- a/src/Optimization/HessianDiagPlusRowRank.hpp +++ b/src/Optimization/HessianDiagPlusRowRank.hpp @@ -161,10 +161,18 @@ class HessianDiagPlusRowRank : public hiopMatrix private: // Vector for (B0+Dk)^{-1} hiopVector* DhInv_; - // Dx_ is needed in times_vec (for residual checking in solveCompressed). Can be recomputed from DhInv, but I decided to - // store it instead to avoid round-off errors + + /** @brief Log-barrier diagonal term + * + * `Dx_` is needed in times_vec (for residual checking in solveCompressed). While it can be recomputed on the fly, + * for example from DhInv, I decided to store it to avoid round-off errors. + */ hiopVector* Dx_; + //// The initial B0 Hessian approximation + //hiopVector* B0_; + + // Flags recomputation of the internal inverse representation bool matrix_changed_; // These are matrices from the compact representation; they are updated at each iteration. @@ -178,7 +186,7 @@ class HessianDiagPlusRowRank : public hiopMatrix hiopMatrixDense* L_; /// Diagonal matrix from the compact representation hiopVector* D_; - // Matrix V from the representation of the inverse + // Matrix V from the representation of the inverse, or its factors hiopMatrixDense* V_; #ifdef HIOP_DEEPCHECKS // copy of the V matrix - needed to check the residual From b283b07fd66df77c4d2c3955ed64f9860be5cf08 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Wed, 19 Mar 2025 13:40:28 -0700 Subject: [PATCH 23/27] fixed bug in new B0 code --- src/Optimization/HessianDiagPlusRowRank.cpp | 17 +++++++++-------- src/Optimization/HessianDiagPlusRowRank.hpp | 5 +---- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp index 3a6174c6..c0fea832 100644 --- a/src/Optimization/HessianDiagPlusRowRank.cpp +++ b/src/Optimization/HessianDiagPlusRowRank.cpp @@ -160,7 +160,7 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in, sigma_strategy.c_str()); Dx_ = DhInv_->alloc_clone(); - //B0_ = Dx_->alloc_clone(); + #ifdef HIOP_DEEPCHECKS Vmat_ = V_->alloc_clone(); #endif @@ -173,7 +173,6 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() { delete DhInv_; delete Dx_; - //delete B0_; delete St_; delete Yt_; @@ -464,8 +463,8 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() const hiopVector& B0 = *nlp_->inner_prod()->M_lumped(); hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); - //B0DhInv.scale(sigma_); - B0DhInv.axpy(sigma_, B0); + B0DhInv.scale(sigma_); + B0DhInv.componentMult(B0); mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_); #ifdef HIOP_USE_MPI memcpy(buff1_lxlx3_ + l * l, StB0DhInvYmL.local_data(), buffsize); @@ -1143,11 +1142,13 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, y.axzpy(alpha, x, *Dx_); } - hiopVector* aux = B0.new_copy(); - aux->componentMult(x); + //y.axpy(alpha * sigma_, x); + assert(n_vec1_ != nullptr); - y.axpy(alpha * sigma_, *aux); - delete aux; + hiopVector& aux = *n_vec1_; + aux.copyFrom(B0); + aux.componentMult(x); + y.axpy(alpha*sigma_, aux); for(int k = 0; k < l_curr_; k++) { double bkTx = b[k]->dotProductWith(x); diff --git a/src/Optimization/HessianDiagPlusRowRank.hpp b/src/Optimization/HessianDiagPlusRowRank.hpp index bec4d9a0..887030aa 100644 --- a/src/Optimization/HessianDiagPlusRowRank.hpp +++ b/src/Optimization/HessianDiagPlusRowRank.hpp @@ -168,9 +168,6 @@ class HessianDiagPlusRowRank : public hiopMatrix * for example from DhInv, I decided to store it to avoid round-off errors. */ hiopVector* Dx_; - - //// The initial B0 Hessian approximation - //hiopVector* B0_; // Flags recomputation of the internal inverse representation bool matrix_changed_; @@ -244,7 +241,7 @@ class HessianDiagPlusRowRank : public hiopMatrix hiopVector* l_vec1_; hiopVector* l_vec2_; - hiopVector* n_vec1_; + mutable hiopVector* n_vec1_; hiopVector* n_vec2_; hiopVector* twol_vec1_; hiopVector& new_l_vec1(int l); From 1d54e3f0cfe40b01ea6ced0cbded38141cc9f547 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 21 Mar 2025 11:39:43 -0700 Subject: [PATCH 24/27] duals lsq computation --- src/Optimization/InnerProduct.hpp | 2 +- src/Optimization/hiopDualsUpdater.cpp | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Optimization/InnerProduct.hpp b/src/Optimization/InnerProduct.hpp index 03cdad43..b7490777 100644 --- a/src/Optimization/InnerProduct.hpp +++ b/src/Optimization/InnerProduct.hpp @@ -108,7 +108,7 @@ class InnerProduct double norm_complementarity(const hiopVector& x) const; // Computes 1M norm, i.e., ||x|| = 1^T*M*|x| - double norm_M_one(const hiopVector&x) const; + double norm_M_one(const hiopVector& x) const; // Computes the "volume" of the space, norm of 1 function double volume() const; diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp index bd19c2a2..51c5a137 100644 --- a/src/Optimization/hiopDualsUpdater.cpp +++ b/src/Optimization/hiopDualsUpdater.cpp @@ -188,18 +188,18 @@ hiopDualsLsqUpdateLinsysRedDense::~hiopDualsLsqUpdateLinsysRedDense() /** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem * corresponding to dual infeasibility equation - * min_{y_c,y_d} || \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - zk_l+zk_u ||^2 - * || - y_d - vk_l + vk_u ||_2, + * min_{y_c,y_d} || \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - M*(zk_l-zk_u) ||^2 + * || - y_d - vk_l + vk_u ||_2, * which is - * min_{y_c, y_d} || [ J_c^T J_d^T ] [ y_c ] - [ -\nabla f(xk) + zk_l-zk_u ] ||^2 - * || [ 0 I ] [ y_d ] [ - vk_l + vk_u ] ||_2 + * min_{y_c, y_d} || [ J_c^T J_d^T ] [ y_c ] - [ -\nabla f(xk) + M*(zk_l-zk_u) ] ||^2 + * || [ 0 I ] [ y_d ] [ - vk_l + vk_u ] ||_2 * ****************************** * NLPs with dense constraints * ****************************** * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear * system in y_c and y_d: - * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + zk_l-zk_u ] - * [ J_d J_c^T J_d J_d^T + I ] [ y_d ] [ J_d I ] [ - vk_l + vk_u ] + * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + M*(zk_l-zk_u) ] + * [ J_d J_c^T J_d J_d^T + I ] [ y_d ] [ J_d I ] [ - vk_l + vk_u ] * This linear system is small (of size m=m_E+m_I) (so it is replicated for all MPI ranks). * * The matrix of the above system is stored in the member variable M_ of this class and the @@ -275,9 +275,13 @@ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter, // [ rhsd_ ] [ J_d I ] [ vecd ] // [vecx,vecd] = - [ -\nabla f(xk) + zk_l-zk_u, - vk_l + vk_u]. hiopVector& vecx = *vec_n_; - vecx.copyFrom(grad_f); + //vecx.copyFrom(grad_f); + //vecx.axpy(-1.0, *iter.get_zl()); + //vecx.axpy(1.0, *iter.get_zu()); + vecx.copyFrom(*iter.get_zu()); vecx.axpy(-1.0, *iter.get_zl()); - vecx.axpy(1.0, *iter.get_zu()); + vecx.componentMult(*nlp_->inner_prod()->M_lumped()); + vecx.axpy(1.0, grad_f); hiopVector& vecd = *vec_mi_; vecd.copyFrom(*iter.get_vl()); vecd.axpy(-1.0, *iter.get_vu()); From 63d8f88def6c50af5514bbd051b66fa066118861 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 21 Mar 2025 11:40:40 -0700 Subject: [PATCH 25/27] draft of Hessian approx - code works mesh independently --- src/Optimization/HessianDiagPlusRowRank.cpp | 23 ++++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp index c0fea832..58459c11 100644 --- a/src/Optimization/HessianDiagPlusRowRank.cpp +++ b/src/Optimization/HessianDiagPlusRowRank.cpp @@ -305,12 +305,12 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, hiopVector& s_new = new_n_vec1(n); s_new.copyFrom(*it_curr.x); s_new.axpy(-1., *it_prev_->x); - double s_infnorm = s_new.infnorm(); + const double s_infnorm = s_new.infnorm(); if(s_infnorm >= 100 * std::numeric_limits::epsilon()) { // norm of s not too small // compute y_new = \grad J(x_curr,\lambda_curr) - \grad J(x_prev, \lambda_curr) (yes, J(x_prev, \lambda_curr)) // = graf_f_curr-grad_f_prev + (Jac_c_curr-Jac_c_prev)yc_curr+ (Jac_d_curr-Jac_c_prev)yd_curr - - // zl_curr*s_new + zu_curr*s_new + // M_lumped*zl_curr*s_new + M_lumped*zu_curr*s_new hiopVector& y_new = new_n_vec2(n); y_new.copyFrom(grad_f_curr); y_new.axpy(-1., *grad_f_prev_); @@ -321,17 +321,20 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, Jac_d_curr.transTimesVec(1.0, y_new, 1.0, *it_curr.yd); Jac_d_prev_->transTimesVec(1.0, y_new, -1.0, *it_curr.yd); - double sTy = s_new.dotProductWith(y_new), s_nrm2 = s_new.twonorm(), y_nrm2 = y_new.twonorm(); + const double sTy = s_new.dotProductWith(y_new); + const double s_nrm2 = nlp_->inner_prod()->norm_M(s_new);//s_new.twonorm(); + const double y_nrm2 = nlp_->inner_prod()->norm_H_dual(y_new);//y_new.twonorm(); -#ifdef HIOP_DEEPCHECKS - nlp_->log->printf(hovLinAlgScalarsVerb, - "HessianDiagPlusRowRank: s^T*y=%20.14e ||s||=%20.14e ||y||=%20.14e\n", + nlp_->log->printf(hovWarning, //hovLinAlgScalarsVerb, + "sigma HessianDiagPlusRowRank: s^T*y=%20.14e ||s||=%20.14e ||y||=%20.14e ||s||_inf=%20.14e\n", sTy, s_nrm2, - y_nrm2); + y_nrm2, + s_infnorm); + nlp_->log->printf(hovWarning, "sigma y_new twonrm=%20.14e infnrm=%20.14e\n", y_nrm2, y_new.infnorm()); nlp_->log->write("HessianDiagPlusRowRank s_new", s_new, hovIteration); nlp_->log->write("HessianDiagPlusRowRank y_new", y_new, hovIteration); -#endif + if(sTy > s_nrm2 * y_nrm2 * sqrt(std::numeric_limits::epsilon())) { // sTy far away from zero if(l_max_ > 0) { @@ -357,12 +360,12 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, l_curr_ = l_max_; } } // end of l_max_>0 -#ifdef HIOP_DEEPCHECKS + nlp_->log->printf(hovMatrices, "\nHessianDiagPlusRowRank: these are L and D from the BFGS compact representation\n"); nlp_->log->write("L", *L_, hovMatrices); nlp_->log->write("D", *D_, hovMatrices); nlp_->log->printf(hovMatrices, "\n"); -#endif + // update B0 (i.e., sigma) switch(sigma_update_strategy_) { case SIGMA_STRATEGY1: From 3295382c9d873ad8db0f19d781f3223657ec1c91 Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Fri, 21 Mar 2025 13:54:40 -0700 Subject: [PATCH 26/27] added test for Ex1 with L2 --- src/Drivers/Dense/CMakeLists.txt | 1 + src/Drivers/Dense/NlpDenseConsEx1.hpp | 6 +++-- src/Drivers/Dense/NlpDenseConsEx1Driver.cpp | 27 +++++++++++++++------ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/Drivers/Dense/CMakeLists.txt b/src/Drivers/Dense/CMakeLists.txt index 76249621..24228d09 100644 --- a/src/Drivers/Dense/CMakeLists.txt +++ b/src/Drivers/Dense/CMakeLists.txt @@ -32,6 +32,7 @@ install( ########################################################## add_test(NAME NlpDenseCons1_5H COMMAND ${RUNCMD} "$" "500" "1.0" "-selfcheck") add_test(NAME NlpDenseCons1_5K COMMAND ${RUNCMD} "$" "5000" "1.0" "-selfcheck") +add_test(NAME NlpDenseCons1_25K_InfDim COMMAND ${RUNCMD} "$" "25000" "1.0" "-selfcheck" "-use_L2") add_test(NAME NlpDenseCons1_50K COMMAND ${RUNCMD} "$" "50000" "1.0" "-selfcheck") if(HIOP_USE_MPI) add_test(NAME NlpDenseCons1_50K_mpi COMMAND ${MPICMD} -n 2 "$" "50000" "1.0" "-selfcheck") diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp index 7c9be216..7101ec6e 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.hpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp @@ -112,9 +112,10 @@ class DiscretizedFunction : public hiop::hiopVectorPar class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints { public: - DenseConsEx1(int n_mesh_elem = 100, double mesh_ratio = 1.0) + DenseConsEx1(int n_mesh_elem, double mesh_ratio=1.0, bool use_weighted_vars=false) : n_vars(n_mesh_elem), comm(MPI_COMM_WORLD), + use_weighted_space_(use_weighted_vars), solver_(nullptr) { // create the members @@ -293,12 +294,13 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints */ virtual bool useWeightedInnerProducts() { - return false;//true; + return use_weighted_space_; } private: int n_vars; MPI_Comm comm; + bool use_weighted_space_; Ex1Meshing1D* _mesh; int n_local; diff --git a/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp b/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp index 4da45db4..cd15a783 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx1Driver.cpp @@ -21,10 +21,11 @@ static bool self_check(size_type n, double obj_value); #ifdef HIOP_USE_AXOM static bool do_load_checkpoint_test(const size_type& mesh_size, const double& ratio, const double& obj_val_expected); #endif -static bool parse_arguments(int argc, char** argv, size_type& n, double& distortion_ratio, bool& self_check) +static bool parse_arguments(int argc, char** argv, size_type& n, double& distortion_ratio, bool& wei_vars, bool& self_check) { n = 20000; distortion_ratio = 1.; + wei_vars = false; self_check = false; // default options switch(argc) { @@ -32,6 +33,14 @@ static bool parse_arguments(int argc, char** argv, size_type& n, double& distort // no arguments return true; break; + case 5: // 4 arguments - -use_L2 expected + { + if(std::string(argv[4]) == "-use_L2") { + wei_vars = true; + } else { + return false; + } + } case 4: // 3 arguments - -selfcheck expected { if(std::string(argv[3]) == "-selfcheck") { @@ -91,15 +100,17 @@ int main(int argc, char** argv) } #endif bool selfCheck; + //flags the use of Ex1 with the mass weighted variables (L2 inner product) or without (Euclidean inner product) + bool wei_vars; size_type mesh_size; double ratio; double objective = 0.; - if(!parse_arguments(argc, argv, mesh_size, ratio, selfCheck)) { + if(!parse_arguments(argc, argv, mesh_size, ratio, wei_vars, selfCheck)) { usage(argv[0]); return 1; } - DenseConsEx1 problem(mesh_size, ratio); + DenseConsEx1 problem(mesh_size, ratio, wei_vars); hiop::hiopNlpDenseConstraints nlp(problem); hiop::hiopAlgFilterIPM solver(&nlp); @@ -110,7 +121,9 @@ int main(int argc, char** argv) // this is used for testing when the driver is called with -selfcheck if(selfCheck) { - if(!self_check(mesh_size, objective)) return -1; + if(!self_check(mesh_size, objective)) { + return -1; + } } else { if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d. Number of iterations: %d\n", @@ -142,9 +155,9 @@ int main(int argc, char** argv) static bool self_check(size_type n, double objval) { -#define num_n_saved 3 // keep this is sync with n_saved and objval_saved - const size_type n_saved[] = {500, 5000, 50000}; - const double objval_saved[] = {8.6156700e-2, 8.6156106e-02, 8.6161001e-02}; +#define num_n_saved 4 // keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {500, 5000, 50000, 25000}; + const double objval_saved[] = {8.6156700e-2, 8.6156106e-02, 8.6161001e-02, 8.6155777e-02}; #define relerr 1e-6 bool found = false; From 57a30671187b682924c9115fc1c322df512c797a Mon Sep 17 00:00:00 2001 From: Cosmin G Petra Date: Sun, 23 Mar 2025 16:18:38 -0700 Subject: [PATCH 27/27] M lumped applies in residuals --- src/Optimization/hiopKKTLinSys.cpp | 33 +++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Optimization/hiopKKTLinSys.cpp b/src/Optimization/hiopKKTLinSys.cpp index 80d11142..16b61f25 100644 --- a/src/Optimization/hiopKKTLinSys.cpp +++ b/src/Optimization/hiopKKTLinSys.cpp @@ -100,16 +100,20 @@ double hiopKKTLinSys::errorKKT(const hiopResidual* resid, const hiopIterate* sol } double derr = 1e20, aux; - hiopVector* RX = resid->rx->new_copy(); - // RX = rx-H*dx-J'c*dyc-J'*dyd +dzl-dzu + //hiopVector* RX = resid->rx->new_copy(); + hiopVector* RX = sol->zl->new_copy(); + RX->axpy(-1.0, *sol->zu); + RX->componentMult(*nlp_->inner_prod()->M_lumped()); + RX->axpy(1.0, *resid->rx); + HessianTimesVec_noLogBarrierTerm(1.0, *RX, -1.0, *sol->x); RX->axzpy(-1., *delta_wx_, *sol->x); - Jac_c_->transTimesVec(1.0, *RX, -1.0, *sol->yc); Jac_d_->transTimesVec(1.0, *RX, -1.0, *sol->yd); - RX->axpy(1.0, *sol->zl); - RX->axpy(-1.0, *sol->zu); + //RX->axpy(1.0, *sol->zl); + //RX->axpy(-1.0, *sol->zu); + aux = RX->twonorm(); derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rx=%g\n", aux); @@ -568,7 +572,7 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter, Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu()); nlp_->log->write("Dx in KKT", *Dx_, hovMatrices); - assert(false); + // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu Dd_->setToZero(); Dd_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); @@ -1150,7 +1154,7 @@ bool hiopMatVecKKTFullOpr::combine_res_to_build_vec(hiopVector& y) { return true /** * Full KKT matrix is - * [ H 0 Jc^T Jd^T | -I I 0 0 | 0 0 0 0 ] [ dx] [ rx ] + * [ H 0 Jc^T Jd^T | -M M 0 0 | 0 0 0 0 ] [ dx] [ rx ] * [ 0 0 0 -I | 0 0 -I I | 0 0 0 0 ] [ dd] [ rd ] * [ Jc 0 0 0 | 0 0 0 0 | 0 0 0 0 ] [ dyc] = [ ryc ] * [ Jd -I 0 0 | 0 0 0 0 | 0 0 0 0 ] [ dyd] [ ryd ] @@ -1217,12 +1221,15 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec) hiopVector* yrvu_ = &(y.getVector(11)); // rx = H*dx + delta_wx*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu - Hess->timesVec(0.0, *yrx_, +1.0, *dx_); + yrx_->copyFrom(*dzu_); + yrx_->axpy(-1.0, *dzl_); + yrx_->componentMult(*kkt_->nlp_->inner_prod()->M_lumped()); + Hess->timesVec(1.0, *yrx_, +1.0, *dx_); yrx_->axzpy(1., *delta_wx, *dx_); Jac_c->transTimesVec(1.0, *yrx_, 1.0, *dyc_); Jac_d->transTimesVec(1.0, *yrx_, 1.0, *dyd_); - yrx_->axpy(-1.0, *dzl_); - yrx_->axpy(1.0, *dzu_); + //yrx_->axpy(-1.0, *dzl_); + //yrx_->axpy(1.0, *dzu_); // RD = delta_wd_*dd - dyd - dvl + dvu yrd_->setToZero(); @@ -1285,7 +1292,7 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec) /** * Full KKT matrix is - * [ H 0 Jc^T Jd^T | -I I 0 0 | 0 0 0 0 ] [ dx] [ rx ] + * [ H 0 Jc^T Jd^T | -M M 0 0 | 0 0 0 0 ] [ dx] [ rx ] * [ 0 0 0 -I | 0 0 -I I | 0 0 0 0 ] [ dd] [ rd ] * [ Jc 0 0 0 | 0 0 0 0 | 0 0 0 0 ] [ dyc] = [ ryc ] * [ Jd -I 0 0 | 0 0 0 0 | 0 0 0 0 ] [ dyd] [ ryd ] @@ -1378,12 +1385,14 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x // RXL = -dx + Sxl*dsxl yrsxl_->setToZero(); - yrsxl_->axpy(-1.0, *dx_); + //yrsxl_->axpy(-1.0, *dx_); + yrsxl_->axzpy(-1.0, *dx_, *kkt_->nlp_->inner_prod()->M_lumped()); yrsxl_->axzpy(1.0, *iter_->get_sxl(), *dsxl_); yrsxl_->selectPattern(kkt_->nlp_->get_ixl()); // RXU = dx + Sxu*dsxu yrsxu_->copyFrom(*dx_); + yrsxu_->componentMult(*kkt_->nlp_->inner_prod()->M_lumped()); yrsxu_->axzpy(1.0, *iter_->get_sxu(), *dsxu_); yrsxu_->selectPattern(kkt_->nlp_->get_ixu());