Add relative cost decrease and gradient norm exit criteria to batch solver.

vkorotkine · vkorotkine · commit 81989eaaf2fd · 2024-04-10T10:17:46.000-04:00
diff --git a/examples/ex_batch_se3.py b/examples/ex_batch_se3.py
@@ -12,7 +12,7 @@
 def main():
     # ##########################################################################
     # Create the batch estimator with desired settings
-    estimator = nav.BatchEstimator(solver_type="GN", max_iters=20)
+    estimator = nav.BatchEstimator(solver_type="GN", max_iters=30, step_tol=1e-7, gradient_tol=1e-7, ftol=1e-8, verbose=True)
 
     # ##########################################################################
     # Problem Setup
diff --git a/examples/ex_batch_vector.py b/examples/ex_batch_vector.py
@@ -16,7 +16,7 @@
 def main():
     # #############################################################################
     # Create the batch estimator with desired settings
-    estimator = nav.BatchEstimator(solver_type="GN", max_iters=5)
+    estimator = nav.BatchEstimator(solver_type="LM", max_iters=20, step_tol=None, gradient_tol=1e-7, ftol=1e-8, verbose=True)
 
     # ##############################################################################
     # Problem Setup
diff --git a/navlie/batch/estimator.py b/navlie/batch/estimator.py
@@ -35,6 +35,8 @@ def __init__(
         solver_type: str = "GN",
         max_iters: int = 100,
         step_tol: float = 1e-7,
+        ftol: float = None,
+        gradient_tol: float = None,
         tau: float = 1e-11,
         verbose: bool = True,
     ):
@@ -47,7 +49,28 @@ def __init__(
         max_iters : int, optional
             Maximum number of optimization iterations, by default 100.
         step_tol : float, optional
-            Convergence tolerance, by default 1e7.
+            Convergence step tolerance, by default 1e-7.
+            The solver exits when
+
+            .. math::
+
+                ||\Delta x||_2 < \\text{step_tol}
+            where :math:`\Delta x` is the change in the state estimate for successive steps.
+        ftol : float, optional
+            Convergence relative cost decrease tolerance, by default None (not used).
+            The solver exits when  
+
+            .. math::
+
+                |\Delta C /C| < \\text{ftol} 
+            where :math:`\Delta C` is change in the cost function for successive accepted steps.
+        gradient_tol : float, optional
+            Convergence gradient infinity norm tolerance, by default None (not used).
+            The solver exits when  
+
+            .. math::
+            
+                \max_i |\\nabla J|_i = \max_i |\mathbf{e}^T \mathbf{H}|_i < \\text{gradient_tol} 
         tau : float, optional
             tau parameter in LM, by default 1e-11.
         verbose : bool, optional
@@ -56,6 +79,8 @@ def __init__(
         self.solver_type = solver_type
         self.max_iters = max_iters
         self.step_tol = step_tol
+        self.ftol = ftol
+        self.gradient_tol = gradient_tol
         self.tau = tau
         self.verbose = verbose
 
@@ -154,6 +179,8 @@ def solve(
             max_iters=self.max_iters,
             solver=self.solver_type,
             step_tol=self.step_tol,
+            ftol=self.ftol,
+            gradient_tol=self.gradient_tol,
             tau=self.tau,
             verbose=self.verbose,
         )
diff --git a/navlie/batch/problem.py b/navlie/batch/problem.py
@@ -58,13 +58,17 @@ def __init__(
         solver: str = "GN",
         max_iters: int = 100,
         step_tol: float = 1e-7,
+        ftol: float = None,
+        gradient_tol: float = None,
         tau: float = 1e-11,
         verbose: bool = True,
     ):
         # Set solver parameters
         self.solver = solver
         self.max_iters = max_iters
         self.step_tol = step_tol
+        self.ftol = ftol
+        self.gradient_tol = gradient_tol
         self.tau = tau
         self.verbose = verbose
 
@@ -91,6 +95,27 @@ def __init__(
         # Inverse of information matrix
         self._covariance_matrix: np.ndarray = None
 
+    def is_converged(self, delta_cost, cost, dx, grad_norm) -> bool:
+        converged = False
+        if delta_cost is not None:
+            rel_cost_change = 0.0
+            if cost != 0:
+                rel_cost_change = delta_cost / cost
+
+            if self.step_tol is not None and dx < self.step_tol:
+                converged = True
+            if self.ftol is not None and delta_cost is not None:
+                if rel_cost_change < self.ftol:
+                    converged = True
+            if cost == 0.0:
+                converged = True
+            if dx == 0.0:
+                converged = True
+            if self.gradient_tol is not None and grad_norm is not None:
+                if grad_norm < self.gradient_tol:
+                    converged = True
+        return converged
+    
     def add_residual(self, residual: Residual, loss: LossFunction = L2Loss()):
         """Adds a residual to the problem, along with a robust loss
         function to use. Default loss function is the standard L2Loss.
@@ -182,6 +207,10 @@ def _solve_gauss_newton(self) -> Dict[Hashable, State]:
         """
 
         dx = 10
+        delta_cost = None
+        rel_cost_decrease = None
+        grad_norm = None
+
         iter_idx = 0
         cost_list = []
 
@@ -193,7 +222,11 @@ def _solve_gauss_newton(self) -> Dict[Hashable, State]:
             header = "Initial cost: " + str(cost)
             print(header)
 
-        while (iter_idx < self.max_iters) and (dx > self.step_tol):
+        while iter_idx < self.max_iters:
+
+            if self.is_converged(delta_cost, cost_list[-1], dx, grad_norm):
+                break
+    
             H_spr = sparse.csr_matrix(H)
 
             A = H_spr.T @ H_spr
@@ -208,8 +241,23 @@ def _solve_gauss_newton(self) -> Dict[Hashable, State]:
             cost_list.append(cost)
 
             dx = np.linalg.norm(delta_x)
+            if len(cost_list) >= 2:
+                delta_cost = np.abs(cost_list[-1] - cost_list[-2])
+                if cost_list[-1] != 0:
+                    rel_cost_decrease = delta_cost / cost_list[-1]
+                else:
+                    rel_cost_decrease = 0
+            grad_norm = np.max(np.abs((e.T @ H).squeeze()))
+
             if self.verbose:
-                self._display_header(iter_idx, cost, dx)
+                self._display_header(
+                    iter_idx,
+                    cost,
+                    dx,
+                    delta_cost,
+                    rel_cost_decrease,
+                    grad_norm,
+                )
 
             iter_idx += 1
 
@@ -232,6 +280,10 @@ def _solve_LM(self) -> Dict[Hashable, State]:
         """
 
         e, H, cost = self.compute_error_jac_cost()
+
+        delta_cost = None
+        rel_cost_decrease = None
+        grad_norm = None
         cost_list = [cost]
 
         H_spr = sparse.csr_matrix(H)
@@ -250,9 +302,12 @@ def _solve_LM(self) -> Dict[Hashable, State]:
             print(header)
 
         # Main LM loop
-        while (iter_idx < self.max_iters) and (dx > self.step_tol):
+        while iter_idx < self.max_iters:
             A_solve = A + mu * sparse.identity(A.shape[0])
             delta_x = sparse.linalg.spsolve(A_solve, -b).reshape((-1, 1))
+            dx = np.linalg.norm(delta_x)
+            if self.is_converged(delta_cost, cost_list[-1], dx, grad_norm):
+                break
 
             variables_test = {k: v.copy() for k, v in self.variables.items()}
 
@@ -287,10 +342,26 @@ def _solve_LM(self) -> Dict[Hashable, State]:
                 nu = 2 * nu
                 status = "Rejected."
 
-            dx = np.linalg.norm(delta_x)
+
+
+            if len(cost_list) >= 2:
+                delta_cost = np.abs(cost_list[-1] - cost_list[-2])
+                if cost_list[-1] != 0:
+                    rel_cost_decrease = delta_cost / cost_list[-1]
+                else:
+                    rel_cost_decrease = 0
+            grad_norm = np.max(np.abs((e.T @ H).squeeze()))
 
             if self.verbose:
-                self._display_header(iter_idx + 1, cost, dx, status=status)
+                self._display_header(
+                    iter_idx,
+                    cost,
+                    dx,
+                    delta_cost,
+                    rel_cost_decrease,
+                    grad_norm,
+                    status=status,
+                )
 
             iter_idx += 1
 
@@ -479,7 +550,14 @@ def compute_covariance(self):
             return None
 
     def _display_header(
-        self, iter_idx: int, current_cost: float, dx: float, status: str = None
+        self,
+        iter_idx: int,
+        current_cost: float,
+        dx: float,
+        delta_cost: float = None,
+        delta_cost_rel: float = None,
+        grad_norm: float = None,
+        status: str = None,
     ):
         """Displays the optimization progress.
 
@@ -497,7 +575,12 @@ def _display_header(
         header = ("Iter: {0} || Cost: {1:.4e} || Step size: {2:.4e}").format(
             iter_idx, current_cost, dx
         )
-
+        if delta_cost is not None:
+            header += " || dC: {0:.4e}".format(delta_cost)
+        if delta_cost_rel is not None:
+            header += " || dC/C: {0:.4e}".format(delta_cost_rel)
+        if grad_norm is not None:
+            header += " || |grad|_inf: {0:.4e}".format(grad_norm)
         if status is not None:
             header += " || Status: " + status