dolfin-adjoint
diff --git a/‎pyadjoint/adjfloat.py‎
Lines changed: 7 additions & 2 deletions b/‎pyadjoint/adjfloat.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎pyadjoint/control.py‎
Lines changed: 31 additions & 16 deletions b/‎pyadjoint/control.py‎
Lines changed: 31 additions & 16 deletions
diff --git a/‎pyadjoint/drivers.py‎
Lines changed: 114 additions & 28 deletions b/‎pyadjoint/drivers.py‎
Lines changed: 114 additions & 28 deletions
diff --git a/‎pyadjoint/optimization/optimization.py‎
Lines changed: 7 additions & 15 deletions b/‎pyadjoint/optimization/optimization.py‎
Lines changed: 7 additions & 15 deletions
@@ -93,7 +93,12 @@ def __rsub__(self, other):
     def __pow__(self, power):
         return PowBlock(self, power)
 
-    def _ad_convert_type(self, value, options={}):
+    def _ad_init_zero(self, dual=False):
+        return type(self)(0.)
+
+    def _ad_convert_riesz(self, value, riesz_map=None):
+        if riesz_map is not None:
+            raise ValueError(f"Unexpected Riesz map for Adjfloat: {riesz_map}")
         return AdjFloat(value)
 
     def _ad_create_checkpoint(self):
@@ -343,7 +348,7 @@ def __init__(self, *args):
 
     def recompute_component(self, inputs, block_variable, idx, prepared):
         output = self.operator(*(term.saved_output for term in self.terms))
-        return self._outputs[0].saved_output._ad_convert_type(output)
+        return type(self._outputs[0].saved_output)(output)
 
     def __str__(self):
         return f"{self.terms[0]} {self.symbol} {self.terms[1]}"
 
@@ -1,13 +1,14 @@
+from typing import Any
 from .overloaded_type import OverloadedType, create_overloaded_object
 import logging
 
 
 class Control(object):
     """Defines a control variable from an OverloadedType.
 
-    The control object references a specific node on the Tape.
-    For mutable OverloadedType instances the Control only represents
-    the value at the time of initialization.
+    The control object references a specific node on the Tape. For mutable
+    OverloadedType instances the Control only represents the value at the time
+    of initialization.
 
     Example:
         Given a mutable OverloadedType instance u.
@@ -25,18 +26,22 @@ class Control(object):
         >>> c2.data()
         3.0
 
-        Now c1 represents the node prior to the add_in_place Block,
-        while c2 represents the node after the add_in_place Block.
-        Creating a `ReducedFunctional` with c2 as Control results in
-        a reduced problem without the add_in_place Block, while a ReducedFunctional
-        with c1 as Control results in a forward model including the add_in_place.
+        Now c1 represents the node prior to the add_in_place Block, while c2
+        represents the node after the add_in_place Block. Creating a
+        `ReducedFunctional` with c2 as Control results in a reduced problem
+        without the add_in_place Block, while a ReducedFunctional with c1 as
+        Control results in a forward model including the add_in_place.
 
     Args:
-        control (OverloadedType): The OverloadedType instance to define this control from.
+        control: The OverloadedType instance to define this control from.
+        riesz_map: Parameters controlling how to find the Riesz representer of
+            a dual (adjoint) variable to this control. The permitted values are
+            type-dependent.
 
     """
-    def __init__(self, control):
+    def __init__(self, control: OverloadedType, riesz_map: Any = None):
         self.control = control
+        self.riesz_map = riesz_map
         self.block_variable = control.block_variable
 
     def data(self):
@@ -45,17 +50,27 @@ def data(self):
     def tape_value(self):
         return create_overloaded_object(self.block_variable.saved_output)
 
-    def get_derivative(self, options={}):
+    def get_derivative(self, apply_riesz=False):
         if self.block_variable.adj_value is None:
             logging.warning("Adjoint value is None, is the functional independent of the control variable?")
-            return self.control._ad_convert_type(0., options=options)
-        return self.control._ad_convert_type(self.block_variable.adj_value, options=options)
+            return self.control._ad_init_zero(dual=not apply_riesz)
+        elif apply_riesz:
+            return self.control._ad_convert_riesz(
+                self.block_variable.adj_value, riesz_map=self.riesz_map)
+        else:
+            return self.control._ad_init_object(self.block_variable.adj_value)
 
-    def get_hessian(self, options={}):
+    def get_hessian(self, apply_riesz=False):
         if self.block_variable.hessian_value is None:
             logging.warning("Hessian value is None, is the functional independent of the control variable?")
-            return self.control._ad_convert_type(0., options=options)
-        return self.control._ad_convert_type(self.block_variable.hessian_value, options=options)
+            return self.control._ad_init_zero(dual=not apply_riesz)
+        elif apply_riesz:
+            return self.control._ad_convert_riesz(
+                self.block_variable.hessian_value, riesz_map=self.riesz_map)
+        else:
+            return self.control._ad_init_object(
+                self.block_variable.hessian_value
+            )
 
     def update(self, value):
         # In the future we might want to call a static method
 
@@ -1,24 +1,35 @@
+try:
+    from warnings import deprecated
+except ImportError:
+    from warnings import warn
+    deprecated = None
+
 from .enlisting import Enlist
 from .tape import get_working_tape, stop_annotating
 
 
-def compute_gradient(J, m, options=None, tape=None, adj_value=1.0):
+def compute_derivative(J, m, tape=None, adj_value=1.0, apply_riesz=False):
     """
-    Compute the gradient of J with respect to the initialisation value of m,
+    Compute the derivative of J with respect to the initialisation value of m,
     that is the value of m at its creation.
 
     Args:
-        J (AdjFloat):  The objective functional.
+        J (OverloadedType):  The objective functional.
         m (list or instance of Control): The (list of) controls.
-        options (dict): A dictionary of options. To find a list of available options
-            have a look at the specific control type.
         tape: The tape to use. Default is the current tape.
+        adj_value: The adjoint value to the result. Required if the functional
+            is not scalar-valued, or if the functional is not the final stage
+            in the computation of an outer functional.
+        apply_riesz: If True, apply the Riesz map of each control in order
+            to return a primal gradient rather than a derivative in the
+            dual space.
 
     Returns:
-        OverloadedType: The derivative with respect to the control. Should be an instance of the same type as
-            the control.
+        OverloadedType: The derivative with respect to the control.
+            If apply_riesz is False, should be an instance of the type dual
+            to that of the control. If apply_riesz is True should have the
+            same type as the control.
     """
-    options = options or {}
     tape = tape or get_working_tape()
     tape.reset_variables()
     J.block_variable.adj_value = adj_value
@@ -30,51 +41,126 @@ def compute_gradient(J, m, options=None, tape=None, adj_value=1.0):
                 with marked_controls(m):
                     tape.evaluate_adj(markings=True)
 
-    grads = [i.get_derivative(options=options) for i in m]
+        grads = [i.get_derivative(apply_riesz=apply_riesz) for i in m]
     return m.delist(grads)
 
 
-def compute_hessian(J, m, m_dot, options=None, tape=None):
+def compute_gradient(J, m, tape=None, adj_value=1.0, apply_riesz=True):
+    """
+    Compute the gradient of J with respect to the initialisation value of m,
+    that is the value of m at its creation.
+
+    This function is deprecated in favour of :compute_derivative
+
+    Args:
+        J (OverloadedType):  The objective functional.
+        m (list or instance of Control): The (list of) controls.
+        tape: The tape to use. Default is the current tape.
+        adj_value: The adjoint value to the result. Required if the functional
+            is not scalar-valued, or if the functional is not the final stage
+            in the computation of an outer functional.
+        apply_riesz: If True, apply the Riesz map of each control in order
+            to return a primal gradient rather than a derivative in the
+            dual space.
+
+    Returns:
+        OverloadedType: The gradient with respect to the control.
+            If apply_riesz is False, should be an instance of the type dual
+            to that of the control. If apply_riesz is True should have the
+            same type as the control.
+    """
+    if deprecated is None:
+        warn("compute_gradient is deprecated in favour of compute_derivative.",
+             FutureWarning)
+
+    return compute_derivative(J, m, tape, adj_value, apply_riesz)
+
+
+if deprecated is not None:
+    compute_gradient = deprecated(
+        "compute_gradient is deprecated in favour of compute_derivative."
+    )(compute_gradient)
+
+
+def compute_hessian(J, m, m_dot, hessian_input=None, tape=None, evaluate_tlm=True, apply_riesz=False):
     """
     Compute the Hessian of J in a direction m_dot at the current value of m
 
     Args:
         J (AdjFloat):  The objective functional.
         m (list or instance of Control): The (list of) controls.
-        m_dot (list or instance of the control type): The direction in which to compute the Hessian.
-        options (dict): A dictionary of options. To find a list of available options
-            have a look at the specific control type.
+        m_dot (list or instance of the control type): The direction in which to
+            compute the Hessian.
+        hessian_input (OverloadedType): The value to start the Hessian accumulation
+            from after the TLM calculation. Uses zero initialised value if None.
         tape: The tape to use. Default is the current tape.
+        apply_riesz: If True, apply the Riesz map of each control in order
+            to return the (primal) Riesz representer of the Hessian
+            action.
+        evaluate_tlm (bool): Whether or not to compute the forward (TLM) part of
+            the Hessian calculation.  If False, assumes that the tape has already
+            been populated with the required TLM values.
 
     Returns:
-        OverloadedType: The second derivative with respect to the control in direction m_dot. Should be an instance of
-            the same type as the control.
+        OverloadedType: The action of the Hessian in the direction m_dot.
+            If apply_riesz is False, should be an instance of the type dual
+            to that of the control. If apply_riesz is true should have the
+            same type as the control.
     """
     tape = tape or get_working_tape()
-    options = options or {}
 
-    tape.reset_tlm_values()
+    # fill the relevant tlm values on the tape
+    if evaluate_tlm:
+        compute_tlm(J, m, m_dot, tape)
+
     tape.reset_hessian_values()
 
-    m = Enlist(m)
-    m_dot = Enlist(m_dot)
-    for i, value in enumerate(m_dot):
-        m[i].tlm_value = m_dot[i]
+    if hessian_input is None:
+        J.block_variable.hessian_value = (
+            J.block_variable.output._ad_init_zero(dual=True))
+    else:
+        J.block_variable.hessian_value = (
+            J.block_variable.output._ad_init_object(hessian_input))
 
+    m = Enlist(m)
     with stop_annotating():
         with tape.marked_control_dependents(m):
-            tape.evaluate_tlm(markings=True)
+            with tape.marked_functional_dependencies(J):
+                tape.evaluate_hessian(markings=True)
+
+        r = [v.get_hessian(apply_riesz=apply_riesz) for v in m]
+    return m.delist(r)
+
+
+def compute_tlm(J, m, m_dot, tape=None):
+    """
+    Compute the tangent linear model of J in a direction m_dot at the current value of m
+
+    Args:
+        J (OverloadedType):  The objective functional.
+        m (list or instance of Control): The (list of) controls.
+        m_dot (list or instance of the control type): The direction in which to
+            compute the tangent linear model.
+        tape: The tape to use. Default is the current tape.
 
-    J.block_variable.hessian_value = J.block_variable.output._ad_convert_type(
-        0., options={'riesz_representation': None})
+    Returns:
+        OverloadedType: The action of the tangent linear model with respect to the control
+            in direction m_dot. Should be an instance of the same type as the functional.
+    """
+    tape = tape or get_working_tape()
+    tape.reset_tlm_values()
+
+    m = Enlist(m)
+    m_dot = Enlist(m_dot)
+
+    for mi, mdi in zip(m, m_dot):
+        mi.tlm_value = mdi
 
     with stop_annotating():
         with tape.marked_control_dependents(m):
-            with tape.marked_functional_dependencies(J):
-                tape.evaluate_hessian(markings=True)
+            tape.evaluate_tlm(markings=True)
 
-    r = [v.get_hessian(options=options) for v in m]
-    return m.delist(r)
+    return J._ad_init_object(J.block_variable.tlm_value)
 
 
 def solve_adjoint(J, tape=None, adj_value=1.0):
 
@@ -38,7 +38,7 @@ def serialise_bounds(rf_np, bounds):
     return np.array(bounds_arr).T
 
 
-def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None, **kwargs):
+def minimize_scipy_generic(rf_np, method, bounds=None, **kwargs):
     """Interface to the generic minimize method in scipy
 
     """
@@ -56,18 +56,11 @@ def minimize_scipy_generic(rf_np, method, bounds=None, derivative_options=None,
 
         raise
 
-    if method in ["Newton-CG"]:
-        forget = None
-    else:
-        forget = False
-
-    project = kwargs.pop("project", False)
-
     m = [p.tape_value() for p in rf_np.controls]
     m_global = rf_np.obj_to_array(m)
     J = rf_np.__call__
-    dJ = lambda m: rf_np.derivative(m, forget=forget, project=project, options=derivative_options)
-    H = rf_np.hessian
+    dJ = lambda m: rf_np.derivative(apply_riesz=True)
+    H = lambda x, p: rf_np.hessian(p)
 
     if "options" not in kwargs:
         kwargs["options"] = {}
@@ -144,7 +137,7 @@ def jac(x):
     return m
 
 
-def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs):
+def minimize_custom(rf_np, bounds=None, **kwargs):
     """ Interface to the user-provided minimisation method """
 
     try:
@@ -160,7 +153,7 @@ def minimize_custom(rf_np, bounds=None, derivative_options=None, **kwargs):
     m_global = rf_np.obj_to_array(m)
     J = rf_np.__call__
 
-    dJ = lambda m: rf_np.derivative(m, forget=None, options=derivative_options)
+    dJ = lambda m: rf_np.derivative(m, apply_riesz=True)
     H = rf_np.hessian
 
     if bounds is not None:
@@ -263,7 +256,7 @@ def minimize(rf, method='L-BFGS-B', scale=1.0, **kwargs):
         return opt
 
 
-def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs):
+def maximize(rf, method='L-BFGS-B', scale=1.0, **kwargs):
     """ Solves the maximisation problem with PDE constraint:
 
            max_m func(u, m)
@@ -282,7 +275,6 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs
         * 'method' specifies the optimization method to be used to solve the problem.
             The available methods can be listed with the print_optimization_methods function.
         * 'scale' is a factor to scale to problem (default: 1.0).
-        * 'derivative_options' is a dictionary of options that will be passed to the `rf.derivative`.
         * 'bounds' is an optional keyword parameter to support control constraints: bounds = (lb, ub).
             lb and ub must be of the same type than the parameters m.
 
@@ -291,7 +283,7 @@ def maximize(rf, method='L-BFGS-B', scale=1.0, derivative_options=None, **kwargs
         For detailed information about which arguments are supported for each optimization method,
         please refer to the documentaton of the optimization algorithm.
         """
-    return minimize(rf, method, scale=-scale, derivative_options=derivative_options, **kwargs)
+    return minimize(rf, method, scale=-scale, **kwargs)
 
 
 minimise = minimize