diff --git a/common/amount.c b/common/amount.c
index beeaa14b4a65..1eae69cefc8c 100644
--- a/common/amount.c
+++ b/common/amount.c
@@ -532,6 +532,13 @@ struct amount_msat amount_msat_div(struct amount_msat msat, u64 div)
 	return msat;
 }
 
+struct amount_msat amount_msat_div_ceil(struct amount_msat msat, u64 div)
+{
+	u64 res = msat.millisatoshis / div;
+	msat.millisatoshis = res + (div * res == msat.millisatoshis ? 0 : 1);
+	return msat;
+}
+
 struct amount_sat amount_sat_div(struct amount_sat sat, u64 div)
 {
 	sat.satoshis /= div;
diff --git a/common/amount.h b/common/amount.h
index dd6ad61bb262..b1cdbac1d570 100644
--- a/common/amount.h
+++ b/common/amount.h
@@ -104,7 +104,13 @@ WARN_UNUSED_RESULT bool amount_sat_add_sat_s64(struct amount_sat *val,
 WARN_UNUSED_RESULT bool amount_msat_accumulate(struct amount_msat *a,
 					       struct amount_msat b);
 
+/* returns floor(msat/div) */
 struct amount_msat amount_msat_div(struct amount_msat msat, u64 div);
+
+/* returns ceil(msat/div) */
+struct amount_msat amount_msat_div_ceil(struct amount_msat msat, u64 div);
+
+/* returns floor(sat/div) */
 struct amount_sat amount_sat_div(struct amount_sat sat, u64 div);
 
 bool amount_sat_mul(struct amount_sat *res, struct amount_sat sat, u64 mul);
diff --git a/common/test/run-amount.c b/common/test/run-amount.c
index 5f8a96a0b0bc..0e9f295dc0c2 100644
--- a/common/test/run-amount.c
+++ b/common/test/run-amount.c
@@ -163,6 +163,75 @@ static void test_amount_with_fee(void)
 			    2100000001234567890ULL);
 }
 
+static void test_case_amount_div(u64 input, u64 div, u64 expected)
+{
+	struct amount_msat msat = amount_msat(input);
+	struct amount_msat expected_msat = amount_msat(expected);
+	struct amount_msat result_msat = amount_msat_div(msat, div);
+	assert(amount_msat_eq(result_msat, expected_msat));
+}
+
+static void test_case_amount_div_ceil(u64 input, u64 div, u64 expected)
+{
+	struct amount_msat msat = amount_msat(input);
+	struct amount_msat expected_msat = amount_msat(expected);
+	struct amount_msat result_msat = amount_msat_div_ceil(msat, div);
+	assert(amount_msat_eq(result_msat, expected_msat));
+}
+
+static void test_amount_div(void)
+{
+	test_case_amount_div(1, 1, 1);
+	test_case_amount_div(1, 2, 0);
+	test_case_amount_div(1, 3, 0);
+
+	test_case_amount_div(2, 1, 2);
+	test_case_amount_div(2, 2, 1);
+	test_case_amount_div(2, 3, 0);
+
+	test_case_amount_div(3, 1, 3);
+	test_case_amount_div(3, 2, 1);
+	test_case_amount_div(3, 3, 1);
+	test_case_amount_div(3, 4, 0);
+
+	test_case_amount_div(10, 1, 10);
+	test_case_amount_div(10, 2, 5);
+	test_case_amount_div(10, 3, 3);
+	test_case_amount_div(10, 4, 2);
+	test_case_amount_div(10, 5, 2);
+	test_case_amount_div(10, 6, 1);
+	test_case_amount_div(10, 7, 1);
+	test_case_amount_div(10, 8, 1);
+	test_case_amount_div(10, 9, 1);
+	test_case_amount_div(10, 10, 1);
+	test_case_amount_div(10, 11, 0);
+
+	test_case_amount_div_ceil(1, 1, 1);
+	test_case_amount_div_ceil(1, 2, 1);
+	test_case_amount_div_ceil(1, 3, 1);
+
+	test_case_amount_div_ceil(2, 1, 2);
+	test_case_amount_div_ceil(2, 2, 1);
+	test_case_amount_div_ceil(2, 3, 1);
+
+	test_case_amount_div_ceil(3, 1, 3);
+	test_case_amount_div_ceil(3, 2, 2);
+	test_case_amount_div_ceil(3, 3, 1);
+	test_case_amount_div_ceil(3, 4, 1);
+
+	test_case_amount_div_ceil(10, 1, 10);
+	test_case_amount_div_ceil(10, 2, 5);
+	test_case_amount_div_ceil(10, 3, 4);
+	test_case_amount_div_ceil(10, 4, 3);
+	test_case_amount_div_ceil(10, 5, 2);
+	test_case_amount_div_ceil(10, 6, 2);
+	test_case_amount_div_ceil(10, 7, 2);
+	test_case_amount_div_ceil(10, 8, 2);
+	test_case_amount_div_ceil(10, 9, 2);
+	test_case_amount_div_ceil(10, 10, 1);
+	test_case_amount_div_ceil(10, 11, 1);
+}
+
 #define FAIL_MSAT(msatp, str)					\
 	assert(!parse_amount_msat((msatp), (str), strlen(str)))
 #define PASS_MSAT(msatp, str, val)					\
@@ -330,5 +399,6 @@ int main(int argc, char *argv[])
 	}
 
 	test_amount_with_fee();
+	test_amount_div();
 	common_shutdown();
 }
diff --git a/contrib/msggen/msggen/schema.json b/contrib/msggen/msggen/schema.json
index b00e60f9ea7b..dff1dc5dc86e 100644
--- a/contrib/msggen/msggen/schema.json
+++ b/contrib/msggen/msggen/schema.json
@@ -16125,7 +16125,7 @@
         "",
         "Layers are generally maintained by plugins, either to contain persistent information about capacities which have been discovered, or to contain transient information for this particular payment (such as blinded paths or routehints).",
         "",
-        "There are three automatic layers: *auto.localchans* contains information on local channels from this node (including non-public ones), and their exact current spendable capacities. *auto.sourcefree* overrides all channels (including those from previous layers) leading out of the *source* to be zero fee and zero delay.  These are both useful in the case where the source is the current node.  And *auto.no_mpp_support* forces getroutes to return a single flow, though only basic checks are done that the result is useful."
+        "There are three automatic layers: *auto.localchans* contains information on local channels from this node (including non-public ones), and their exact current spendable capacities. *auto.sourcefree* overrides all channels (including those from previous layers) leading out of the *source* to be zero fee and zero delay.  These are both useful in the case where the source is the current node.  And *auto.no_mpp_support* forces getroutes to return a single path solution which is useful for payments for which MPP is not supported."
       ],
       "categories": [
         "readonly"
diff --git a/doc/schemas/getroutes.json b/doc/schemas/getroutes.json
index cfa2ac3a3b90..942ef99de0c6 100644
--- a/doc/schemas/getroutes.json
+++ b/doc/schemas/getroutes.json
@@ -11,7 +11,7 @@
     "",
     "Layers are generally maintained by plugins, either to contain persistent information about capacities which have been discovered, or to contain transient information for this particular payment (such as blinded paths or routehints).",
     "",
-    "There are three automatic layers: *auto.localchans* contains information on local channels from this node (including non-public ones), and their exact current spendable capacities. *auto.sourcefree* overrides all channels (including those from previous layers) leading out of the *source* to be zero fee and zero delay.  These are both useful in the case where the source is the current node.  And *auto.no_mpp_support* forces getroutes to return a single flow, though only basic checks are done that the result is useful."
+    "There are three automatic layers: *auto.localchans* contains information on local channels from this node (including non-public ones), and their exact current spendable capacities. *auto.sourcefree* overrides all channels (including those from previous layers) leading out of the *source* to be zero fee and zero delay.  These are both useful in the case where the source is the current node.  And *auto.no_mpp_support* forces getroutes to return a single path solution which is useful for payments for which MPP is not supported."
   ],
   "categories": [
     "readonly"
diff --git a/plugins/askrene/Makefile b/plugins/askrene/Makefile
index a34442df4b27..aebb98935153 100644
--- a/plugins/askrene/Makefile
+++ b/plugins/askrene/Makefile
@@ -22,6 +22,7 @@ PLUGIN_ASKRENE_HEADER := \
 	plugins/askrene/explain_failure.h \
 	plugins/askrene/graph.h \
 	plugins/askrene/priorityqueue.h \
+	plugins/askrene/queue.h \
 	plugins/askrene/algorithm.h
 
 PLUGIN_ASKRENE_OBJS := $(PLUGIN_ASKRENE_SRC:.c=.o)
diff --git a/plugins/askrene/algorithm.c b/plugins/askrene/algorithm.c
index d253325a8b23..54abfeac01c7 100644
--- a/plugins/askrene/algorithm.c
+++ b/plugins/askrene/algorithm.c
@@ -3,6 +3,7 @@
 #include <ccan/tal/tal.h>
 #include <plugins/askrene/algorithm.h>
 #include <plugins/askrene/priorityqueue.h>
+#include <plugins/askrene/queue.h>
 
 static const s64 INFINITE = INT64_MAX;
 
@@ -661,10 +662,552 @@ s64 flow_cost(const struct graph *graph, const s64 *capacity, const s64 *cost)
 		struct arc arc = {.idx = i};
 		struct arc dual = arc_dual(graph, arc);
 
-		if (arc_is_dual(graph, arc))
+		if (arc_is_dual(graph, arc) || !arc_enabled(graph, arc))
 			continue;
 
 		total_cost += capacity[dual.idx] * cost[arc.idx];
 	}
 	return total_cost;
 }
+
+/* Heuristic improvements options to Goldberg-Tarjan implementation.
+ * Goldberg "An Efficient Implementation of a Scaling Minimum-Cost Flow
+ * Algorithm. 1992 "*/
+
+/* Set-relabel is applied to the set of nodes that cannot reach any sink by
+ * admissible paths.
+ * This heuristics alone can reduce significantly the running time by reducing
+ * the number relabeling operations. */
+#define GOLDBERG_PRICE_UPDATE
+/* FIXME: Price refinement as proposed by Goldberg 1992 and Bunnagel-Korte-Vygen
+ * seeks the minimum value of epsilon and the corresponding potential for which
+ * the current state is epsilon-optimal then epsilon is reduced by a factor and
+ * refine is called. Right now we simply assume the current state is
+ * epsilon-optimal. */
+#define GOLDBERG_PRICE_REFINEMENT 8
+/* Relabel a node to its maximum extent. */
+#define GOLDBERG_MAX_RELABEL
+/* Relabel neighboring nodes that do not have admissible arcs before pushing
+ * flow to them. GOLDBERG_LOOKAHEAD alone does not bring much performance gain.
+ * But combined with GOLDBERG_MAX_RELABEL produces a substantial increase in
+ * performance. */
+#define GOLDBERG_LOOKAHEAD
+
+
+/* FIXME: the original paper (Goldberg-Tarjan 1990) suggests using a
+ * "first-active" container, but a follow up paper (Goldberg 1992) uses a queue
+ * for its implementation and it should be just fine. or-tools library uses a
+ * stack and still obtains very good performances. We should investigate if the
+ * first-active container would improve our performance.
+ * Another alternative is the use of "dynamic trees". */
+QUEUE_DEFINE_TYPE(u32, queue_of_u32);
+QUEUE_DEFINE_TYPE(u32, gt_active);
+
+struct goldberg_tarjan_network {
+	const struct graph *graph;
+	s64 *residual_capacity;
+	struct arc *current_arc;
+	s64 *excess;
+	s64 *potential;
+	s64 *cost;
+};
+
+/* Goldberg-Tarjan's push/relabel, auxiliary routine. */
+static void gt_push(struct goldberg_tarjan_network *gt, struct arc arc,
+		    s64 flow)
+{
+	struct arc dual = arc_dual(gt->graph, arc);
+	struct node from = arc_tail(gt->graph, arc);
+	struct node to = arc_head(gt->graph, arc);
+
+	gt->residual_capacity[arc.idx] -= flow;
+	gt->residual_capacity[dual.idx] += flow;
+	gt->excess[from.idx] -= flow;
+	gt->excess[to.idx] += flow;
+}
+
+/* Goldberg-Tarjan's push/relabel, auxiliary routine.
+ * note: excess is written to the same array that provides supply/demand values. */
+static void gt_discharge(u32 nodeidx, struct goldberg_tarjan_network *gt,
+			 struct queue_of_u32 *active, const s64 max_label)
+{
+	const struct node node = {.idx = nodeidx};
+	struct arc arc;
+
+	/* do push/relable while node is active */
+	while (gt->potential[nodeidx] < max_label && gt->excess[nodeidx] > 0) {
+		/* smallest label in the neighborhood */
+		s64 min_label = INT64_MAX;
+
+		/* try pushing out flow */
+		for (arc = gt->current_arc[nodeidx];
+		     !node_adjacency_end(arc) && gt->excess[nodeidx] > 0;
+		     arc = node_adjacency_next(gt->graph, arc)) {
+			const struct node next = arc_head(gt->graph, arc);
+
+			/* applies only to residual arcs */
+			if (gt->residual_capacity[arc.idx] <= 0)
+				continue;
+
+			if (gt->potential[nodeidx] > gt->potential[next.idx]) {
+				const s64 flow =
+				    MIN(gt->excess[nodeidx],
+					gt->residual_capacity[arc.idx]);
+				const s64 old_excess = gt->excess[next.idx];
+				gt_push(gt, arc, flow);
+
+				if (gt->excess[next.idx] > 0 &&
+				    old_excess <= 0 &&
+				    gt->potential[next.idx] < max_label)
+					queue_of_u32_insert(active, next.idx);
+			} else {
+				min_label =
+				    MIN(min_label, gt->potential[next.idx]);
+			}
+
+			/* we had a non-saturating push, this means the current
+			 * arc is still admissible, break before we checkout the
+			 * next arc. */
+			if (gt->excess[nodeidx] == 0)
+				break;
+		}
+		gt->current_arc[nodeidx] = arc;
+
+		/* still have excess: relabel */
+		if (gt->excess[nodeidx] > 0) {
+			if (min_label < INT64_MAX &&
+			    min_label >= gt->potential[nodeidx])
+				gt->potential[nodeidx] = min_label + 1;
+			else
+				gt->potential[nodeidx]++;
+			gt->current_arc[nodeidx] =
+			    node_adjacency_begin(gt->graph, node);
+		}
+	}
+}
+
+/* A variation of Maximum-Flow "push/relabel" to find a feasible flow.
+ *
+ * See Goldberg-Tarjan "A New Approach to the Maximum-Flow Problem", JACM, Vol.
+ * 35, No. 4, October 1988, pp. 921--940
+ *
+ * @ctx: allocator.
+ * @graph: graph, assumes the existence of reverse (dual) arcs.
+ * @supply: supply/demand encoding, supply[i]>0 for source nodes and supply[i]<0
+ * for sinks. It is modified by the algorithm execution. When a feasible
+ * solution is found supply[i] = 0 for every node.
+ * @residual_capacity: residual capacity on arcs, here the final solution is
+ * encoded.
+ *
+ * The original algorithm has an O(N^3) complexity, where N is the number of
+ * nodes.
+ * By limiting the highest value of the "label" to 10 we exclude all solutions
+ * that require more than 10 hops from source to sink and reduce the
+ * theoretical complexity to O(N^2).
+ * */
+static bool UNNEEDED goldberg_tarjan_feasible(const tal_t *ctx,
+					      const struct graph *graph,
+					      s64 *supply,
+					      s64 *residual_capacity)
+{
+	const tal_t *this_ctx = tal(ctx, tal_t);
+	const size_t max_num_nodes = graph_max_num_nodes(graph);
+
+	/* re-use/abuse the same struct for MCF and Feasible Flow */
+	struct goldberg_tarjan_network *gt =
+	    tal(this_ctx, struct goldberg_tarjan_network);
+
+	gt->graph = graph;
+	/* we work with the residual_capacity in-place */
+	gt->residual_capacity = residual_capacity;
+	gt->current_arc = tal_arr(gt, struct arc, max_num_nodes);
+	gt->excess = supply;
+	gt->potential = tal_arrz(gt, s64, max_num_nodes);
+	gt->cost = NULL;
+
+	struct queue_of_u32 active;
+	queue_of_u32_init(&active, this_ctx);
+
+	/* `max_label = max_num_nodes` would exhaust every possible path from
+	 * the source to the sink, this is the correct MaxFlow implementation.
+	 * However we use `max_label = 10`, meaning that we discard any solution
+	 * that uses more than 10 hops from source to sink. */
+	const s64 max_label = MIN(10, max_num_nodes);
+	for (u32 node_id = 0; node_id < max_num_nodes; node_id++) {
+		if (gt->excess[node_id] > 0) {
+			gt->potential[node_id] = 1;
+			queue_of_u32_insert(&active, node_id);
+		}
+		gt->current_arc[node_id] =
+		    node_adjacency_begin(gt->graph, node_obj(node_id));
+	}
+
+	while (!queue_of_u32_empty(&active)) {
+		u32 node = queue_of_u32_pop(&active);
+		gt_discharge(node, gt, &active, max_label);
+	}
+
+	/* did we find a feasible solution? */
+	bool solved = true;
+	for (u32 node_id = 0; node_id < max_num_nodes; node_id++)
+		if (gt->excess[node_id] != 0) {
+			solved = false;
+			break;
+		}
+	tal_free(this_ctx);
+	return solved;
+}
+
+static s64 gt_reduced_cost(const struct goldberg_tarjan_network *gt, u32 arcidx,
+			   u32 from, u32 to)
+{
+	return gt->cost[arcidx] + gt->potential[to] - gt->potential[from];
+}
+
+#ifdef GOLDBERG_LOOKAHEAD
+static bool gt_has_admissible_arcs(struct goldberg_tarjan_network *gt,
+				   const u32 nodeidx)
+{
+	for (struct arc arc = gt->current_arc[nodeidx];
+	     !node_adjacency_end(arc);
+	     arc = node_adjacency_next(gt->graph, arc)) {
+		struct node next = arc_head(gt->graph, arc);
+		const s64 rcost =
+		    gt_reduced_cost(gt, arc.idx, nodeidx, next.idx);
+		if (gt->residual_capacity[arc.idx] > 0 && rcost < 0) {
+			gt->current_arc[nodeidx] = arc;
+			return true;
+		}
+	}
+	return false;
+}
+#endif // GOLDBERG_LOOKAHEAD
+
+static void gt_mcf_relabel(struct goldberg_tarjan_network *gt,
+			   const u32 nodeidx, const s64 epsilon)
+{
+	/* a conservative relabel, just add epsilon */
+	struct node node = {.idx = nodeidx};
+	gt->potential[nodeidx] += epsilon;
+	gt->current_arc[nodeidx] = node_adjacency_begin(gt->graph, node);
+
+/* highest value relabel we can perform while keeping epsilon-optimality */
+#ifdef GOLDBERG_MAX_RELABEL
+	s64 smallest_cost = INT64_MAX;
+	struct arc first_residual_arc;
+	for (struct arc arc = node_adjacency_begin(gt->graph, node);
+	     !node_adjacency_end(arc);
+	     arc = node_adjacency_next(gt->graph, arc)) {
+
+		if (gt->residual_capacity[arc.idx] <= 0)
+			continue;
+
+		struct node next = arc_head(gt->graph, arc);
+		s64 rcost = gt->cost[arc.idx] + gt->potential[next.idx];
+
+		/* remember the first residual arc to use as current_arc */
+		if (smallest_cost == INT64_MAX)
+			first_residual_arc = arc;
+
+		if (rcost < gt->potential[nodeidx]) {
+			// at least one arc is admissible, we exit early
+			gt->current_arc[nodeidx] = arc;
+			return;
+		}
+
+		smallest_cost = MIN(smallest_cost, rcost);
+	}
+
+	if (smallest_cost < INT64_MAX) {
+		gt->potential[nodeidx] = smallest_cost + epsilon;
+		gt->current_arc[nodeidx] = first_residual_arc;
+	}
+#endif // GOLDBERG_MAX_RELABEL
+}
+
+/* Goldberg-Tarjan's push/relabel, auxiliary routine */
+static unsigned int gt_mcf_discharge(struct goldberg_tarjan_network *gt,
+				     struct gt_active *active,
+				     const s64 epsilon, const u32 nodeidx)
+{
+	unsigned int num_relabels = 0;
+
+	while (gt->excess[nodeidx] > 0) {
+		struct arc arc;
+
+		/* try pushing out flow */
+		for (arc = gt->current_arc[nodeidx];
+		     !node_adjacency_end(arc) && gt->excess[nodeidx] > 0;
+		     arc = node_adjacency_next(gt->graph, arc)) {
+			const struct node next = arc_head(gt->graph, arc);
+
+			/* applies only to residual arcs */
+			if (gt->residual_capacity[arc.idx] <= 0)
+				continue;
+
+			/* applies only to admissible arcs */
+			s64 rcost =
+			    gt_reduced_cost(gt, arc.idx, nodeidx, next.idx);
+			if (rcost >= 0)
+				continue;
+
+			const s64 flow = MIN(gt->excess[nodeidx],
+					     gt->residual_capacity[arc.idx]);
+			assert(flow > 0);
+
+			const s64 old_excess = gt->excess[next.idx];
+
+#ifdef GOLDBERG_LOOKAHEAD
+			if (old_excess >= 0 &&
+			    !gt_has_admissible_arcs(gt, next.idx)) {
+				num_relabels++;
+				gt_mcf_relabel(gt, next.idx, epsilon);
+
+				/* the arc might not be admissible after the
+				 * next node relabel, we check */
+				rcost = gt_reduced_cost(gt, arc.idx, nodeidx,
+							next.idx);
+				if (rcost >= 0)
+					continue;
+			}
+#endif // GOLDBERG_LOOKAHEAD
+
+			gt_push(gt, arc, flow);
+			if (gt->excess[next.idx] > 0 && old_excess <= 0)
+				gt_active_insert(active, next.idx);
+
+			/* break right away, skip moving to the next arc */
+			if (gt->excess[nodeidx] == 0)
+				break;
+		}
+
+		/* next time we loop over arcs starting where we ended-up now */
+		gt->current_arc[nodeidx] = arc;
+
+		/* still have excess: relabel */
+		if (gt->excess[nodeidx] > 0) {
+			num_relabels++;
+			gt_mcf_relabel(gt, nodeidx, epsilon);
+		}
+	}
+	return num_relabels;
+}
+
+#ifdef GOLDBERG_PRICE_UPDATE
+static void gt_set_relabel(struct goldberg_tarjan_network *gt,
+			   const s64 epsilon)
+{
+	const tal_t *this_ctx = tal(gt, tal_t);
+	const size_t max_num_nodes = graph_max_num_nodes(gt->graph);
+
+	struct priorityqueue *pending;
+	pending = priorityqueue_new(this_ctx, max_num_nodes);
+	priorityqueue_init(pending);
+	const s64 *distance = priorityqueue_value(pending);
+	s64 maximum_distance = 0;
+	s64 set_excess = 0;
+
+	/* negative excess nodes is where we start flooding */
+	for (u32 nodeidx = 0; nodeidx < max_num_nodes; nodeidx++) {
+		if (gt->excess[nodeidx] < 0) {
+			set_excess += gt->excess[nodeidx];
+			priorityqueue_update(pending, nodeidx, 0);
+		}
+	}
+
+	while (!priorityqueue_empty(pending)) {
+		const u32 nodeidx = priorityqueue_top(pending);
+
+		priorityqueue_pop(pending);
+		const struct node node = {.idx = nodeidx};
+
+		if (gt->excess[nodeidx] > 0)
+			set_excess += gt->excess[nodeidx];
+
+		maximum_distance = distance[nodeidx];
+
+		/* once we have scanned all active nodes we exit */
+		if (set_excess == 0)
+			break;
+
+		for (struct arc arc = node_adjacency_begin(gt->graph, node);
+		     !node_adjacency_end(arc);
+		     arc = node_adjacency_next(gt->graph, arc)) {
+			const struct arc dual = arc_dual(gt->graph, arc);
+			const struct node next = arc_head(gt->graph, arc);
+
+			/* traverse residual arcs only */
+			if (gt->residual_capacity[dual.idx] <= 0)
+				continue;
+
+			const s64 rcost =
+			    gt_reduced_cost(gt, dual.idx, next.idx, nodeidx);
+
+			/* (node) <--- (next)
+			 * distance[next] must be the least such that
+			 *
+			 * cost[dual] + (potential[node]+distance[node]*epsilon)
+			 *      - (potential[next]+distance[next]*epsilon) < 0
+			 * */
+			s64 delta = 1 + rcost / epsilon;
+			if (rcost < 0)
+				delta = 0;
+
+			if (distance[next.idx] <= delta + distance[nodeidx])
+				continue;
+
+			priorityqueue_update(pending, next.idx,
+					     distance[nodeidx] + delta);
+		}
+	}
+
+	for (u32 nodeidx = 0; nodeidx < max_num_nodes; nodeidx++) {
+		s64 d = MIN(distance[nodeidx], maximum_distance);
+		if (d > 0) {
+			gt->potential[nodeidx] += epsilon * d;
+			gt->current_arc[nodeidx] =
+			    node_adjacency_begin(gt->graph, node_obj(nodeidx));
+		}
+	}
+}
+#endif // GOLDBERG_PRICE_UPDATE
+
+/* Refine operation for Goldberg-Tarjan's push/relabel
+ * min-cost-circulation. */
+static void gt_refine(struct goldberg_tarjan_network *gt, s64 epsilon)
+{
+	const tal_t *this_ctx = tal(gt, tal_t);
+
+	struct gt_active active;
+	gt_active_init(&active, this_ctx);
+
+	const size_t max_num_arcs = graph_max_num_arcs(gt->graph);
+	const size_t max_num_nodes = graph_max_num_nodes(gt->graph);
+
+	/* reset current act for every node */
+	for (u32 nodeidx = 0; nodeidx < max_num_nodes; nodeidx++) {
+		struct node node = {.idx = nodeidx};
+		gt->current_arc[nodeidx] =
+		    node_adjacency_begin(gt->graph, node);
+	}
+
+	/* saturate all negative cost arcs */
+	for (u32 i = 0; i < max_num_arcs; i++) {
+		struct arc arc = {.idx = i};
+		if (arc_enabled(gt->graph, arc)) {
+			struct node to = arc_head(gt->graph, arc);
+			struct node from = arc_tail(gt->graph, arc);
+			const s64 rcost =
+			    gt_reduced_cost(gt, i, from.idx, to.idx);
+			const s64 flow = gt->residual_capacity[arc.idx];
+			if (rcost < 0 && flow > 0)
+				gt_push(gt, arc, flow);
+		}
+	}
+
+	/* enqueue all active nodes */
+	for (u32 nodeidx = 0; nodeidx < max_num_nodes; nodeidx++) {
+		if (gt->excess[nodeidx] > 0) {
+			gt_active_insert(&active, nodeidx);
+		}
+	}
+
+	unsigned int num_relabels = 0;
+	/* push/relabel until there are no more active nodes */
+	while (!gt_active_empty(&active)) {
+#ifdef GOLDBERG_PRICE_UPDATE
+		if (num_relabels >= max_num_nodes) {
+			num_relabels = 0;
+			gt_set_relabel(gt, epsilon);
+		}
+#endif // GOLDBERG_PRICE_UPDATE
+
+		u32 nodeidx = gt_active_pop(&active);
+		num_relabels += gt_mcf_discharge(gt, &active, epsilon, nodeidx);
+	}
+	tal_free(this_ctx);
+}
+
+/* This is the actual implementation of the Minimum-Cost Circulation algorithm.
+ *
+ * note: supply/demand is already satisfied in this state,
+ * algorithm always succeds */
+static void goldberg_tarjan_circulation(struct goldberg_tarjan_network *gt,
+					s64 epsilon)
+{
+	while (epsilon > 1) {
+#ifdef GOLDBERG_PRICE_REFINEMENT
+		epsilon /= GOLDBERG_PRICE_REFINEMENT;
+#else
+		epsilon /= 2;
+#endif // GOLDBERG_PRICE_REFINEMENT
+		if (epsilon < 1)
+			epsilon = 1;
+		gt_refine(gt, epsilon);
+	}
+}
+
+static bool check_overflow(double x, double y, double bound)
+{
+	return x * y <= bound;
+}
+
+/* Minimum-Cost Flow "cost scaling, push/relabel"
+ *
+ * see Goldberg-Tarjan "Finding Minimum-Cost Circulations by Successive
+ * Approximation" Math. of Op. Research, Vol. 15, No. 3 (Aug. 1990), pp.
+ * 430--466.
+ *
+ * @ctx: allocator.
+ * @graph: graph, assumes the existence of reverse (dual) arcs.
+ * @supply: supply/demand encoding, supply[i]>0 for source nodes and supply[i]<0
+ * for sinks. It is modified by the algorithm execution. When a feasible
+ * solution is found supply[i] = 0 for every node.
+ * @residual_capacity: residual capacity on arcs, here the final solution is
+ * encoded.
+ * @cost: cost per unit of flow on arcs. It is assumed that dual arcs have the
+ * opposite cost of its twin: cost[i] = -cost[dual(i)].
+ * */
+bool goldberg_tarjan_mcf(const tal_t *ctx, const struct graph *graph,
+			 s64 *supply, s64 *residual_capacity, const s64 *cost)
+{
+	const tal_t *this_ctx = tal(ctx, tal_t);
+	if (!goldberg_tarjan_feasible(this_ctx, graph, supply,
+				      residual_capacity)) {
+		goto fail;
+	}
+
+	const size_t max_num_arcs = graph_max_num_arcs(graph);
+	const size_t max_num_nodes = graph_max_num_nodes(graph);
+
+	struct goldberg_tarjan_network *gt =
+	    tal(this_ctx, struct goldberg_tarjan_network);
+
+	gt->graph = graph;
+	/* we work with the residual_capacity in-place */
+	gt->residual_capacity = residual_capacity;
+	gt->current_arc = tal_arr(gt, struct arc, max_num_nodes);
+	/* assumed to be zero at this point */
+	gt->excess = supply;
+	gt->potential = tal_arrz(gt, s64, max_num_nodes);
+	gt->cost = tal_arrz(gt, s64, max_num_arcs);
+
+	const s64 scale_factor = max_num_nodes;
+
+	/* FIXME: advantage of knowing the minimum non-zero cost? */
+	s64 max_epsilon = 0;
+	for (u32 i = 0; i < max_num_arcs; i++)
+		if (arc_enabled(gt->graph, arc_obj(i))) {
+			max_epsilon = MAX(cost[i], max_epsilon);
+			gt->cost[i] = cost[i] * scale_factor;
+		}
+	assert(check_overflow(max_epsilon, scale_factor, 9e18));
+	goldberg_tarjan_circulation(gt, max_epsilon * scale_factor);
+
+	tal_free(this_ctx);
+	return true;
+
+fail:
+	tal_free(this_ctx);
+	return false;
+}
diff --git a/plugins/askrene/algorithm.h b/plugins/askrene/algorithm.h
index 40010eb5cfd9..0ccf2044eece 100644
--- a/plugins/askrene/algorithm.h
+++ b/plugins/askrene/algorithm.h
@@ -176,4 +176,23 @@ bool mcf_refinement(const tal_t *ctx,
 		    const s64 *cost,
 		    s64 *potential);
 
+/* Minimum-Cost Flow "cost scaling, push/relabel"
+ *
+ * see Goldberg-Tarjan "Finding Minimum-Cost Circulations by Successive
+ * Approximation" Math. of Op. Research, Vol. 15, No. 3 (Aug. 1990), pp.
+ * 430--466.
+ *
+ * @ctx: allocator.
+ * @graph: graph, assumes the existence of reverse (dual) arcs.
+ * @supply: supply/demand encoding, supply[i]>0 for source nodes and supply[i]<0
+ * for sinks. It is modified by the algorithm execution. When a feasible
+ * solution is found supply[i] = 0 for every node.
+ * @residual_capacity: residual capacity on arcs, here the final solution is
+ * encoded.
+ * @cost: cost per unit of flow on arcs. It is assumed that dual arcs have the
+ * opposite cost of its twin: cost[i] = -cost[dual(i)].
+ * */
+bool goldberg_tarjan_mcf(const tal_t *ctx, const struct graph *graph,
+			 s64 *supply, s64 *residual_capacity, const s64 *cost);
+
 #endif /* LIGHTNING_PLUGINS_ASKRENE_ALGORITHM_H */
diff --git a/plugins/askrene/askrene.c b/plugins/askrene/askrene.c
index 3429391084a2..8335f3f98ed1 100644
--- a/plugins/askrene/askrene.c
+++ b/plugins/askrene/askrene.c
@@ -9,6 +9,7 @@
 #include "config.h"
 #include <ccan/array_size/array_size.h>
 #include <ccan/tal/str/str.h>
+#include <ccan/time/time.h>
 #include <common/dijkstra.h>
 #include <common/gossmap.h>
 #include <common/gossmods_listpeerchannels.h>
@@ -18,11 +19,9 @@
 #include <errno.h>
 #include <math.h>
 #include <plugins/askrene/askrene.h>
-#include <plugins/askrene/explain_failure.h>
 #include <plugins/askrene/flow.h>
 #include <plugins/askrene/layer.h>
 #include <plugins/askrene/mcf.h>
-#include <plugins/askrene/refine.h>
 #include <plugins/askrene/reserve.h>
 
 /* "spendable" for a channel assumes a single HTLC: for additional HTLCs,
@@ -332,76 +331,65 @@ const char *fmt_flow_full(const tal_t *ctx,
 	return str;
 }
 
-static struct amount_msat linear_flows_cost(struct flow **flows,
-					    struct amount_msat total_amount,
-					    double delay_feefactor)
-{
-	struct amount_msat total = AMOUNT_MSAT(0);
-
-	for (size_t i = 0; i < tal_count(flows); i++) {
-		if (!amount_msat_accumulate(&total,
-					    linear_flow_cost(flows[i],
-							     total_amount,
-							     delay_feefactor)))
-			abort();
-	}
-	return total;
-}
+enum algorithm {
+	/* Min. Cost Flow by successive shortests paths. */
+	ALGO_DEFAULT,
+	/* Algorithm that finds the optimal routing solution constrained to a
+	 * single path. */
+	ALGO_SINGLE_PATH,
+	/* Min. Cost Flow by Successive Approximations, aka. Cost Scaling. */
+	ALGO_GOLDBERG_TARJAN,
+};
 
-/* Returns an error message, or sets *routes */
-static const char *get_routes(const tal_t *ctx,
-			      struct command *cmd,
-			      const struct node_id *source,
-			      const struct node_id *dest,
-			      struct amount_msat amount,
-			      struct amount_msat maxfee,
-			      u32 finalcltv,
-			      u32 maxdelay,
-			      const char **layers,
-			      struct gossmap_localmods *localmods,
-			      const struct layer *local_layer,
-			      bool single_path,
-			      struct route ***routes,
-			      struct amount_msat **amounts,
-			      const struct additional_cost_htable *additional_costs,
-			      double *probability)
+static struct command_result *
+param_algorithm(struct command *cmd, const char *name, const char *buffer,
+		const jsmntok_t *tok, enum algorithm **algo)
 {
-	struct askrene *askrene = get_askrene(cmd->plugin);
-	struct route_query *rq = tal(ctx, struct route_query);
-	struct flow **flows;
-	const struct gossmap_node *srcnode, *dstnode;
-	double delay_feefactor;
-	u32 mu;
-	const char *ret;
-
-	if (gossmap_refresh(askrene->gossmap)) {
-		/* FIXME: gossmap_refresh callbacks to we can update in place */
-		tal_free(askrene->capacities);
-		askrene->capacities = get_capacities(askrene, askrene->plugin, askrene->gossmap);
-	}
+	const char *algo_str = json_strdup(cmd, buffer, tok);
+	*algo = tal(cmd, enum algorithm);
+	if (streq(algo_str, "default"))
+		**algo = ALGO_DEFAULT;
+	else if (streq(algo_str, "single-path"))
+		**algo = ALGO_SINGLE_PATH;
+	else if (streq(algo_str, "goldberg-tarjan"))
+		**algo = ALGO_GOLDBERG_TARJAN;
+	else
+		return command_fail_badparam(cmd, name, buffer, tok,
+					     "unknown algorithm");
+	return NULL;
+}
 
-	rq->cmd = cmd;
-	rq->plugin = cmd->plugin;
-	rq->gossmap = askrene->gossmap;
-	rq->reserved = askrene->reserved;
-	rq->layers = tal_arr(rq, const struct layer *, 0);
-	rq->capacities = tal_dup_talarr(rq, fp16_t, askrene->capacities);
-	rq->additional_costs = additional_costs;
+struct getroutes_info {
+	struct command *cmd;
+	struct node_id *source, *dest;
+	struct amount_msat *amount, *maxfee;
+	u32 *finalcltv, *maxdelay;
+	const char **layers;
+	struct additional_cost_htable *additional_costs;
+	/* Non-NULL if we are told to use "auto.localchans" */
+	struct layer *local_layer;
+	/* algorithm selection, only dev */
+	enum algorithm *dev_algo;
+};
 
+static void apply_layers(struct askrene *askrene, struct route_query *rq,
+			 struct gossmap_localmods *localmods,
+			 const struct getroutes_info *info)
+{
 	/* Layers must exist, but might be special ones! */
-	for (size_t i = 0; i < tal_count(layers); i++) {
-		const struct layer *l = find_layer(askrene, layers[i]);
+	for (size_t i = 0; i < tal_count(info->layers); i++) {
+		const struct layer *l = find_layer(askrene, info->layers[i]);
 		if (!l) {
-			if (streq(layers[i], "auto.localchans")) {
+			if (streq(info->layers[i], "auto.localchans")) {
 				plugin_log(rq->plugin, LOG_DBG, "Adding auto.localchans");
-				l = local_layer;
-			} else if (streq(layers[i], "auto.no_mpp_support")) {
+				l = info->local_layer;
+			} else if (streq(info->layers[i], "auto.no_mpp_support")) {
 				plugin_log(rq->plugin, LOG_DBG, "Adding auto.no_mpp_support, sorry");
-				l = remove_small_channel_layer(layers, askrene, amount, localmods);
+				l = remove_small_channel_layer(info->layers, askrene, *info->amount, localmods);
 			} else {
-				assert(streq(layers[i], "auto.sourcefree"));
+				assert(streq(info->layers[i], "auto.sourcefree"));
 				plugin_log(rq->plugin, LOG_DBG, "Adding auto.sourcefree");
-				l = source_free_layer(layers, askrene, source, localmods);
+				l = source_free_layer(info->layers, askrene, info->source, localmods);
 			}
 		}
 
@@ -413,140 +401,14 @@ static const char *get_routes(const tal_t *ctx,
 		 * override them (incl local channels) */
 		layer_clear_overridden_capacities(l, askrene->gossmap, rq->capacities);
 	}
+}
 
-	/* Clear scids with reservations, too, so we don't have to look up
-	 * all the time! */
-	reserves_clear_capacities(askrene->reserved, askrene->gossmap, rq->capacities);
-
-	gossmap_apply_localmods(askrene->gossmap, localmods);
-
-	/* localmods can add channels, so we need to allocate biases array *afterwards* */
-	rq->biases = tal_arrz(rq, s8, gossmap_max_chan_idx(askrene->gossmap) * 2);
-
-	/* Note any channel biases */
-	for (size_t i = 0; i < tal_count(rq->layers); i++)
-		layer_apply_biases(rq->layers[i], askrene->gossmap, rq->biases);
-
-	srcnode = gossmap_find_node(askrene->gossmap, source);
-	if (!srcnode) {
-		ret = rq_log(ctx, rq, LOG_INFORM,
-			     "Unknown source node %s",
-			     fmt_node_id(tmpctx, source));
-		goto fail;
-	}
-
-	dstnode = gossmap_find_node(askrene->gossmap, dest);
-	if (!dstnode) {
-		ret = rq_log(ctx, rq, LOG_INFORM,
-			     "Unknown destination node %s",
-			     fmt_node_id(tmpctx, dest));
-		goto fail;
-	}
-
-	delay_feefactor = 1.0/1000000;
-
-	/* First up, don't care about fees (well, just enough to tiebreak!) */
-	mu = 1;
-	flows = minflow(rq, rq, srcnode, dstnode, amount,
-			mu, delay_feefactor, single_path);
-	if (!flows) {
-		ret = explain_failure(ctx, rq, srcnode, dstnode, amount);
-		goto fail;
-	}
-
-	/* Too much delay? */
-	while (finalcltv + flows_worst_delay(flows) > maxdelay) {
-		delay_feefactor *= 2;
-		rq_log(tmpctx, rq, LOG_UNUSUAL,
-		       "The worst flow delay is %"PRIu64" (> %i), retrying with delay_feefactor %f...",
-		       flows_worst_delay(flows), maxdelay - finalcltv, delay_feefactor);
-		flows = minflow(rq, rq, srcnode, dstnode, amount,
-				mu, delay_feefactor, single_path);
-		if (!flows || delay_feefactor > 10) {
-			ret = rq_log(ctx, rq, LOG_UNUSUAL,
-				     "Could not find route without excessive delays");
-			goto fail;
-		}
-	}
-
-	/* Too expensive? */
-too_expensive:
-	while (amount_msat_greater(flowset_fee(rq->plugin, flows), maxfee)) {
-		struct flow **new_flows;
-
-		if (mu == 1)
-			mu = 10;
-		else
-			mu += 10;
-		rq_log(tmpctx, rq, LOG_UNUSUAL,
-		       "The flows had a fee of %s, greater than max of %s, retrying with mu of %u%%...",
-		       fmt_amount_msat(tmpctx, flowset_fee(rq->plugin, flows)),
-		       fmt_amount_msat(tmpctx, maxfee),
-		       mu);
-		new_flows = minflow(rq, rq, srcnode, dstnode, amount,
-				    mu > 100 ? 100 : mu, delay_feefactor, single_path);
-		if (!flows || mu >= 100) {
-			ret = rq_log(ctx, rq, LOG_UNUSUAL,
-				     "Could not find route without excessive cost");
-			goto fail;
-		}
-
-		/* This is possible, because MCF's linear fees are not the same. */
-		if (amount_msat_greater(flowset_fee(rq->plugin, new_flows),
-					flowset_fee(rq->plugin, flows))) {
-			struct amount_msat old_cost = linear_flows_cost(flows, amount, delay_feefactor);
-			struct amount_msat new_cost = linear_flows_cost(new_flows, amount, delay_feefactor);
-			if (amount_msat_greater_eq(new_cost, old_cost)) {
-				rq_log(tmpctx, rq, LOG_BROKEN, "Old flows cost %s:",
-				       fmt_amount_msat(tmpctx, old_cost));
-				for (size_t i = 0; i < tal_count(flows); i++) {
-					rq_log(tmpctx, rq, LOG_BROKEN,
-					       "Flow %zu/%zu: %s (linear cost %s)", i, tal_count(flows),
-					       fmt_flow_full(tmpctx, rq, flows[i]),
-					       fmt_amount_msat(tmpctx, linear_flow_cost(flows[i],
-											amount,
-											delay_feefactor)));
-				}
-				rq_log(tmpctx, rq, LOG_BROKEN, "Old flows cost %s:",
-				       fmt_amount_msat(tmpctx, new_cost));
-				for (size_t i = 0; i < tal_count(new_flows); i++) {
-					rq_log(tmpctx, rq, LOG_BROKEN,
-					       "Flow %zu/%zu: %s (linear cost %s)", i, tal_count(new_flows),
-					       fmt_flow_full(tmpctx, rq, new_flows[i]),
-					       fmt_amount_msat(tmpctx, linear_flow_cost(new_flows[i],
-											amount,
-											delay_feefactor)));
-				}
-			}
-		}
-		tal_free(flows);
-		flows = new_flows;
-	}
-
-	if (finalcltv + flows_worst_delay(flows) > maxdelay) {
-		ret = rq_log(ctx, rq, LOG_UNUSUAL,
-			     "Could not find route without excessive cost or delays");
-		goto fail;
-	}
-
-	/* The above did not take into account the extra funds to pay
-	 * fees, so we try to adjust now.  We could re-run MCF if this
-	 * fails, but failure basically never happens where payment is
-	 * still possible */
-	ret = refine_with_fees_and_limits(ctx, rq, amount, &flows, probability);
-	if (ret)
-		goto fail;
-
-	/* Again, a tiny corner case: refine step can make us exceed maxfee */
-	if (amount_msat_greater(flowset_fee(rq->plugin, flows), maxfee)) {
-		rq_log(tmpctx, rq, LOG_UNUSUAL,
-		       "After final refinement, fee was excessive: retrying");
-		goto too_expensive;
-	}
-
-	rq_log(tmpctx, rq, LOG_DBG, "Final answer has %zu flows with mu=%u",
-	       tal_count(flows), mu);
-
+static void convert_flows_to_routes(const tal_t *ctx, struct route_query *rq,
+				    struct route ***routes,
+				    struct amount_msat **amounts,
+				    u32 finalcltv,
+				    struct flow **flows)
+{
 	/* Convert back into routes, with delay and other information fixed */
 	*routes = tal_arr(ctx, struct route *, tal_count(flows));
 	*amounts = tal_arr(ctx, struct amount_msat, tal_count(flows));
@@ -584,17 +446,6 @@ static const char *get_routes(const tal_t *ctx,
 		       i, tal_count(flows),
 		       fmt_route(tmpctx, r, (*amounts)[i], finalcltv));
 	}
-
-	gossmap_remove_localmods(askrene->gossmap, localmods);
-
-	return NULL;
-
-	/* Explicit failure path keeps the compiler (gcc version 12.3.0 -O3) from
-	 * warning about uninitialized variables in the caller */
-fail:
-	assert(ret != NULL);
-	gossmap_remove_localmods(askrene->gossmap, localmods);
-	return ret;
 }
 
 void get_constraints(const struct route_query *rq,
@@ -633,16 +484,40 @@ void get_constraints(const struct route_query *rq,
 	reserve_sub(rq->reserved, &scidd, max);
 }
 
-struct getroutes_info {
-	struct command *cmd;
-	struct node_id *source, *dest;
-	struct amount_msat *amount, *maxfee;
-	u32 *finalcltv, *maxdelay;
-	const char **layers;
-	struct additional_cost_htable *additional_costs;
-	/* Non-NULL if we are told to use "auto.localchans" */
-	struct layer *local_layer;
-};
+static void json_add_getroutes(
+    struct json_stream *js,
+    struct route **routes,
+    const struct amount_msat *amounts,
+    double probability,
+    u32 final_cltv)
+{
+	json_add_u64(js, "probability_ppm", (u64)(probability * 1000000));
+	json_array_start(js, "routes");
+	for (size_t i = 0; i < tal_count(routes); i++) {
+		json_object_start(js, NULL);
+		json_add_u64(js, "probability_ppm",
+			     (u64)(routes[i]->success_prob * 1000000));
+		json_add_amount_msat(js, "amount_msat", amounts[i]);
+		json_add_u32(js, "final_cltv", final_cltv);
+		json_array_start(js, "path");
+		for (size_t j = 0; j < tal_count(routes[i]->hops); j++) {
+			struct short_channel_id_dir scidd;
+			const struct route_hop *r = &routes[i]->hops[j];
+			json_object_start(js, NULL);
+			scidd.scid = r->scid;
+			scidd.dir = r->direction;
+			json_add_short_channel_id_dir(
+			    js, "short_channel_id_dir", scidd);
+			json_add_node_id(js, "next_node_id", &r->node_id);
+			json_add_amount_msat(js, "amount_msat", r->amount);
+			json_add_u32(js, "delay", r->delay);
+			json_object_end(js);
+		}
+		json_array_end(js);
+		json_object_end(js);
+	}
+	json_array_end(js);
+}
 
 static struct command_result *do_getroutes(struct command *cmd,
 					   struct gossmap_localmods *localmods,
@@ -652,43 +527,130 @@ static struct command_result *do_getroutes(struct command *cmd,
 	double probability;
 	struct amount_msat *amounts;
 	struct route **routes;
+	struct flow **flows;
 	struct json_stream *response;
 
-	err = get_routes(cmd, cmd,
-			 info->source, info->dest,
-			 *info->amount, *info->maxfee, *info->finalcltv,
-			 *info->maxdelay, info->layers, localmods, info->local_layer,
-			 have_layer(info->layers, "auto.no_mpp_support"),
-			 &routes, &amounts, info->additional_costs, &probability);
+	/* get me the global state structure */
+	struct askrene *askrene = get_askrene(cmd->plugin);
+
+	/* update the gossmap */
+	if (gossmap_refresh(askrene->gossmap)) {
+		/* FIXME: gossmap_refresh callbacks to we can update in place */
+		tal_free(askrene->capacities);
+		askrene->capacities =
+		    get_capacities(askrene, askrene->plugin, askrene->gossmap);
+	}
+
+	/* build this request structure */
+	struct route_query *rq = tal(cmd, struct route_query);
+	rq->cmd = cmd;
+	rq->plugin = cmd->plugin;
+	rq->gossmap = askrene->gossmap;
+	rq->reserved = askrene->reserved;
+	rq->layers = tal_arr(rq, const struct layer *, 0);
+	rq->capacities = tal_dup_talarr(rq, fp16_t, askrene->capacities);
+	/* FIXME: we still need to do something useful with these */
+	rq->additional_costs = info->additional_costs;
+
+	/* apply selected layers to the localmods */
+	apply_layers(askrene, rq, localmods, info);
+
+	/* Clear scids with reservations, too, so we don't have to look up
+	 * all the time! */
+	reserves_clear_capacities(askrene->reserved, askrene->gossmap,
+				  rq->capacities);
+
+	/* we temporarily apply localmods */
+	gossmap_apply_localmods(askrene->gossmap, localmods);
+
+	/* localmods can add channels, so we need to allocate biases array
+	 * *afterwards* */
+	rq->biases =
+	    tal_arrz(rq, s8, gossmap_max_chan_idx(askrene->gossmap) * 2);
+
+	/* Note any channel biases */
+	for (size_t i = 0; i < tal_count(rq->layers); i++)
+		layer_apply_biases(rq->layers[i], askrene->gossmap, rq->biases);
+
+	/* checkout the source */
+	const struct gossmap_node *srcnode =
+	    gossmap_find_node(askrene->gossmap, info->source);
+	if (!srcnode) {
+		err = rq_log(tmpctx, rq, LOG_INFORM, "Unknown source node %s",
+			     fmt_node_id(tmpctx, info->source));
+		goto fail;
+	}
+
+	/* checkout the destination */
+	const struct gossmap_node *dstnode =
+	    gossmap_find_node(askrene->gossmap, info->dest);
+	if (!dstnode) {
+		err = rq_log(tmpctx, rq, LOG_INFORM,
+			     "Unknown destination node %s",
+			     fmt_node_id(tmpctx, info->dest));
+		goto fail;
+	}
+
+	/* auto.no_mpp_support layer overrides any choice of algorithm. */
+	if (have_layer(info->layers, "auto.no_mpp_support") &&
+	    *info->dev_algo != ALGO_SINGLE_PATH) {
+		*info->dev_algo = ALGO_SINGLE_PATH;
+		rq_log(tmpctx, rq, LOG_DBG,
+		       "Layer no_mpp_support is active we switch to a "
+		       "single path algorithm.");
+	}
+
+	/* Compute the routes. At this point we might select between multiple
+	 * algorithms. Right now there is only one algorithm available. */
+	struct timemono time_start = time_mono();
+	if (*info->dev_algo == ALGO_SINGLE_PATH){
+		err = single_path_routes(
+		    rq, rq, srcnode, dstnode, *info->amount,
+		    *info->maxfee, *info->finalcltv, *info->maxdelay, &flows,
+		    &probability);
+	} else if (*info->dev_algo == ALGO_GOLDBERG_TARJAN) {
+		err = goldberg_tarjan_routes(
+		    rq, rq, srcnode, dstnode, *info->amount, *info->maxfee,
+		    *info->finalcltv, *info->maxdelay, &flows, &probability);
+	} else {
+		assert(*info->dev_algo == ALGO_DEFAULT);
+		err = default_routes(rq, rq, srcnode, dstnode, *info->amount,
+				     *info->maxfee, *info->finalcltv,
+				     *info->maxdelay, &flows, &probability);
+	}
+	struct timerel time_delta = timemono_between(time_mono(), time_start);
+
+	/* log the time of computation */
+	rq_log(tmpctx, rq, LOG_DBG, "get_routes %s %" PRIu64 " ms",
+	       err ? "failed after" : "completed in",
+	       time_to_msec(time_delta));
 	if (err)
-		return command_fail(cmd, PAY_ROUTE_NOT_FOUND, "%s", err);
+		goto fail;
+
+	/* otherwise we continue */
+	assert(tal_count(flows) > 0);
+	rq_log(tmpctx, rq, LOG_DBG, "Final answer has %zu flows",
+	       tal_count(flows));
 
+	/* convert flows to routes */
+	convert_flows_to_routes(rq, rq, &routes, &amounts, *info->finalcltv,
+				flows);
+	assert(tal_count(routes) == tal_count(flows));
+	assert(tal_count(amounts) == tal_count(flows));
+
+	/* At last we remove the localmods from the gossmap. */
+	gossmap_remove_localmods(askrene->gossmap, localmods);
+
+	/* output the results */
 	response = jsonrpc_stream_success(cmd);
-	json_add_u64(response, "probability_ppm", (u64)(probability * 1000000));
-	json_array_start(response, "routes");
-	for (size_t i = 0; i < tal_count(routes); i++) {
-		json_object_start(response, NULL);
-		json_add_u64(response, "probability_ppm", (u64)(routes[i]->success_prob * 1000000));
-		json_add_amount_msat(response, "amount_msat", amounts[i]);
-		json_add_u32(response, "final_cltv", *info->finalcltv);
-		json_array_start(response, "path");
-		for (size_t j = 0; j < tal_count(routes[i]->hops); j++) {
-			struct short_channel_id_dir scidd;
-			const struct route_hop *r = &routes[i]->hops[j];
-			json_object_start(response, NULL);
-			scidd.scid = r->scid;
-			scidd.dir = r->direction;
-			json_add_short_channel_id_dir(response, "short_channel_id_dir", scidd);
-			json_add_node_id(response, "next_node_id", &r->node_id);
-			json_add_amount_msat(response, "amount_msat", r->amount);
-			json_add_u32(response, "delay", r->delay);
-			json_object_end(response);
-		}
-		json_array_end(response);
-		json_object_end(response);
-	}
-	json_array_end(response);
+	json_add_getroutes(response, routes, amounts, probability,
+			   *info->finalcltv);
 	return command_finished(cmd, response);
+
+fail:
+	assert(err);
+	gossmap_remove_localmods(askrene->gossmap, localmods);
+	return command_fail(cmd, PAY_ROUTE_NOT_FOUND, "%s", err);
 }
 
 static void add_localchan(struct gossmap_localmods *mods,
@@ -810,6 +772,8 @@ static struct command_result *json_getroutes(struct command *cmd,
 			 p_req("final_cltv", param_u32, &info->finalcltv),
 			 p_opt_def("maxdelay", param_u32, &info->maxdelay,
 				   maxdelay_allowed),
+			 p_opt_dev("dev_algorithm", param_algorithm,
+				   &info->dev_algo, ALGO_DEFAULT),
 			 NULL))
 		return command_param_failed();
 	plugin_log(cmd->plugin, LOG_TRACE, "%s called: %.*s", __func__,
diff --git a/plugins/askrene/mcf.c b/plugins/askrene/mcf.c
index 6339510a5ce9..29b7dcc23615 100644
--- a/plugins/askrene/mcf.c
+++ b/plugins/askrene/mcf.c
@@ -2,6 +2,7 @@
 #include <assert.h>
 #include <ccan/asort/asort.h>
 #include <ccan/bitmap/bitmap.h>
+#include <ccan/err/err.h>
 #include <ccan/list/list.h>
 #include <ccan/tal/str/str.h>
 #include <ccan/tal/tal.h>
@@ -11,9 +12,11 @@
 #include <plugins/askrene/algorithm.h>
 #include <plugins/askrene/askrene.h>
 #include <plugins/askrene/dijkstra.h>
+#include <plugins/askrene/explain_failure.h>
 #include <plugins/askrene/flow.h>
 #include <plugins/askrene/graph.h>
 #include <plugins/askrene/mcf.h>
+#include <plugins/askrene/refine.h>
 #include <plugins/libplugin.h>
 #include <stdint.h>
 
@@ -157,6 +160,10 @@
  *
  * */
 
+#define PANIC(message)                                                         \
+	errx(1, "Panic in function %s line %d: %s", __func__, __LINE__,        \
+	     message);
+
 #define PARTS_BITS 2
 #define CHANNEL_PARTS (1 << PARTS_BITS)
 
@@ -297,48 +304,17 @@ struct pay_parameters {
 	double base_fee_penalty;
 };
 
-/* Representation of the linear MCF network.
- * This contains the topology of the extended network (after linearization and
- * addition of arc duality).
- * This contains also the arc probability and linear fee cost, as well as
- * capacity; these quantities remain constant during MCF execution. */
-struct linear_network
-{
-	struct graph *graph;
-
-	// probability and fee cost associated to an arc
-	double *arc_prob_cost;
-	s64 *arc_fee_cost;
-	s64 *capacity;
-};
-
-/* This is the structure that keeps track of the network properties while we
- * seek for a solution. */
-struct residual_network {
-	/* residual capacity on arcs */
-	s64 *cap;
-
-	/* some combination of prob. cost and fee cost on arcs */
-	s64 *cost;
-
-	/* potential function on nodes */
-	s64 *potential;
-
-	/* auxiliary data, the excess of flow on nodes */
-	s64 *excess;
-};
-
 /* Helper function.
  * Given an arc of the network (not residual) give me the flow. */
 static s64 get_arc_flow(
-		const struct residual_network *network,
+		const s64 *arc_residual_capacity,
 		const struct graph *graph,
 		const struct arc arc)
 {
 	assert(!arc_is_dual(graph, arc));
 	struct arc dual = arc_dual(graph, arc);
-	assert(dual.idx < tal_count(network->cap));
-	return network->cap[dual.idx];
+	assert(dual.idx < tal_count(arc_residual_capacity));
+	return arc_residual_capacity[dual.idx];
 }
 
 /* Set *capacity to value, up to *cap_on_capacity.  Reduce cap_on_capacity */
@@ -348,6 +324,28 @@ static void set_capacity(s64 *capacity, u64 value, u64 *cap_on_capacity)
 	*cap_on_capacity -= *capacity;
 }
 
+/* FIXME: unit test this */
+/* The probability of forwarding a payment amount given a high and low liquidity
+ * bounds.
+ * @low: the liquidity is known to be greater or equal than "low"
+ * @high: the liquidity is known to be less than "high"
+ * @amount: how much is required to forward */
+static double pickhardt_richter_probability(struct amount_msat low,
+					    struct amount_msat high,
+					    struct amount_msat amount)
+{
+	struct amount_msat all_states, good_states;
+	if (amount_msat_greater_eq(amount, high))
+		return 0.0;
+	if (!amount_msat_sub(&amount, amount, low))
+		return 1.0;
+	if (!amount_msat_sub(&all_states, high, low))
+		PANIC("we expect high > low");
+	if (!amount_msat_sub(&good_states, all_states, amount))
+		PANIC("we expect high > amount");
+	return amount_msat_ratio(good_states, all_states);
+}
+
 // TODO(eduardo): unit test this
 /* Split a directed channel into parts with linear cost function. */
 static void linearize_channel(const struct pay_parameters *params,
@@ -367,9 +365,13 @@ static void linearize_channel(const struct pay_parameters *params,
 	    b = 1 + amount_msat_ratio_floor(maxcap, params->accuracy);
 
 	/* An extra bound on capacity, here we use it to reduce the flow such
-	 * that it does not exceed htlcmax. */
+	 * that it does not exceed htlcmax.
+	 * Also there is no need to keep track of more capacity than the payment
+	 * amount, this can help us prune some arcs. */
 	u64 cap_on_capacity =
-	    amount_msat_ratio_floor(gossmap_chan_htlc_max(c, dir), params->accuracy);
+	    MIN(amount_msat_ratio_floor(gossmap_chan_htlc_max(c, dir),
+					params->accuracy),
+		amount_msat_ratio_ceil(params->amount, params->accuracy));
 
 	set_capacity(&capacity[0], a, &cap_on_capacity);
 	cost[0]=0;
@@ -383,49 +385,6 @@ static void linearize_channel(const struct pay_parameters *params,
 	}
 }
 
-static struct residual_network *
-alloc_residual_network(const tal_t *ctx, const size_t max_num_nodes,
-		      const size_t max_num_arcs)
-{
-	struct residual_network *residual_network =
-	    tal(ctx, struct residual_network);
-
-	residual_network->cap = tal_arrz(residual_network, s64, max_num_arcs);
-	residual_network->cost = tal_arrz(residual_network, s64, max_num_arcs);
-	residual_network->potential =
-	    tal_arrz(residual_network, s64, max_num_nodes);
-	residual_network->excess =
-	    tal_arrz(residual_network, s64, max_num_nodes);
-
-	return residual_network;
-}
-
-static void init_residual_network(
-		const struct linear_network * linear_network,
-		struct residual_network* residual_network)
-{
-	const struct graph *graph = linear_network->graph;
-	const size_t max_num_arcs = graph_max_num_arcs(graph);
-	const size_t max_num_nodes = graph_max_num_nodes(graph);
-
-	for (struct arc arc = {.idx = 0}; arc.idx < max_num_arcs; ++arc.idx) {
-		if (arc_is_dual(graph, arc) || !arc_enabled(graph, arc))
-			continue;
-
-		struct arc dual = arc_dual(graph, arc);
-		residual_network->cap[arc.idx] =
-		    linear_network->capacity[arc.idx];
-		residual_network->cap[dual.idx] = 0;
-
-		residual_network->cost[arc.idx] =
-		    residual_network->cost[dual.idx] = 0;
-	}
-	for (u32 i = 0; i < max_num_nodes; ++i) {
-		residual_network->potential[i] = 0;
-		residual_network->excess[i] = 0;
-	}
-}
-
 static int cmp_u64(const u64 *a, const u64 *b, void *unused)
 {
 	if (*a < *b)
@@ -445,9 +404,10 @@ static int cmp_double(const double *a, const double *b, void *unused)
 }
 
 static double get_median_ratio(const tal_t *working_ctx,
-			       const struct linear_network* linear_network)
+			       const struct graph *graph,
+			       const double *arc_prob_cost,
+			       const s64 *arc_fee_cost)
 {
-	const struct graph *graph = linear_network->graph;
 	const size_t max_num_arcs = graph_max_num_arcs(graph);
 	u64 *u64_arr = tal_arr(working_ctx, u64, max_num_arcs);
 	double *double_arr = tal_arr(working_ctx, double, max_num_arcs);
@@ -458,8 +418,8 @@ static double get_median_ratio(const tal_t *working_ctx,
 		if (arc_is_dual(graph, arc) || !arc_enabled(graph, arc))
 			continue;
 		assert(n < max_num_arcs/2);
-		u64_arr[n] = linear_network->arc_fee_cost[arc.idx];
-		double_arr[n] = linear_network->arc_prob_cost[arc.idx];
+		u64_arr[n] = arc_fee_cost[arc.idx];
+		double_arr[n] = arc_prob_cost[arc.idx];
 		n++;
 	}
 	asort(u64_arr, n, cmp_u64, NULL);
@@ -473,18 +433,17 @@ static double get_median_ratio(const tal_t *working_ctx,
 	return u64_arr[n/2] / double_arr[n/2];
 }
 
-static void combine_cost_function(
-		const tal_t *working_ctx,
-		const struct linear_network* linear_network,
-		struct residual_network *residual_network,
-		const s8 *biases,
-		s64 mu)
+static void combine_cost_function(const tal_t *working_ctx,
+				  const struct graph *graph,
+				  const double *arc_prob_cost,
+				  const s64 *arc_fee_cost, const s8 *biases,
+				  s64 mu, s64 *arc_cost)
 {
 	/* probabilty and fee costs are not directly comparable!
 	 * Scale by ratio of (positive) medians. */
-	const double k = get_median_ratio(working_ctx, linear_network);
+	const double k =
+	    get_median_ratio(working_ctx, graph, arc_prob_cost, arc_fee_cost);
 	const double ln_30 = log(30);
-	const struct graph *graph = linear_network->graph;
 	const size_t max_num_arcs = graph_max_num_arcs(graph);
 
 	for(struct arc arc = {.idx=0};arc.idx < max_num_arcs; ++arc.idx)
@@ -492,8 +451,8 @@ static void combine_cost_function(
 		if (arc_is_dual(graph, arc) || !arc_enabled(graph, arc))
 			continue;
 
-		const double pcost = linear_network->arc_prob_cost[arc.idx];
-		const s64 fcost = linear_network->arc_fee_cost[arc.idx];
+		const double pcost = arc_prob_cost[arc.idx];
+		const s64 fcost = arc_fee_cost[arc.idx];
 		double combined;
 		u32 chanidx;
 		int chandir;
@@ -513,13 +472,13 @@ static void combine_cost_function(
 			 *    e^(-bias / (100/ln(30)))
 			 */
 			double bias_factor = exp(-bias / (100 / ln_30));
-			residual_network->cost[arc.idx] = combined * bias_factor;
+			arc_cost[arc.idx] = combined * bias_factor;
 		} else {
-			residual_network->cost[arc.idx] = combined;
+			arc_cost[arc.idx] = combined;
 		}
 		/* and the respective dual */
 		struct arc dual = arc_dual(graph, arc);
-		residual_network->cost[dual.idx] = -combined;
+		arc_cost[dual.idx] = -combined;
 	}
 }
 
@@ -576,31 +535,26 @@ struct amount_msat linear_flow_cost(const struct flow *flow,
 	return msat_cost;
 }
 
-/* FIXME: Instead of mapping one-to-one the indexes in the gossmap, try to
- * reduce the number of nodes and arcs used by taking only those that are
- * enabled. We might save some cpu if the work with a pruned network. */
-static struct linear_network *
-init_linear_network(const tal_t *ctx, const struct pay_parameters *params)
+static void init_linear_network(const tal_t *ctx,
+				const struct pay_parameters *params,
+				struct graph **graph, double **arc_prob_cost,
+				s64 **arc_fee_cost, s64 **arc_capacity)
 {
-	struct linear_network * linear_network = tal(ctx, struct linear_network);
 	const struct gossmap *gossmap = params->rq->gossmap;
-
 	const size_t max_num_chans = gossmap_max_chan_idx(gossmap);
 	const size_t max_num_arcs = max_num_chans * ARCS_PER_CHANNEL;
 	const size_t max_num_nodes = gossmap_max_node_idx(gossmap);
 
-	linear_network->graph =
-	    graph_new(ctx, max_num_nodes, max_num_arcs, ARC_DUAL_BITOFF);
+	*graph = graph_new(ctx, max_num_nodes, max_num_arcs, ARC_DUAL_BITOFF);
+	*arc_prob_cost = tal_arr(ctx, double, max_num_arcs);
+	for (size_t i = 0; i < max_num_arcs; ++i)
+		(*arc_prob_cost)[i] = DBL_MAX;
 
-	linear_network->arc_prob_cost = tal_arr(linear_network,double,max_num_arcs);
-	for(size_t i=0;i<max_num_arcs;++i)
-		linear_network->arc_prob_cost[i]=DBL_MAX;
+	*arc_fee_cost = tal_arr(ctx, s64, max_num_arcs);
+	for (size_t i = 0; i < max_num_arcs; ++i)
+		(*arc_fee_cost)[i] = INT64_MAX;
 
-	linear_network->arc_fee_cost = tal_arr(linear_network,s64,max_num_arcs);
-	for(size_t i=0;i<max_num_arcs;++i)
-		linear_network->arc_fee_cost[i]=INFINITE;
-
-	linear_network->capacity = tal_arrz(linear_network,s64,max_num_arcs);
+	*arc_capacity = tal_arrz(ctx, s64, max_num_arcs);
 
 	for(struct gossmap_node *node = gossmap_first_node(gossmap);
 	    node;
@@ -653,30 +607,29 @@ init_linear_network(const tal_t *ctx, const struct pay_parameters *params)
 			// when the `i` hits the `next` node.
 			for(size_t k=0;k<CHANNEL_PARTS;++k)
 			{
-				/* FIXME: Can we prune arcs with 0 capacity?
-				 * if(capacity[k]==0)continue; */
+				/* prune arcs with 0 capacity */
+				if (capacity[k] == 0)
+					continue;
 
 				struct arc arc = arc_from_parts(chan_id, half, k, false);
 
-				graph_add_arc(linear_network->graph, arc,
+				graph_add_arc(*graph, arc,
 					      node_obj(node_id),
 					      node_obj(next_id));
 
-				linear_network->capacity[arc.idx] = capacity[k];
-				linear_network->arc_prob_cost[arc.idx] = prob_cost[k];
-				linear_network->arc_fee_cost[arc.idx] = fee_cost;
+				(*arc_capacity)[arc.idx] = capacity[k];
+				(*arc_prob_cost)[arc.idx] = prob_cost[k];
+				(*arc_fee_cost)[arc.idx] = fee_cost;
 
 				// + the respective dual
-				struct arc dual = arc_dual(linear_network->graph, arc);
+				struct arc dual = arc_dual(*graph, arc);
 
-				linear_network->capacity[dual.idx] = 0;
-				linear_network->arc_prob_cost[dual.idx] = -prob_cost[k];
-				linear_network->arc_fee_cost[dual.idx] = -fee_cost;
+				(*arc_capacity)[dual.idx] = 0;
+				(*arc_prob_cost)[dual.idx] = -prob_cost[k];
+				(*arc_fee_cost)[dual.idx] = -fee_cost;
 			}
 		}
 	}
-
-	return linear_network;
 }
 
 // flow on directed channels
@@ -871,8 +824,8 @@ static struct flow **
 get_flow_paths(const tal_t *ctx,
 	       const tal_t *working_ctx,
 	       const struct pay_parameters *params,
-	       const struct linear_network *linear_network,
-	       const struct residual_network *residual_network)
+	       const struct graph *graph,
+	       const s64 *arc_residual_capacity)
 {
 	struct flow **flows = tal_arr(ctx,struct flow*,0);
 
@@ -895,7 +848,6 @@ get_flow_paths(const tal_t *ctx,
 	// Convert the arc based residual network flow into a flow in the
 	// directed channel network.
 	// Compute balance on the nodes.
-	const struct graph *graph = linear_network->graph;
 	for (struct node n = {.idx = 0}; n.idx < max_num_nodes; n.idx++) {
 		for(struct arc arc = node_adjacency_begin(graph,n);
 		        !node_adjacency_end(arc);
@@ -904,7 +856,7 @@ get_flow_paths(const tal_t *ctx,
 			if(arc_is_dual(graph, arc))
 				continue;
 			struct node m = arc_head(graph,arc);
-			s64 flow = get_arc_flow(residual_network,
+			s64 flow = get_arc_flow(arc_residual_capacity,
 						graph, arc);
 			u32 chanidx;
 			int chandir;
@@ -947,6 +899,46 @@ get_flow_paths(const tal_t *ctx,
 	return flows;
 }
 
+/* Given a single path build a flow set. */
+static struct flow **
+get_flow_singlepath(const tal_t *ctx, const struct pay_parameters *params,
+		    const struct graph *graph, const struct gossmap *gossmap,
+		    const struct node source, const struct node destination,
+		    const u64 pay_amount, const struct arc *prev)
+{
+	struct flow **flows = tal_arr(ctx, struct flow *, 0);
+
+	size_t length = 0;
+
+	for (u32 cur_idx = destination.idx; cur_idx != source.idx;) {
+		assert(cur_idx != INVALID_INDEX);
+		length++;
+		struct arc arc = prev[cur_idx];
+		struct node next = arc_tail(graph, arc);
+		cur_idx = next.idx;
+	}
+	struct flow *f = tal(ctx, struct flow);
+	f->path = tal_arr(f, const struct gossmap_chan *, length);
+	f->dirs = tal_arr(f, int, length);
+
+	for (u32 cur_idx = destination.idx; cur_idx != source.idx;) {
+		int chandir;
+		u32 chanidx;
+		struct arc arc = prev[cur_idx];
+		arc_to_parts(arc, &chanidx, &chandir, NULL, NULL);
+
+		length--;
+		f->path[length] = gossmap_chan_byidx(gossmap, chanidx);
+		f->dirs[length] = chandir;
+
+		struct node next = arc_tail(graph, arc);
+		cur_idx = next.idx;
+	}
+	f->delivers = params->amount;
+	tal_arr_expand(&flows, f);
+	return flows;
+}
+
 // TODO(eduardo): choose some default values for the minflow parameters
 /* eduardo: I think it should be clear that this module deals with linear
  * flows, ie. base fees are not considered. Hence a flow along a path is
@@ -965,8 +957,7 @@ struct flow **minflow(const tal_t *ctx,
 		      const struct gossmap_node *target,
 		      struct amount_msat amount,
 		      u32 mu,
-		      double delay_feefactor,
-		      bool single_part)
+		      double delay_feefactor)
 {
 	struct flow **flow_paths;
 	/* We allocate everything off this, and free it at the end,
@@ -978,10 +969,14 @@ struct flow **minflow(const tal_t *ctx,
 	params->source = source;
 	params->target = target;
 	params->amount = amount;
-	params->accuracy = AMOUNT_MSAT(1000);
-	/* FIXME: params->accuracy = amount_msat_max(amount_msat_div(amount,
-	 * 1000), AMOUNT_MSAT(1));
+	/* -> At most 1M units of flow are allowed, that reduces the
+	 * computational burden for algorithms that depend on it, eg. "capacity
+	 * scaling" and "successive shortest path".
+	 * -> Using Ceil operation instead of Floor so that
+	 *      accuracy x 1M >= amount
 	 * */
+	params->accuracy = amount_msat_max(
+	    AMOUNT_MSAT(1), amount_msat_div_ceil(amount, 1000000));
 
 	// template the channel partition into linear arcs
 	params->cap_fraction[0]=0;
@@ -998,17 +993,25 @@ struct flow **minflow(const tal_t *ctx,
 	params->base_fee_penalty = base_fee_penalty_estimate(amount);
 
 	// build the uncertainty network with linearization and residual arcs
-	struct linear_network *linear_network= init_linear_network(working_ctx, params);
-	const struct graph *graph = linear_network->graph;
+	struct graph *graph;
+	double *arc_prob_cost;
+	s64 *arc_fee_cost;
+	s64 *arc_capacity;
+	init_linear_network(working_ctx, params, &graph, &arc_prob_cost,
+			    &arc_fee_cost, &arc_capacity);
+
 	const size_t max_num_arcs = graph_max_num_arcs(graph);
 	const size_t max_num_nodes = graph_max_num_nodes(graph);
-	struct residual_network *residual_network =
-	    alloc_residual_network(working_ctx, max_num_nodes, max_num_arcs);
+	s64 *arc_cost;
+	s64 *node_potential;
+	s64 *node_excess;
+	arc_cost = tal_arrz(working_ctx, s64, max_num_arcs);
+	node_potential = tal_arrz(working_ctx, s64, max_num_nodes);
+	node_excess = tal_arrz(working_ctx, s64, max_num_nodes);
 
 	const struct node dst = {.idx = gossmap_node_idx(rq->gossmap, target)};
 	const struct node src = {.idx = gossmap_node_idx(rq->gossmap, source)};
 
-	init_residual_network(linear_network,residual_network);
 
 	/* Since we have constraint accuracy, ask to find a payment solution
 	 * that can pay a bit more than the actual value rathen than undershoot it.
@@ -1016,22 +1019,22 @@ struct flow **minflow(const tal_t *ctx,
 	const u64 pay_amount =
 	    amount_msat_ratio_ceil(params->amount, params->accuracy);
 
-	if (!simple_feasibleflow(working_ctx, linear_network->graph, src, dst,
-				 residual_network->cap, pay_amount)) {
+	if (!simple_feasibleflow(working_ctx, graph, src, dst,
+				 arc_capacity, pay_amount)) {
 		rq_log(tmpctx, rq, LOG_INFORM,
 		       "%s failed: unable to find a feasible flow.", __func__);
 		goto fail;
 	}
-	combine_cost_function(working_ctx, linear_network, residual_network,
-			      rq->biases, mu);
+	combine_cost_function(working_ctx, graph, arc_prob_cost, arc_fee_cost,
+			      rq->biases, mu, arc_cost);
 
 	/* We solve a linear MCF problem. */
 	if (!mcf_refinement(working_ctx,
-			    linear_network->graph,
-			    residual_network->excess,
-			    residual_network->cap,
-			    residual_network->cost,
-			    residual_network->potential)) {
+			    graph,
+			    node_excess,
+			    arc_capacity,
+			    arc_cost,
+			    node_potential)) {
 		rq_log(tmpctx, rq, LOG_BROKEN,
 		       "%s: MCF optimization step failed", __func__);
 		goto fail;
@@ -1041,7 +1044,7 @@ struct flow **minflow(const tal_t *ctx,
 	 * Actual amounts considering fees are computed for every
 	 * channel in the routes. */
 	flow_paths = get_flow_paths(ctx, working_ctx, params,
-				    linear_network, residual_network);
+				    graph, arc_capacity);
 	if(!flow_paths){
 		rq_log(tmpctx, rq, LOG_BROKEN,
 		       "%s: failed to extract flow paths from the MCF solution",
@@ -1049,34 +1052,486 @@ struct flow **minflow(const tal_t *ctx,
 		goto fail;
 	}
 	tal_free(working_ctx);
+	return flow_paths;
 
-	/* This is dumb, but if you don't support MPP you don't deserve any
-	 * better.  Pile it into the largest part if not already. */
-	if (single_part) {
-		struct flow *best = flow_paths[0];
-		for (size_t i = 1; i < tal_count(flow_paths); i++) {
-			if (amount_msat_greater(flow_paths[i]->delivers, best->delivers))
-				best = flow_paths[i];
-		}
-		for (size_t i = 0; i < tal_count(flow_paths); i++) {
-			if (flow_paths[i] == best)
+fail:
+	tal_free(working_ctx);
+	return NULL;
+}
+
+static struct amount_msat linear_flows_cost(struct flow **flows,
+					    struct amount_msat total_amount,
+					    double delay_feefactor)
+{
+	struct amount_msat total = AMOUNT_MSAT(0);
+
+	for (size_t i = 0; i < tal_count(flows); i++) {
+		if (!amount_msat_accumulate(&total,
+					    linear_flow_cost(flows[i],
+							     total_amount,
+							     delay_feefactor)))
+			abort();
+	}
+	return total;
+}
+
+/* Initialize the data vectors for the single-path solver. */
+static void init_linear_network_single_path(
+    const tal_t *ctx, const struct pay_parameters *params, struct graph **graph,
+    double **arc_prob_cost, s64 **arc_fee_cost, s64 **arc_capacity)
+{
+	const size_t max_num_chans = gossmap_max_chan_idx(params->rq->gossmap);
+	const size_t max_num_arcs = max_num_chans * ARCS_PER_CHANNEL;
+	const size_t max_num_nodes = gossmap_max_node_idx(params->rq->gossmap);
+
+	*graph = graph_new(ctx, max_num_nodes, max_num_arcs, ARC_DUAL_BITOFF);
+	*arc_prob_cost = tal_arr(ctx, double, max_num_arcs);
+	for (size_t i = 0; i < max_num_arcs; ++i)
+		(*arc_prob_cost)[i] = DBL_MAX;
+
+	*arc_fee_cost = tal_arr(ctx, s64, max_num_arcs);
+	for (size_t i = 0; i < max_num_arcs; ++i)
+		(*arc_fee_cost)[i] = INT64_MAX;
+	*arc_capacity = tal_arrz(ctx, s64, max_num_arcs);
+
+	const struct gossmap *gossmap = params->rq->gossmap;
+
+	for (struct gossmap_node *node = gossmap_first_node(gossmap); node;
+	     node = gossmap_next_node(gossmap, node)) {
+		const u32 node_id = gossmap_node_idx(gossmap, node);
+
+		for (size_t j = 0; j < node->num_chans; ++j) {
+			int half;
+			const struct gossmap_chan *c =
+			    gossmap_nth_chan(gossmap, node, j, &half);
+                        struct amount_msat mincap, maxcap;
+
+			if (!gossmap_chan_set(c, half) ||
+			    !c->half[half].enabled)
 				continue;
-			if (!amount_msat_accumulate(&best->delivers,
-						    flow_paths[i]->delivers)) {
-				rq_log(tmpctx, rq, LOG_BROKEN,
-				       "%s: failed to extract accumulate flow paths %s+%s",
-				       __func__,
-				       fmt_amount_msat(tmpctx, best->delivers),
-				       fmt_amount_msat(tmpctx, flow_paths[i]->delivers));
-				goto fail;
-			}
+
+			/* If a channel cannot forward the total amount we don't
+			 * use it. */
+			if (amount_msat_less(params->amount,
+					     gossmap_chan_htlc_min(c, half)) ||
+			    amount_msat_greater(params->amount,
+						gossmap_chan_htlc_max(c, half)))
+				continue;
+
+			get_constraints(params->rq, c, half, &mincap, &maxcap);
+			/* Assume if min > max, min is wrong */
+			if (amount_msat_greater(mincap, maxcap))
+				mincap = maxcap;
+			/* It is preferable to work on 1msat past the known
+			 * bound. */
+			if (!amount_msat_accumulate(&maxcap, amount_msat(1)))
+				PANIC("maxcap + 1msat overflows");
+
+			/* If amount is greater than the known liquidity upper
+			 * bound we get infinite probability cost. */
+			if (amount_msat_greater_eq(params->amount, maxcap))
+				continue;
+
+			const u32 chan_id = gossmap_chan_idx(gossmap, c);
+
+			const struct gossmap_node *next =
+			    gossmap_nth_node(gossmap, c, !half);
+
+			const u32 next_id = gossmap_node_idx(gossmap, next);
+
+			/* channel to self? */
+			if (node_id == next_id)
+				continue;
+
+			struct arc arc =
+			    arc_from_parts(chan_id, half, 0, false);
+
+			graph_add_arc(*graph, arc, node_obj(node_id),
+				      node_obj(next_id));
+
+			(*arc_capacity)[arc.idx] = 1;
+			(*arc_prob_cost)[arc.idx] =
+			    (-1.0) * log(pickhardt_richter_probability(
+					 mincap, maxcap, params->amount));
+
+			struct amount_msat fee;
+			if (!amount_msat_fee(&fee, params->amount,
+					     c->half[half].base_fee,
+					     c->half[half].proportional_fee))
+				PANIC("fee overflow");
+			u32 fee_msat;
+			if (!amount_msat_to_u32(fee, &fee_msat))
+				PANIC("fee does not fit in u32");
+			(*arc_fee_cost)[arc.idx] =
+			    fee_msat +
+			    params->delay_feefactor * c->half[half].delay;
 		}
-		flow_paths[0] = best;
-		tal_resize(&flow_paths, 1);
 	}
+}
+
+/* Similar to minflow but computes routes that have a single path. */
+struct flow **single_path_flow(const tal_t *ctx, const struct route_query *rq,
+			       const struct gossmap_node *source,
+			       const struct gossmap_node *target,
+			       struct amount_msat amount, u32 mu,
+			       double delay_feefactor)
+{
+	struct flow **flow_paths;
+	/* We allocate everything off this, and free it at the end,
+	 * as we can be called multiple times without cleaning tmpctx! */
+	tal_t *working_ctx = tal(NULL, char);
+	struct pay_parameters *params = tal(working_ctx, struct pay_parameters);
+
+	params->rq = rq;
+	params->source = source;
+	params->target = target;
+	params->amount = amount;
+	/* for the single-path solver the accuracy does not detriment
+	 * performance */
+	params->accuracy = amount;
+	params->delay_feefactor = delay_feefactor;
+	params->base_fee_penalty = base_fee_penalty_estimate(amount);
+
+	struct graph *graph;
+	double *arc_prob_cost;
+	s64 *arc_fee_cost;
+	s64 *arc_capacity;
+
+	init_linear_network_single_path(working_ctx, params, &graph,
+					&arc_prob_cost, &arc_fee_cost,
+					&arc_capacity);
+
+	const struct node dst = {.idx = gossmap_node_idx(rq->gossmap, target)};
+	const struct node src = {.idx = gossmap_node_idx(rq->gossmap, source)};
+
+	const size_t max_num_nodes = graph_max_num_nodes(graph);
+	const size_t max_num_arcs = graph_max_num_arcs(graph);
+
+	s64 *potential = tal_arrz(working_ctx, s64, max_num_nodes);
+	s64 *distance = tal_arrz(working_ctx, s64, max_num_nodes);
+	s64 *arc_cost = tal_arrz(working_ctx, s64, max_num_arcs);
+	struct arc *prev = tal_arrz(working_ctx, struct arc, max_num_nodes);
+
+	combine_cost_function(working_ctx, graph, arc_prob_cost, arc_fee_cost,
+			      rq->biases, mu, arc_cost);
+
+	/* We solve a linear cost flow problem. */
+	if (!dijkstra_path(working_ctx, graph, src, dst,
+			   /* prune = */ true, arc_capacity,
+			   /*threshold = */ 1, arc_cost, potential, prev,
+			   distance)) {
+                /* This might fail if we are unable to find a suitable route, it
+                 * doesn't mean the plugin is broken, that's why we LOG_INFORM. */
+		rq_log(tmpctx, rq, LOG_INFORM,
+		       "%s: could not find a feasible single path", __func__);
+		goto fail;
+	}
+	const u64 pay_amount =
+	    amount_msat_ratio_ceil(params->amount, params->accuracy);
+
+	/* We dissect the flow into payment routes.
+	 * Actual amounts considering fees are computed for every
+	 * channel in the routes. */
+	flow_paths = get_flow_singlepath(ctx, params, graph, rq->gossmap,
+					 src, dst, pay_amount, prev);
+	if (!flow_paths) {
+		rq_log(tmpctx, rq, LOG_BROKEN,
+		       "%s: failed to extract flow paths from the single-path "
+		       "solution",
+		       __func__);
+		goto fail;
+	}
+	if (tal_count(flow_paths) != 1) {
+		rq_log(
+		    tmpctx, rq, LOG_BROKEN,
+		    "%s: single-path solution returned a multi route solution",
+		    __func__);
+		goto fail;
+	}
+	tal_free(working_ctx);
 	return flow_paths;
 
 fail:
 	tal_free(working_ctx);
 	return NULL;
 }
+
+static struct flow **goldberg_tarjan_mincostflow(
+    const tal_t *ctx, const struct route_query *rq,
+    const struct gossmap_node *source, const struct gossmap_node *target,
+    struct amount_msat amount, u32 mu, double delay_feefactor)
+{
+	struct flow **flow_paths;
+	/* We allocate everything off this, and free it at the end,
+	 * as we can be called multiple times without cleaning tmpctx! */
+	tal_t *working_ctx = tal(NULL, char);
+	struct pay_parameters *params = tal(working_ctx, struct pay_parameters);
+
+	params->rq = rq;
+	params->source = source;
+	params->target = target;
+	params->amount = amount;
+	/* -> At most 1M units of flow are allowed, that reduces the
+	 * computational burden for algorithms that depend on it, eg. "capacity
+	 * scaling" and "successive shortest path".
+	 * -> Using Ceil operation instead of Floor so that
+	 *      accuracy x 1M >= amount
+	 * */
+	params->accuracy = amount_msat_max(
+	    AMOUNT_MSAT(1), amount_msat_div_ceil(amount, 1000000));
+
+	// template the channel partition into linear arcs
+	params->cap_fraction[0] = 0;
+	params->cost_fraction[0] = 0;
+	for (size_t i = 1; i < CHANNEL_PARTS; ++i) {
+		params->cap_fraction[i] =
+		    CHANNEL_PIVOTS[i] - CHANNEL_PIVOTS[i - 1];
+		params->cost_fraction[i] =
+		    log((1 - CHANNEL_PIVOTS[i - 1]) / (1 - CHANNEL_PIVOTS[i])) /
+		    params->cap_fraction[i];
+	}
+
+	params->delay_feefactor = delay_feefactor;
+	params->base_fee_penalty = base_fee_penalty_estimate(amount);
+
+	// build the uncertainty network with linearization and residual arcs
+	struct graph *graph;
+	double *arc_prob_cost;
+	s64 *arc_fee_cost;
+	s64 *arc_capacity;
+	/* FIXME: with cost scaling it might be a good idea to put also an upper
+	 * bound on the cost per unit flow on arcs. */
+	init_linear_network(working_ctx, params, &graph, &arc_prob_cost,
+			    &arc_fee_cost, &arc_capacity);
+
+	const size_t max_num_arcs = graph_max_num_arcs(graph);
+	const size_t max_num_nodes = graph_max_num_nodes(graph);
+	s64 *arc_cost = tal_arrz(working_ctx, s64, max_num_arcs);
+	s64 *node_excess = tal_arrz(working_ctx, s64, max_num_nodes);
+
+	const struct node dst = {.idx = gossmap_node_idx(rq->gossmap, target)};
+	const struct node src = {.idx = gossmap_node_idx(rq->gossmap, source)};
+
+	/* Since we have constraint accuracy, ask to find a payment solution
+	 * that can pay a bit more than the actual value rathen than undershoot
+	 * it. That's why we use the ceil function here. */
+	const u64 pay_amount =
+	    amount_msat_ratio_ceil(params->amount, params->accuracy);
+
+	/* FIXME: review this combine cost function, with cost scaling we want
+	 * the cost to be limited so it might be a good idea to combine fee and
+	 * probability costs with a max preserving transformation, eg. a
+	 * rotation. */
+	combine_cost_function(working_ctx, graph, arc_prob_cost, arc_fee_cost,
+			      rq->biases, mu, arc_cost);
+	node_excess[src.idx] = pay_amount;
+	node_excess[dst.idx] = -pay_amount;
+
+	/* We solve a linear MCF problem. */
+        /* FIXME: return to the caller the value of the optimal cost function */
+        /* FIXME: given the fact that this algorithm is based on successive
+         * approximations, we might add a parameter here to limit the accuracy
+         * of the final solution, so that we can exchange accuracy for runtime
+         * performance. */
+	if (!goldberg_tarjan_mcf(working_ctx, graph, node_excess, arc_capacity,
+				 arc_cost)) {
+		rq_log(tmpctx, rq, LOG_INFORM,
+		       "%s: unable to find a feasible flow.", __func__);
+		goto fail;
+	}
+
+	/* We dissect the solution of the MCF into payment routes.
+	 * Actual amounts considering fees are computed for every
+	 * channel in the routes. */
+	flow_paths =
+	    get_flow_paths(ctx, working_ctx, params, graph, arc_capacity);
+	if (!flow_paths) {
+		rq_log(tmpctx, rq, LOG_BROKEN,
+		       "%s: failed to extract flow paths from the MCF solution",
+		       __func__);
+		goto fail;
+	}
+	tal_free(working_ctx);
+	return flow_paths;
+
+fail:
+	tal_free(working_ctx);
+	return NULL;
+}
+
+static const char *
+linear_routes(const tal_t *ctx, struct route_query *rq,
+	      const struct gossmap_node *srcnode,
+	      const struct gossmap_node *dstnode, struct amount_msat amount,
+	      struct amount_msat maxfee, u32 finalcltv, u32 maxdelay,
+	      struct flow ***flows, double *probability,
+	      struct flow **(*solver)(const tal_t *, const struct route_query *,
+				      const struct gossmap_node *,
+				      const struct gossmap_node *,
+				      struct amount_msat, u32, double))
+{
+	*flows = NULL;
+	const char *ret;
+	double delay_feefactor = 1.0 / 1000000;
+
+	/* First up, don't care about fees (well, just enough to tiebreak!) */
+	u32 mu = 1;
+	tal_free(*flows);
+	*flows = solver(ctx, rq, srcnode, dstnode, amount, mu, delay_feefactor);
+	if (!*flows) {
+		ret = explain_failure(ctx, rq, srcnode, dstnode, amount);
+		goto fail;
+	}
+
+	/* Too much delay? */
+	while (finalcltv + flows_worst_delay(*flows) > maxdelay) {
+		delay_feefactor *= 2;
+		rq_log(tmpctx, rq, LOG_UNUSUAL,
+		       "The worst flow delay is %" PRIu64
+		       " (> %i), retrying with delay_feefactor %f...",
+		       flows_worst_delay(*flows), maxdelay - finalcltv,
+		       delay_feefactor);
+		tal_free(*flows);
+		*flows = solver(ctx, rq, srcnode, dstnode, amount, mu,
+				delay_feefactor);
+		if (!*flows || delay_feefactor > 10) {
+			ret = rq_log(
+			    ctx, rq, LOG_UNUSUAL,
+			    "Could not find route without excessive delays");
+			goto fail;
+		}
+	}
+
+	/* Too expensive? */
+too_expensive:
+	while (amount_msat_greater(flowset_fee(rq->plugin, *flows), maxfee)) {
+		struct flow **new_flows;
+
+		if (mu == 1)
+			mu = 10;
+		else
+			mu += 10;
+		rq_log(tmpctx, rq, LOG_UNUSUAL,
+		       "The flows had a fee of %s, greater than max of %s, "
+		       "retrying with mu of %u%%...",
+		       fmt_amount_msat(tmpctx, flowset_fee(rq->plugin, *flows)),
+		       fmt_amount_msat(tmpctx, maxfee), mu);
+		new_flows = solver(ctx, rq, srcnode, dstnode, amount,
+				   mu > 100 ? 100 : mu, delay_feefactor);
+		if (!*flows || mu >= 100) {
+			ret = rq_log(
+			    ctx, rq, LOG_UNUSUAL,
+			    "Could not find route without excessive cost");
+			goto fail;
+		}
+
+		/* This is possible, because MCF's linear fees are not the same.
+		 */
+		if (amount_msat_greater(flowset_fee(rq->plugin, new_flows),
+					flowset_fee(rq->plugin, *flows))) {
+			struct amount_msat old_cost =
+			    linear_flows_cost(*flows, amount, delay_feefactor);
+			struct amount_msat new_cost = linear_flows_cost(
+			    new_flows, amount, delay_feefactor);
+			if (amount_msat_greater_eq(new_cost, old_cost)) {
+				rq_log(tmpctx, rq, LOG_BROKEN,
+				       "Old flows cost %s:",
+				       fmt_amount_msat(tmpctx, old_cost));
+				for (size_t i = 0; i < tal_count(*flows); i++) {
+					rq_log(
+					    tmpctx, rq, LOG_BROKEN,
+					    "Flow %zu/%zu: %s (linear cost %s)",
+					    i, tal_count(*flows),
+					    fmt_flow_full(tmpctx, rq, (*flows)[i]),
+					    fmt_amount_msat(
+						tmpctx, linear_flow_cost(
+							    (*flows)[i], amount,
+							    delay_feefactor)));
+				}
+				rq_log(tmpctx, rq, LOG_BROKEN,
+				       "Old flows cost %s:",
+				       fmt_amount_msat(tmpctx, new_cost));
+				for (size_t i = 0; i < tal_count(new_flows);
+				     i++) {
+					rq_log(
+					    tmpctx, rq, LOG_BROKEN,
+					    "Flow %zu/%zu: %s (linear cost %s)",
+					    i, tal_count(new_flows),
+					    fmt_flow_full(tmpctx, rq,
+							  new_flows[i]),
+					    fmt_amount_msat(
+						tmpctx,
+						linear_flow_cost(
+						    new_flows[i], amount,
+						    delay_feefactor)));
+				}
+			}
+		}
+		tal_free(*flows);
+		*flows = new_flows;
+	}
+
+	if (finalcltv + flows_worst_delay(*flows) > maxdelay) {
+		ret = rq_log(
+		    ctx, rq, LOG_UNUSUAL,
+		    "Could not find route without excessive cost or delays");
+		goto fail;
+	}
+
+	/* The above did not take into account the extra funds to pay
+	 * fees, so we try to adjust now.  We could re-run MCF if this
+	 * fails, but failure basically never happens where payment is
+	 * still possible */
+	ret = refine_with_fees_and_limits(ctx, rq, amount, flows, probability);
+	if (ret)
+		goto fail;
+
+	/* Again, a tiny corner case: refine step can make us exceed maxfee */
+	if (amount_msat_greater(flowset_fee(rq->plugin, *flows), maxfee)) {
+		rq_log(tmpctx, rq, LOG_UNUSUAL,
+		       "After final refinement, fee was excessive: retrying");
+		goto too_expensive;
+	}
+
+	return NULL;
+fail:
+	assert(ret != NULL);
+	return ret;
+}
+
+const char *default_routes(const tal_t *ctx, struct route_query *rq,
+			   const struct gossmap_node *srcnode,
+			   const struct gossmap_node *dstnode,
+			   struct amount_msat amount, struct amount_msat maxfee,
+			   u32 finalcltv, u32 maxdelay, struct flow ***flows,
+			   double *probability)
+{
+	return linear_routes(ctx, rq, srcnode, dstnode, amount, maxfee,
+			     finalcltv, maxdelay, flows, probability, minflow);
+}
+
+const char *single_path_routes(const tal_t *ctx, struct route_query *rq,
+			       const struct gossmap_node *srcnode,
+			       const struct gossmap_node *dstnode,
+			       struct amount_msat amount,
+			       struct amount_msat maxfee, u32 finalcltv,
+			       u32 maxdelay, struct flow ***flows,
+			       double *probability)
+{
+	return linear_routes(ctx, rq, srcnode, dstnode, amount, maxfee,
+			     finalcltv, maxdelay, flows, probability,
+			     single_path_flow);
+}
+
+const char *goldberg_tarjan_routes(const tal_t *ctx, struct route_query *rq,
+				   const struct gossmap_node *srcnode,
+				   const struct gossmap_node *dstnode,
+				   struct amount_msat amount,
+				   struct amount_msat maxfee, u32 finalcltv,
+				   u32 maxdelay, struct flow ***flows,
+				   double *probability)
+{
+	return linear_routes(ctx, rq, srcnode, dstnode, amount, maxfee,
+			     finalcltv, maxdelay, flows, probability,
+			     goldberg_tarjan_mincostflow);
+}
diff --git a/plugins/askrene/mcf.h b/plugins/askrene/mcf.h
index f8100e766dd6..c5ddf6c7816d 100644
--- a/plugins/askrene/mcf.h
+++ b/plugins/askrene/mcf.h
@@ -31,8 +31,29 @@ struct flow **minflow(const tal_t *ctx,
 		      const struct gossmap_node *target,
 		      struct amount_msat amount,
 		      u32 mu,
-		      double delay_feefactor,
-		      bool single_part);
+		      double delay_feefactor);
+
+/**
+ * API for min cost single path.
+ * @ctx: context to allocate returned flows from
+ * @rq: the route_query we're processing (for logging)
+ * @source: the source to start from
+ * @target: the target to pay
+ * @amount: the amount we want to reach @target
+ * @mu: 0 = corresponds to only probabilities, 100 corresponds to only fee.
+ * @delay_feefactor: convert 1 block delay into msat.
+ *
+ * @delay_feefactor converts 1 block delay into msat, as if it were an additional
+ * fee.  So if a CLTV delay on a node is 5 blocks, that's treated as if it
+ * were a fee of 5 * @delay_feefactor.
+ *
+ * Returns an array with one flow which deliver amount to target, or NULL.
+ */
+struct flow **single_path_flow(const tal_t *ctx, const struct route_query *rq,
+			       const struct gossmap_node *source,
+			       const struct gossmap_node *target,
+			       struct amount_msat amount, u32 mu,
+			       double delay_feefactor);
 
 /* To sanity check: this is the approximation mcf uses for the cost
  * of each channel. */
@@ -40,4 +61,32 @@ struct amount_msat linear_flow_cost(const struct flow *flow,
 				    struct amount_msat total_amount,
 				    double delay_feefactor);
 
+/* A wrapper to the min. cost flow solver that actually takes into consideration
+ * the extra msats per channel needed to pay for fees. */
+const char *default_routes(const tal_t *ctx, struct route_query *rq,
+			   const struct gossmap_node *srcnode,
+			   const struct gossmap_node *dstnode,
+			   struct amount_msat amount,
+			   struct amount_msat maxfee, u32 finalcltv,
+			   u32 maxdelay, struct flow ***flows,
+			   double *probability);
+
+/* A wrapper to the single-path constrained solver. */
+const char *single_path_routes(const tal_t *ctx, struct route_query *rq,
+			       const struct gossmap_node *srcnode,
+			       const struct gossmap_node *dstnode,
+			       struct amount_msat amount,
+			       struct amount_msat maxfee, u32 finalcltv,
+			       u32 maxdelay, struct flow ***flows,
+			       double *probability);
+
+/* A wrapper to the Goldberg-Tarjan's MCF solver. */
+const char *goldberg_tarjan_routes(const tal_t *ctx, struct route_query *rq,
+				   const struct gossmap_node *srcnode,
+				   const struct gossmap_node *dstnode,
+				   struct amount_msat amount,
+				   struct amount_msat maxfee, u32 finalcltv,
+				   u32 maxdelay, struct flow ***flows,
+				   double *probability);
+
 #endif /* LIGHTNING_PLUGINS_ASKRENE_MCF_H */
diff --git a/plugins/askrene/queue.h b/plugins/askrene/queue.h
new file mode 100644
index 000000000000..be5de99559c4
--- /dev/null
+++ b/plugins/askrene/queue.h
@@ -0,0 +1,108 @@
+#ifndef LIGHTNING_PLUGINS_ASKRENE_QUEUE_H
+#define LIGHTNING_PLUGINS_ASKRENE_QUEUE_H
+
+#include "config.h"
+#include <ccan/compiler/compiler.h>
+#include <ccan/lqueue/lqueue.h>
+#include <ccan/tal/tal.h>
+
+/* Generic and efficient queue based on ccan/lqueue for primitive data.
+ * The size of the cache of 64 is the smallest power of two for which I obtain a
+ * significant time improvement over directly using lqueue, ie. one lqueue
+ * element for each item in the queue. For a small problem sizes (~10) the
+ * speed-up is 3x, for large problem sizes
+ * (>1000) the speed-up is 7x.
+ * ~0.5 operations/nsec */
+
+#define QUEUE_CACHE_SIZE 64
+
+#define QUEUE_DEFINE_TYPE(type, name)                                          \
+	struct name##_qcache_ {                                                \
+		struct lqueue_link qlink;                                      \
+		int begin, end;                                                \
+		type data[QUEUE_CACHE_SIZE];                                   \
+	};                                                                     \
+	static inline UNNEEDED bool name##_qcache_empty_(                      \
+	    const struct name##_qcache_ *qc)                                   \
+	{                                                                      \
+		return qc->begin == qc->end;                                   \
+	}                                                                      \
+	/* UB if _qcache is empty */                                           \
+	static inline UNNEEDED type name##_qcache_front_(                      \
+	    const struct name##_qcache_ *qc)                                   \
+	{                                                                      \
+		return qc->data[qc->begin];                                    \
+	}                                                                      \
+	static inline UNNEEDED type name##_qcache_pop_(                        \
+	    struct name##_qcache_ *qc)                                         \
+	{                                                                      \
+		type r = name##_qcache_front_(qc);                             \
+		qc->begin++;                                                   \
+		if (qc->begin >= qc->end) {                                    \
+			qc->begin = qc->end = 0;                               \
+		}                                                              \
+		return r;                                                      \
+	}                                                                      \
+	static inline UNNEEDED bool name##_qcache_insert_(                     \
+	    struct name##_qcache_ *qc, type element)                           \
+	{                                                                      \
+		if (qc->end == QUEUE_CACHE_SIZE) {                             \
+			return false;                                          \
+		}                                                              \
+		qc->data[qc->end++] = element;                                 \
+		return true;                                                   \
+	}                                                                      \
+	static inline UNNEEDED void name##_qcache_init_(                       \
+	    struct name##_qcache_ *qc)                                         \
+	{                                                                      \
+		qc->begin = qc->end = 0;                                       \
+	}                                                                      \
+	struct name {                                                          \
+		const tal_t *ctx;                                              \
+		struct lqueue_ lq;                                             \
+	};                                                                     \
+	static inline UNNEEDED bool name##_empty(const struct name *q)         \
+	{                                                                      \
+		return lqueue_empty_(&q->lq);                                  \
+	}                                                                      \
+	static inline UNNEEDED type name##_front(const struct name *q)         \
+	{                                                                      \
+		type r;                                                        \
+		const struct name##_qcache_ *qc =                              \
+		    (const struct name##_qcache_ *)lqueue_front_(&q->lq);      \
+		r = name##_qcache_front_(qc);                                  \
+		return r;                                                      \
+	}                                                                      \
+	static inline UNNEEDED type name##_pop(struct name *q)                 \
+	{                                                                      \
+		type r;                                                        \
+		struct name##_qcache_ *qc =                                    \
+		    (struct name##_qcache_ *)lqueue_front_(&q->lq);            \
+		r = name##_qcache_pop_(qc);                                    \
+		if (qc && name##_qcache_empty_(qc)) {                          \
+			lqueue_dequeue_(&q->lq);                               \
+			tal_free(qc);                                          \
+		}                                                              \
+		return r;                                                      \
+	}                                                                      \
+	static inline UNNEEDED void name##_init(struct name *q,                \
+						const tal_t *ctx)              \
+	{                                                                      \
+		q->ctx = ctx;                                                  \
+		lqueue_init_(&q->lq, NULL);                                    \
+	}                                                                      \
+	static inline UNNEEDED void name##_insert(struct name *q,              \
+						  type element)                \
+	{                                                                      \
+		struct name##_qcache_ *qc =                                    \
+		    (struct name##_qcache_ *)lqueue_back_(&q->lq);             \
+		if (qc && name##_qcache_insert_(qc, element))                  \
+			return;                                                \
+		qc = tal(q->ctx, struct name##_qcache_);                       \
+		name##_qcache_init_(qc);                                       \
+		name##_qcache_insert_(qc, element);                            \
+		lqueue_enqueue_(&q->lq, (struct lqueue_link *)qc);             \
+	}                                                                      \
+	/* QUEUE_DEFINE_TYPE */
+
+#endif /* LIGHTNING_PLUGINS_ASKRENE_QUEUE_H */
diff --git a/plugins/askrene/test/run-queue.c b/plugins/askrene/test/run-queue.c
new file mode 100644
index 000000000000..cf8edef339c2
--- /dev/null
+++ b/plugins/askrene/test/run-queue.c
@@ -0,0 +1,78 @@
+#include "config.h"
+#include <common/setup.h>
+#include <stdlib.h>
+
+#include "../queue.h"
+
+/* a queue for int */
+QUEUE_DEFINE_TYPE(int, iqueue);
+
+int main(int argc, char *argv[])
+{
+	common_setup(argv[0]);
+	int x;
+	struct iqueue q;
+	iqueue_init(&q, NULL);
+
+	iqueue_insert(&q, 1);
+	x = iqueue_pop(&q);
+	assert(x == 1);
+
+	iqueue_insert(&q, 2);
+	x = iqueue_pop(&q);
+	assert(x == 2);
+
+	iqueue_insert(&q, 3);
+	iqueue_insert(&q, 4);
+	x = iqueue_pop(&q);
+	assert(x == 3);
+	x = iqueue_pop(&q);
+	assert(x == 4);
+
+	iqueue_insert(&q, 5);
+	iqueue_insert(&q, 6);
+	x = iqueue_pop(&q);
+	assert(x == 5);
+	iqueue_insert(&q, 7);
+	x = iqueue_pop(&q);
+	assert(x == 6);
+	x = iqueue_pop(&q);
+	assert(x == 7);
+
+	for (int i = 1; i <= 10000; i++)
+		iqueue_insert(&q, i);
+	for (int i = 1; i <= 10000; i++) {
+		x = iqueue_pop(&q);
+		assert(x == i);
+	}
+
+	const int MAX_ITEM = 1000000;
+	int expected_front = 1, next_insert = 1;
+
+	do {
+		if (iqueue_empty(&q) && next_insert > MAX_ITEM)
+			break;
+
+		if (iqueue_empty(&q)) {
+			/* we can only insert */
+			iqueue_insert(&q, next_insert++);
+		} else if (next_insert > MAX_ITEM) {
+			/* we can only pop */
+			x = iqueue_pop(&q);
+			assert(x == expected_front);
+			expected_front++;
+		} else {
+			/* we can both insert and pop, throw a coin */
+			if (rand() % 2) {
+				iqueue_insert(&q, next_insert++);
+			} else {
+				x = iqueue_pop(&q);
+				assert(x == expected_front);
+				expected_front++;
+			}
+		}
+	} while (1);
+
+	common_shutdown();
+	return 0;
+}
diff --git a/tests/test_askrene.py b/tests/test_askrene.py
index 8944fc262001..21a194ba065b 100644
--- a/tests/test_askrene.py
+++ b/tests/test_askrene.py
@@ -568,25 +568,88 @@ def test_getroutes(node_factory):
                             'amount_msat': 5500005,
                             'delay': 99 + 6}]])
 
-    # We realize that this is impossible in a single path:
-    with pytest.raises(RpcError, match="The shortest path is 0x2x1, but 0x2x1/1 marked disabled by layer auto.no_mpp_support."):
-        l1.rpc.getroutes(source=nodemap[0],
-                         destination=nodemap[2],
-                         amount_msat=10000000,
-                         layers=['auto.no_mpp_support'],
-                         maxfee_msat=1000,
-                         final_cltv=99)
 
-    # But this will work.
-    check_getroute_paths(l1,
-                         nodemap[0],
-                         nodemap[2],
-                         9000000,
-                         [[{'short_channel_id_dir': '0x2x3/1',
-                            'next_node_id': nodemap[2],
-                            'amount_msat': 9000009,
-                            'delay': 99 + 6}]],
-                         layers=['auto.no_mpp_support'])
+def test_getroutes_single_path(node_factory):
+    """Test getroutes generating single path payments"""
+    gsfile, nodemap = generate_gossip_store(
+        [
+            GenChannel(0, 1),
+            GenChannel(1, 2, capacity_sats=9000),
+            GenChannel(1, 2, capacity_sats=10000),
+        ]
+    )
+    # Set up l1 with this as the gossip_store
+    l1 = node_factory.get_node(gossip_store_file=gsfile.name)
+
+    # To be able to route this amount two parts are needed, therefore a single
+    # pay search will fail.
+    # FIXME: the explanation for the failure is wrong
+    with pytest.raises(RpcError):
+        l1.rpc.getroutes(
+            source=nodemap[1],
+            destination=nodemap[2],
+            amount_msat=10000001,
+            layers=["auto.no_mpp_support"],
+            maxfee_msat=1000,
+            final_cltv=99,
+        )
+
+    # For this amount, only one solution is possible
+    check_getroute_paths(
+        l1,
+        nodemap[1],
+        nodemap[2],
+        10000000,
+        [
+            [
+                {
+                    "short_channel_id_dir": "1x2x2/1",
+                    "next_node_id": nodemap[2],
+                    "amount_msat": 10000010,
+                    "delay": 99 + 6,
+                }
+            ]
+        ],
+        layers=["auto.no_mpp_support"],
+    )
+
+    # To be able to route this amount two parts are needed, therefore a single
+    # pay search will fail.
+    # FIXME: the explanation for the failure is wrong
+    with pytest.raises(RpcError):
+        l1.rpc.getroutes(
+            source=nodemap[0],
+            destination=nodemap[2],
+            amount_msat=10000001,
+            layers=["auto.no_mpp_support"],
+            maxfee_msat=1000,
+            final_cltv=99,
+        )
+
+    # For this amount, only one solution is possible
+    check_getroute_paths(
+        l1,
+        nodemap[0],
+        nodemap[2],
+        10000000,
+        [
+            [
+                {
+                    "short_channel_id_dir": "0x1x0/1",
+                    "next_node_id": nodemap[1],
+                    "amount_msat": 10000020,
+                    "delay": 99 + 6 + 6,
+                },
+                {
+                    "short_channel_id_dir": "1x2x2/1",
+                    "next_node_id": nodemap[2],
+                    "amount_msat": 10000010,
+                    "delay": 99 + 6,
+                },
+            ]
+        ],
+        layers=["auto.no_mpp_support"],
+    )
 
 
 def test_getroutes_fee_fallback(node_factory):
@@ -1141,7 +1204,7 @@ def test_real_data(node_factory, bitcoind):
     # CI, it's slow.
     if SLOW_MACHINE:
         limit = 25
-        expected = (6, 25, 1544756, 142986, 91)
+        expected = (5, 25, 1567536, 142772, 91)
     else:
         limit = 100
         expected = (9, 95, 6347877, 566288, 92)
@@ -1258,7 +1321,7 @@ def test_real_biases(node_factory, bitcoind):
     # CI, it's slow.
     if SLOW_MACHINE:
         limit = 25
-        expected = ({1: 5, 2: 7, 4: 7, 8: 11, 16: 14, 32: 19, 64: 25, 100: 25}, 0)
+        expected = ({1: 6, 2: 6, 4: 7, 8: 12, 16: 14, 32: 19, 64: 25, 100: 25}, 0)
     else:
         limit = 100
         expected = ({1: 23, 2: 31, 4: 40, 8: 53, 16: 70, 32: 82, 64: 96, 100: 96}, 0)