lightningnetwork · calvinrzachman · Jul 30, 2025 · Jul 14, 2025 · Jan 2, 2026 · Jan 2, 2026
diff --git a/docs/release-notes/release-notes-0.21.0.md b/docs/release-notes/release-notes-0.21.0.md
@@ -86,6 +86,16 @@
   potential loss of funds. The compilation of the server is hidden behind the
   non-default `switchrpc` build tag.
 
+* Added a new [`CleanStore` RPC](https://github.com/lightningnetwork/lnd/pull/10484)
+  to the `switchrpc` sub-system. This new endpoint allows remote entities
+  responsible for payment lifecycle management to perform cleanup of HTLC
+  attempt information. The `CleanStore` method deletes all stored attempts
+  except for a specified `keepSet` of IDs. **IMPORTANT**: As with other
+  `switchrpc` endpoints, it is currently only safe to allow a *single* entity to
+  dispatch attempts and manage the store via the Switch at any given time. Using
+  `CleanStore` in an uncoordinated, multi-controller environment can lead to
+  lead to stuck payment attempts, failures, and potential loss of funds.
+
 ## lncli Additions
 
 # Improvements

diff --git a/htlcswitch/payment_result.go b/htlcswitch/payment_result.go
@@ -122,6 +122,12 @@ type networkResultStore struct {
 	// its read-then-write sequence from concurrent calls, and it maintains
 	// consistency between the database state and result subscribers.
 	attemptIDMtx *multimutex.Mutex[uint64]
+
+	// storeMtx is a read-write mutex that protects the entire store during
+	// global operations, such as a full cleanup. A read-lock should be
+	// held by all per-attempt operations, while a full write-lock should
+	// be held by CleanStore.
+	storeMtx sync.RWMutex
 }
 
 func newNetworkResultStore(db kvdb.Backend) *networkResultStore {
@@ -147,6 +153,11 @@ func newNetworkResultStore(db kvdb.Backend) *networkResultStore {
 // NOTE: This is part of the AttemptStore interface. Subscribed clients do not
 // receive notice of this initialization.
 func (store *networkResultStore) InitAttempt(attemptID uint64) error {
+	// Acquire a lock to protect this fine-grained operation against a
+	// concurrent, store-wide CleanStore operation.
+	store.storeMtx.Lock()
+	defer store.storeMtx.Unlock()
+
 	// We get a mutex for this attempt ID to serialize init, store, and
 	// subscribe operations. This is needed to ensure consistency between
 	// the database state and the subscribers in case of concurrent calls.
@@ -239,6 +250,11 @@ func (store *networkResultStore) InitAttempt(attemptID uint64) error {
 func (store *networkResultStore) StoreResult(attemptID uint64,
 	result *networkResult) error {
 
+	// Acquire a lock to protect this fine-grained operation against a
+	// concurrent, store-wide CleanStore operation.
+	store.storeMtx.Lock()
+	defer store.storeMtx.Unlock()
+
 	// We get a mutex for this attempt ID. This is needed to ensure
 	// consistency between the database state and the subscribers in case
 	// of concurrent calls.
@@ -299,6 +315,11 @@ func (store *networkResultStore) notifySubscribers(attemptID uint64,
 func (store *networkResultStore) SubscribeResult(attemptID uint64) (
 	<-chan *networkResult, error) {
 
+	// Acquire a read lock to protect this fine-grained operation against
+	// a concurrent, store-wide CleanStore operation.
+	store.storeMtx.RLock()
+	defer store.storeMtx.RUnlock()
+
 	// We get a mutex for this payment ID. This is needed to ensure
 	// consistency between the database state and the subscribers in case
 	// of concurrent calls.
@@ -374,6 +395,11 @@ func (store *networkResultStore) SubscribeResult(attemptID uint64) (
 func (store *networkResultStore) GetResult(pid uint64) (
 	*networkResult, error) {
 
+	// Acquire a read lock to protect this fine-grained operation against
+	// a concurrent, store-wide CleanStore operation.
+	store.storeMtx.RLock()
+	defer store.storeMtx.RUnlock()
+
 	var result *networkResult
 	err := kvdb.View(store.backend, func(tx kvdb.RTx) error {
 		var err error
@@ -427,6 +453,14 @@ func fetchResult(tx kvdb.RTx, pid uint64) (*networkResult, error) {
 // concurrently while this process is ongoing, as its result might end up being
 // deleted.
 func (store *networkResultStore) CleanStore(keep map[uint64]struct{}) error {
+	// The CleanStore operation is coarse-grained ("snapshot-and-delete")
+	// and must acquire a full write lock to serialize it against all
+	// fine-grained, per-attempt operations, preventing race conditions.
+	// NOTE: An alternative DeleteAttempts API would allow for more
+	// fine-grained locking.
+	store.storeMtx.Lock()
+	defer store.storeMtx.Unlock()
+
 	return kvdb.Update(store.backend, func(tx kvdb.RwTx) error {
 		networkResults, err := tx.CreateTopLevelBucket(
 			networkResultStoreBucketKey,
@@ -471,6 +505,11 @@ func (store *networkResultStore) CleanStore(keep map[uint64]struct{}) error {
 //
 // NOTE: This function is NOT safe for concurrent access.
 func (store *networkResultStore) FetchPendingAttempts() ([]uint64, error) {
+	// Acquire a read lock to protect this fine-grained operation against
+	// a concurrent, store-wide CleanStore operation.
+	store.storeMtx.RLock()
+	defer store.storeMtx.RUnlock()
+
 	var pending []uint64
 	err := kvdb.View(store.backend, func(tx kvdb.RTx) error {
 		bucket := tx.ReadBucket(networkResultStoreBucketKey)
@@ -533,6 +572,11 @@ func (store *networkResultStore) FetchPendingAttempts() ([]uint64, error) {
 func (store *networkResultStore) FailPendingAttempt(attemptID uint64,
 	linkErr *LinkError) error {
 
+	// Acquire a lock to protect this fine-grained operation against a
+	// concurrent, store-wide CleanStore operation.
+	store.storeMtx.Lock()
+	defer store.storeMtx.Unlock()
+
 	// We get a mutex for this attempt ID to ensure consistency between the
 	// database state and the subscribers in case of concurrent calls.
 	store.attemptIDMtx.Lock(attemptID)

diff --git a/itest/list_on_test.go b/itest/list_on_test.go
@@ -764,8 +764,8 @@ var allTestCases = []*lntest.TestCase{
 		TestFunc: testSendOnion,
 	},
 	{
-		Name:     "send onion twice",
-		TestFunc: testSendOnionTwice,
+		Name:     "send onion lifecycle",
+		TestFunc: testSendOnionIdempotencyLifecycle,
 	},
 	{
 		Name:     "track onion",

diff --git a/itest/lnd_sendonion_test.go b/itest/lnd_sendonion_test.go
@@ -99,13 +99,18 @@ func testSendOnion(ht *lntest.HarnessTest) {
 	ht.AssertInvoiceSettled(dave, invoices[0].PaymentAddr)
 }
 
-// testSendOnionTwice tests that the switch correctly rejects a duplicate
-// payment attempt for an HTLC that is already in-flight. It sends an onion,
-// then immediately sends the exact same onion with the same attempt ID. The
-// test asserts that the second attempt is rejected with a DUPLICATE_HTLC
-// error. It also verifies that sending again after the original HTLC has
-// settled is also rejected.
-func testSendOnionTwice(ht *lntest.HarnessTest) {
+// testSendOnionIdempotencyLifecycle verifies the full lifecycle of SendOnion's
+// idempotency guarantees. It tests that the switch correctly rejects duplicate
+// payment attempts across various states:
+//  1. While the original HTLC is still in-flight.
+//  2. After the original HTLC has definitively settled.
+//  3. After the attempt record has been explicitly preserved via CleanStore.
+//  4. After the attempt record has been explicitly deleted via CleanStore,
+//     proving the idempotency key can be reused.
+//
+// This test ensures the SendOnion RPC provides a stable and trustworthy
+// contract for clients managing payment attempts.
+func testSendOnionIdempotencyLifecycle(ht *lntest.HarnessTest) {
 	// Create a four-node context consisting of Alice, Bob, Carol, and
 	// Dave with the following topology:
 	//     Alice -> Bob -> Carol -> Dave
@@ -203,11 +208,40 @@ func testSendOnionTwice(ht *lntest.HarnessTest) {
 	require.Equal(ht, preimage[:], trackResp.Preimage)
 
 	// Now that the original HTLC attempt has settled, we'll send the same
-	// onion again with the same attempt ID.
-	//
-	// NOTE: Currently, this does not error. When we make SendOnion fully
-	// duplicate safe, this should be updated to assert an error is
-	// returned.
+	// onion again with the same attempt ID to prove the idempotency key
+	// persists across the full lifecycle.
+	resp = alice.RPC.SendOnion(sendReq)
+	require.False(ht, resp.Success, "expected failure on onion send")
+	require.Equal(ht, resp.ErrorCode,
+		switchrpc.ErrorCode_DUPLICATE_HTLC,
+		"unexpected error code")
+	require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error())
+
+	// Now we'll test the CleanStore RPC. First, we'll call it with the
+	// attempt ID in the keep list.
+	cleanReq := &switchrpc.CleanStoreRequest{
+		KeepAttemptIds: []uint64{sendReq.AttemptId},
+	}
+	alice.RPC.CleanStore(cleanReq)
+
+	// Since we specified that the ID should be kept, calling SendOnion
+	// again should still result in a duplicate error.
+	resp = alice.RPC.SendOnion(sendReq)
+	require.False(ht, resp.Success, "expected failure on onion send")
+	require.Equal(ht, resp.ErrorCode,
+		switchrpc.ErrorCode_DUPLICATE_HTLC,
+		"unexpected error code")
+	require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error())
+
+	// Now, we'll call CleanStore with an empty keep list, which should
+	// delete our record.
+	cleanReq.KeepAttemptIds = []uint64{}
+	alice.RPC.CleanStore(cleanReq)
+
+	// Finally, we'll send the onion one last time. Since the idempotency
+	// record has been deleted, this should be accepted by the switch. It
+	// will ultimately fail at the final hop since the invoice is already
+	// paid, but the initial dispatch will succeed.
 	resp = alice.RPC.SendOnion(sendReq)
 	require.True(ht, resp.Success, "expected successful onion send")
 	require.Empty(ht, resp.ErrorMessage, "unexpected failure to send onion")

diff --git a/lnrpc/switchrpc/mock.go b/lnrpc/switchrpc/mock.go
@@ -17,6 +17,12 @@ type mockPayer struct {
 	getResultResult *htlcswitch.PaymentResult
 	getResultErr    error
 	resultChan      chan *htlcswitch.PaymentResult
+
+	// cleanStoreErr is the error to return from CleanStore.
+	cleanStoreErr error
+
+	// keptPids stores the IDs passed to CleanStore.
+	keptPids map[uint64]struct{}
 }
 
 // SendHTLC is a mock implementation of the SendHTLC method.
@@ -56,7 +62,9 @@ func (m *mockPayer) HasAttemptResult(attemptID uint64) (bool, error) {
 
 // CleanStore is a mock implementation of the CleanStore method.
 func (m *mockPayer) CleanStore(keepPids map[uint64]struct{}) error {
-	return nil
+	m.keptPids = keepPids
+
+	return m.cleanStoreErr
 }
 
 // mockRouteProcessor is a mock implementation of the routing.RouteProcessor