Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/release-notes/release-notes-0.21.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@
potential loss of funds. The compilation of the server is hidden behind the
non-default `switchrpc` build tag.

* Added a new [`CleanStore` RPC](https://github.com/lightningnetwork/lnd/pull/10484)
to the `switchrpc` sub-system. This new endpoint allows remote entities
responsible for payment lifecycle management to perform cleanup of HTLC
attempt information. The `CleanStore` method deletes all stored attempts
except for a specified `keepSet` of IDs. **IMPORTANT**: As with other
`switchrpc` endpoints, it is currently only safe to allow a *single* entity to
dispatch attempts and manage the store via the Switch at any given time. Using
`CleanStore` in an uncoordinated, multi-controller environment can lead to
lead to stuck payment attempts, failures, and potential loss of funds.

## lncli Additions

# Improvements
Expand Down
44 changes: 44 additions & 0 deletions htlcswitch/payment_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@ type networkResultStore struct {
// its read-then-write sequence from concurrent calls, and it maintains
// consistency between the database state and result subscribers.
attemptIDMtx *multimutex.Mutex[uint64]

// storeMtx is a read-write mutex that protects the entire store during
// global operations, such as a full cleanup. A read-lock should be
// held by all per-attempt operations, while a full write-lock should
// be held by CleanStore.
storeMtx sync.RWMutex
}

func newNetworkResultStore(db kvdb.Backend) *networkResultStore {
Expand All @@ -147,6 +153,11 @@ func newNetworkResultStore(db kvdb.Backend) *networkResultStore {
// NOTE: This is part of the AttemptStore interface. Subscribed clients do not
// receive notice of this initialization.
func (store *networkResultStore) InitAttempt(attemptID uint64) error {
// Acquire a lock to protect this fine-grained operation against a
// concurrent, store-wide CleanStore operation.
store.storeMtx.Lock()
defer store.storeMtx.Unlock()

// We get a mutex for this attempt ID to serialize init, store, and
// subscribe operations. This is needed to ensure consistency between
// the database state and the subscribers in case of concurrent calls.
Expand Down Expand Up @@ -239,6 +250,11 @@ func (store *networkResultStore) InitAttempt(attemptID uint64) error {
func (store *networkResultStore) StoreResult(attemptID uint64,
result *networkResult) error {

// Acquire a lock to protect this fine-grained operation against a
// concurrent, store-wide CleanStore operation.
store.storeMtx.Lock()
defer store.storeMtx.Unlock()

// We get a mutex for this attempt ID. This is needed to ensure
// consistency between the database state and the subscribers in case
// of concurrent calls.
Expand Down Expand Up @@ -299,6 +315,11 @@ func (store *networkResultStore) notifySubscribers(attemptID uint64,
func (store *networkResultStore) SubscribeResult(attemptID uint64) (
<-chan *networkResult, error) {

// Acquire a read lock to protect this fine-grained operation against
// a concurrent, store-wide CleanStore operation.
store.storeMtx.RLock()
defer store.storeMtx.RUnlock()

// We get a mutex for this payment ID. This is needed to ensure
// consistency between the database state and the subscribers in case
// of concurrent calls.
Expand Down Expand Up @@ -374,6 +395,11 @@ func (store *networkResultStore) SubscribeResult(attemptID uint64) (
func (store *networkResultStore) GetResult(pid uint64) (
*networkResult, error) {

// Acquire a read lock to protect this fine-grained operation against
// a concurrent, store-wide CleanStore operation.
store.storeMtx.RLock()
defer store.storeMtx.RUnlock()

var result *networkResult
err := kvdb.View(store.backend, func(tx kvdb.RTx) error {
var err error
Expand Down Expand Up @@ -427,6 +453,14 @@ func fetchResult(tx kvdb.RTx, pid uint64) (*networkResult, error) {
// concurrently while this process is ongoing, as its result might end up being
// deleted.
func (store *networkResultStore) CleanStore(keep map[uint64]struct{}) error {
// The CleanStore operation is coarse-grained ("snapshot-and-delete")
// and must acquire a full write lock to serialize it against all
// fine-grained, per-attempt operations, preventing race conditions.
// NOTE: An alternative DeleteAttempts API would allow for more
// fine-grained locking.
store.storeMtx.Lock()
defer store.storeMtx.Unlock()

return kvdb.Update(store.backend, func(tx kvdb.RwTx) error {
networkResults, err := tx.CreateTopLevelBucket(
networkResultStoreBucketKey,
Expand Down Expand Up @@ -471,6 +505,11 @@ func (store *networkResultStore) CleanStore(keep map[uint64]struct{}) error {
//
// NOTE: This function is NOT safe for concurrent access.
func (store *networkResultStore) FetchPendingAttempts() ([]uint64, error) {
// Acquire a read lock to protect this fine-grained operation against
// a concurrent, store-wide CleanStore operation.
store.storeMtx.RLock()
defer store.storeMtx.RUnlock()

var pending []uint64
err := kvdb.View(store.backend, func(tx kvdb.RTx) error {
bucket := tx.ReadBucket(networkResultStoreBucketKey)
Expand Down Expand Up @@ -533,6 +572,11 @@ func (store *networkResultStore) FetchPendingAttempts() ([]uint64, error) {
func (store *networkResultStore) FailPendingAttempt(attemptID uint64,
linkErr *LinkError) error {

// Acquire a lock to protect this fine-grained operation against a
// concurrent, store-wide CleanStore operation.
store.storeMtx.Lock()
defer store.storeMtx.Unlock()

// We get a mutex for this attempt ID to ensure consistency between the
// database state and the subscribers in case of concurrent calls.
store.attemptIDMtx.Lock(attemptID)
Expand Down
4 changes: 2 additions & 2 deletions itest/list_on_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,8 @@ var allTestCases = []*lntest.TestCase{
TestFunc: testSendOnion,
},
{
Name: "send onion twice",
TestFunc: testSendOnionTwice,
Name: "send onion lifecycle",
TestFunc: testSendOnionIdempotencyLifecycle,
},
{
Name: "track onion",
Expand Down
58 changes: 46 additions & 12 deletions itest/lnd_sendonion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,18 @@ func testSendOnion(ht *lntest.HarnessTest) {
ht.AssertInvoiceSettled(dave, invoices[0].PaymentAddr)
}

// testSendOnionTwice tests that the switch correctly rejects a duplicate
// payment attempt for an HTLC that is already in-flight. It sends an onion,
// then immediately sends the exact same onion with the same attempt ID. The
// test asserts that the second attempt is rejected with a DUPLICATE_HTLC
// error. It also verifies that sending again after the original HTLC has
// settled is also rejected.
func testSendOnionTwice(ht *lntest.HarnessTest) {
// testSendOnionIdempotencyLifecycle verifies the full lifecycle of SendOnion's
// idempotency guarantees. It tests that the switch correctly rejects duplicate
// payment attempts across various states:
// 1. While the original HTLC is still in-flight.
// 2. After the original HTLC has definitively settled.
// 3. After the attempt record has been explicitly preserved via CleanStore.
// 4. After the attempt record has been explicitly deleted via CleanStore,
// proving the idempotency key can be reused.
//
// This test ensures the SendOnion RPC provides a stable and trustworthy
// contract for clients managing payment attempts.
func testSendOnionIdempotencyLifecycle(ht *lntest.HarnessTest) {
// Create a four-node context consisting of Alice, Bob, Carol, and
// Dave with the following topology:
// Alice -> Bob -> Carol -> Dave
Expand Down Expand Up @@ -203,11 +208,40 @@ func testSendOnionTwice(ht *lntest.HarnessTest) {
require.Equal(ht, preimage[:], trackResp.Preimage)

// Now that the original HTLC attempt has settled, we'll send the same
// onion again with the same attempt ID.
//
// NOTE: Currently, this does not error. When we make SendOnion fully
// duplicate safe, this should be updated to assert an error is
// returned.
// onion again with the same attempt ID to prove the idempotency key
// persists across the full lifecycle.
resp = alice.RPC.SendOnion(sendReq)
require.False(ht, resp.Success, "expected failure on onion send")
require.Equal(ht, resp.ErrorCode,
switchrpc.ErrorCode_DUPLICATE_HTLC,
"unexpected error code")
require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error())

// Now we'll test the CleanStore RPC. First, we'll call it with the
// attempt ID in the keep list.
cleanReq := &switchrpc.CleanStoreRequest{
KeepAttemptIds: []uint64{sendReq.AttemptId},
}
alice.RPC.CleanStore(cleanReq)

// Since we specified that the ID should be kept, calling SendOnion
// again should still result in a duplicate error.
resp = alice.RPC.SendOnion(sendReq)
require.False(ht, resp.Success, "expected failure on onion send")
require.Equal(ht, resp.ErrorCode,
switchrpc.ErrorCode_DUPLICATE_HTLC,
"unexpected error code")
require.Equal(ht, resp.ErrorMessage, htlcswitch.ErrDuplicateAdd.Error())

// Now, we'll call CleanStore with an empty keep list, which should
// delete our record.
cleanReq.KeepAttemptIds = []uint64{}
alice.RPC.CleanStore(cleanReq)

// Finally, we'll send the onion one last time. Since the idempotency
// record has been deleted, this should be accepted by the switch. It
// will ultimately fail at the final hop since the invoice is already
// paid, but the initial dispatch will succeed.
resp = alice.RPC.SendOnion(sendReq)
require.True(ht, resp.Success, "expected successful onion send")
require.Empty(ht, resp.ErrorMessage, "unexpected failure to send onion")
Expand Down
10 changes: 9 additions & 1 deletion lnrpc/switchrpc/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ type mockPayer struct {
getResultResult *htlcswitch.PaymentResult
getResultErr error
resultChan chan *htlcswitch.PaymentResult

// cleanStoreErr is the error to return from CleanStore.
cleanStoreErr error

// keptPids stores the IDs passed to CleanStore.
keptPids map[uint64]struct{}
}

// SendHTLC is a mock implementation of the SendHTLC method.
Expand Down Expand Up @@ -56,7 +62,9 @@ func (m *mockPayer) HasAttemptResult(attemptID uint64) (bool, error) {

// CleanStore is a mock implementation of the CleanStore method.
func (m *mockPayer) CleanStore(keepPids map[uint64]struct{}) error {
return nil
m.keptPids = keepPids

return m.cleanStoreErr
}

// mockRouteProcessor is a mock implementation of the routing.RouteProcessor
Expand Down
Loading
Loading