Skip to content

Commit 50cad71

Browse files
committed
channeldb: fix race condition in link node pruning
This commit fixes a critical race condition in MarkChanFullyClosed and pruneLinkNode where link nodes could be incorrectly deleted despite having pending or open channels. The race occurred because the check for open channels and the link node deletion happened in separate database transactions: Thread A: TX1 checks open channels → [] (empty) Thread A: TX1 commits Thread B: Opens new channel with same peer Thread A: TX2 deletes link node (using stale data) Result: Link node deleted despite pending channel existing This creates a TOCTOU (time-of-check to time-of-use) vulnerability where database state changes between reading the channel count and deleting the node. Fix for MarkChanFullyClosed: - Move link node deletion into the same transaction as the channel closing check, making the check-and-delete operation atomic Fix for pruneLinkNode: - Add double-check within the write transaction to verify no channels were opened since the caller's initial check - Maintains performance by keeping early return for common case - Prevents deletion if channels exist at delete time This ensures the invariant: "link node exists iff channels exist" is never violated, preventing database corruption and potential connection issues.
1 parent 91423ee commit 50cad71

1 file changed

Lines changed: 57 additions & 26 deletions

File tree

channeldb/db.go

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,11 +1363,7 @@ func (c *ChannelStateDB) FetchClosedChannelForID(cid lnwire.ChannelID) (
13631363
// the pending funds in a channel that has been forcibly closed have been
13641364
// swept.
13651365
func (c *ChannelStateDB) MarkChanFullyClosed(chanPoint *wire.OutPoint) error {
1366-
var (
1367-
openChannels []*OpenChannel
1368-
pruneLinkNode *btcec.PublicKey
1369-
)
1370-
err := kvdb.Update(c.backend, func(tx kvdb.RwTx) error {
1366+
return kvdb.Update(c.backend, func(tx kvdb.RwTx) error {
13711367
var b bytes.Buffer
13721368
if err := graphdb.WriteOutpoint(&b, chanPoint); err != nil {
13731369
return err
@@ -1413,44 +1409,79 @@ func (c *ChannelStateDB) MarkChanFullyClosed(chanPoint *wire.OutPoint) error {
14131409
// other open channels with this peer. If we don't we'll
14141410
// garbage collect it to ensure we don't establish persistent
14151411
// connections to peers without open channels.
1416-
pruneLinkNode = chanSummary.RemotePub
1417-
openChannels, err = c.fetchOpenChannels(
1418-
tx, pruneLinkNode,
1419-
)
1412+
remotePub := chanSummary.RemotePub
1413+
openChannels, err := c.fetchOpenChannels(tx, remotePub)
14201414
if err != nil {
14211415
return fmt.Errorf("unable to fetch open channels for "+
14221416
"peer %x: %v",
1423-
pruneLinkNode.SerializeCompressed(), err)
1417+
remotePub.SerializeCompressed(), err)
14241418
}
14251419

1426-
return nil
1427-
}, func() {
1428-
openChannels = nil
1429-
pruneLinkNode = nil
1430-
})
1431-
if err != nil {
1432-
return err
1433-
}
1420+
if len(openChannels) > 0 {
1421+
return nil
1422+
}
1423+
1424+
// If there are no open channels with this peer, prune the
1425+
// link node. We do this within the same transaction to avoid
1426+
// a race condition where a new channel could be opened
1427+
// between this check and the deletion.
1428+
log.Infof("Pruning link node %x with zero open "+
1429+
"channels from database",
1430+
remotePub.SerializeCompressed())
1431+
1432+
err = deleteLinkNode(tx, remotePub)
1433+
if err != nil {
1434+
return fmt.Errorf("unable to delete link "+
1435+
"node: %w", err)
1436+
}
14341437

1435-
// Decide whether we want to remove the link node, based upon the number
1436-
// of still open channels.
1437-
return c.pruneLinkNode(openChannels, pruneLinkNode)
1438+
return nil
1439+
}, func() {})
14381440
}
14391441

14401442
// pruneLinkNode determines whether we should garbage collect a link node from
1441-
// the database due to no longer having any open channels with it. If there are
1442-
// any left, then this acts as a no-op.
1443+
// the database due to no longer having any open channels with it.
1444+
//
1445+
// NOTE: This function should be called after an initial check shows no open
1446+
// channels exist. It will double-check within a write transaction to avoid a
1447+
// race condition where a channel could be opened between the initial check
1448+
// and the deletion.
14431449
func (c *ChannelStateDB) pruneLinkNode(openChannels []*OpenChannel,
14441450
remotePub *btcec.PublicKey) error {
14451451

1452+
// If there are open channels, don't prune.
14461453
if len(openChannels) > 0 {
14471454
return nil
14481455
}
14491456

1450-
log.Infof("Pruning link node %x with zero open channels from database",
1451-
remotePub.SerializeCompressed())
1457+
return kvdb.Update(c.backend, func(tx kvdb.RwTx) error {
1458+
// Double-check for open channels to avoid deleting a link node
1459+
// if a channel was opened since the caller's initial check.
1460+
//
1461+
// NOTE: This avoids a race condition where a channel could be
1462+
// opened between the initial check and the deletion.
1463+
openChannels, err := c.fetchOpenChannels(tx, remotePub)
1464+
if err != nil {
1465+
return err
1466+
}
1467+
1468+
// If channels exist now, don't prune.
1469+
if len(openChannels) > 0 {
1470+
return nil
1471+
}
1472+
1473+
// No open channels, safe to prune the link node.
1474+
log.Infof("Pruning link node %x with zero open channels "+
1475+
"from database",
1476+
remotePub.SerializeCompressed())
1477+
1478+
err = deleteLinkNode(tx, remotePub)
1479+
if err != nil {
1480+
return fmt.Errorf("unable to prune link node: %w", err)
1481+
}
14521482

1453-
return c.linkNodeDB.DeleteLinkNode(remotePub)
1483+
return nil
1484+
}, func() {})
14541485
}
14551486

14561487
// PruneLinkNodes attempts to prune all link nodes found within the database

0 commit comments

Comments
 (0)