From d8421b029082cf7f27b852a7187bbd97e7b0a34d Mon Sep 17 00:00:00 2001 From: Nikolay Karadzhov Date: Fri, 27 Feb 2026 13:00:26 +0200 Subject: [PATCH] fix(sentinel): preserve root seeds for outage recovery fixes #3127 --- packages/client/lib/sentinel/index.spec.ts | 78 ++++++++++++++++++++++ packages/client/lib/sentinel/index.ts | 25 +++++-- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/packages/client/lib/sentinel/index.spec.ts b/packages/client/lib/sentinel/index.spec.ts index e87a516694..d85ae4a854 100644 --- a/packages/client/lib/sentinel/index.spec.ts +++ b/packages/client/lib/sentinel/index.spec.ts @@ -979,6 +979,84 @@ describe('legacy tests', () => { assert.notEqual(sentinelNode!.port, newSentinel.port); }); + it('Should recover after full outage', async function () { + this.timeout(120000); + + const allSentinelPorts = frame.getAllSentinelsPort(); + const primarySentinelPort = allSentinelPorts[0]; + const extraSentinelPorts = allSentinelPorts.slice(1); + + // Keep only one sentinel reachable for the test. + await Promise.all(extraSentinelPorts.map(port => frame.stopSentinel(port.toString()))); + await setTimeout(1500); + + sentinel = RedisSentinel.create({ + name: config.sentinelName, + sentinelRootNodes: [{ host: '127.0.0.1', port: primarySentinelPort }], + RESP: 3, + scanInterval: 250 + }); + sentinel.setTracer(tracer); + sentinel.on("error", () => { }); + await sentinel.connect(); + + await sentinel.set('some-key', 'value'); + assert.equal(await sentinel.get('some-key'), 'value'); + + const allNodePorts = frame.getAllNodesPort(); + // Simulate full outage (all Redis nodes + the single configured sentinel). + await Promise.all(allNodePorts.map(port => frame.stopNode(port.toString()))); + await frame.stopSentinel(primarySentinelPort.toString()); + + const timedGet = async () => { + const getPromise = sentinel!.get('some-key'); + void getPromise.catch(() => undefined); // Promise.race may timeout first. + + return Promise.race([ + getPromise, + setTimeout(1000).then(() => { + throw new Error('1s Timeout'); + }) + ]); + }; + + const pollResults: Array<{ phase: 'outage' | 'recovery'; status: 'success' | 'timeout' | 'error' }> = []; + const pollLoop = async (phase: 'outage' | 'recovery', rounds: number) => { + for (let i = 0; i < rounds; i++) { + try { + await timedGet(); + pollResults.push({ phase, status: 'success' }); + } catch (err: any) { + pollResults.push({ + phase, + status: err?.message === '1s Timeout' ? 'timeout' : 'error' + }); + } + await setTimeout(3000); + } + }; + + // Match the issue's periodic GET calls while outage is active. + await pollLoop('outage', 3); + + // Bring only the single configured sentinel back; keep extra sentinels down. + await Promise.all(allNodePorts.map(port => frame.restartNode(port.toString()))); + await frame.restartSentinel(primarySentinelPort.toString()); + + // Continue periodic GET loop and assert recovery. + await pollLoop('recovery', 5); + + const sawOutageFailure = pollResults.some(result => + result.phase === 'outage' && result.status !== 'success' + ); + assert.equal(sawOutageFailure, true, 'expected GET failures during outage'); + + const sawRecoverySuccess = pollResults.some(result => + result.phase === 'recovery' && result.status === 'success' + ); + assert.equal(sawRecoverySuccess, true, 'expected periodic GET to recover after restart'); + }); + it('timer works, and updates sentinel list', async function () { this.timeout(60000); diff --git a/packages/client/lib/sentinel/index.ts b/packages/client/lib/sentinel/index.ts index b31e67dc48..51085f8d2a 100644 --- a/packages/client/lib/sentinel/index.ts +++ b/packages/client/lib/sentinel/index.ts @@ -652,6 +652,7 @@ class RedisSentinelInternal< #configEpoch: number = 0; + readonly #sentinelSeedNodes: Array; #sentinelRootNodes: Array; #sentinelClient?: RedisClientType; @@ -696,7 +697,8 @@ class RedisSentinelInternal< this.#name = options.name; this.#RESP = options.RESP; - this.#sentinelRootNodes = Array.from(options.sentinelRootNodes); + this.#sentinelSeedNodes = Array.from(options.sentinelRootNodes); + this.#sentinelRootNodes = Array.from(this.#sentinelSeedNodes); this.#maxCommandRediscovers = options.maxCommandRediscovers ?? 16; this.#masterPoolSize = options.masterPoolSize ?? 1; this.#replicaPoolSize = options.replicaPoolSize ?? 0; @@ -951,6 +953,19 @@ class RedisSentinelInternal< } } + #sentinelNodeListKey(nodes: Array) { + return nodes.map(node => `${node.host}:${node.port}`).sort().join('|'); + } + + #restoreSentinelRootNodesIfEmpty() { + if (this.#sentinelRootNodes.length !== 0) { + return; + } + + this.#trace("restoring sentinel roots from seed nodes"); + this.#sentinelRootNodes = Array.from(this.#sentinelSeedNodes); + } + #handleSentinelFailure(node: RedisNode) { const found = this.#sentinelRootNodes.findIndex( (rootNode) => rootNode.host === node.host && rootNode.port === node.port @@ -958,6 +973,7 @@ class RedisSentinelInternal< if (found !== -1) { this.#sentinelRootNodes.splice(found, 1); } + this.#restoreSentinelRootNodesIfEmpty(); this.#reset(); } @@ -1104,6 +1120,8 @@ class RedisSentinelInternal< // observe/analyze/transform remediation functions async observe() { + this.#restoreSentinelRootNodesIfEmpty(); + for (const node of this.#sentinelRootNodes) { let client: RedisClientType | undefined; try { @@ -1247,8 +1265,7 @@ class RedisSentinelInternal< }; this.emit('client-error', event); this.#handleSentinelFailure(node); - }) - .on('end', () => this.#handleSentinelFailure(node)); + }); this.#sentinelClient = client; this.#trace(`transform: adding sentinel client connect() to promise list`); @@ -1383,7 +1400,7 @@ class RedisSentinelInternal< } } - if (analyzed.sentinelList.length != this.#sentinelRootNodes.length) { + if (this.#sentinelNodeListKey(analyzed.sentinelList) !== this.#sentinelNodeListKey(this.#sentinelRootNodes)) { this.#sentinelRootNodes = analyzed.sentinelList; const event: RedisSentinelEvent = { type: "SENTINE_LIST_CHANGE",