@@ -20,7 +20,7 @@ public static async Task<NodeWatcherResult> WatchThatNodeProcessingTheRequestIsS
2020 TimeSpan maxTimeBetweenHeartBeetsBeforeProcessingNodeIsAssumedToBeOffline ,
2121 CancellationToken watchCancellationToken )
2222 {
23- log = log . ForContext < NodeHeartBeatSender > ( ) ;
23+ log = log . ForContext < NodeHeartBeatWatcher > ( ) ;
2424 // Once the pending's CT has been cancelled we no longer care to keep observing
2525 await using var cts = new CancelOnDisposeCancellationToken ( watchCancellationToken , redisPending . PendingRequestCancellationToken ) ;
2626 try
@@ -74,49 +74,53 @@ static async Task<NodeWatcherResult> WatchForPulsesFromNode(
7474 log . Write ( EventType . Diagnostic , "Starting to watch for pulses from {0} node, request {1}, endpoint {2}" , watchingForPulsesFrom , requestActivityId , endpoint ) ;
7575
7676 DateTimeOffset ? lastHeartBeat = DateTimeOffset . Now ;
77-
77+
78+ await using var subscriptionCts = new CancelOnDisposeCancellationToken ( watchCancellationToken ) ;
79+ // Non-blocking subscription for heart beats.
80+ var subscriptionTask = Task . Run ( async ( ) => await halibutRedisTransport . SubscribeToNodeHeartBeatChannel (
81+ endpoint ,
82+ requestActivityId ,
83+ watchingForPulsesFrom ,
84+ async ( ) =>
85+ {
86+ await Task . CompletedTask ;
87+ lastHeartBeat = DateTimeOffset . Now ;
88+ log . Write ( EventType . Diagnostic , "Received heartbeat from {0} node, request {1}" , watchingForPulsesFrom , requestActivityId ) ;
89+ } , subscriptionCts . Token ) ) ;
7890 try
7991 {
80- // Currently we will wait until the CT is cancelled to get a subscription,
81- // instead it would be better if we either
82- // - waited for maxTimeBetweenHeartBeetsBeforeNodeIsAssumedToBeOffline to get a subscription.
83- // - SubscribeToNodeHeartBeatChannel returned immediately even if it doesn't have a subscription, and instead it works
84- // in the background to get one unless the CT is triggered, or it is disposed.
85- // https://whimsical.com/subscribetonodeheartbeatchannel-should-timeout-while-waiting-to--NFWwmPkE7pTBdm2PRUC8Tf
86- await using var subscription = await halibutRedisTransport . SubscribeToNodeHeartBeatChannel (
87- endpoint ,
88- requestActivityId ,
89- watchingForPulsesFrom ,
90- async ( ) =>
91- {
92- await Task . CompletedTask ;
93- lastHeartBeat = DateTimeOffset . Now ;
94- log . Write ( EventType . Diagnostic , "Received heartbeat from {0} node, request {1}" , watchingForPulsesFrom , requestActivityId ) ;
95- } , watchCancellationToken ) ;
96-
97- while ( ! watchCancellationToken . IsCancellationRequested )
92+ try
9893 {
99- var timeSinceLastHeartBeat = DateTimeOffset . Now - lastHeartBeat . Value ;
100- if ( timeSinceLastHeartBeat > maxTimeBetweenHeartBeetsBeforeNodeIsAssumedToBeOffline )
94+
95+ while ( ! watchCancellationToken . IsCancellationRequested )
10196 {
102- log . Write ( EventType . Diagnostic , "{0} node appears disconnected, request {1}, last heartbeat was {2} seconds ago" , watchingForPulsesFrom , requestActivityId , timeSinceLastHeartBeat . TotalSeconds ) ;
103- return NodeWatcherResult . NodeMayHaveDisconnected ;
97+ var timeSinceLastHeartBeat = DateTimeOffset . Now - lastHeartBeat . Value ;
98+ if ( timeSinceLastHeartBeat > maxTimeBetweenHeartBeetsBeforeNodeIsAssumedToBeOffline )
99+ {
100+ log . Write ( EventType . Diagnostic , "{0} node appears disconnected, request {1}, last heartbeat was {2} seconds ago" , watchingForPulsesFrom , requestActivityId , timeSinceLastHeartBeat . TotalSeconds ) ;
101+ return NodeWatcherResult . NodeMayHaveDisconnected ;
102+ }
103+
104+ var timeToWait = TimeSpanHelper . Min (
105+ TimeSpan . FromSeconds ( 30 ) ,
106+ maxTimeBetweenHeartBeetsBeforeNodeIsAssumedToBeOffline - timeSinceLastHeartBeat + TimeSpan . FromSeconds ( 1 ) ) ;
107+
108+ await Try . IgnoringError ( async ( ) => await Task . Delay ( timeToWait , watchCancellationToken ) ) ;
104109 }
105110
106- var timeToWait = TimeSpanHelper . Min (
107- TimeSpan . FromSeconds ( 30 ) ,
108- maxTimeBetweenHeartBeetsBeforeNodeIsAssumedToBeOffline - timeSinceLastHeartBeat + TimeSpan . FromSeconds ( 1 ) ) ;
109-
110- await Try . IgnoringError ( async ( ) => await Task . Delay ( timeToWait , watchCancellationToken ) ) ;
111+ log . Write ( EventType . Diagnostic , "{0} node watcher cancelled, request {1}" , watchingForPulsesFrom , requestActivityId ) ;
112+ return NodeWatcherResult . NoDisconnectSeen ;
113+ }
114+ catch ( Exception ex ) when ( ! watchCancellationToken . IsCancellationRequested )
115+ {
116+ log . WriteException ( EventType . Diagnostic , "Error while watching {0} node, request {1}" , ex , watchingForPulsesFrom , requestActivityId ) ;
117+ throw ;
111118 }
112-
113- log . Write ( EventType . Diagnostic , "{0} node watcher cancelled, request {1}" , watchingForPulsesFrom , requestActivityId ) ;
114- return NodeWatcherResult . NoDisconnectSeen ;
115119 }
116- catch ( Exception ex ) when ( ! watchCancellationToken . IsCancellationRequested )
120+ finally
117121 {
118- log . WriteException ( EventType . Diagnostic , "Error while watching {0} node, request {1}" , ex , watchingForPulsesFrom , requestActivityId ) ;
119- throw ;
122+ await Try . IgnoringError ( async ( ) => await subscriptionCts . CancelAsync ( ) ) ;
123+ await Try . IgnoringError ( async ( ) => await ( await subscriptionTask ) . DisposeAsync ( ) ) ;
120124 }
121125 }
122126
0 commit comments