From b425402e5c1e52bca3cf4fa162a59b37718c4800 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 4 Jun 2026 04:52:12 +0200 Subject: [PATCH] fix: recover the streamed reply after Android background/resume On Android the OS suspends the isolate when the app is backgrounded and tears down the TCP connection, but socket.io never processes the disconnect. On resume the socket is often a zombie (connected == true but dead): nothing fires 'disconnect', nothing auto-reconnects, and the connectivity-edge force-reconnect never triggers because the network never actually went offline. Since OpenWebUI streams completions out-of-band over the socket and does not replay missed events, the in-flight reply is lost until a full app restart. Two parts: - socket_service: only a real background->foreground transition (paused/hidden/detached -> resumed) forces a fresh socket and emits a reconnect so streaming recovery polls the server for the missed completion. The transient `inactive` state (notification shade, app switcher, permission/system dialog) is treated as foreground so it no longer tears down a healthy socket on every shade peek. A re-entrancy guard prevents overlapping resume reconnects from orphaning sockets. - app_startup_providers: on resume, re-fetch the open conversation from the server (the source of truth) so a reply that finished in the background appears immediately, with a stale-write guard so a slow fetch cannot overwrite a conversation the user switched to in the meantime. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/core/providers/app_startup_providers.dart | 20 ++++- lib/core/services/socket_service.dart | 76 ++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/lib/core/providers/app_startup_providers.dart b/lib/core/providers/app_startup_providers.dart index c0e538e86..98f3dbda5 100644 --- a/lib/core/providers/app_startup_providers.dart +++ b/lib/core/providers/app_startup_providers.dart @@ -1115,10 +1115,28 @@ class _ForegroundRefreshObserver extends WidgetsBindingObserver { void didChangeAppLifecycleState(AppLifecycleState state) { if (state == AppLifecycleState.resumed) { // Schedule to avoid side-effects during build frames - Future.microtask(() { + Future.microtask(() async { try { refreshConversationsCache(_ref); _resetConversationWarmup(_ref); + // Re-fetch the OPEN conversation from the server on resume so a reply + // that finished while the app was backgrounded appears immediately — + // independent of the socket (which is often dead/zombie after an Android + // background; OpenWebUI does not replay missed events). The server is the + // source of truth. Without this the user had to pull-to-refresh or restart + // the app to see the reply. + final api = _ref.read(apiServiceProvider); + final active = _ref.read(activeConversationProvider); + if (api != null && active != null) { + final requestedId = active.id; + final full = await api.getConversation(requestedId); + // Stale-write guard: only apply if the user hasn't switched to a + // different conversation during the network round-trip. + final stillActive = _ref.read(activeConversationProvider); + if (stillActive != null && stillActive.id == requestedId) { + _ref.read(activeConversationProvider.notifier).set(full); + } + } } catch (_) {} // Resume already kicked off a forced conversations refresh above; only // finish the warmup work that should run alongside it. diff --git a/lib/core/services/socket_service.dart b/lib/core/services/socket_service.dart index 419fff82b..5a53528ce 100644 --- a/lib/core/services/socket_service.dart +++ b/lib/core/services/socket_service.dart @@ -28,6 +28,17 @@ class SocketService with WidgetsBindingObserver { String? _authToken; bool _isConnecting = false; bool _isAppForeground = true; + // Tracks a REAL backgrounding (paused/hidden/detached), distinct from the + // transient `inactive` focus loss, so resume logic only fires on a genuine + // background→foreground return. + bool _wasBackgrounded = false; + // Re-entrancy guard so overlapping resume bounces cannot spawn concurrent + // forced reconnects (which would orphan socket.io engines). + bool _resumeReconnectInFlight = false; + // Set while a resume-triggered forced reconnect is pending, so the reconnect + // signal is emitted from _handleConnect (after the session id is available) + // rather than before the handshake completes. + bool _signalReconnectOnConnect = false; Timer? _heartbeatTimer; bool _forcePollingFallback = false; @@ -231,7 +242,60 @@ class SocketService with WidgetsBindingObserver { @override void didChangeAppLifecycleState(AppLifecycleState state) { - _isAppForeground = state == AppLifecycleState.resumed; + // Only a REAL background→foreground transition should force-reconnect. The + // `inactive` state is a transient focus loss (notification shade, app + // switcher, permission/system dialog, PiP) — NOT a backgrounding — and must + // not tear down a healthy socket, or it churns the connection on every shade + // peek (and loops on some Samsung permission flows). Treat only + // paused/hidden/detached as background; keep `inactive` foreground for both + // reconnect and delivery semantics. + switch (state) { + case AppLifecycleState.paused: + case AppLifecycleState.hidden: + case AppLifecycleState.detached: + _isAppForeground = false; + _wasBackgrounded = true; + case AppLifecycleState.inactive: + break; // transient focus loss; keep foreground semantics + case AppLifecycleState.resumed: + _isAppForeground = true; + if (_wasBackgrounded) { + _wasBackgrounded = false; + // While backgrounded the OS suspends this isolate and tears down the + // TCP connection; socket.io never sees the disconnect, so the socket + // is a ZOMBIE on resume (connected==true but dead) and the server + // already emitted the streamed completion to it (OpenWebUI does not + // replay). Force a fresh socket + signal reconnect so streaming_helper + // polls the server for the missed completion. + unawaited(_reconnectAfterResume()); + } + } + } + + /// Force-recreates the socket after returning from background and notifies + /// reconnect listeners so any in-flight stream re-syncs the missed completion. + /// Re-entrancy-guarded so overlapping resume bounces cannot spawn multiple + /// concurrent forced connects. + Future _reconnectAfterResume() async { + if (_resumeReconnectInFlight) return; + _resumeReconnectInFlight = true; + // A forced fresh connect fires 'connect', not 'reconnect', so onReconnect + // listeners (streaming_helper's missed-completion recovery) would not run on + // their own. Have _handleConnect emit the signal once the handshake has + // completed and the new session id is available; emitting here (right after + // connect() returns) would fire before the 'connect' event while sessionId is + // still null, so the recovery would skip the session-id update. + _signalReconnectOnConnect = true; + try { + // force: true disposes the (possibly zombie) socket and opens a fresh one. + await connect(force: true); + } catch (_) { + // Connection setup failed outright; clear the pending signal so it cannot + // later fire on an unrelated connect. + _signalReconnectOnConnect = false; + } finally { + _resumeReconnectInFlight = false; + } } String? get sessionId => _socket?.id; @@ -631,6 +695,16 @@ class SocketService with WidgetsBindingObserver { // Emit health update _emitHealthUpdate(); + + // If this connect was triggered by a background→foreground resume, signal + // recovery now that the session id is available, so listeners refresh their + // handler session ids AND poll the server for a missed completion. + if (_signalReconnectOnConnect) { + _signalReconnectOnConnect = false; + if (!_reconnectController.isClosed) { + _reconnectController.add(null); + } + } } void _handleReconnectAttempt(dynamic attempt) {