@@ -543,11 +543,27 @@ async fn process_orders_keep_alive(
543543 ) ;
544544 } ,
545545 Ok ( None ) => {
546- // Peer responded but returned None for a diff request; classify as InvalidOrIncomplete.
547- // This is unexpected for SyncPubkeyOrderbookState.
548- had_response = true ;
546+ // Peer responded but returned None for a diff request; treat as Unavailable (no-ban).
547+ // Rationale: None means the responder currently has no pubkey_state for this pubkey.
548+ // This avoids misclassifying forwarders that lost state due to GC/restart/null-root cleanup races.
549+ //
550+ // TODO(sync-none):
551+ // Investigate and harden all code paths that can make a responder unable to serve Sync (Ok(None)/Err):
552+ // 1) Null‑root keep‑alive: today we clear per‑pair history immediately (remove_pubkey_pair_orders).
553+ // - Proposal: preserve history and let it expire via TRIE_STATE_HISTORY_TIMEOUT instead of dropping it.
554+ // - Verify we can still serve exact from→to deltas (incl. to null) during the TTL window.
555+ // 2) Cancel‑order races: KA → Cancel shortly after.
556+ // - Confirm cancel paths keep per‑pair history (they should) and do not trigger pubkey GC too soon.
557+ // - Consider refreshing liveness (pair_last_seen_local) on cancel and null‑root KA so a pubkey with empty pairs isn’t GC’d immediately.
558+ // 3) GC interaction: pubkey_state removed when all pairs are stale.
559+ // - Consider a pubkey‑level last_seen updated on any maker message (KA/Cancel/Update) and include it in the GC decision.
560+ // 4) Serving empty roots: if pubkey_state is missing and all expected_roots are null, serve FullTrie([]) instead of returning None.
561+ // 5) Instrumentation to confirm root causes:
562+ // - On Ok(None): log pubkey, whether pubkey_state exists, known pairs/roots for that pubkey, and expected_roots summary.
563+ // - Around null‑root KA and cancel: log pair, old/new root, history length after the operation, and liveness timestamps.
564+ // - Add counters: sync_none, sync_unavailable, from_history_hits, fulltrie_fallbacks, memdb_purge_errors.
549565 warn ! (
550- "SyncPubkeyOrderbookState request to {} returned None; treating as InvalidOrIncomplete " ,
566+ "SyncPubkeyOrderbookState request to {} returned None; treating as Unavailable (no-ban) " ,
551567 propagated_from
552568 ) ;
553569 } ,
0 commit comments