From 49fb6c36ce973262eb1b7340e7ff4c24670dff0f Mon Sep 17 00:00:00 2001 From: Dasooul03 Date: Sun, 7 Jun 2026 11:14:56 +0800 Subject: [PATCH] fix(trace): LLM_TTFT parent node pointing to wrong span + missing ERROR/CANCELLED status colors - Add detach() to StreamCancellationHandle interface for span lifecycle control - Defer span detach in AbstractOpenAIStyleChatClient from finally block to returned handle - Call handle.detach() after awaitFirstPacket in RoutingLLMService, ensuring llm-first-packet (LLM_TTFT) node correctly nests under the provider stream node - Use try-finally per candidate to guarantee detach on both success and failover paths - Frontend: add error/cancelled mappings to STATUS_COLORS, statusLabel, and statusBadgeVariant - Add fallback ?? STATUS_COLORS.default in getStatusColors to prevent crash on unmapped statuses Co-Authored-By: Claude Opus 4.6 --- .../pages/admin/traces/RagTraceDetailPage.tsx | 6 +++-- frontend/src/pages/admin/traces/traceUtils.ts | 5 +++- .../chat/AbstractOpenAIStyleChatClient.java | 26 ++++++++++++++----- .../ragent/infra/chat/RoutingLLMService.java | 23 ++++++++++------ .../infra/chat/StreamCancellationHandle.java | 9 +++++++ 5 files changed, 51 insertions(+), 18 deletions(-) diff --git a/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx b/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx index 1450b1622..f2462e691 100644 --- a/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx +++ b/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx @@ -59,18 +59,20 @@ const copyToClipboard = (text: string, label: string) => { // ============ 状态颜色 ============ -type StatusType = "success" | "failed" | "running" | "default"; +type StatusType = "success" | "failed" | "running" | "error" | "cancelled" | "default"; const STATUS_COLORS: Record = { success: { dot: "bg-emerald-500", bar: "bg-emerald-400" }, failed: { dot: "bg-red-500", bar: "bg-red-400" }, running: { dot: "bg-amber-500", bar: "bg-amber-400" }, + error: { dot: "bg-red-500", bar: "bg-red-400" }, + cancelled: { dot: "bg-slate-400", bar: "bg-slate-300" }, default: { dot: "bg-slate-300", bar: "bg-slate-300" } }; const getStatusColors = (status?: string | null) => { const normalized = normalizeStatus(status) as StatusType | null; - return STATUS_COLORS[normalized || "default"]; + return STATUS_COLORS[normalized ?? "default"] ?? STATUS_COLORS.default; }; // ============ 子组件 ============ diff --git a/frontend/src/pages/admin/traces/traceUtils.ts b/frontend/src/pages/admin/traces/traceUtils.ts index 14786b399..be2c153ab 100644 --- a/frontend/src/pages/admin/traces/traceUtils.ts +++ b/frontend/src/pages/admin/traces/traceUtils.ts @@ -44,14 +44,17 @@ export const statusLabel = (status?: string | null): string => { if (normalized === "failed") return "FAILED"; if (normalized === "running") return "RUNNING"; if (normalized === "timeout") return "TIMEOUT"; + if (normalized === "error") return "ERROR"; + if (normalized === "cancelled") return "CANCELLED"; return normalized.toUpperCase(); }; export const statusBadgeVariant = (status?: string | null): BadgeVariant => { const normalized = normalizeStatus(status); - if (normalized === "failed" || normalized === "timeout") return "destructive"; + if (normalized === "failed" || normalized === "error" || normalized === "timeout") return "destructive"; if (normalized === "running") return "secondary"; if (normalized === "success") return "default"; + if (normalized === "cancelled") return "outline"; return "outline"; }; diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java index 22945a868..4c1dce224 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java @@ -154,16 +154,28 @@ protected StreamCancellationHandle doStreamChat(ChatRequest request, StreamCallb wrappedCallback, cancelled -> doStream(call, wrappedCallback, cancelled, reasoningEnabled) ); - return () -> { - try { - inner.cancel(); - } finally { - wrappedCallback.onCancel(); + // 不在此处 detach span,而是将 detach 封装到返回的 handle 中, + // 由 RoutingLLMService 在 awaitFirstPacket 之后调用,确保 llm-first-packet + // 节点能正确归属到 provider 流式节点之下 + return new StreamCancellationHandle() { + @Override + public void cancel() { + try { + inner.cancel(); + } finally { + wrappedCallback.onCancel(); + } + } + + @Override + public void detach() { + span.detach(); } }; - } finally { - // 同步部分结束:把节点从当前线程的 NODE_STACK 弹出,避免污染兄弟节点的父节点链 + } catch (Exception e) { + // 异常路径:确保 span 在退出前从 NODE_STACK 弹出,避免污染后续调用 span.detach(); + throw e; } } diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java index aa1ae2299..87bffd729 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java @@ -140,16 +140,23 @@ public StreamCancellationHandle streamChat(ChatRequest request, StreamCallback c ProbeStreamBridge.ProbeResult result = awaitFirstPacket(bridge, handle, callback); - if (result.isSuccess()) { - healthStore.markSuccess(target.id()); - return handle; - } + // 无论成功或失败,都必须 detach span,确保 provider 节点从 NODE_STACK 弹出; + // 成功时 detach 在 TTFT 记录之后,保证 llm-first-packet 正确归属到 provider 节点之下; + // 失败时 detach 避免影响下一个 provider 的父节点链 + try { + if (result.isSuccess()) { + healthStore.markSuccess(target.id()); + return handle; + } - // 失败处理 - healthStore.markFailure(target.id()); - handle.cancel(); + // 失败处理 + healthStore.markFailure(target.id()); + handle.cancel(); - lastError = buildLastErrorAndLog(result, target, label); + lastError = buildLastErrorAndLog(result, target, label); + } finally { + handle.detach(); + } } // 所有模型都失败了,通知客户端错误 diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java index 809830775..7ad6ccfb1 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java @@ -45,4 +45,13 @@ public interface StreamCancellationHandle { * - 调用后应该不会再继续产生 onContent() 回调 */ void cancel(); + + /** + * 释放调用线程上的 trace 资源(从 NODE_STACK 弹出节点) + *

+ * 默认实现为空,向后兼容不需要 trace 清理的实现。 + * 需要 trace 支持的实现应覆写此方法,确保 provider 节点的 span 在合适时机 + * (如首包探测完成后)从线程栈中弹出 + */ + default void detach() {} }