From a2da80df29d65d743d9b0470e535e075cbcdd00a Mon Sep 17 00:00:00 2001 From: GhostC <1276537536@qq.com> Date: Sun, 24 May 2026 22:26:22 +0800 Subject: [PATCH] feat(web): add batch upload mode with one-click deep review Support multi-package folder/zip uploads, per-skill LLM state, batch orchestration with progress and retry, cross-skill aggregate stats, and CSV export. Bump Next/postcss deps and fix AbortError on cancelled reviews. Co-authored-by: Cursor --- web/app/api/sample/[id]/route.ts | 2 + web/app/page.tsx | 281 +++++++----- web/components/BatchReviewPanel.tsx | 544 +++++++++++++++++++++++ web/components/BatchStatsPanel.tsx | 187 ++++++++ web/components/Uploader.tsx | 169 +++++-- web/lib/scoring/batch-csv.ts | 158 +++++++ web/lib/scoring/batch-stats.ts | 221 +++++++++ web/lib/scoring/run-skill-deep-review.ts | 115 +++++ web/lib/spec/loader.ts | 278 ++++++++++-- web/package-lock.json | 131 +++--- web/package.json | 7 +- 11 files changed, 1826 insertions(+), 267 deletions(-) create mode 100644 web/components/BatchReviewPanel.tsx create mode 100644 web/components/BatchStatsPanel.tsx create mode 100644 web/lib/scoring/batch-csv.ts create mode 100644 web/lib/scoring/batch-stats.ts create mode 100644 web/lib/scoring/run-skill-deep-review.ts diff --git a/web/app/api/sample/[id]/route.ts b/web/app/api/sample/[id]/route.ts index bc85732..b06d985 100644 --- a/web/app/api/sample/[id]/route.ts +++ b/web/app/api/sample/[id]/route.ts @@ -43,6 +43,7 @@ interface SamplePayload { files: SampleFile[]; entryFile: string; rootName: string; + packagePath: string; } async function walk(dir: string, base: string): Promise { @@ -86,6 +87,7 @@ export async function GET(_req: NextRequest, { params }: { params: Promise<{ id: files, entryFile: "SKILL.md", rootName: id, + packagePath: "", }; return NextResponse.json(payload, { headers: { "Cache-Control": "public, max-age=300" }, diff --git a/web/app/page.tsx b/web/app/page.tsx index 411ab0b..d67ad43 100644 --- a/web/app/page.tsx +++ b/web/app/page.tsx @@ -1,6 +1,10 @@ "use client"; import { useEffect, useMemo, useState } from "react"; -import Uploader, { type SkillTypeChoice, type SampleEntry } from "@/components/Uploader"; +import Uploader, { type SkillTypeChoice, type SampleEntry, type UploadMode } from "@/components/Uploader"; +import BatchReviewPanel, { + DEFAULT_SKILL_RUN_STATE, + type SkillRunState, +} from "@/components/BatchReviewPanel"; import ScoreRadar from "@/components/ScoreRadar"; import PillarSection from "@/components/PillarSection"; import SuggestionCard from "@/components/SuggestionCard"; @@ -9,14 +13,12 @@ import SubSkillsCard from "@/components/SubSkillsCard"; import { RUBRIC } from "@/lib/rubric/rubric"; import { parseSkill } from "@/lib/spec/parser"; import { aggregateScore, type GeneralWeightOverrides } from "@/lib/scoring/aggregate"; -import { runLlmReview } from "@/lib/scoring/llm-client"; -import { fetchMarketSurvey } from "@/lib/market/client"; -import { isMarketSurvey } from "@/lib/market/types"; +import { runSkillDeepReview } from "@/lib/scoring/run-skill-deep-review"; import { MESSAGES, type Lang } from "@/lib/i18n/messages"; import type { CheckResult, PillarResult, ScoreReport, ValueType } from "@/lib/rubric/types"; import type { LlmReviewResponse } from "@/lib/llm/types"; import type { MarketSurveyResult } from "@/lib/market/types"; -import type { LoadedSkill } from "@/lib/spec/loader"; +import { skillInstanceKey, type LoadedSkill } from "@/lib/spec/loader"; import { FINANCE_SCENARIOS, getFinancePillarsForScenario, type FinanceScenarioId } from "@/lib/domain/finance"; import { buildFinanceExpertReport, @@ -134,131 +136,152 @@ const FINANCE_SAMPLE_BY_SCENARIO: Record(null); + const [batchSkills, setBatchSkills] = useState(null); + const [activeBatchIndex, setActiveBatchIndex] = useState(0); + const [runStates, setRunStates] = useState>({}); const [lang, setLang] = useState("zh"); const [pdfBusy, setPdfBusy] = useState(false); const [weights, setWeights] = useState(() => defaultGeneralWeights()); - /** 用户主动点"启动完整评测"才会变 true(v3 分两层流程) */ - const [llmEnabled, setLlmEnabled] = useState(false); - const [llmResults, setLlmResults] = useState | null>(null); - const [llmMeta, setLlmMeta] = useState<{ valueType?: ValueType; reason?: string } | null>(null); - const [llmState, setLlmState] = useState({ status: "idle" }); - const [retryNonce, setRetryNonce] = useState(0); - const [marketSurvey, setMarketSurvey] = useState(null); const [reviewMode, setReviewMode] = useState("general"); const [financeScenario, setFinanceScenario] = useState("stock_trading"); const [financeWeights, setFinanceWeights] = useState({}); const [activeReportTab, setActiveReportTab] = useState<"finance" | "general">("finance"); const [skillTypeChoice, setSkillTypeChoice] = useState("auto"); + const [uploadMode, setUploadMode] = useState("single"); + const [batchOrchestratorRunning, setBatchOrchestratorRunning] = useState(false); + + const activeLoaded = + uploadMode === "batch" && batchSkills ? batchSkills[activeBatchIndex] : loaded; + const activeSkillKey = activeLoaded ? skillInstanceKey(activeLoaded) : null; + const activeState = activeSkillKey ? runStates[activeSkillKey] ?? DEFAULT_SKILL_RUN_STATE : DEFAULT_SKILL_RUN_STATE; + + const setActiveState = (updater: (current: SkillRunState) => SkillRunState) => { + if (!activeSkillKey) return; + setRunStates((prev) => ({ + ...prev, + [activeSkillKey]: updater(prev[activeSkillKey] ?? DEFAULT_SKILL_RUN_STATE), + })); + }; const skill = useMemo(() => { - if (!loaded) return null; + if (!activeLoaded) return null; return parseSkill({ - rawText: loaded.rawText, - files: loaded.files, - entryFile: loaded.entryFile, + rawText: activeLoaded.rawText, + files: activeLoaded.files, + entryFile: activeLoaded.entryFile, }); - }, [loaded]); + }, [activeLoaded]); - // 切换 skill 时重置所有 LLM / market 状态 useEffect(() => { - setLlmResults(null); - setLlmMeta(null); - setLlmState({ status: "idle" }); - setLlmEnabled(false); - setMarketSurvey(null); - }, [loaded]); + setFinanceWeights({}); + }, [financeScenario]); useEffect(() => { - if (!llmEnabled) { - setLlmResults(null); - setLlmMeta(null); - setLlmState({ status: "idle" }); - setMarketSurvey(null); + if (!activeSkillKey) return; + const state = runStates[activeSkillKey] ?? DEFAULT_SKILL_RUN_STATE; + if (!state.llmEnabled && (state.llmState.status !== "idle" || state.llmResults || state.llmMeta || state.marketSurvey)) { + setActiveState((prev) => ({ + ...prev, + llmState: { status: "idle" }, + llmResults: null, + llmMeta: null, + marketSurvey: null, + })); } - }, [llmEnabled]); + }, [activeSkillKey, runStates]); useEffect(() => { - setFinanceWeights({}); - }, [financeScenario]); - - useEffect(() => { - if (!skill || !llmEnabled) return; + if (batchOrchestratorRunning) return; + if (!skill || !activeSkillKey || !activeState.llmEnabled) return; let cancelled = false; const ctl = new AbortController(); - setLlmState({ status: "running" }); - setMarketSurvey(null); + setActiveState((prev) => ({ ...prev, llmState: { status: "running" }, marketSurvey: null })); (async () => { try { - // 1) 先做 GitHub 市场调研(失败不阻塞,软降级为 null) - const survey = await fetchMarketSurvey( - { - name: typeof skill.meta.name === "string" ? skill.meta.name : undefined, - description: typeof skill.meta.description === "string" ? skill.meta.description : undefined, - tags: Array.isArray(skill.meta.tags) ? (skill.meta.tags as string[]) : undefined, - body: skill.body, - }, - { signal: ctl.signal }, - ).catch((e) => { - if ((e as Error).name === "AbortError") throw e; - return { error: "network" as const, detail: (e as Error).message }; - }); - if (cancelled) return; - setMarketSurvey(survey); - - // 2) 再调 LLM,把 survey(成功时)塞进去 - const surveyForLlm = isMarketSurvey(survey) && survey.repos.length > 0 ? survey : undefined; - const { response, results } = await runLlmReview(skill, RUBRIC, { + const result = await runSkillDeepReview(skill, RUBRIC, { signal: ctl.signal, - marketSurvey: surveyForLlm, lang, - expertReview: reviewMode === "finance" - ? { domain: "finance", scenario: financeScenario } - : undefined, + reviewMode, + financeScenario, skillType: skillTypeChoice, }); - if (cancelled) return; - setLlmResults(results); - setLlmMeta({ - valueType: response.meta?.value_type, - reason: response.meta?.value_type_reason, - }); - setLlmState({ status: "ok", response }); - } catch (e) { - if (cancelled) return; - if ((e as Error).name === "AbortError") return; - const msg = (e as Error).message || "unknown"; - const m = msg.match(/^(\d+)::([a-z_]+)::(.*)$/s); - const httpStatus = m ? Number(m[1]) : undefined; - const reason = (m?.[2] as LlmErrorReason | undefined) ?? - (httpStatus === 429 ? "rate_limited" : "unknown"); - const detail = m?.[3] ?? msg; - setLlmState({ status: "error", message: detail, reason, httpStatus }); + if (cancelled || "aborted" in result) { + setActiveState((prev) => + prev.llmState.status === "running" + ? { ...prev, llmState: { status: "idle" } } + : prev, + ); + return; + } + if (result.ok) { + setActiveState((prev) => ({ + ...prev, + llmResults: result.llmResults, + llmMeta: result.llmMeta, + llmState: { status: "ok", response: result.response }, + marketSurvey: result.marketSurvey, + llmEnabled: true, + })); + } else { + setActiveState((prev) => ({ + ...prev, + llmState: { + status: "error", + message: result.message, + reason: result.reason, + httpStatus: result.httpStatus, + }, + marketSurvey: result.marketSurvey, + llmEnabled: true, + })); + } + } catch { + if (!cancelled) return; + setActiveState((prev) => + prev.llmState.status === "running" + ? { ...prev, llmState: { status: "idle" } } + : prev, + ); } })(); return () => { cancelled = true; ctl.abort(); }; - }, [skill, llmEnabled, retryNonce, lang, reviewMode, financeScenario, skillTypeChoice]); + }, [ + skill, + activeSkillKey, + activeState.llmEnabled, + activeState.retryNonce, + lang, + reviewMode, + financeScenario, + skillTypeChoice, + batchOrchestratorRunning, + ]); const report: ScoreReport | null = useMemo(() => { if (!skill) return null; const base = aggregateScore(skill, RUBRIC, { weightOverrides: weights, - llmResults: llmResults ?? undefined, + llmResults: activeState.llmResults ?? undefined, language: lang, skillType: skillTypeChoice, }); return { ...base, - valueType: llmMeta?.valueType, - valueTypeReason: llmMeta?.reason, + valueType: activeState.llmMeta?.valueType, + valueTypeReason: activeState.llmMeta?.reason, }; - }, [skill, weights, llmResults, llmMeta, lang, skillTypeChoice]); + }, [skill, weights, activeState.llmResults, activeState.llmMeta, lang, skillTypeChoice]); const t = MESSAGES[lang]; - const llmIdle = llmState.status === "idle" || llmState.status === "error"; - const showFullEvalCta = report && !report.llmComplete && llmState.status !== "running"; + const llmIdle = activeState.llmState.status === "idle" || activeState.llmState.status === "error"; + const showFullEvalCta = + report && + !report.llmComplete && + activeState.llmState.status !== "running" && + !(uploadMode === "batch" && batchSkills); const financeSample = FINANCE_SAMPLE_BY_SCENARIO[financeScenario]; const sampleId = reviewMode === "finance" ? financeSample.id : "pr-reviewer"; const sampleLabel = reviewMode === "finance" @@ -292,9 +315,9 @@ export default function HomePage() { [lang], ); const financeExpertReport = useMemo(() => { - if (reviewMode !== "finance" || !llmResults) return null; - return buildFinanceExpertReport(llmResults, financeScenario, financeWeights); - }, [reviewMode, llmResults, financeScenario, financeWeights]); + if (reviewMode !== "finance" || !activeState.llmResults) return null; + return buildFinanceExpertReport(activeState.llmResults, financeScenario, financeWeights); + }, [reviewMode, activeState.llmResults, financeScenario, financeWeights]); return (
@@ -327,18 +350,60 @@ export default function HomePage() { /> { + if (Array.isArray(payload)) { + if (uploadMode === "batch") { + setLoaded(null); + setBatchSkills(payload); + setActiveBatchIndex(0); + setRunStates({}); + } else { + setBatchSkills(null); + setLoaded(payload[0]!); + setRunStates({}); + } + } else { + setBatchSkills(null); + setLoaded(payload); + setRunStates({}); + } + }} sampleId={sampleId} sampleLabel={sampleLabel} samples={reviewMode === "finance" ? undefined : generalSamples} skillTypeChoice={skillTypeChoice} onSkillTypeChange={setSkillTypeChoice} + uploadMode={uploadMode} + onUploadModeChange={(mode) => { + setUploadMode(mode); + if (mode === "single" && batchSkills) { + setLoaded(batchSkills[activeBatchIndex] ?? batchSkills[0]!); + setBatchSkills(null); + setActiveBatchIndex(0); + } + }} /> )} {report && skill && (
+ {uploadMode === "batch" && batchSkills && ( + + )} {/* ===== Dashboard header: 通用模式只展示通用;垂类模式展示通用基线 + 当前垂类 ===== */}
{formatSpec(report.spec, lang)}
{t.generatedAt}
{new Date(report.generatedAt).toLocaleString()}
- {llmState.status === "ok" && ( + {activeState.llmState.status === "ok" && ( <>
{t.llmProvider}
- {llmState.response.provider} - {llmState.response.cached && ({t.llmCached})} + {activeState.llmState.response.provider} + {activeState.llmState.response.cached && ({t.llmCached})}
)} @@ -404,7 +469,11 @@ export default function HomePage() { )}
)} - {llmState.status === "running" && ( + {activeState.llmState.status === "running" && (
@@ -476,22 +545,22 @@ export default function HomePage() {
)} - {llmState.status === "ok" && ( + {activeState.llmState.status === "ok" && (
{t.llmDone} - {llmState.response.provider === "mock" && ( + {activeState.llmState.response.provider === "mock" && ( {t.llmMockBanner} )}
)} - {llmState.status === "error" && ( + {activeState.llmState.status === "error" && (
-
{t.llmErrorTitle[llmState.reason] ?? t.llmFailed}
-
{t.llmErrorHint[llmState.reason] ?? ""}
+
{t.llmErrorTitle[activeState.llmState.reason] ?? t.llmFailed}
+
{t.llmErrorHint[activeState.llmState.reason] ?? ""}
@@ -544,7 +613,7 @@ export default function HomePage() { report={report} financeExpertReport={financeExpertReport} lang={lang} - filename={loaded?.rootName || "report"} + filename={activeLoaded?.rootName || "report"} pdfBusy={pdfBusy} setPdfBusy={setPdfBusy} /> @@ -563,12 +632,12 @@ export default function HomePage() { onWeightsChange={setWeights} onResetWeights={() => setWeights(defaultGeneralWeights())} extra={ - p.id === "market" && (llmState.status === "running" || marketSurvey) + p.id === "market" && (activeState.llmState.status === "running" || activeState.marketSurvey) ? ( ) : undefined @@ -582,7 +651,7 @@ export default function HomePage() { report={report} financeExpertReport={financeExpertReport} lang={lang} - filename={loaded?.rootName || "report"} + filename={activeLoaded?.rootName || "report"} pdfBusy={pdfBusy} setPdfBusy={setPdfBusy} /> diff --git a/web/components/BatchReviewPanel.tsx b/web/components/BatchReviewPanel.tsx new file mode 100644 index 0000000..e97376a --- /dev/null +++ b/web/components/BatchReviewPanel.tsx @@ -0,0 +1,544 @@ +"use client"; + +import { useCallback, useMemo, useRef, useState } from "react"; +import type { Lang } from "@/lib/i18n/messages"; +import type { SkillTypeChoice } from "@/components/Uploader"; +import BatchStatsPanel from "@/components/BatchStatsPanel"; +import { RUBRIC } from "@/lib/rubric/rubric"; +import { parseSkill } from "@/lib/spec/parser"; +import { aggregateScore, type GeneralWeightOverrides } from "@/lib/scoring/aggregate"; +import { + type BatchAggregateStats, + type BatchSkillScoreEntry, + computeBatchAggregateStats, + dimensionPercent, + pillarPercent, + roundStat, +} from "@/lib/scoring/batch-stats"; +import { downloadBatchScoresCsv } from "@/lib/scoring/batch-csv"; +import { runSkillDeepReview } from "@/lib/scoring/run-skill-deep-review"; +import { skillInstanceKey, type LoadedSkill } from "@/lib/spec/loader"; +import type { CheckResult, ScoreReport, ValueType } from "@/lib/rubric/types"; +import type { LlmReviewResponse } from "@/lib/llm/types"; +import type { MarketSurveyResult } from "@/lib/market/types"; +import type { FinanceScenarioId } from "@/lib/domain/finance"; +import { buildFinanceExpertReport } from "@/lib/domain/finance-score"; +import type { FinanceWeightOverrides } from "@/lib/domain/finance-score"; + +type ReviewMode = "general" | "finance"; + +type LlmState = + | { status: "idle" } + | { status: "running" } + | { status: "ok"; response: LlmReviewResponse } + | { status: "error"; message: string; reason: string; httpStatus?: number }; + +export type SkillRunState = { + llmResults: Map | null; + llmMeta: { valueType?: ValueType; reason?: string } | null; + llmState: LlmState; + llmEnabled: boolean; + retryNonce: number; + marketSurvey: MarketSurveyResult | null; +}; + +export const DEFAULT_SKILL_RUN_STATE: SkillRunState = { + llmResults: null, + llmMeta: null, + llmState: { status: "idle" }, + llmEnabled: false, + retryNonce: 0, + marketSurvey: null, +}; + +interface Props { + lang: Lang; + batchSkills: LoadedSkill[]; + runStates: Record; + setRunStates: React.Dispatch>>; + activeBatchIndex: number; + setActiveBatchIndex: (index: number) => void; + reviewMode: ReviewMode; + financeScenario: FinanceScenarioId; + financeWeights: FinanceWeightOverrides; + weights: GeneralWeightOverrides; + skillTypeChoice: SkillTypeChoice; + onOrchestratorRunningChange?: (running: boolean) => void; +} + +function skillLabel(item: LoadedSkill): string { + return item.packagePath ? `${item.rootName} (${item.packagePath})` : item.rootName; +} + +function buildEntry( + item: LoadedSkill, + state: SkillRunState, + opts: { + lang: Lang; + weights: GeneralWeightOverrides; + skillTypeChoice: SkillTypeChoice; + reviewMode: ReviewMode; + financeScenario: FinanceScenarioId; + financeWeights: FinanceWeightOverrides; + }, +): BatchSkillScoreEntry { + const key = skillInstanceKey(item); + const parsed = parseSkill({ + rawText: item.rawText, + files: item.files, + entryFile: item.entryFile, + }); + const base = aggregateScore(parsed, RUBRIC, { + weightOverrides: opts.weights, + llmResults: state.llmResults ?? undefined, + language: opts.lang, + skillType: opts.skillTypeChoice, + }); + const report: ScoreReport = { + ...base, + valueType: state.llmMeta?.valueType, + valueTypeReason: state.llmMeta?.reason, + }; + const financeReport = + opts.reviewMode === "finance" && state.llmResults + ? buildFinanceExpertReport(state.llmResults, opts.financeScenario, opts.financeWeights) + : null; + return { + key, + label: skillLabel(item), + report, + llmComplete: report.llmComplete, + financeReport, + }; +} + +export default function BatchReviewPanel({ + lang, + batchSkills, + runStates, + setRunStates, + activeBatchIndex, + setActiveBatchIndex, + reviewMode, + financeScenario, + financeWeights, + weights, + skillTypeChoice, + onOrchestratorRunningChange, +}: Props) { + const zh = lang === "zh"; + const abortRef = useRef(null); + const [batchRunning, setBatchRunning] = useState(false); + const [progress, setProgress] = useState<{ + done: number; + total: number; + currentLabel: string; + } | null>(null); + + const entryOpts = useMemo( + () => ({ + lang, + weights, + skillTypeChoice, + reviewMode, + financeScenario, + financeWeights, + }), + [lang, weights, skillTypeChoice, reviewMode, financeScenario, financeWeights], + ); + + const batchScoreEntries = useMemo( + () => + batchSkills.map((item) => { + const key = skillInstanceKey(item); + const state = runStates[key] ?? DEFAULT_SKILL_RUN_STATE; + return buildEntry(item, state, entryOpts); + }), + [batchSkills, runStates, entryOpts], + ); + + const batchAggregateStats: BatchAggregateStats = useMemo( + () => computeBatchAggregateStats(batchScoreEntries), + [batchScoreEntries], + ); + + const completedCount = batchScoreEntries.filter((e) => e.llmComplete).length; + const failedCount = batchSkills.filter((item) => { + const s = runStates[skillInstanceKey(item)] ?? DEFAULT_SKILL_RUN_STATE; + return s.llmState.status === "error"; + }).length; + const pendingCount = batchSkills.length - completedCount - failedCount; + + const setSkillState = useCallback( + (key: string, updater: (prev: SkillRunState) => SkillRunState) => { + setRunStates((prev) => ({ + ...prev, + [key]: updater(prev[key] ?? DEFAULT_SKILL_RUN_STATE), + })); + }, + [setRunStates], + ); + + const runReviewForKey = useCallback( + async (key: string, signal: AbortSignal) => { + const item = batchSkills.find((s) => skillInstanceKey(s) === key); + if (!item) return; + + setSkillState(key, (prev) => ({ + ...prev, + llmState: { status: "running" }, + marketSurvey: null, + })); + + const parsed = parseSkill({ + rawText: item.rawText, + files: item.files, + entryFile: item.entryFile, + }); + + let result: Awaited>; + try { + result = await runSkillDeepReview(parsed, RUBRIC, { + signal, + lang, + reviewMode, + financeScenario, + skillType: skillTypeChoice, + }); + } catch { + if (signal.aborted) { + setSkillState(key, (prev) => + prev.llmState.status === "running" + ? { ...prev, llmState: { status: "idle" } } + : prev, + ); + } + return; + } + + if (signal.aborted || "aborted" in result) { + setSkillState(key, (prev) => + prev.llmState.status === "running" + ? { ...prev, llmState: { status: "idle" } } + : prev, + ); + return; + } + + if (result.ok) { + setSkillState(key, (prev) => ({ + ...prev, + llmResults: result.llmResults, + llmMeta: result.llmMeta, + llmState: { status: "ok", response: result.response }, + marketSurvey: result.marketSurvey, + llmEnabled: true, + })); + } else { + setSkillState(key, (prev) => ({ + ...prev, + llmState: { + status: "error", + message: result.message, + reason: result.reason, + httpStatus: result.httpStatus, + }, + marketSurvey: result.marketSurvey, + llmEnabled: true, + })); + } + }, + [batchSkills, setSkillState, lang, reviewMode, financeScenario, skillTypeChoice], + ); + + const keysNeedingReview = useCallback(() => { + return batchSkills + .map((item) => skillInstanceKey(item)) + .filter((key) => { + const state = runStates[key] ?? DEFAULT_SKILL_RUN_STATE; + if (state.llmState.status === "running") return false; + if (state.llmState.status === "ok") { + const item = batchSkills.find((s) => skillInstanceKey(s) === key)!; + const entry = buildEntry(item, state, entryOpts); + return !entry.llmComplete; + } + return true; + }); + }, [batchSkills, runStates, entryOpts]); + + const runBatch = useCallback( + async (keys: string[]) => { + if (keys.length === 0 || batchRunning) return; + const ctl = new AbortController(); + abortRef.current = ctl; + setBatchRunning(true); + onOrchestratorRunningChange?.(true); + setProgress({ done: 0, total: keys.length, currentLabel: "" }); + + try { + for (let i = 0; i < keys.length; i++) { + if (ctl.signal.aborted) break; + const key = keys[i]!; + const item = batchSkills.find((s) => skillInstanceKey(s) === key); + setProgress({ + done: i, + total: keys.length, + currentLabel: item ? skillLabel(item) : key, + }); + await runReviewForKey(key, ctl.signal); + if (ctl.signal.aborted) break; + setProgress({ done: i + 1, total: keys.length, currentLabel: "" }); + } + } catch { + // aborted mid-batch — state already reset per skill + } + + setProgress(null); + setBatchRunning(false); + onOrchestratorRunningChange?.(false); + abortRef.current = null; + }, + [batchRunning, batchSkills, runReviewForKey, onOrchestratorRunningChange], + ); + + const stopBatch = () => { + abortRef.current?.abort(); + abortRef.current = null; + setProgress(null); + setBatchRunning(false); + onOrchestratorRunningChange?.(false); + }; + + const pillarCols = RUBRIC.pillars; + const dimensionCols = RUBRIC.pillars.flatMap((p) => + p.dimensions.map((d) => ({ ...d, pillarId: p.id })), + ); + + return ( +
+
+
+

+ {zh ? "批量上传的 Skill 包" : "Batch uploaded skill packages"} +

+

+ {batchSkills.length} {zh ? "个 skill" : "skills"} +

+

+ {zh + ? `深度评测完成 ${completedCount} · 失败 ${failedCount} · 待评测 ${pendingCount}` + : `Deep review done ${completedCount} · failed ${failedCount} · pending ${pendingCount}`} +

+
+
+ + {batchRunning && ( + + )} + +
+
+ + {(batchRunning || progress) && ( +
+
+ + {progress?.currentLabel + ? `${zh ? "正在评测" : "Reviewing"}: ${progress.currentLabel}` + : zh ? "批量评测中…" : "Batch review running…"} + + + {progress ? `${progress.done}/${progress.total}` : "—"} + +
+
+
0 + ? `${(progress.done / progress.total) * 100}%` + : "0%", + }} + /> +
+
+ )} + +
+ {batchSkills.map((item, index) => { + const key = skillInstanceKey(item); + const itemState = runStates[key] ?? DEFAULT_SKILL_RUN_STATE; + const entry = batchScoreEntries[index]!; + const statusLabel = + itemState.llmState.status === "ok" && entry.llmComplete + ? zh ? "深度评测完成" : "Deep review done" + : itemState.llmState.status === "error" + ? zh ? "评测失败" : "Failed" + : itemState.llmState.status === "running" + ? zh ? "评测中…" : "Running…" + : zh ? "待深度评测" : "Pending"; + const scoreLabel = + entry.llmComplete ? `${roundStat(entry.report.total, 1)}` : "—"; + + return ( +
+ + {itemState.llmState.status === "error" && !batchRunning && ( + + )} +
+ ); + })} +
+ + {completedCount > 0 && ( +
+

+ {zh ? "全部 Skill 深度评测分数" : "All skills — deep review scores"} +

+
+ + + + + + {pillarCols.map((p) => ( + + ))} + + + + {batchScoreEntries.map((entry) => ( + + + + {pillarCols.map((p) => { + const pillar = entry.report.pillars.find((x) => x.id === p.id); + const pct = pillar ? pillarPercent(pillar) : null; + return ( + + ); + })} + + ))} + +
{zh ? "Skill" : "Skill"}{zh ? "总分" : "Total"} + {zh ? p.name_zh : p.name_en} +
+ {entry.label} + + {entry.llmComplete ? roundStat(entry.report.total, 1) : "—"} + + {pct != null && entry.llmComplete ? roundStat(pct, 1) : "—"} +
+
+ +
+ + {zh ? "各维度得分明细" : "Dimension scores (detail)"} + +
+ + + + + {dimensionCols.map((d) => ( + + ))} + + + + {batchScoreEntries.map((entry) => ( + + + {dimensionCols.map((d) => { + let pct: number | null = null; + for (const pillar of entry.report.pillars) { + const dim = pillar.dimensions.find((x) => x.id === d.id); + if (dim) { + pct = dimensionPercent(dim); + break; + } + } + return ( + + ); + })} + + ))} + +
{zh ? "Skill" : "Skill"} + {zh ? d.name_zh : d.name_en} +
+ {entry.label} + + {pct != null && entry.llmComplete ? roundStat(pct, 1) : "—"} +
+
+
+
+ )} + + +
+ ); +} diff --git a/web/components/BatchStatsPanel.tsx b/web/components/BatchStatsPanel.tsx new file mode 100644 index 0000000..a78cc28 --- /dev/null +++ b/web/components/BatchStatsPanel.tsx @@ -0,0 +1,187 @@ +"use client"; + +import type { Lang } from "@/lib/i18n/messages"; +import { + type BatchAggregateStats, + type MetricBatchStats, + type NumericStats, + roundStat, +} from "@/lib/scoring/batch-stats"; + +interface Props { + lang: Lang; + stats: BatchAggregateStats; + showFinance: boolean; +} + +const STAT_COLS: Array<{ key: keyof NumericStats; zh: string; en: string }> = [ + { key: "mean", zh: "平均", en: "Mean" }, + { key: "max", zh: "最高", en: "Max" }, + { key: "min", zh: "最低", en: "Min" }, + { key: "variance", zh: "方差", en: "Variance" }, + { key: "median", zh: "中位数", en: "Median" }, +]; + +function formatCell(stats: NumericStats | null, key: keyof NumericStats): string { + if (!stats) return "—"; + const v = stats[key]; + if (key === "count") return String(v); + return String(roundStat(v as number, key === "variance" ? 2 : 1)); +} + +function StatsTable({ + lang, + title, + subtitle, + rows, + valueSuffix = "", +}: { + lang: Lang; + title: string; + subtitle?: string; + rows: Array<{ id: string; name: string; stats: NumericStats | null }>; + valueSuffix?: string; +}) { + if (rows.length === 0) return null; + return ( +
+
+

{title}

+ {subtitle &&

{subtitle}

} +
+
+ + + + + {STAT_COLS.map((col) => ( + + ))} + + + + + {rows.map((row) => ( + + + {STAT_COLS.map((col) => ( + + ))} + + + ))} + +
+ {lang === "zh" ? "指标" : "Metric"} + + {lang === "zh" ? col.zh : col.en} + {valueSuffix ? ` (${valueSuffix})` : ""} + + n +
+ {row.name} + + {formatCell(row.stats, col.key)} + + {row.stats?.count ?? "—"} +
+
+
+ ); +} + +function metricRows(metrics: MetricBatchStats[], lang: Lang) { + return metrics + .filter((m) => m.stats != null) + .map((m) => ({ + id: m.id, + name: lang === "zh" ? m.nameZh : m.nameEn, + stats: m.stats, + })); +} + +export default function BatchStatsPanel({ lang, stats, showFinance }: Props) { + const zh = lang === "zh"; + const progress = + stats.completedCount === 0 + ? zh + ? "完成至少 1 个 skill 的「SkillLens 深度评测」后,将在此显示批量汇总统计。" + : "Run SkillLens Deep Review on at least one skill to see batch aggregate statistics." + : zh + ? `基于 ${stats.completedCount}/${stats.totalCount} 个已完成深度评测的 skill(得分 0–100,支柱/维度为满分百分比)。` + : `Based on ${stats.completedCount}/${stats.totalCount} skills with completed Deep Review (scores 0–100; pillars/dimensions as % of max).`; + + const totalRow = stats.total + ? [{ id: "total", name: zh ? "总分" : "Total score", stats: stats.total }] + : []; + + const pillarRows = metricRows(stats.pillars, lang); + const dimensionRows = metricRows(stats.dimensions, lang); + const financeTotalRow = stats.financeTotal + ? [{ id: "finance-total", name: zh ? "金融专家总分" : "Finance expert total", stats: stats.financeTotal }] + : []; + const financePillarRows = metricRows(stats.financePillars, lang); + + return ( +
+
+

+ {zh ? "批量深度评测汇总" : "Batch deep review summary"} +

+

+ {zh ? "跨 Skill 统计" : "Cross-skill statistics"} +

+

{progress}

+
+ + {stats.completedCount > 0 && ( + <> + + {pillarRows.length > 0 && ( + + )} + {dimensionRows.length > 0 && ( + + )} + {showFinance && (financeTotalRow.length > 0 || financePillarRows.length > 0) && ( + <> + {financeTotalRow.length > 0 && ( + + )} + {financePillarRows.length > 0 && ( + + )} + + )} + + )} +
+ ); +} diff --git a/web/components/Uploader.tsx b/web/components/Uploader.tsx index 9ac3f73..809f8e4 100644 --- a/web/components/Uploader.tsx +++ b/web/components/Uploader.tsx @@ -2,10 +2,12 @@ import { useRef, useState } from "react"; import type { Lang } from "@/lib/i18n/messages"; import { MESSAGES } from "@/lib/i18n/messages"; -import { loadFromFileList, type LoadedSkill } from "@/lib/spec/loader"; +import { loadFromFileList, toUploadResult, type LoadedSkill } from "@/lib/spec/loader"; import type { SkillType } from "@/lib/llm/types"; export type SkillTypeChoice = SkillType | "auto"; +export type UploadMode = "single" | "batch"; +export type UploadResult = LoadedSkill | LoadedSkill[]; /** * One "load sample" button on the dropzone. Pre-bundled skills (e.g. the @@ -26,7 +28,7 @@ export interface SampleEntry { interface Props { lang: Lang; - onLoad: (loaded: LoadedSkill) => void; + onLoad: (loaded: UploadResult) => void; /** Single-sample mode (legacy). Used when only one example is appropriate * for the current scenario, e.g. the finance-scenario flow. */ sampleId?: string; @@ -36,6 +38,8 @@ interface Props { samples?: SampleEntry[]; skillTypeChoice?: SkillTypeChoice; onSkillTypeChange?: (choice: SkillTypeChoice) => void; + uploadMode?: UploadMode; + onUploadModeChange?: (mode: UploadMode) => void; } const SKILL_TYPE_OPTIONS: Array<{ @@ -73,6 +77,8 @@ export default function Uploader({ samples, skillTypeChoice = "auto", onSkillTypeChange, + uploadMode = "single", + onUploadModeChange, }: Props) { const t = MESSAGES[lang]; const fileInputRef = useRef(null); @@ -88,7 +94,20 @@ export default function Uploader({ setBusy(true); try { const loaded = await loadFromFileList(files); - onLoad(loaded); + const result = toUploadResult(loaded); + if (uploadMode === "batch" && !Array.isArray(result)) { + setError( + lang === "zh" + ? "批量模式需要上传包含多个 skill 包的文件夹或 zip(每个子目录一份 SKILL.md)。" + : "Batch mode requires a folder or zip with multiple skill packages (one SKILL.md per package).", + ); + return; + } + if (uploadMode === "single" && Array.isArray(result) && result.length > 1) { + onLoad(result); + return; + } + onLoad(result); } catch (e) { setError((e as Error).message || String(e)); } finally { @@ -98,15 +117,32 @@ export default function Uploader({ function loadPasted() { if (!pasteBuf.trim()) return; + if (uploadMode === "batch") { + setError( + lang === "zh" + ? "批量模式请使用文件夹或 zip 上传,不支持粘贴单个 SKILL.md。" + : "Batch mode uses folder or zip upload; pasted text is not supported.", + ); + return; + } onLoad({ rawText: pasteBuf, files: [{ path: "SKILL.md", size: pasteBuf.length }], entryFile: "SKILL.md", rootName: "pasted", + packagePath: "", }); } async function loadSample(id: string, skillType?: SkillTypeChoice) { + if (uploadMode === "batch") { + setError( + lang === "zh" + ? "批量模式下请使用「选择文件夹」上传多个 skill 包。" + : "In batch mode, use “Folder” to upload multiple skill packages.", + ); + return; + } setError(""); setBusy(true); try { @@ -131,43 +167,101 @@ export default function Uploader({ return (
- {onSkillTypeChange && ( -
-
-

- {lang === "zh" ? "skill 类型" : "Skill type"} -

-

- {lang === "zh" - ? "影响 SkillLens 评估视角和 LLM 改进建议;多 skill 嵌套时优先选 pipeline。" - : "Sets the evaluation lens and tunes LLM fix recommendations. For multi-skill packages choose pipeline."} -

-
-
- {SKILL_TYPE_OPTIONS.map((opt) => { - const active = skillTypeChoice === opt.id; - const t = lang === "zh" ? opt.zh : opt.en; - return ( + {(onSkillTypeChange || onUploadModeChange) && ( +
+ {onSkillTypeChange && ( +
+
+

+ {lang === "zh" ? "skill 类型" : "Skill type"} +

+

+ {lang === "zh" + ? "影响评估视角与 LLM 改进建议;单包内多子 skill 时优先选 pipeline。" + : "Sets the evaluation lens and LLM fixes; for one package with child skills, prefer pipeline."} +

+
+
+ {SKILL_TYPE_OPTIONS.map((opt) => { + const active = skillTypeChoice === opt.id; + const label = lang === "zh" ? opt.zh : opt.en; + return ( + + ); + })} +
+
+ )} + {onUploadModeChange && ( +
+
+

+ {lang === "zh" ? "上传模式" : "Upload mode"} +

+

+ {lang === "zh" + ? "与 skill 类型并列;批量模式用于多个独立 skill 包及跨包汇总统计。" + : "Alongside skill type; batch mode is for multiple packages and cross-package stats."} +

+
+
- ); - })} -
+ +
+
+ )}
)}
+ {uploadMode === "batch" && ( +

+ {lang === "zh" + ? "每个子目录需有且仅有一份 SKILL.md;完成后可对每个 skill 深度评测并查看跨包汇总。" + : "Each subfolder needs exactly one SKILL.md; run deep review per skill, then see cross-package stats."} +

+ )}