From 87e0e188837ea6273c48bee3ba690fb36a227fe1 Mon Sep 17 00:00:00 2001 From: Vincent Koc <25068+vincentkoc@users.noreply.github.com> Date: Mon, 22 Jun 2026 11:39:54 +0800 Subject: [PATCH 1/2] fix: retry failed release RTT rows --- scripts/resolve-openclaw-channel-package.mjs | 9 ++++++--- scripts/resolve-openclaw-channel-package.test.mjs | 3 ++- scripts/resolve-openclaw-discord-package.mjs | 2 +- scripts/resolve-openclaw-discord-package.test.mjs | 11 +++++++---- scripts/resolve-openclaw-surface-package.mjs | 5 ++++- scripts/resolve-openclaw-surface-package.test.mjs | 3 ++- 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/scripts/resolve-openclaw-channel-package.mjs b/scripts/resolve-openclaw-channel-package.mjs index 83fce71f..8a4fbb42 100644 --- a/scripts/resolve-openclaw-channel-package.mjs +++ b/scripts/resolve-openclaw-channel-package.mjs @@ -114,9 +114,11 @@ const explicitVersions = readListEnv("INPUT_VERSIONS"); const versionLimit = readPositiveIntegerEnv("INPUT_VERSION_LIMIT", DEFAULT_VERSION_LIMIT); const channelRows = releaseRows(await readChannelRttRows()); const measured = new Set( - channelRows.map( - (row) => `${row.channel?.id}\0${row.package?.spec}\0${row.package?.version}`, - ), + channelRows + .filter((row) => row.run?.status === "pass") + .map( + (row) => `${row.channel?.id}\0${row.package?.spec}\0${row.package?.version}`, + ), ); const queue = []; @@ -124,6 +126,7 @@ for (const channelId of channelIds) { const channel = channelConfig.get(channelId); const measuredVersions = channelRows .filter((row) => row.channel?.id === channelId) + .filter((row) => row.run?.status === "pass") .map((row) => row.package.version) .filter((version) => parseVersion(version)); const latestMeasured = measuredVersions.sort(compareVersions).at(-1); diff --git a/scripts/resolve-openclaw-channel-package.test.mjs b/scripts/resolve-openclaw-channel-package.test.mjs index 97c339fb..b386c16e 100644 --- a/scripts/resolve-openclaw-channel-package.test.mjs +++ b/scripts/resolve-openclaw-channel-package.test.mjs @@ -94,7 +94,7 @@ test("queues explicit channel release versions even when already measured", asyn ]); }); -test("does not auto-requeue imported channel release versions", async () => { +test("auto-requeues failed channel release versions", async () => { const workspace = await makeWorkspace(); await writeJsonl(path.join(workspace, "data/channels/slack/2026.5.16-beta.6.jsonl"), [ row("2026.5.16-beta.6", "slack", "fail"), @@ -119,6 +119,7 @@ test("does not auto-requeue imported channel release versions", async () => { ); const matrix = JSON.parse(outputs.matrix); assert.deepEqual(matrix.map((entry) => `${entry.channel}:${entry.version}`), [ + "slack:2026.5.16-beta.6", "whatsapp:2026.5.16-beta.6", "slack:2026.5.16-beta.7", "whatsapp:2026.5.16-beta.7", diff --git a/scripts/resolve-openclaw-discord-package.mjs b/scripts/resolve-openclaw-discord-package.mjs index 5c217776..0b83cc1d 100644 --- a/scripts/resolve-openclaw-discord-package.mjs +++ b/scripts/resolve-openclaw-discord-package.mjs @@ -178,7 +178,7 @@ if (requestedVersions.length > 0) { .slice(0, rssBackfillLimit); } else { const measured = new Set( - discordRows.map((row) => `${row.package.spec}\0${row.package.version}`), + successfulDiscordRows.map((row) => `${row.package.spec}\0${row.package.version}`), ); queue = (await npmVersions()) .filter((version) => typeof version === "string" && parseVersion(version)) diff --git a/scripts/resolve-openclaw-discord-package.test.mjs b/scripts/resolve-openclaw-discord-package.test.mjs index fadfe12c..eee69f87 100644 --- a/scripts/resolve-openclaw-discord-package.test.mjs +++ b/scripts/resolve-openclaw-discord-package.test.mjs @@ -115,7 +115,7 @@ test("queues missing Discord releases from the Telegram baseline", async () => { assert.deepEqual(JSON.parse(outputs.matrix).map((pkg) => pkg.version), ["2026.5.12"]); }); -test("does not requeue imported failed Discord releases by default", async () => { +test("auto-requeues failed Discord releases by default", async () => { const workspace = await makeWorkspace(); await writeJsonl(path.join(workspace, "data/channels/telegram.jsonl"), [ releaseRow("2026.5.12", "telegram"), @@ -142,10 +142,13 @@ test("does not requeue imported failed Discord releases by default", async () => }); const outputs = parseOutputs(stdout); - assert.equal(outputs.count, "1"); - assert.equal(outputs.missing_baseline_count, "1"); + assert.equal(outputs.count, "2"); + assert.equal(outputs.missing_baseline_count, "2"); assert.equal(outputs.reason, "missing-discord-release-versions"); - assert.deepEqual(JSON.parse(outputs.matrix).map((pkg) => pkg.version), ["2026.5.16-beta.6"]); + assert.deepEqual(JSON.parse(outputs.matrix).map((pkg) => pkg.version), [ + "2026.5.16-beta.5", + "2026.5.16-beta.6", + ]); }); test("queues requested Discord releases even when a failed row was imported", async () => { diff --git a/scripts/resolve-openclaw-surface-package.mjs b/scripts/resolve-openclaw-surface-package.mjs index f3e0912c..edcd861a 100644 --- a/scripts/resolve-openclaw-surface-package.mjs +++ b/scripts/resolve-openclaw-surface-package.mjs @@ -121,7 +121,9 @@ for (const surfaceId of surfaceIds) { } const measured = new Set( [...rowsBySurface.entries()].flatMap(([surfaceId, rows]) => - rows.map((row) => `${surfaceId}\0${row.package?.spec}\0${row.package?.version}`), + rows + .filter((row) => row.run?.status === "pass") + .map((row) => `${surfaceId}\0${row.package?.spec}\0${row.package?.version}`), ), ); @@ -130,6 +132,7 @@ for (const surfaceId of surfaceIds) { const surface = surfaceConfig.get(surfaceId); const surfaceRows = rowsBySurface.get(surfaceId) ?? []; const measuredVersions = surfaceRows + .filter((row) => row.run?.status === "pass") .map((row) => row.package.version) .filter((version) => parseVersion(version)); const latestMeasured = measuredVersions.sort(compareVersions).at(-1); diff --git a/scripts/resolve-openclaw-surface-package.test.mjs b/scripts/resolve-openclaw-surface-package.test.mjs index 70940b0a..43427441 100644 --- a/scripts/resolve-openclaw-surface-package.test.mjs +++ b/scripts/resolve-openclaw-surface-package.test.mjs @@ -93,7 +93,7 @@ test("queues explicit surface release versions even when already measured", asyn ]); }); -test("does not auto-requeue imported surface release versions", async () => { +test("auto-requeues failed surface release versions", async () => { const workspace = await makeWorkspace(); await writeJsonl(path.join(workspace, "data/surfaces/control-ui/2026.6.1-beta.3.jsonl"), [ row("2026.6.1-beta.3", "control-ui", "fail"), @@ -113,6 +113,7 @@ test("does not auto-requeue imported surface release versions", async () => { const outputs = parseOutputs(stdout); const matrix = JSON.parse(outputs.matrix); assert.deepEqual(matrix.map((entry) => `${entry.surface}:${entry.version}`), [ + "control-ui:2026.6.1-beta.3", "control-ui:2026.6.1", ]); }); From fed7dcf5f38062904a92316779d5c9740cff9f16 Mon Sep 17 00:00:00 2001 From: Vincent Koc <25068+vincentkoc@users.noreply.github.com> Date: Mon, 22 Jun 2026 11:57:03 +0800 Subject: [PATCH 2/2] fix: import Discord qa evidence artifacts --- .github/workflows/main-discord-rtt.yml | 2 +- .../workflows/stable-release-discord-rtt.yml | 4 +- scripts/import-discord-rtt.mjs | 41 +++++++++++++++- scripts/import-discord-rtt.test.mjs | 48 +++++++++++++++++++ 4 files changed, 91 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main-discord-rtt.yml b/.github/workflows/main-discord-rtt.yml index 6dc8915b..5fad4e08 100644 --- a/.github/workflows/main-discord-rtt.yml +++ b/.github/workflows/main-discord-rtt.yml @@ -129,7 +129,7 @@ jobs: for sample in $(seq 1 "$samples"); do output_dir="${output_root}/sample-${sample}" - summary_path="${output_dir}/discord-qa-summary.json" + summary_path="${output_dir}/qa-evidence.json" observed_path="${output_dir}/discord-qa-observed-messages.json" metrics_path="${output_dir}/resource-metrics.env" status=1 diff --git a/.github/workflows/stable-release-discord-rtt.yml b/.github/workflows/stable-release-discord-rtt.yml index c4d6be65..18a95e3c 100644 --- a/.github/workflows/stable-release-discord-rtt.yml +++ b/.github/workflows/stable-release-discord-rtt.yml @@ -219,7 +219,7 @@ jobs: for sample in $(seq 1 "$samples"); do output_dir="${output_root}/sample-${sample}" - summary_path="${output_dir}/discord-qa-summary.json" + summary_path="${output_dir}/qa-evidence.json" observed_path="${output_dir}/discord-qa-observed-messages.json" metrics_path="${output_dir}/resource-metrics.env" status=1 @@ -347,7 +347,7 @@ jobs: sample_paths="$RUNNER_TEMP/openclaw-discord-release-${version}-rtt-samples.tsv" : >"$sample_paths" while IFS= read -r sample_dir; do - summary_path="${sample_dir}/discord-qa-summary.json" + summary_path="${sample_dir}/qa-evidence.json" observed_path="${sample_dir}/discord-qa-observed-messages.json" metrics_path="${sample_dir}/resource-metrics.env" if [[ ! -f "$summary_path" || ! -f "$observed_path" || ! -f "$metrics_path" ]]; then diff --git a/scripts/import-discord-rtt.mjs b/scripts/import-discord-rtt.mjs index 137befae..0e5d86ae 100644 --- a/scripts/import-discord-rtt.mjs +++ b/scripts/import-discord-rtt.mjs @@ -91,6 +91,45 @@ function validateSummary(value) { return summary; } +function normalizeEvidenceSummary(value) { + if (value?.kind !== "openclaw.qa.evidence-summary") { + return value; + } + const entries = Array.isArray(value.entries) ? value.entries : []; + const entry = entries.find((item) => item?.test?.id === "discord-canary"); + if (!entry) { + throw new Error("qa evidence missing discord-canary."); + } + const generatedAt = requireString(value.generatedAt, "qa evidence generatedAt"); + const timing = requireObject(entry.result?.timing, "qa evidence discord-canary timing"); + const rttMs = timing.rttMs; + if (typeof rttMs !== "number" || !Number.isFinite(rttMs)) { + throw new Error("qa evidence discord-canary timing must include finite rttMs."); + } + const passed = entries.filter((item) => item?.result?.status === "pass").length; + return { + startedAt: generatedAt, + finishedAt: generatedAt, + counts: { + total: entries.length, + passed, + failed: entries.length - passed, + }, + scenarios: [ + { + id: "discord-canary", + status: entry.result?.status === "pass" ? "pass" : "fail", + rttMs, + details: entry.result?.details, + }, + ], + credentials: { + source: entry.execution?.provider?.fixture ?? entry.execution?.provider?.auth, + role: "ci", + }, + }; +} + function safeRunLabel(input) { return input.replace(/[^a-zA-Z0-9.-]+/gu, "_").replace(/^_+|_+$/gu, ""); } @@ -223,7 +262,7 @@ function extractGatewayResourceMetrics(summary) { } async function readSample(entry, index) { - const summary = validateSummary(await readJson(path.resolve(entry.summaryPath))); + const summary = validateSummary(normalizeEvidenceSummary(await readJson(path.resolve(entry.summaryPath)))); const observedMessages = await readJson(path.resolve(entry.observedMessagesPath)); const resources = entry.resourceMetricsPath ? await readResourceMetrics(path.resolve(entry.resourceMetricsPath)) diff --git a/scripts/import-discord-rtt.test.mjs b/scripts/import-discord-rtt.test.mjs index be19d879..de0e9de5 100644 --- a/scripts/import-discord-rtt.test.mjs +++ b/scripts/import-discord-rtt.test.mjs @@ -55,6 +55,54 @@ test("does not write failed Discord runs when pass is required", async () => { await assert.rejects(fs.stat(path.join(workspace, "runs/discord")), { code: "ENOENT" }); }); +test("imports Discord qa-evidence summaries", async () => { + const workspace = await makeWorkspace(); + const sampleDir = path.join(workspace, "sample-1"); + await fs.mkdir(sampleDir, { recursive: true }); + await fs.writeFile( + path.join(sampleDir, "qa-evidence.json"), + `${JSON.stringify({ + kind: "openclaw.qa.evidence-summary", + schemaVersion: 2, + generatedAt: "2026-06-22T03:54:37.214Z", + entries: [ + { + test: { id: "discord-canary", title: "Discord canary echo" }, + execution: { provider: { fixture: "mock-openai" } }, + result: { status: "pass", timing: { rttMs: 1903 } }, + }, + ], + })}\n`, + ); + await fs.writeFile(path.join(sampleDir, "discord-qa-observed-messages.json"), "[]\n"); + await fs.writeFile( + path.join(workspace, "samples.tsv"), + `${path.join(sampleDir, "qa-evidence.json")}\t${path.join( + sampleDir, + "discord-qa-observed-messages.json", + )}\n`, + ); + + await execFileAsync(process.execPath, [ + IMPORT_SCRIPT, + path.join(workspace, "samples.tsv"), + "--spec", + "openclaw@2026.6.10-beta.1", + "--version", + "2026.6.10-beta.1", + "--require-pass", + ], { cwd: workspace }); + + const [row] = (await fs.readFile(path.join(workspace, "data/channels/discord/2026.6.10-beta.1.jsonl"), "utf8")) + .trim() + .split("\n") + .map((line) => JSON.parse(line)); + assert.equal(row.run.status, "pass"); + assert.equal(row.rtt.p50Ms, 1903); + assert.deepEqual(row.rtt.sources, ["summary-rtt"]); + assert.equal(row.mode.providerMode, "mock-openai"); +}); + test("imports Discord resource metrics without changing RTT stats", async () => { const workspace = await makeWorkspace(); const sampleDir = path.join(workspace, "sample-1");