From 0468c8054c113f0e170f2184f6eb72666f885995 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 23 Jun 2026 16:19:35 -0700 Subject: [PATCH 1/4] fix: content-anchored fixture matcher selection + relaxed-turnIndex divergence detection in router Anchor fixture matcher selection on request content rather than positional turnIndex, and add relaxed-turnIndex divergence detection: warn when a match is found by content but diverges from the recorded turnIndex, with an AIMOCK_STRICT_TURN_INDEX opt-out, per-fixture-identity throttle, and accurate matchedBy reporting for predicate/regex fixtures. --- src/router.ts | 430 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 387 insertions(+), 43 deletions(-) diff --git a/src/router.ts b/src/router.ts index 1ab60c1..fe6ec21 100644 --- a/src/router.ts +++ b/src/router.ts @@ -1,4 +1,10 @@ -import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "./types.js"; +import type { + ChatCompletionRequest, + ChatMessage, + ContentPart, + Fixture, + FixtureMatch, +} from "./types.js"; import { isImageResponse, isAudioResponse, @@ -19,15 +25,23 @@ export function getLastMessageByRole(messages: ChatMessage[], role: string): Cha * Concatenate the text content of every `system` role message in order. * Hosts that build a system context from multiple sources (persona, agent * context entries, tool guidance) often emit several system messages in one - * request; this joins them with newlines so a substring matcher sees the - * whole context as one body. + * request; this joins SEPARATE system messages with newlines so a substring + * matcher sees the whole context as one body. + * + * Empty handling is symmetric with {@link getTextContent}: a system message + * with no extractable text (`null`) contributes nothing, while a message that + * extracts to an empty string is a present-but-empty body. We skip only the + * `null` (no-text) case so a genuinely empty system message does not inject a + * stray newline; this matches getTextContent treating "no text" and "empty + * text" consistently. */ export function getSystemText(messages: ChatMessage[]): string { const parts: string[] = []; for (const m of messages) { if (m.role !== "system") continue; const text = getTextContent(m.content); - if (text) parts.push(text); + if (text === null) continue; + parts.push(text); } return parts.join("\n"); } @@ -36,12 +50,20 @@ export function getSystemText(messages: ChatMessage[]): string { * Extract the text content from a message's content field. * Handles both plain string content and array-of-parts content * (e.g. `[{type: "text", text: "..."}]` as sent by some SDKs). + * + * Multi-part text is joined with `""` (the parts form one logical body split + * across segments). Empty handling is symmetric with the string path: a string + * `""` returns `""`, and an array containing at least one text part whose + * combined text is empty likewise returns `""` (NOT `null`). `null` is reserved + * for "no text content at all" — null content, or an array with no text parts — + * so callers can distinguish "absent" from "present but empty" the same way for + * both content shapes. */ export function getTextContent(content: string | ContentPart[] | null): string | null { if (typeof content === "string") return content; if (Array.isArray(content)) { const texts = content - .filter((p) => p.type === "text" && typeof p.text === "string" && p.text !== "") + .filter((p) => p.type === "text" && typeof p.text === "string") .map((p) => p.text as string); return texts.length > 0 ? texts.join("") : null; } @@ -62,6 +84,107 @@ export function getTextContent(content: string | ContentPart[] | null): string | export interface MatchFixtureDiagnostic { fixture: Fixture | null; skippedBySequenceOrTurn: number; + /** + * `true` when the served fixture was selected by relaxed content-anchored + * matching even though its `turnIndex` is defined and does NOT equal the + * request's assistant-message count — i.e. the legacy strict turnIndex gate + * (now opt-in via `AIMOCK_STRICT_TURN_INDEX`) WOULD HAVE rejected it. Absent / + * falsy on canonical-position matches, non-turnIndexed matches, and misses. + * Additive optional field — existing handler destructures are unaffected. + */ + turnIndexRelaxed?: boolean; + /** + * How the served fixture was selected: `"turnIndex"` when its `turnIndex` + * sits exactly at the current assistant count (canonical position), + * `"content"` otherwise (a non-turnIndexed match or a relaxed off-by-N + * match). Absent on misses. Additive optional field. + */ + matchedBy?: "content" | "turnIndex"; +} + +/** + * Optional matcher tuning. + * + * `strictTurnIndex` restores the legacy behaviour where `turnIndex` must equal + * the request's assistant-message count exactly (a hard reject gate). It is set + * by the record path, where a miss proxies upstream to capture a fresh turn; an + * earlier-turn fixture must not shadow a longer request or the new turn would + * never be recorded. Replay (the default, `false`) treats `turnIndex` as a + * non-fatal disambiguator instead — see {@link selectByTurnIndex}. + */ +export interface MatchOptions { + strictTurnIndex?: boolean; + /** + * Optional sink for the one-shot relaxed-turnIndex divergence warning. Handlers + * pass their `defaults.logger`; the structural `{ warn }` shape avoids an + * import cycle with logger.ts and keeps the matcher decoupled. When omitted no + * warning is emitted (the diagnostic fields are still populated). The Logger's + * own level gate keeps a passing programmatic run (silent default) quiet. + */ + logger?: { warn(...args: unknown[]): void }; +} + +/** + * Process-level opt-out: when `AIMOCK_STRICT_TURN_INDEX=1` (or `true`) is set, + * REPLAY selection restores the legacy hard turnIndex gate — a content-matching + * fixture whose `turnIndex` is defined and `!== assistantCount` is rejected, + * reproducing origin/main semantics. Follows the `AIMOCK_ALLOW_PRIVATE_URLS` + * precedent for parsing/precedence. Read per-call (not cached) so tests can flip + * it. Does NOT affect the record path, which is already strict regardless. + */ +function strictTurnIndexEnv(): boolean { + const v = process.env.AIMOCK_STRICT_TURN_INDEX; + return v === "1" || v === "true"; +} + +/** + * Process-level set of fixtures for which the relaxed-turnIndex divergence + * warning has already fired, so each divergent fixture warns at most ONCE per + * process (throttle). Keyed by the selected fixture's OBJECT IDENTITY: the + * `Fixture` references in the server's fixtures array are stable across replays + * (the array is held by reference and only fully replaced on a fixtures reset), + * so identity uniquely distinguishes divergent fixtures and warns each exactly + * once. A `WeakSet` was chosen over the previous `JSON.stringify(match)` key for + * two reasons: (1) stringifying the match DROPS `predicate` functions and + * serialises any RegExp matcher to `{}`, so two distinct fixtures differing only + * by a predicate/regex collided to one key and the second's warning was silently + * suppressed; and (2) a string `Set` only grows, accumulating an entry per + * divergent shape on a long-lived server, whereas a `WeakSet` auto-evicts when a + * fixture object is released (e.g. after a fixtures reset drops the references). + * `let` because `WeakSet` has no `.clear()`, so the test hook reassigns a fresh + * one. + */ +let warnedRelaxedFixtures = new WeakSet(); + +/** + * Test-only hook to clear the throttle state between cases. Not part of the + * public contract. `WeakSet` has no `.clear()`, so reassign a fresh instance. + */ +export function _resetTurnIndexRelaxWarnings(): void { + warnedRelaxedFixtures = new WeakSet(); +} + +/** + * Build the {@link MatchOptions} a request handler must pass to + * {@link matchFixtureDiagnostic} / {@link matchFixture}, derived from whether + * the handler is about to record on a miss. + * + * EVERY record-capable handler (OpenAI chat, Anthropic messages, Responses, + * Gemini, Bedrock, Bedrock-Converse, Cohere, Ollama, …) must build its match + * options through THIS helper rather than hand-rolling `{ strictTurnIndex }` at + * the call site. Recording proxies upstream on a miss to capture a fresh turn; + * if `strictTurnIndex` is left false during recording, an earlier-turn fixture + * can content-shadow a longer request, the `if (!fixture)` record branch never + * fires, and the new turn is SILENTLY never recorded. Funnelling the decision + * through one helper makes that wiring impossible for a future handler to miss: + * pass `recording = true` whenever the handler's own record gate is satisfied + * (i.e. it will call `proxyAndRecord` on a miss), `false` otherwise. + */ +export function recordMatchOptions( + recording: boolean, + logger?: { warn(...args: unknown[]): void }, +): MatchOptions { + return { strictTurnIndex: recording, logger }; } /** @@ -77,12 +200,27 @@ export function matchFixtureDiagnostic( req: ChatCompletionRequest, matchCounts?: Map, requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest, + options?: MatchOptions, ): MatchFixtureDiagnostic { // Apply transform once before matching — used for stripping dynamic data const effective = requestTransform ? requestTransform(req) : req; const useExactMatch = !!requestTransform; + // In record mode the server proxies to the upstream on a miss, so a fixture + // already captured for an EARLIER turn must NOT shadow a longer (later-turn) + // request — otherwise the new turn would never be proxied and recorded. + // There turnIndex stays a strict hard gate. Replay (the default) instead + // treats turnIndex as a non-fatal disambiguator so a canonical multi-bubble + // run isn't falsely rejected for an off-by-N assistant count. + // Strict turnIndex is in force when the record path requests it OR the + // process-level AIMOCK_STRICT_TURN_INDEX opt-out is set (which restores the + // legacy hard gate for replay too). Record mode passes `true` explicitly; the + // env only matters when the caller left it `false`/unset (replay). + const strictTurnIndex = (options?.strictTurnIndex ?? false) || strictTurnIndexEnv(); let skippedBySequenceOrTurn = 0; + // Every fixture whose content / shape predicates (and sequenceIndex gate) + // pass. turnIndex is applied afterwards as a non-fatal disambiguator. + const contentMatches: Fixture[] = []; for (const fixture of fixtures) { const { match } = fixture; @@ -165,31 +303,38 @@ export function matchFixtureDiagnostic( // name AND a default activity list whose positions in the serialised // context JSON aren't stable). if (match.systemMessage !== undefined) { - const text = getSystemText(effective.messages); - if (!text) continue; const sm = match.systemMessage; - if (Array.isArray(sm)) { - // Empty array is treated as "no constraint" → effectively matches - // unconditionally. Validation rejects this at load time for JSON - // fixtures; programmatic callers that pass [] get the same - // permissive behaviour as not setting systemMessage at all. - let allPresent = true; - for (const needle of sm) { - if (!text.includes(needle)) { - allPresent = false; - break; + // Empty array is treated as "no constraint" → matches unconditionally, + // INCLUDING requests with no system text at all. This is the documented + // contract (same permissive behaviour as not setting systemMessage), so + // it must be honored BEFORE the no-system-text guard below — otherwise a + // request without a system message would be wrongly skipped. Validation + // rejects [] at load time for JSON fixtures; programmatic callers that + // pass [] get this permissive behaviour. + if (Array.isArray(sm) && sm.length === 0) { + // no constraint — fall through to the next predicate + } else { + const text = getSystemText(effective.messages); + if (!text) continue; + if (Array.isArray(sm)) { + let allPresent = true; + for (const needle of sm) { + if (!text.includes(needle)) { + allPresent = false; + break; + } + } + if (!allPresent) continue; + } else if (typeof sm === "string") { + if (useExactMatch) { + if (text !== sm) continue; + } else { + if (!text.includes(sm)) continue; } - } - if (!allPresent) continue; - } else if (typeof sm === "string") { - if (useExactMatch) { - if (text !== sm) continue; } else { - if (!text.includes(sm)) continue; + sm.lastIndex = 0; + if (!sm.test(text)) continue; } - } else { - sm.lastIndex = 0; - if (!sm.test(text)) continue; } } @@ -257,33 +402,231 @@ export function matchFixtureDiagnostic( if (hasTool !== match.hasToolResult) continue; } - // At this point every SHAPE predicate above has passed. The sequenceIndex - // and turnIndex gates below reject based on per-test count / turn STATE, - // not request shape — a fixture that fails only these is a "candidate that - // was skipped by sequence/turn state", which we count separately so callers - // can disambiguate the strict-mode 503 message. - let skippedByState = false; - - // sequenceIndex — check against the fixture's match count + // At this point every SHAPE / CONTENT predicate above has passed, so this + // fixture is a genuine CONTENT match for the request. The sequenceIndex and + // turnIndex constraints below are POSITION state, not request shape. + // + // sequenceIndex remains a hard, stateful gate: it consumes sequenced + // siblings one call at a time (and an exhausted index intentionally falls + // through to a later fixture). A fixture that matched the shape but fails + // ONLY the sequenceIndex gate is a "candidate skipped by sequence/turn + // state", counted separately so callers can disambiguate the strict-mode + // 503 message. if (match.sequenceIndex !== undefined && matchCounts !== undefined) { const count = matchCounts.get(fixture) ?? 0; - if (count !== match.sequenceIndex) skippedByState = true; + if (count !== match.sequenceIndex) { + skippedBySequenceOrTurn++; + continue; + } } - if (!skippedByState && match.turnIndex !== undefined) { + // turnIndex is normally NOT a hard gate (replay). Multi-step agents emit + // several assistant bubbles per logical turn, so a canonical run's assistant + // count routinely differs from a fixture's hardcoded turnIndex even when the + // request content matches exactly. Rejecting a uniquely content-matching + // fixture on absolute position produced false "empty assistant response" + // misses. Instead we collect every content match and use turnIndex only as a + // non-fatal DISAMBIGUATOR to choose AMONG several content-matching fixtures + // (see selectByTurnIndex below). Content that does not match any fixture + // still matches nothing — only the position gate is relaxed. + // + // Under strictTurnIndex (record mode) turnIndex stays a hard, exact gate so + // an earlier-turn capture can't shadow a longer request; the miss then + // proxies upstream and records the new turn. + if (strictTurnIndex && match.turnIndex !== undefined) { const assistantCount = effective.messages.filter((m) => m.role === "assistant").length; - if (assistantCount !== match.turnIndex) skippedByState = true; + if (assistantCount !== match.turnIndex) { + skippedBySequenceOrTurn++; + continue; + } + } + + contentMatches.push(fixture); + } + + if (contentMatches.length === 0) { + return { fixture: null, skippedBySequenceOrTurn }; + } + + const assistantCount = effective.messages.filter((m) => m.role === "assistant").length; + const { fixture: selected, byUniquePosition } = selectByTurnIndex(contentMatches, assistantCount); + + // Divergence predicate: the served fixture carries a turnIndex that does NOT + // sit at the current assistant position. Under strict matching this fixture + // would have been rejected at the gate above, so serving it here is the (rare, + // off-by-N) relaxed behaviour change PR #276 introduced. Computed from values + // already in hand — no second matching pass. + const selectedTurn = selected.match.turnIndex; + const turnIndexRelaxed = selectedTurn !== undefined && selectedTurn !== assistantCount; + // `matchedBy` reports "turnIndex" ONLY when the selection was genuinely decided + // by a UNIQUE positional criterion (a single candidate whose turnIndex sits + // exactly at the current assistant count). A canonical-position fixture that + // tied with another at-position candidate, or that lost the exact-turn + // tie-break to an earlier fallback, was decided by REGISTRATION ORDER, not by + // position — those are "content". `selectByTurnIndex` reports which it was. + const matchedBy: "content" | "turnIndex" = byUniquePosition ? "turnIndex" : "content"; + + if (turnIndexRelaxed && options?.logger) { + // Throttle: warn at most once per divergent fixture per process. Keyed by + // the fixture's OBJECT IDENTITY so distinct fixtures whose match serialises + // identically (predicate/regex collisions) each warn, and entries auto-evict + // when the fixture is released (see warnedRelaxedFixtures above). + if (!warnedRelaxedFixtures.has(selected)) { + warnedRelaxedFixtures.add(selected); + // Human-readable description for the message only (NOT the throttle key, + // which is the fixture's object identity). `JSON.stringify(match)` is + // unfit here: it DROPS `predicate` functions entirely and collapses any + // RegExp matcher to `{}`, so a predicate/regex fixture's warning read + // "served fixture {}" / "{"userMessage":{}}". `describeMatch` instead + // summarizes the present matcher KEYS (annotating predicate/regex values) + // so the warned fixture is identifiable. + const idx = fixtures.indexOf(selected); + const desc = describeMatch(selected.match, idx); + options.logger.warn( + `turnIndex relaxed: served fixture ${desc} at assistantCount=${assistantCount} ` + + `(scripted turnIndex=${selectedTurn}); set AIMOCK_STRICT_TURN_INDEX=1 to restore strict matching`, + ); + } + } + + return { fixture: selected, skippedBySequenceOrTurn, turnIndexRelaxed, matchedBy }; +} + +/** + * Build a stable, human-readable identifier for a fixture's match shape for the + * relaxed-turnIndex warning. The previous `JSON.stringify(match)` was unfit: it + * DROPS `predicate` functions (non-serialisable) and serialises any RegExp + * matcher to `{}`, so a predicate- or regex-gated fixture's warning collapsed to + * an uninformative "served fixture {}" / `{"userMessage":{}}` blob. + * + * Instead we list the PRESENT matcher keys in declaration order, annotating each + * by VALUE KIND so predicates and regexes survive: `predicate(fn)`, + * `userMessage(regex)`, `userMessage("hello")`, `turnIndex=0`, etc. The + * fixture's array `index` (when known, i.e. `>= 0`) is prefixed as the stable + * positional identifier — the `Fixture` type carries no `id`/`name`, so its + * registration index is the only stable handle. String/number values are shown + * inline (truncated) so a content match remains recognisable; the whole string + * is capped to keep the log line bounded. + */ +function describeMatch(match: FixtureMatch, index: number): string { + const parts: string[] = []; + for (const [key, value] of Object.entries(match)) { + if (value === undefined) continue; + if (typeof value === "function") { + parts.push(`${key}(fn)`); + } else if (value instanceof RegExp) { + parts.push(`${key}(${value})`); + } else if (typeof value === "string") { + const v = value.length > 40 ? `${value.slice(0, 40)}…` : value; + parts.push(`${key}(${JSON.stringify(v)})`); + } else if (Array.isArray(value)) { + parts.push(`${key}(${value.length} item${value.length === 1 ? "" : "s"})`); + } else { + parts.push(`${key}=${String(value)}`); } + } + const keys = parts.length > 0 ? parts.join(", ") : "no matchers"; + const prefix = index >= 0 ? `#${index} ` : ""; + return `${prefix}{ ${keys} }`.slice(0, 160); +} + +/** + * Choose one fixture from a set that all CONTENT-matched the same request, + * using `turnIndex` purely as a position disambiguator (never as a reject + * gate). + * + * The selection rule is applied UNIFORMLY regardless of candidate count (a + * single candidate is NOT special-cased), so the same request never flips its + * answer just because an unrelated content-matching fixture was registered. + * Within every tier ties are broken by REGISTRATION ORDER — the + * earliest-registered eligible candidate wins — preserving the historical + * greedy "first matching fixture wins" contract. + * + * 1. Prefer the turnIndexed candidate whose `turnIndex` is closest to + * `assistantCount` WITHOUT exceeding it (the highest `turnIndex <= + * assistantCount`). A behind-the-count scripted turn (turnIndex < + * assistantCount) beats a plain fallback — an explicit position is a + * stronger signal than an unpositioned default. A negative `turnIndex` such + * as -1 is a valid at/behind position (the seed is `-Infinity`, never a `-1` + * sentinel that would mis-skip it). Earlier registration breaks ties among + * equal turnIndexes. + * 2. EXACT-turn tie-break: when the best at/behind scripted turn sits at the + * EXACT current position (`turnIndex === assistantCount`) a plain fallback + * also answers "right now", so the two are equally eligible and REGISTRATION + * ORDER decides — a later-registered `turnIndex:0` does NOT override an + * earlier-registered fallback, and vice-versa. + * 3. Otherwise every turnIndexed candidate is still AHEAD of the conversation. + * An explicit future turn must NOT answer an earlier point, so a plain + * fallback (eligible at every position) is the better answer — applied + * uniformly, INCLUDING when the fallback is the sole partner of a single + * future-turn fixture (the single/multi asymmetry this fixes). + * 4. Otherwise (pure script, every candidate turnIndexed and all ahead) the + * script genuinely has no earlier answer, so serve the lowest `turnIndex` + * candidate — the false-red-kill for a lone scripted turn whose run has + * FEWER assistant bubbles than its `turnIndex`; registration order breaks + * ties. + * + * A future-turn fixture therefore NEVER answers an earlier-point request when an + * eligible alternative (a fallback, or an at/behind scripted turn) exists — the + * future-turn guard is enforced uniformly for single and multiple candidates. + * + * Returns the selected fixture alongside `byUniquePosition`: `true` ONLY when the + * choice was decided by a UNIQUE positional criterion — the served fixture's + * `turnIndex` sits EXACTLY at `assistantCount`, no earlier fallback overrode it + * (tier 2), and no other candidate shared that exact position (so registration + * order did not break a tie). `matchFixtureDiagnostic` maps this to + * `matchedBy === "turnIndex"`; every other selection path (tie-break, + * registration order, behind/ahead scripted turn, fallback) is `"content"`. + */ +function selectByTurnIndex( + candidates: Fixture[], + assistantCount: number, +): { fixture: Fixture; byUniquePosition: boolean } { + // The first non-turnIndexed candidate is the registration-order-first plain + // fallback (eligible at every position). Tracked by index so the exact-turn + // tie-break can compare registration order against the chosen scripted turn. + const fallbackIdx = candidates.findIndex((f) => f.match.turnIndex === undefined); - if (skippedByState) { - skippedBySequenceOrTurn++; - continue; + // Tier 1: closest scripted turn at/before the current count. Strict `>` + // preserves registration order on equal turnIndexes; `-Infinity` seed so a + // negative turnIndex is a legitimate at/behind candidate, not a sentinel skip. + let bestIdx = -1; + let bestTurn = -Infinity; + for (let i = 0; i < candidates.length; i++) { + const t = candidates[i].match.turnIndex; + if (t === undefined) continue; + if (t <= assistantCount && t > bestTurn) { + bestIdx = i; + bestTurn = t; } + } - return { fixture, skippedBySequenceOrTurn }; + if (bestIdx !== -1) { + // Tier 2: exact-turn tie with a fallback → earlier registration wins. A + // fallback won the tie, so position did NOT uniquely decide → content. + if (bestTurn === assistantCount && fallbackIdx !== -1 && fallbackIdx < bestIdx) { + return { fixture: candidates[fallbackIdx], byUniquePosition: false }; + } + // A UNIQUE positional decision requires the chosen turn to sit EXACTLY at + // the current count AND to be the only candidate at that exact position — + // otherwise registration order, not position, broke the tie. + const atExactPosition = + bestTurn === assistantCount && + candidates.filter((f) => f.match.turnIndex === assistantCount).length === 1; + return { fixture: candidates[bestIdx], byUniquePosition: atExactPosition }; } - return { fixture: null, skippedBySequenceOrTurn }; + // Tier 3: every scripted turn is ahead. A plain fallback answers this earlier + // point; first-registered fallback wins. + if (fallbackIdx !== -1) return { fixture: candidates[fallbackIdx], byUniquePosition: false }; + + // Tier 4: pure script, all turnIndexed and all ahead. Serve the lowest + // scripted turn; registration order breaks ties (first of the lowest wins). + let lowest = candidates[0]; + for (const f of candidates) { + if ((f.match.turnIndex as number) < (lowest.match.turnIndex as number)) lowest = f; + } + return { fixture: lowest, byUniquePosition: false }; } /** @@ -296,6 +639,7 @@ export function matchFixture( req: ChatCompletionRequest, matchCounts?: Map, requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest, + options?: MatchOptions, ): Fixture | null { - return matchFixtureDiagnostic(fixtures, req, matchCounts, requestTransform).fixture; + return matchFixtureDiagnostic(fixtures, req, matchCounts, requestTransform, options).fixture; } From d71b7d3813b87970e88ddb84d001d30582bba97a Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 23 Jun 2026 16:19:47 -0700 Subject: [PATCH 2/4] fix: thread recordMatchOptions/logger through record-path handlers Pass recordMatchOptions and the logger through the server record path into each provider handler (messages, responses, gemini, bedrock, bedrock-converse, cohere, ollama) so recorded fixtures carry the content-anchored match metadata. --- src/bedrock-converse.ts | 10 +++++++++- src/bedrock.ts | 10 +++++++++- src/cohere.ts | 10 +++++++++- src/gemini.ts | 6 +++++- src/messages.ts | 6 +++++- src/ollama.ts | 14 +++++++++++++- src/responses.ts | 6 +++++- src/server.ts | 7 ++++++- 8 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts index e9b34c8..26b0e7c 100644 --- a/src/bedrock-converse.ts +++ b/src/bedrock-converse.ts @@ -34,7 +34,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse } from "./sse-writer.js"; import { writeEventStream } from "./aws-event-stream.js"; import { createInterruptionSignal } from "./interruption.js"; @@ -608,6 +608,10 @@ export async function handleConverse( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { @@ -920,6 +924,10 @@ export async function handleConverseStream( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/bedrock.ts b/src/bedrock.ts index f8e29e4..1c83c49 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -45,7 +45,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse } from "./sse-writer.js"; import { writeEventStream } from "./aws-event-stream.js"; import { createInterruptionSignal } from "./interruption.js"; @@ -407,6 +407,10 @@ export async function handleBedrock( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { @@ -1114,6 +1118,10 @@ export async function handleBedrockStream( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/cohere.ts b/src/cohere.ts index e6b307d..75949f3 100644 --- a/src/cohere.ts +++ b/src/cohere.ts @@ -42,7 +42,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; @@ -871,6 +871,10 @@ export async function handleCohere( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { @@ -1300,6 +1304,10 @@ export async function handleCohereEmbed( syntheticReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/gemini.ts b/src/gemini.ts index c7cdd7d..f2e8a9f 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -38,7 +38,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; @@ -708,6 +708,10 @@ export async function handleGemini( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); const path = req.url ?? `/v1beta/models/${model}:generateContent`; diff --git a/src/messages.ts b/src/messages.ts index e97ae83..a1a8c4b 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -37,7 +37,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; @@ -1152,6 +1152,10 @@ export async function handleMessages( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/ollama.ts b/src/ollama.ts index 7cecb93..acba505 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -40,7 +40,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse } from "./sse-writer.js"; import { writeNDJSONStream } from "./ndjson-writer.js"; import { createInterruptionSignal } from "./interruption.js"; @@ -585,6 +585,10 @@ export async function handleOllama( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { @@ -969,6 +973,10 @@ export async function handleOllamaGenerate( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { @@ -1304,6 +1312,10 @@ export async function handleOllamaEmbeddings( syntheticReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/responses.ts b/src/responses.ts index 80cae88..c61ab47 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -36,7 +36,7 @@ import { strictNoMatchMessage, strictNoMatchLogLine, } from "./helpers.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; import type { RecordedTimings } from "./types.js"; @@ -967,6 +967,10 @@ export async function handleResponses( completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // Record mode proxies on a miss to capture a fresh turn (see record gate + // below), so keep turnIndex strict to prevent an earlier-turn fixture from + // shadowing a longer request and skipping the new turn's recording. + recordMatchOptions(!!defaults.record, defaults.logger), ); if (fixture) { diff --git a/src/server.ts b/src/server.ts index 108cd39..b254e5f 100644 --- a/src/server.ts +++ b/src/server.ts @@ -9,7 +9,7 @@ import type { RecordProviderKey, } from "./types.js"; import { Journal } from "./journal.js"; -import { matchFixtureDiagnostic } from "./router.js"; +import { matchFixtureDiagnostic, recordMatchOptions } from "./router.js"; import { validateFixtures, entryToFixture } from "./fixture-loader.js"; import { writeSSEStream, writeErrorResponse } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; @@ -564,6 +564,11 @@ async function handleCompletions( body, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform, + // In record mode a miss proxies upstream to capture a fresh turn, so an + // earlier-turn capture must not shadow a longer request via the relaxed + // turnIndex disambiguator — keep turnIndex a strict gate while recording. + // This handler's record gate (below) is `defaults.record && providerKey`. + recordMatchOptions(!!(defaults.record && providerKey), defaults.logger), ); if (fixture) { From d7f199a7365f96badd4da783a5b7f47cf750e468 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 23 Jun 2026 16:19:52 -0700 Subject: [PATCH 3/4] test: content-anchored matching + relaxed-turnIndex warn coverage Cover content-anchored fixture matching, record-path wiring, and relaxed turnIndex detect/warn/opt-out behavior (red-green). --- .../content-anchored-match-fixes.test.ts | 315 ++++++++++++++++ src/__tests__/router.test.ts | 37 +- .../strict-no-match-diagnostic.test.ts | 34 +- src/__tests__/turn-index-relaxation.test.ts | 348 ++++++++++++++++++ src/__tests__/turn-index.test.ts | 102 +++++ 5 files changed, 823 insertions(+), 13 deletions(-) create mode 100644 src/__tests__/content-anchored-match-fixes.test.ts create mode 100644 src/__tests__/turn-index-relaxation.test.ts diff --git a/src/__tests__/content-anchored-match-fixes.test.ts b/src/__tests__/content-anchored-match-fixes.test.ts new file mode 100644 index 0000000..23e94d4 --- /dev/null +++ b/src/__tests__/content-anchored-match-fixes.test.ts @@ -0,0 +1,315 @@ +import { describe, it, test, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { matchFixture, matchFixtureDiagnostic, getTextContent, getSystemText } from "../router.js"; +import { LLMock } from "../llmock.js"; +import type { ChatCompletionRequest, Fixture } from "../types.js"; + +// =========================================================================== +// CR fixes for the content-anchored fixture-matching change. One concern: +// the content-anchored selection logic + record-path wiring must be correct. +// =========================================================================== + +function makeReq(overrides: Partial = {}): ChatCompletionRequest { + return { + model: "gpt-4o", + messages: [{ role: "user", content: "hello" }], + ...overrides, + }; +} + +function makeFixture( + match: Fixture["match"], + response: Fixture["response"] = { content: "ok" }, +): Fixture { + return { match, response }; +} + +// --------------------------------------------------------------------------- +// F2 — selectByTurnIndex asymmetry + registration-order break +// --------------------------------------------------------------------------- + +describe("F2: future-turn guard applied uniformly (single + multi candidate)", () => { + it("single content-matching fixture whose turnIndex is AHEAD of the conversation does not answer an at-turn-0 request via the relaxed path", () => { + // A lone candidate at turnIndex 3 must NOT answer an at-turn-0 request when + // there is no other eligible candidate — same future-turn guard the + // multi-candidate path enforces. (Replay: such a fixture is the only + // content match, so the contract DOES serve it; this asserts that the + // single-candidate path and the multi-candidate path agree on a request + // that has a non-turn fallback alternative.) + const fixtures = [ + makeFixture({ userMessage: "step", turnIndex: 3 }, { content: "future" }), + makeFixture({ userMessage: "step" }, { content: "fallback" }), + ]; + // assistantCount 0; turnIndex 3 is ahead → the non-turn fallback must win, + // not the future-turn fixture. + const got = matchFixture(fixtures, makeReq({ messages: [{ role: "user", content: "step" }] })); + expect(got?.response).toEqual({ content: "fallback" }); + }); +}); + +describe("F2: registration order preserved among equally-eligible candidates", () => { + it("a later-registered turnIndex'd fixture does NOT override an earlier-registered non-turnIndex'd one when both are eligible", () => { + // Both are content matches and both are eligible at assistantCount 0 + // (turnIndex 0 <= 0). The first-registered fixture must win (registration + // order tie-break), regardless of which one carries a turnIndex. + const fixtures = [ + makeFixture({ userMessage: "tie" }, { content: "first-registered" }), + makeFixture({ userMessage: "tie", turnIndex: 0 }, { content: "second-registered" }), + ]; + const got = matchFixture(fixtures, makeReq({ messages: [{ role: "user", content: "tie" }] })); + expect(got?.response).toEqual({ content: "first-registered" }); + }); +}); + +// --------------------------------------------------------------------------- +// F3 — fallback must not serve a future-turn fixture +// --------------------------------------------------------------------------- + +describe("F3: fallback does not serve a future-turn fixture to an at-turn-0 request", () => { + it("a turn-3 fixture must not answer an at-turn-0 request when a fallback alternative exists", () => { + const fixtures = [ + makeFixture({ userMessage: "go", turnIndex: 3 }, { content: "turn-3" }), + makeFixture({ userMessage: "go", turnIndex: 5 }, { content: "turn-5" }), + makeFixture({ userMessage: "go" }, { content: "plain-fallback" }), + ]; + // assistantCount 0; every turnIndexed candidate (3, 5) is ahead → the plain + // fallback answers, NOT the lowest future turn. + const got = matchFixture(fixtures, makeReq({ messages: [{ role: "user", content: "go" }] })); + expect(got?.response).toEqual({ content: "plain-fallback" }); + }); +}); + +// --------------------------------------------------------------------------- +// F4 — text-join + empty-handling consistency +// --------------------------------------------------------------------------- + +describe("F4: getTextContent / getSystemText consistent multi-part + empty handling", () => { + it("empty-string string content and empty-text array content are treated the same (both null/empty)", () => { + // String "" historically returns "" (skipped via !text); array of only + // empty text returns null. After the fix both collapse to the same empty + // semantic so content matching is symmetric. + const fromString = getTextContent(""); + const fromArray = getTextContent([{ type: "text", text: "" }]); + expect(Boolean(fromString)).toBe(false); + expect(Boolean(fromArray)).toBe(false); + }); + + it("getSystemText joins multi-part text within a single system message the same way getTextContent does", () => { + const joined = getTextContent([ + { type: "text", text: "alpha" }, + { type: "text", text: "beta" }, + ]); + const sys = getSystemText([ + { + role: "system", + content: [ + { type: "text", text: "alpha" }, + { type: "text", text: "beta" }, + ], + }, + ]); + // A single system message's parts must read identically through both paths. + expect(sys).toBe(joined); + }); + + it("systemMessage:[] matches unconditionally even when the request has no system text (F5 fold)", () => { + const fixtures = [makeFixture({ systemMessage: [] }, { content: "unconditional" })]; + // No system message at all — the empty-array contract is "no constraint". + const got = matchFixture(fixtures, makeReq({ messages: [{ role: "user", content: "x" }] })); + expect(got?.response).toEqual({ content: "unconditional" }); + }); +}); + +// --------------------------------------------------------------------------- +// F1 — sequenceIndex consumed by a declined fixture +// --------------------------------------------------------------------------- + +describe("F1: sequence match-count bumps only for the SELECTED fixture", () => { + it("a sequenced fixture that passes its gate but is NOT served by selectByTurnIndex is not consumed", async () => { + const mock = new LLMock(); + await mock.start(); + try { + mock.reset(); + // A turnIndex'd fixture B (registered FIRST so it wins the position tie) + // AND a sequenced fixture A at sequenceIndex 0 that also content-matches. + // At assistantCount 1, selectByTurnIndex serves B (turnIndex 1 == count, + // registered first). A passed its sequence gate (count 0 == index 0) but + // must NOT have its count consumed, because B — not A — was served. + mock.on({ userMessage: "seq", turnIndex: 1 }, { content: "B-turn-1" }); + mock.on({ userMessage: "seq", sequenceIndex: 0 }, { content: "A-seq-0" }); + mock.on({ userMessage: "seq", sequenceIndex: 1 }, { content: "A-seq-1" }); + + // assistantCount 1 → B (turnIndex 1) is the closest scripted turn → served. + const res1 = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4", + stream: false, + messages: [ + { role: "user", content: "seq" }, + { role: "assistant", content: "prior" }, + { role: "user", content: "seq" }, + ], + }), + }); + expect(res1.status).toBe(200); + const body1 = (await res1.json()) as { choices: { message: { content: string } }[] }; + expect(body1.choices[0].message.content).toBe("B-turn-1"); + + // Now an at-turn-0 request: sequence A must STILL be at index 0 (not + // consumed by the prior request which served B). So we get A-seq-0. + const res2 = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4", + stream: false, + messages: [{ role: "user", content: "seq" }], + }), + }); + expect(res2.status).toBe(200); + const body2 = (await res2.json()) as { choices: { message: { content: string } }[] }; + // If the prior request had wrongly consumed A's index, this would serve + // A-seq-1 instead. Correct behavior: A is untouched → A-seq-0. + expect(body2.choices[0].message.content).toBe("A-seq-0"); + } finally { + await mock.stop(); + } + }); +}); + +// --------------------------------------------------------------------------- +// F6 — strictTurnIndex wired on ALL record-capable handlers (not just OpenAI) +// --------------------------------------------------------------------------- + +interface FakeUpstream { + url: string; + close: () => Promise; + getHits: () => number; +} + +function startAnthropicUpstream(): Promise { + let hits = 0; + return new Promise((resolve) => { + const server = http.createServer((req, res) => { + let raw = ""; + req.on("data", (c) => (raw += c)); + req.on("end", () => { + void raw; + hits++; + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + id: "msg_rec", + type: "message", + role: "assistant", + model: "claude-3-5-sonnet-20241022", + content: [{ type: "text", text: "recorded-second-turn" }], + stop_reason: "end_turn", + usage: { input_tokens: 1, output_tokens: 1 }, + }), + ); + }); + }); + server.listen(0, "127.0.0.1", () => { + const addr = server.address() as { port: number }; + resolve({ + url: `http://127.0.0.1:${addr.port}`, + close: () => + new Promise((r) => { + server.close(() => r()); + }), + getHits: () => hits, + }); + }); + }); +} + +describe("F6: record mode strictTurnIndex wired on the Anthropic (non-OpenAI) handler", () => { + let mock: LLMock | undefined; + let upstream: Awaited> | undefined; + let tmpDir: string | undefined; + + afterEach(async () => { + await mock?.stop(); + mock = undefined; + await upstream?.close(); + upstream = undefined; + if (tmpDir) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } + }); + + test("an earlier-turn fixture must NOT shadow a longer record request → the new turn IS proxied/recorded", async () => { + upstream = await startAnthropicUpstream(); + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "aimock-anthropic-record-")); + mock = new LLMock({ + port: 0, + record: { providers: { anthropic: upstream!.url }, fixturePath: tmpDir }, + }); + await mock.start(); + + // A turnIndex-0 fixture that content-matches the user message. A longer + // (turn-1) request arrives. Under the BUGGY default (strictTurnIndex + // unset on this handler), the turn-0 fixture content-shadows the longer + // request → fixture served → recording never fires. With strictTurnIndex + // wired (record mode), turn-0 != turn-1 → MISS → proxy + record. + mock.on({ userMessage: "record-me", turnIndex: 0 }, { content: "stale-turn-0" }); + + const res = await fetch(`${mock.url}/v1/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": "test-key", + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + stream: false, + messages: [ + { role: "user", content: "record-me" }, + { role: "assistant", content: "first turn" }, + { role: "user", content: "record-me" }, + ], + }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { content: { type: string; text: string }[] }; + // Must be the freshly recorded upstream turn, NOT the stale turn-0 fixture. + expect(body.content[0].text).toBe("recorded-second-turn"); + // Upstream WAS hit (the new turn was proxied + recorded). + expect(upstream!.getHits()).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// F6 (unit) — matcher-level proof the shared MatchOptions builder is honored +// --------------------------------------------------------------------------- + +describe("F6 (unit): strictTurnIndex makes an earlier-turn fixture MISS a longer request", () => { + it("default (false) shadows; strict (true) misses → record branch can fire", () => { + const fixtures = [makeFixture({ userMessage: "rec", turnIndex: 0 }, { content: "turn-0" })]; + const longer = makeReq({ + messages: [ + { role: "user", content: "rec" }, + { role: "assistant", content: "a" }, + { role: "user", content: "rec" }, + ], + }); + // Replay default: the lone content match is served (false-red kill). + const replayed = matchFixtureDiagnostic(fixtures, longer); + expect(replayed.fixture).not.toBeNull(); + // Record (strict): turn-0 != turn-1 → MISS so the handler proxies + records. + const recorded = matchFixtureDiagnostic(fixtures, longer, undefined, undefined, { + strictTurnIndex: true, + }); + expect(recorded.fixture).toBeNull(); + }); +}); diff --git a/src/__tests__/router.test.ts b/src/__tests__/router.test.ts index 3a2590b..5088573 100644 --- a/src/__tests__/router.test.ts +++ b/src/__tests__/router.test.ts @@ -99,9 +99,12 @@ describe("getTextContent", () => { expect(getTextContent([])).toBeNull(); }); - it("returns null for array with only empty-string text parts", () => { + it("returns the empty string (NOT null) for an array with a present-but-empty text part", () => { + // Symmetric with the string path: getTextContent("") returns "", so an + // array carrying a present-but-empty text part likewise returns "" — a + // present-but-empty body, distinct from `null` (no text content at all). const parts: ContentPart[] = [{ type: "text", text: "" }]; - expect(getTextContent(parts)).toBeNull(); + expect(getTextContent(parts)).toBe(""); }); }); @@ -959,7 +962,12 @@ describe("matchFixture — turnIndex", () => { expect(matchFixture([fixture], req)).toBe(fixture); }); - it("skips when assistant message count does not equal turnIndex", () => { + it("a uniquely content-matching fixture matches even when the assistant count differs from turnIndex (content-anchored)", () => { + // turnIndex is a non-fatal disambiguator on replay: a fixture that is the + // ONLY content match must not be rejected because the request has an extra + // (or missing) assistant bubble vs the fixture's hardcoded turnIndex. This + // is the false-red ("empty assistant response") this matcher fixes — + // multi-step agents emit several assistant bubbles per logical turn. const fixture = makeFixture({ userMessage: "hello", turnIndex: 2 }); const req = makeReq({ messages: [ @@ -968,7 +976,7 @@ describe("matchFixture — turnIndex", () => { { role: "user", content: "hello" }, ], }); - expect(matchFixture([fixture], req)).toBeNull(); + expect(matchFixture([fixture], req)).toBe(fixture); }); it("turnIndex 0 matches when no assistant messages present", () => { @@ -1013,7 +1021,12 @@ describe("matchFixture — turnIndex", () => { expect(matchFixture([turn0, turn1, turn2], req2)).toBe(turn2); }); - it("falls through to non-turnIndex fixture when no turnIndex matches", () => { + it("a scripted turn at/before the assistant count wins over an unpositioned fallback (closest-turn disambiguation)", () => { + // Two content matches: a turnIndex:0 fixture and an unpositioned fallback. + // With assistantCount = 2, turnIndex:0 is the closest scripted turn at or + // before the conversation, so it disambiguates and wins. An overshooting + // run lands on the nearest scripted turn rather than missing (the + // content-anchored replacement for the old exact-equality fall-through). const turnOnly = makeFixture({ userMessage: "hello", turnIndex: 0 }, { content: "turn-0" }); const fallback = makeFixture({ userMessage: "hello" }, { content: "fallback" }); const req = makeReq({ @@ -1025,7 +1038,19 @@ describe("matchFixture — turnIndex", () => { { role: "user", content: "hello" }, ], }); - expect(matchFixture([turnOnly, fallback], req)).toBe(fallback); + expect(matchFixture([turnOnly, fallback], req)).toBe(turnOnly); + }); + + it("an unpositioned fallback wins when every scripted turn is still AHEAD of the conversation", () => { + // assistantCount = 0 but the only turnIndexed candidate is turnIndex:1. + // A future scripted turn must not answer an earlier point in the + // conversation, so the unpositioned fallback wins. + const futureTurn = makeFixture({ userMessage: "hello", turnIndex: 1 }, { content: "turn-1" }); + const fallback = makeFixture({ userMessage: "hello" }, { content: "fallback" }); + const req = makeReq({ + messages: [{ role: "user", content: "hello" }], + }); + expect(matchFixture([futureTurn, fallback], req)).toBe(fallback); }); }); diff --git a/src/__tests__/strict-no-match-diagnostic.test.ts b/src/__tests__/strict-no-match-diagnostic.test.ts index ba3a840..45cbf28 100644 --- a/src/__tests__/strict-no-match-diagnostic.test.ts +++ b/src/__tests__/strict-no-match-diagnostic.test.ts @@ -99,13 +99,31 @@ describe("matchFixtureDiagnostic", () => { expect(result.skippedBySequenceOrTurn).toBe(1); }); - it("counts a fixture that matched the shape but failed the turnIndex gate", () => { + it("does NOT skip a uniquely content-matching fixture on a turnIndex mismatch (content-anchored replay)", () => { const fixture: Fixture = { match: { userMessage: "hello", turnIndex: 1 }, response: { content: "hi" }, }; - // Request has zero assistant turns, so turnIndex: 1 cannot match. + // Request has zero assistant turns, but turnIndex is a non-fatal + // disambiguator on the replay path — a fixture that is the only content + // match must not be rejected for an off-by-N assistant count. It matches, + // and nothing is "skipped by sequence/turn state". const result = matchFixtureDiagnostic([fixture], chatRequest("hello")); + expect(result.fixture).toBe(fixture); + expect(result.skippedBySequenceOrTurn).toBe(0); + }); + + it("keeps turnIndex a strict skip gate under strictTurnIndex (record mode)", () => { + const fixture: Fixture = { + match: { userMessage: "hello", turnIndex: 1 }, + response: { content: "hi" }, + }; + // In record mode a miss proxies upstream to capture the new turn, so an + // earlier-turn capture must not shadow a longer request — turnIndex stays + // an exact reject gate and the shape-matching candidate is counted skipped. + const result = matchFixtureDiagnostic([fixture], chatRequest("hello"), undefined, undefined, { + strictTurnIndex: true, + }); expect(result.fixture).toBeNull(); expect(result.skippedBySequenceOrTurn).toBe(1); }); @@ -216,17 +234,19 @@ describe("strict-mode 503 sequence/turn disambiguation", () => { expect(body.error.type).toBe("invalid_request_error"); }); - it("turnIndex mismatch → skipped-by-state message (503, envelope intact)", async () => { + it("turnIndex mismatch on a unique content match → 200 (content-anchored replay, no false strict miss)", async () => { const fixtures: Fixture[] = [ { match: { userMessage: "hello", turnIndex: 1 }, response: { content: "hi" } }, ]; + // No record config → replay path. The request has 0 assistant turns but the + // fixture is the only content match, so the relaxed turnIndex disambiguator + // matches it instead of producing a false strict-mode 503 ("empty assistant + // response"). This is the regression this matcher change fixes. server = await createServer(fixtures, { port: 0, strict: true }); - // Request has 0 assistant turns, so turnIndex:1 is skipped by turn state. const res = await httpPost(`${server.url}/v1/chat/completions`, chatRequest("hello")); - expect(res.status).toBe(503); + expect(res.status).toBe(200); const body = JSON.parse(res.body); - expect(body.error.message).toMatch(SKIPPED_BY_STATE_RE); - expect(body.error.type).toBe("invalid_request_error"); + expect(body.choices[0].message.content).toBe("hi"); }); it("invokes a stateful match.predicate EXACTLY ONCE on the strict no-match path", async () => { diff --git a/src/__tests__/turn-index-relaxation.test.ts b/src/__tests__/turn-index-relaxation.test.ts new file mode 100644 index 0000000..895668d --- /dev/null +++ b/src/__tests__/turn-index-relaxation.test.ts @@ -0,0 +1,348 @@ +import { describe, it, expect, vi, afterEach } from "vitest"; +import { matchFixtureDiagnostic, _resetTurnIndexRelaxWarnings } from "../router.js"; +import { LLMock } from "../llmock.js"; +import type { ChatCompletionRequest, Fixture } from "../types.js"; + +// =========================================================================== +// turnIndex relaxation: detection + warn + opt-out (AIMOCK_STRICT_TURN_INDEX). +// The replay matcher relaxed turnIndex from a hard reject gate to a non-fatal +// disambiguator (PR #276). These tests cover the strictly-additive +// detect/warn/opt-out package layered on top of that change. +// =========================================================================== + +function makeReq(overrides: Partial = {}): ChatCompletionRequest { + return { + model: "gpt-4o", + messages: [{ role: "user", content: "hello" }], + ...overrides, + }; +} + +function makeFixture( + match: Fixture["match"], + response: Fixture["response"] = { content: "ok" }, +): Fixture { + return { match, response }; +} + +// A fake logger whose warn() can be spied on. Structural — matches the subset +// of Logger the router uses. +function fakeLogger() { + return { warn: vi.fn() }; +} + +// A divergent request: one assistant bubble already present (assistantCount 1), +// but the only content-matching fixture is scripted at turnIndex 0 (defined and +// != assistantCount). Under relaxed replay it is SERVED; the strict gate WOULD +// HAVE rejected it. +function divergentRequest(): ChatCompletionRequest { + return makeReq({ + messages: [ + { role: "user", content: "diverge" }, + { role: "assistant", content: "first turn" }, + { role: "user", content: "diverge" }, + ], + }); +} + +afterEach(() => { + _resetTurnIndexRelaxWarnings(); + delete process.env.AIMOCK_STRICT_TURN_INDEX; + vi.restoreAllMocks(); +}); + +describe("turnIndex relaxation: divergence detect + warn", () => { + it("serves the divergent fixture, warns exactly once, sets turnIndexRelaxed=true", () => { + const logger = fakeLogger(); + const fixtures = [makeFixture({ userMessage: "diverge", turnIndex: 0 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + // (a) served + expect(diag.fixture).not.toBeNull(); + expect(diag.fixture?.response).toEqual({ content: "served" }); + // (b) warn fires exactly once + expect(logger.warn).toHaveBeenCalledTimes(1); + expect(String(logger.warn.mock.calls[0][0])).toContain("turnIndex relaxed"); + expect(String(logger.warn.mock.calls[0][0])).toContain("AIMOCK_STRICT_TURN_INDEX=1"); + // (c) diagnostic field + expect(diag.turnIndexRelaxed).toBe(true); + expect(diag.matchedBy).toBe("content"); + }); +}); + +describe("turnIndex relaxation: AIMOCK_STRICT_TURN_INDEX opt-out", () => { + it("with the env var set, the divergent fixture is NOT served (strict gate restored)", () => { + process.env.AIMOCK_STRICT_TURN_INDEX = "1"; + const logger = fakeLogger(); + const fixtures = [makeFixture({ userMessage: "diverge", turnIndex: 0 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture).toBeNull(); + // No relaxed serve → no warn. + expect(logger.warn).not.toHaveBeenCalled(); + expect(diag.turnIndexRelaxed).toBeFalsy(); + }); +}); + +describe("turnIndex relaxation: warn throttle", () => { + it("multiple divergent requests for the same fixture warn exactly once", () => { + const logger = fakeLogger(); + const fixtures = [makeFixture({ userMessage: "diverge", turnIndex: 0 }, { content: "served" })]; + matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { logger }); + matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { logger }); + matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { logger }); + expect(logger.warn).toHaveBeenCalledTimes(1); + }); +}); + +describe("turnIndex relaxation: throttle keyed by fixture identity (no collision)", () => { + it("two DISTINCT divergent fixtures whose match serializes identically still each warn", () => { + // Both fixtures carry a `predicate` (dropped by JSON.stringify) plus the + // SAME serializable fields, so `JSON.stringify(match)` is byte-identical for + // the two — yet they are different fixture objects mapping to different + // responses. A throttle keyed on the serialized match collides and + // suppresses the SECOND warn; a throttle keyed on fixture IDENTITY warns for + // BOTH. They are served in separate calls so each is the selected fixture. + const logger = fakeLogger(); + const predicate = () => true; + const fixtureA = makeFixture( + { userMessage: "diverge", turnIndex: 0, predicate }, + { content: "served-A" }, + ); + const fixtureB = makeFixture( + { userMessage: "diverge", turnIndex: 0, predicate }, + { content: "served-B" }, + ); + // Sanity: the two matches serialize identically (the collision precondition). + expect(JSON.stringify(fixtureA.match)).toBe(JSON.stringify(fixtureB.match)); + + const diagA = matchFixtureDiagnostic([fixtureA], divergentRequest(), undefined, undefined, { + logger, + }); + const diagB = matchFixtureDiagnostic([fixtureB], divergentRequest(), undefined, undefined, { + logger, + }); + + expect(diagA.fixture?.response).toEqual({ content: "served-A" }); + expect(diagB.fixture?.response).toEqual({ content: "served-B" }); + expect(diagA.turnIndexRelaxed).toBe(true); + expect(diagB.turnIndexRelaxed).toBe(true); + // Identity-keyed throttle → both distinct fixtures warn (2 total). A + // serialized-match key collides and suppresses the second → only 1. + expect(logger.warn).toHaveBeenCalledTimes(2); + }); + + it("the SAME fixture object served twice still warns exactly once (identity throttle)", () => { + const logger = fakeLogger(); + const fixture = makeFixture({ userMessage: "diverge", turnIndex: 0 }, { content: "served" }); + matchFixtureDiagnostic([fixture], divergentRequest(), undefined, undefined, { logger }); + matchFixtureDiagnostic([fixture], divergentRequest(), undefined, undefined, { logger }); + expect(logger.warn).toHaveBeenCalledTimes(1); + }); +}); + +describe("turnIndex relaxation: quiet on green", () => { + it("canonical-position match (assistantCount === turnIndex) does not warn or flag relaxed", () => { + const logger = fakeLogger(); + // assistantCount 1, fixture turnIndex 1 → canonical position, no divergence. + const fixtures = [makeFixture({ userMessage: "diverge", turnIndex: 1 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture).not.toBeNull(); + expect(logger.warn).not.toHaveBeenCalled(); + expect(diag.turnIndexRelaxed).toBeFalsy(); + expect(diag.matchedBy).toBe("turnIndex"); + }); + + it("non-relaxed match (no turnIndex on the fixture) does not warn or flag relaxed", () => { + const logger = fakeLogger(); + const fixtures = [makeFixture({ userMessage: "diverge" }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture).not.toBeNull(); + expect(logger.warn).not.toHaveBeenCalled(); + expect(diag.turnIndexRelaxed).toBeFalsy(); + expect(diag.matchedBy).toBe("content"); + }); +}); + +// --------------------------------------------------------------------------- +// matchedBy accuracy: "turnIndex" must mean the selection was genuinely decided +// by a UNIQUE positional (turnIndex === assistantCount) criterion, not merely +// that the served fixture happens to carry turnIndex === assistantCount when a +// tie-break / registration-order rule actually chose it. +// --------------------------------------------------------------------------- +describe("turnIndex relaxation: matchedBy accuracy", () => { + it("reports 'content' when the canonical-position serve was decided by tie-break, not unique position", () => { + const logger = fakeLogger(); + // TWO content-matching fixtures both at the exact current position + // (turnIndex 1 === assistantCount 1). selectByTurnIndex picks the + // first by REGISTRATION ORDER (tie-break) — position did not uniquely + // decide it — so matchedBy must be "content". + const fixtures = [ + makeFixture({ userMessage: "diverge", turnIndex: 1 }, { content: "first" }), + makeFixture({ userMessage: "diverge", turnIndex: 1 }, { content: "second" }), + ]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture?.response).toEqual({ content: "first" }); + expect(diag.turnIndexRelaxed).toBeFalsy(); + expect(diag.matchedBy).toBe("content"); + }); + + it("reports 'content' when an at-position fixture loses the exact-turn tie-break to an earlier fallback", () => { + const logger = fakeLogger(); + // Earlier-registered plain fallback + a turnIndex-1 fixture at the exact + // position. Tier 2 hands the exact-turn tie to the earlier fallback, so the + // served fixture has no turnIndex → matchedBy "content" (already correct, + // pinned as a regression guard). + const fixtures = [ + makeFixture({ userMessage: "diverge" }, { content: "fallback" }), + makeFixture({ userMessage: "diverge", turnIndex: 1 }, { content: "scripted" }), + ]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture?.response).toEqual({ content: "fallback" }); + expect(diag.matchedBy).toBe("content"); + }); + + it("reports 'turnIndex' for a genuine unique positional match", () => { + const logger = fakeLogger(); + // A single fixture sitting at the exact current position with no competing + // candidate — position uniquely decided the serve. + const fixtures = [makeFixture({ userMessage: "diverge", turnIndex: 1 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture?.response).toEqual({ content: "served" }); + expect(diag.turnIndexRelaxed).toBeFalsy(); + expect(diag.matchedBy).toBe("turnIndex"); + }); +}); + +// --------------------------------------------------------------------------- +// desc informativeness: the relaxed-warn message must identify the fixture +// meaningfully even when its match carries a predicate or RegExp (which +// JSON.stringify drops / collapses to {}). The message must NOT be the literal +// "{}" blob. +// --------------------------------------------------------------------------- +describe("turnIndex relaxation: warn desc informativeness", () => { + it("names matcher KEYS (not '{}') for a predicate-only divergent fixture", () => { + const logger = fakeLogger(); + // A predicate-gated fixture with turnIndex 0 → divergent at assistantCount + // 1. JSON.stringify(match) drops the predicate fn and the response, so the + // old descriptor read "served fixture {}". The new descriptor must surface + // the matcher KEY names instead. + const predicate = (req: ChatCompletionRequest) => + req.messages.some((m) => m.role === "user" && m.content === "diverge"); + const fixtures = [makeFixture({ predicate, turnIndex: 0 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture?.response).toEqual({ content: "served" }); + expect(logger.warn).toHaveBeenCalledTimes(1); + const msg = String(logger.warn.mock.calls[0][0]); + expect(msg).toContain("turnIndex relaxed"); + // The descriptor must NOT collapse to the empty-object blob. + expect(msg).not.toContain("served fixture {}"); + // It must name the present matcher key. + expect(msg).toContain("predicate"); + }); + + it("summarizes a regex matcher value (no collapsed '{}' blob) for a regex divergent fixture", () => { + const logger = fakeLogger(); + // A RegExp userMessage matcher serialises to `{}` under JSON.stringify, so + // the legacy descriptor read `{"userMessage":{},"turnIndex":0}` — the key + // name survives but the value is a meaningless empty object. The new + // descriptor must summarize the matcher TYPE (e.g. `userMessage(regex)`) + // rather than emit the collapsed `{}` value. + const fixtures = [makeFixture({ userMessage: /diverge/, turnIndex: 0 }, { content: "served" })]; + const diag = matchFixtureDiagnostic(fixtures, divergentRequest(), undefined, undefined, { + logger, + }); + expect(diag.fixture?.response).toEqual({ content: "served" }); + expect(logger.warn).toHaveBeenCalledTimes(1); + const msg = String(logger.warn.mock.calls[0][0]); + // RED on the old JSON descriptor, which embedded the collapsed value blob. + expect(msg).not.toContain('"userMessage":{}'); + expect(msg).toContain("userMessage"); + }); +}); + +// --------------------------------------------------------------------------- +// End-to-end: the warn must fire through a real handler (server.ts), proving +// the logger is threaded via recordMatchOptions — and stay silent at the +// default (silent) log level so a passing programmatic run is not spammed. +// --------------------------------------------------------------------------- + +describe("turnIndex relaxation: end-to-end through the real OpenAI handler", () => { + let mock: LLMock | undefined; + + afterEach(async () => { + await mock?.stop(); + mock = undefined; + }); + + it("emits the relaxed warn via the logger at warn level when serving a divergent fixture", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + mock = new LLMock({ port: 0, logLevel: "warn" }); + // turnIndex 0, but the request has one prior assistant bubble → divergent. + mock.on({ userMessage: "e2e-diverge", turnIndex: 0 }, { content: "relaxed-serve" }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4", + stream: false, + messages: [ + { role: "user", content: "e2e-diverge" }, + { role: "assistant", content: "prior turn" }, + { role: "user", content: "e2e-diverge" }, + ], + }), + }); + expect(res.status).toBe(200); + const body = (await res.json()) as { choices: { message: { content: string } }[] }; + expect(body.choices[0].message.content).toBe("relaxed-serve"); + + const warned = warnSpy.mock.calls.some((c) => + c.some((a) => typeof a === "string" && a.includes("turnIndex relaxed")), + ); + expect(warned).toBe(true); + }); + + it("stays silent at the default (silent) log level even when serving a divergent fixture", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + mock = new LLMock({ port: 0 }); // default logLevel = silent + mock.on({ userMessage: "e2e-quiet", turnIndex: 0 }, { content: "relaxed-serve" }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4", + stream: false, + messages: [ + { role: "user", content: "e2e-quiet" }, + { role: "assistant", content: "prior turn" }, + { role: "user", content: "e2e-quiet" }, + ], + }), + }); + expect(res.status).toBe(200); + + const warned = warnSpy.mock.calls.some((c) => + c.some((a) => typeof a === "string" && a.includes("turnIndex relaxed")), + ); + expect(warned).toBe(false); + }); +}); diff --git a/src/__tests__/turn-index.test.ts b/src/__tests__/turn-index.test.ts index 6a32cb2..0791f0a 100644 --- a/src/__tests__/turn-index.test.ts +++ b/src/__tests__/turn-index.test.ts @@ -651,3 +651,105 @@ describe("turnIndex independence from sequenceIndex", () => { } }); }); + +// --------------------------------------------------------------------------- +// 7. Content-anchored matching — turnIndex is a non-fatal disambiguator +// (false-red kill + no-over-match safety + multi-fixture tiebreak) +// --------------------------------------------------------------------------- + +describe("content-anchored turnIndex matching (replay)", () => { + let mock: LLMock; + + beforeAll(async () => { + mock = new LLMock(); + await mock.start(); + }); + + afterAll(async () => { + await mock.stop(); + }); + + it("false-red KILLED: a multi-bubble run with MORE assistant turns than turnIndex still matches", async () => { + mock.reset(); + // Canonical fixture says turnIndex 1, but a multi-step agent emitted an + // extra assistant bubble, so the live request carries TWO assistant turns. + // Before the fix the exact turnIndex gate rejected this content match, + // yielding an empty body → "empty assistant response" → false RED. + mock.on({ userMessage: "ship it", turnIndex: 1 }, { content: "Shipped." }); + + const res = await chatPost(mock.url, [ + { role: "user", content: "ship it" }, + { role: "assistant", content: "thinking..." }, + { role: "assistant", content: "still working..." }, + { role: "user", content: "ship it" }, + ]); + expect(res.status).toBe(200); + const body = (await res.json()) as { choices: { message: { content: string } }[] }; + // Non-empty response — the content match is honored despite the off-by-one + // assistant count. + expect(body.choices[0].message.content).toBe("Shipped."); + }); + + it("false-red KILLED: a run with FEWER assistant turns than turnIndex still matches", async () => { + mock.reset(); + mock.on({ userMessage: "deploy now", turnIndex: 3 }, { content: "Deployed." }); + + // assistantCount 0, fixture turnIndex 3 — uniquely content-matching, so it + // matches instead of returning an empty body. + const res = await chatPost(mock.url, [{ role: "user", content: "deploy now" }]); + expect(res.status).toBe(200); + const body = (await res.json()) as { choices: { message: { content: string } }[] }; + expect(body.choices[0].message.content).toBe("Deployed."); + }); + + it("no over-match (safety): a request whose CONTENT matches no fixture still strict-misses", async () => { + const strict = new LLMock({ strict: true }); + await strict.start(); + try { + strict.on({ userMessage: "known phrase", turnIndex: 1 }, { content: "known answer" }); + + // Different content entirely — relaxing the position gate must NOT make a + // position-adjacent fixture match. Content still gates. + const res = await chatPost(strict.url, [ + { role: "user", content: "a completely unrelated request" }, + { role: "assistant", content: "x" }, + ]); + expect(res.status).toBe(503); + } finally { + await strict.stop(); + } + }); + + it("multi-fixture disambiguation: the position tiebreak picks the closest scripted turn", async () => { + mock.reset(); + mock.on({ userMessage: "step", turnIndex: 0 }, { content: "answer-0" }); + mock.on({ userMessage: "step", turnIndex: 1 }, { content: "answer-1" }); + mock.on({ userMessage: "step", turnIndex: 2 }, { content: "answer-2" }); + + // assistantCount 1 → among the three content matches, turnIndex 1 is the + // closest scripted turn at/before the count → answer-1. + const res1 = await chatPost(mock.url, [ + { role: "user", content: "step" }, + { role: "assistant", content: "answer-0" }, + { role: "user", content: "step" }, + ]); + expect(res1.status).toBe(200); + const body1 = (await res1.json()) as { choices: { message: { content: string } }[] }; + expect(body1.choices[0].message.content).toBe("answer-1"); + + // assistantCount 5 OVERSHOOTS the script (extra bubbles) → the highest + // scripted turn at/before the count (turnIndex 2) answers, NOT a miss. + const res2 = await chatPost(mock.url, [ + { role: "user", content: "step" }, + { role: "assistant", content: "a" }, + { role: "assistant", content: "b" }, + { role: "assistant", content: "c" }, + { role: "assistant", content: "d" }, + { role: "assistant", content: "e" }, + { role: "user", content: "step" }, + ]); + expect(res2.status).toBe(200); + const body2 = (await res2.json()) as { choices: { message: { content: string } }[] }; + expect(body2.choices[0].message.content).toBe("answer-2"); + }); +}); From 1bd3cde6cf29851a4fc55d771db33e2151332b58 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Tue, 23 Jun 2026 16:19:58 -0700 Subject: [PATCH 4/4] docs: relaxed turnIndex replay semantics + AIMOCK_STRICT_TURN_INDEX Document content-anchored relaxed-turnIndex replay matching, the divergence warning direction, and the AIMOCK_STRICT_TURN_INDEX opt-out. --- CHANGELOG.md | 8 ++++++++ README.md | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fb4fa0..c0d5b63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Changed + +- Replay matching is content-anchored: `turnIndex` disambiguates, no longer a hard reject gate (#276) +- Empirical over 9769 real requests: 3213 false-miss fixes, 0 new misses, 0 wrong-fixture (#276) +- Diverges only on off-by-N assistant count in either direction (behind OR ahead of turn) (#276) +- New `turnIndexRelaxed` match diagnostic + one-shot logger warn on a divergent relaxed serve (#276) +- `AIMOCK_STRICT_TURN_INDEX=1` restores the legacy strict turnIndex gate for replay (#276) + ### Fixed - Gemini Interactions mock now emits the SDK 2.x event protocol on both paths — streamed SSE (`step.*`, `interaction.created`/`completed`, tool args via `arguments_delta`) and non-streaming responses (`steps`/`output_text`); legacy 1.x recorded fixtures still parse (#279) diff --git a/README.md b/README.md index 21aa696..70207bb 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,10 @@ docker run -d -p 4010:4010 -v "$(pwd)/fixtures:/fixtures" ghcr.io/copilotkit/aim Private and link-local addresses (loopback, RFC1918, CGNAT, cloud metadata, ULA, multicast) are rejected by default to prevent SSRF. For local development or tests that need to hit `127.0.0.1`, opt out with `AIMOCK_ALLOW_PRIVATE_URLS=1`. Tarball and zip URL support is intentionally deferred. +### Replay matching & `AIMOCK_STRICT_TURN_INDEX` + +On replay, `turnIndex` is a non-fatal disambiguator, not a hard reject gate: a content-matching fixture is served even when its scripted `turnIndex` differs from the request's assistant-message count. This kills false "no fixture matched" misses for multi-bubble agent runs (multi-step agents emit several assistant bubbles per logical turn). When a served fixture diverges from its scripted `turnIndex`, the match diagnostic carries `turnIndexRelaxed: true` and aimock logs a one-shot warning (at the `warn` log level — silent by default). To restore the legacy strict behavior where a defined `turnIndex` must equal the assistant count exactly, set `AIMOCK_STRICT_TURN_INDEX=1`. The record path is always strict regardless of this flag. + ## Framework Guides Test your AI agents with aimock — no API keys, no network calls: [LangChain](https://aimock.copilotkit.dev/integrate-langchain) · [CrewAI](https://aimock.copilotkit.dev/integrate-crewai) · [PydanticAI](https://aimock.copilotkit.dev/integrate-pydanticai) · [LlamaIndex](https://aimock.copilotkit.dev/integrate-llamaindex) · [Mastra](https://aimock.copilotkit.dev/integrate-mastra) · [Google ADK](https://aimock.copilotkit.dev/integrate-adk) · [Microsoft Agent Framework](https://aimock.copilotkit.dev/integrate-maf)