diff --git a/.changeset/persistence-layer.md b/.changeset/persistence-layer.md new file mode 100644 index 000000000..0ea1e36c8 --- /dev/null +++ b/.changeset/persistence-layer.md @@ -0,0 +1,31 @@ +--- +'@tanstack/ai': minor +'@tanstack/ai-sandbox': patch +'@tanstack/ai-client': minor +'@tanstack/ai-claude-code': patch +'@tanstack/ai-codex': patch +'@tanstack/ai-gemini-cli': patch +'@tanstack/ai-opencode': patch +'@tanstack/ai-persistence': minor +'@tanstack/ai-persistence-sql': minor +'@tanstack/ai-persistence-sqlite': minor +'@tanstack/ai-persistence-postgres': minor +'@tanstack/ai-persistence-cloudflare': minor +'@tanstack/ai-persistence-drizzle': minor +'@tanstack/ai-persistence-prisma': minor +'@tanstack/ai-sandbox-persistence': minor +--- + +Persistence + resumable runs as composable `chat()` middleware. + +`withPersistence(...)` makes any run durable: it loads/saves thread message history (server-authoritative), creates/updates run records, persists every AG-UI `StreamChunk` to an append-only event log, and persists usage. It is fully **optional** — a `chat()` with no persistence middleware is byte-for-byte unchanged, and it works for both non-sandbox and sandbox (agent-mode) runs. + +**Resume.** Each persisted chunk carries an in-band, opaque `cursor` (a monotonic per-run sequence). A client that disconnects mid-run reconnects with the run's `runId` + last `cursor`; `chat({ cursor })` replays the persisted event tail after that cursor, then — for harness adapters that re-attach to their still-running in-sandbox process — continues live. The headless `ChatClient` tracks the cursor and exposes `resume()` / `getResumeState()` / `maybeAutoResume()` with an `autoResume` opt-out. + +**Event model.** The persisted log is the AG-UI `StreamChunk` stream itself (no parallel event type); agent activity (file changes, process output, approvals, artifacts, sandbox lifecycle) rides on well-known `CUSTOM` events catalogued in `@tanstack/ai`. + +**Backends (shared SQL core + thin adapters).** One SQL implementation behind a minimal `SqlDriver` (`@tanstack/ai-persistence-sql`), with backends for SQLite (`-sqlite`, node:sqlite/better-sqlite3), Postgres (`-postgres`, pg), Cloudflare D1 (`-cloudflare`), and bring-your-own Drizzle (`-drizzle`) and Prisma (`-prisma`). Raw drivers auto-migrate (versioned, opt-out); ORMs own their schema. `memoryPersistence()` ships in core for tests/examples. + +**Agent mode.** `@tanstack/ai-sandbox-persistence` bridges a durable SQL-backed `SandboxStore` and the durable `LockStore` into `withSandbox`, so sandbox resume and ensure-locking survive across processes. The shared `locks` capability now lives in `@tanstack/ai` (one token across the sandbox and persistence layers); `@tanstack/ai-sandbox` re-exports it for back-compat. + +Approvals are persisted and a durable approval controller feeds decisions back into the existing deny-and-replay flow. Cloudflare is compile-verified (Workers runtime), Postgres runtime-verification is via Docker, and live harness re-attach is verified with the real CLIs; everything else is unit/integration-tested. The Playwright E2E suite is a follow-up. diff --git a/docs/config.json b/docs/config.json index 611a9c8fa..568f1fe91 100644 --- a/docs/config.json +++ b/docs/config.json @@ -302,6 +302,16 @@ } ] }, + { + "label": "Persistence", + "children": [ + { + "label": "Overview", + "to": "persistence/overview", + "addedAt": "2026-06-18" + } + ] + }, { "label": "Advanced", "children": [ diff --git a/docs/persistence/overview.md b/docs/persistence/overview.md new file mode 100644 index 000000000..88e422d32 --- /dev/null +++ b/docs/persistence/overview.md @@ -0,0 +1,164 @@ +--- +title: Persistence Overview +id: overview +--- + +Persistence makes a `chat()` run **durable** and **resumable** — without changing +how you write `chat()`. It is composable middleware, so it is entirely optional: +a run with no persistence middleware behaves exactly as before, and the same +middleware works for plain model adapters and for sandbox-backed harness adapters. + +`withPersistence(...)`: + +- loads and saves the thread's message history (the server is authoritative), +- records each run (status, usage, errors), +- appends every streamed AG-UI event to an append-only **event log**, +- stamps each streamed chunk with an opaque **cursor** so a disconnected client + can resume, +- and (in agent mode) persists approvals and artifacts. + +## Installation + +Pick a backend. SQLite is the simplest durable option: + +```sh +npm install @tanstack/ai-persistence @tanstack/ai-persistence-sqlite +``` + +Other backends: `@tanstack/ai-persistence-postgres`, `-cloudflare`, `-drizzle`, +`-prisma`. For tests and prototypes, `memoryPersistence()` ships in +`@tanstack/ai-persistence`. + +## Server: a persisted, resumable endpoint + +```ts +import { chat } from '@tanstack/ai' +import { anthropicText } from '@tanstack/ai-anthropic/adapters' +import { withPersistence } from '@tanstack/ai-persistence' +import { sqlitePersistence } from '@tanstack/ai-persistence-sqlite' + +// Build once and reuse across requests. +const persistence = sqlitePersistence({ + path: '.tanstack-ai/state.sqlite', + mode: 'chat', +}) + +export async function POST(request: Request) { + // `runId` is reused on a resume; `cursor` is present only when resuming. + const { messages, threadId, runId, cursor } = await request.json() + + return chat({ + threadId, + runId, + cursor, + adapter: anthropicText({ model: 'claude-sonnet-4-6' }), + messages, + middleware: [withPersistence(persistence)], + }).toResponse() +} +``` + +When `cursor` is present, `chat()` replays the persisted events after that +cursor instead of re-running the adapter — so a reconnecting client catches up +without duplicating work or burning tokens. + +## Client: automatic resume + +The headless client tracks the last cursor it saw and can resume an interrupted +run. In React: + +```tsx +import { useChat } from '@tanstack/ai-react' + +function Chat() { + const chat = useChat({ + threadId: 'thread-123', + transport: { api: '/api/chat' }, + // Auto-resume is on by default; opt out with `autoResume: false`. + }) + + // Call on mount / when the tab comes back online to continue an + // interrupted run where it left off: + // useEffect(() => { chat.maybeAutoResume() }, []) + + return <>{/* ...render chat.messages... */} +} +``` + +`chat.getResumeState()` returns `{ runId, cursor }` for the active/interrupted +run (or `null`), which you can persist to resume across a full page reload; +`chat.resume()` continues it on demand. + +## Modes + +`mode` declares how much is persisted: + +| Mode | Persists | +| --- | --- | +| `'messages'` | thread message history only | +| `'chat'` | messages + runs + event log + usage (resumable conversations) | +| `'agent'` | everything in `chat`, plus sandbox records, approvals, and artifacts | + +## Bring your own database + +`sqlitePersistence` / `postgresPersistence` accept a connection (`{ path }` / +`{ connectionString }`) **or** an existing handle. Drizzle and Prisma users pass +their client directly: + +```ts +import { drizzlePersistence } from '@tanstack/ai-persistence-drizzle' +import { prismaPersistence } from '@tanstack/ai-persistence-prisma' + +const a = drizzlePersistence({ db, dialect: 'postgres', mode: 'chat' }) +const b = prismaPersistence({ prisma, dialect: 'postgres', mode: 'chat' }) +``` + +Raw drivers create and migrate their tables automatically (opt out with +`{ migrate: false }` and apply the exported `ddl(...)` / `migrate(...)` +yourself). Drizzle and Prisma own their own schema/migrations. + +## Agent mode + sandboxes + +For sandbox-backed harness runs, `@tanstack/ai-sandbox-persistence` provides a +durable, SQL-backed sandbox store and a distributed lock so sandbox resume and +ensure-locking survive across processes: + +```ts +import { withSandbox, defineSandbox } from '@tanstack/ai-sandbox' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' +import { withPersistence } from '@tanstack/ai-persistence' +import { sqlitePersistence, createSqliteDriver } from '@tanstack/ai-persistence-sqlite' +import { + withPersistenceBridge, + createSqlSandboxStore, +} from '@tanstack/ai-sandbox-persistence' +import { claudeCode } from '@tanstack/ai-claude-code' + +const dbPath = '.tanstack-ai/state.sqlite' +const driver = createSqliteDriver({ path: dbPath }) +const persistence = sqlitePersistence({ path: dbPath, mode: 'agent' }) + +const repoSandbox = defineSandbox({ + id: 'repo-agent', + provider: dockerSandbox({ image: 'node:22' }), +}) + +chat({ + threadId, + runId, + adapter: claudeCode({ model: 'claude-sonnet-4-6' }), + messages, + middleware: [ + withPersistence(persistence), + withPersistenceBridge({ + persistence, + sandboxStore: createSqlSandboxStore(driver), + }), + withSandbox(repoSandbox), + ], +}).toResponse() +``` + +A harness adapter (which runs the agent inside the still-running sandbox) can +re-attach to its process on resume and continue live after replaying the event +tail. diff --git a/examples/sandbox-issue-triage/triage.ts b/examples/sandbox-issue-triage/triage.ts index 035d1ae8d..6dd62e797 100644 --- a/examples/sandbox-issue-triage/triage.ts +++ b/examples/sandbox-issue-triage/triage.ts @@ -125,8 +125,7 @@ export async function runTriage(options: RunTriageOptions): Promise { hooks: { onFile: (e) => { fileEvents.push(e) - const mark = - e.type === 'create' ? '+' : e.type === 'delete' ? '-' : '~' + const mark = e.type === 'create' ? '+' : e.type === 'delete' ? '-' : '~' console.log(` [${mark}] ${e.type} ${e.path}`) }, }, diff --git a/knip.json b/knip.json index a5e8a03e1..39655ddab 100644 --- a/knip.json +++ b/knip.json @@ -44,6 +44,9 @@ }, "packages/ai-vue-ui": { "ignore": ["src/use-chat-context.ts"] + }, + "packages/ai-persistence-postgres": { + "ignoreDependencies": ["pg"] } } } diff --git a/packages/ai-claude-code/src/adapters/text.ts b/packages/ai-claude-code/src/adapters/text.ts index c5f59c0fc..5abb36416 100644 --- a/packages/ai-claude-code/src/adapters/text.ts +++ b/packages/ai-claude-code/src/adapters/text.ts @@ -115,6 +115,12 @@ export class ClaudeCodeTextAdapter< // Harness adapter: requires a sandbox to run the agent CLI inside. override readonly requires = [SandboxCapability] as const + // The agent runs inside the (persistent) sandbox, so on resume the engine can + // re-attach to the still-running process and continue live after replaying the + // persisted event tail (rather than ending at replay). Live re-attach behavior + // is verified with the real CLI; the engine seam is unit-tested. + readonly supportsReattach = true + private readonly adapterConfig: ClaudeCodeTextConfig constructor(config: ClaudeCodeTextConfig, model: TModel) { diff --git a/packages/ai-client/src/chat-client.ts b/packages/ai-client/src/chat-client.ts index 468dc7618..7f3398d41 100644 --- a/packages/ai-client/src/chat-client.ts +++ b/packages/ai-client/src/chat-client.ts @@ -102,6 +102,14 @@ export class ChatClient< // focused on streaming. Undefined when no `persistence` adapter is configured. private readonly persistor?: ChatPersistor private currentRunId: string | null = null + // Resume tracking: the latest in-band cursor seen for the active run, so a + // reconnect can replay events after it. Cleared when the run terminates. + private lastResume: { runId: string; cursor: string } | null = null + private readonly autoResume: boolean + // When set, the next streamResponse() resumes this run/cursor instead of + // starting a fresh run (consumed once). + private pendingResumeRunId: string | null = null + private pendingResumeCursor: string | null = null // Track the legacy `body` option and the canonical `forwardedProps` // option as separate slots so that `updateOptions({ forwardedProps })` // doesn't wipe a previously-set `body` (and vice versa). They are @@ -170,6 +178,7 @@ export class ChatClient< constructor(options: ChatClientOptions) { this.uniqueId = options.id || this.generateUniqueId('chat') this.threadId = options.threadId || this.generateUniqueId('thread') + this.autoResume = options.autoResume ?? true if (options.persistence) { this.persistor = new ChatPersistor( options.persistence, @@ -489,6 +498,66 @@ export class ChatClient< } } + /** + * Observe the in-band resume cursor on each chunk so a reconnect can replay + * after the last seen event. Cleared when the run reaches a terminal event. + */ + private observeResumeCursor(chunk: StreamChunk): void { + if (chunk.type === 'RUN_FINISHED' || chunk.type === 'RUN_ERROR') { + // A server-signaled terminal event completes the run — drop its resume + // state. (A stream that merely ends without a terminal is an interruption + // and keeps its resume state so it can be continued.) + const runId = getChunkRunId(chunk) + if (!runId || this.lastResume?.runId === runId) { + this.lastResume = null + } + return + } + const cursor = + 'cursor' in chunk && typeof chunk.cursor === 'string' + ? chunk.cursor + : undefined + if (cursor && this.currentRunId) { + this.lastResume = { runId: this.currentRunId, cursor } + } + } + + /** + * The resume state for the active/interrupted run (the run id plus the last + * cursor seen), or null when there is nothing to resume. Apps can persist this + * to resume across a full reload; in-session reconnects use it automatically + * via {@link maybeAutoResume}. + */ + getResumeState(): { runId: string; cursor: string } | null { + return this.lastResume ? { ...this.lastResume } : null + } + + /** + * Resume a run by replaying its persisted events after the last cursor, then + * continuing live — without re-sending messages. Uses the supplied state, or + * the tracked in-session state. No-op (returns false) when there is nothing to + * resume or a stream is already in flight. + */ + resume(state?: { runId: string; cursor: string }): Promise { + const target = state ?? this.lastResume + if (!target || this.isLoading) return Promise.resolve(false) + this.pendingResumeRunId = target.runId + this.pendingResumeCursor = target.cursor + return this.streamResponse() + } + + /** + * Auto-resume hook for framework integrations to call on mount / when the tab + * comes back online. Honors the `autoResume` option (default true) and only + * fires when an interrupted run is tracked and no stream is in flight. + */ + maybeAutoResume(): Promise { + if (!this.autoResume || this.isLoading || !this.lastResume) { + return Promise.resolve(false) + } + return this.resume() + } + private generateUniqueId(prefix: string): string { return `${prefix}-${Date.now()}-${Math.random().toString(36).substring(7)}` } @@ -696,6 +765,7 @@ export class ChatClient< // per-run error only clears that run, while a runId-less RUN_ERROR is // treated as a session-level error that clears every active run. this.updateRunLifecycle(chunk) + this.observeResumeCursor(chunk) // Yield control back to event loop for UI updates await new Promise((resolve) => setTimeout(resolve, 0)) } @@ -854,7 +924,14 @@ export class ChatClient< // Track generation so a superseded stream's cleanup doesn't clobber the new one const generation = ++this.streamGeneration - const runId = `run-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + // Resuming reuses the original runId so the server replays that run's events. + const resumeRunId = this.pendingResumeRunId + const resumeCursor = this.pendingResumeCursor + this.pendingResumeRunId = null + this.pendingResumeCursor = null + const runId = + resumeRunId ?? + `run-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` this.currentRunId = runId this.setIsLoading(true) @@ -945,6 +1022,7 @@ export class ChatClient< : { type: 'object' }, })), forwardedProps: { ...mergedBody }, + ...(resumeCursor ? { cursor: resumeCursor } : {}), } this.devtoolsBridge.beginRun(runContext.runId, this.threadId) activeDevtoolsRunId = runContext.runId diff --git a/packages/ai-client/src/connection-adapters.ts b/packages/ai-client/src/connection-adapters.ts index 3c4010047..56e4d2e1c 100644 --- a/packages/ai-client/src/connection-adapters.ts +++ b/packages/ai-client/src/connection-adapters.ts @@ -199,6 +199,12 @@ export interface RunAgentInputContext { threadId: string runId: string parentRunId?: string + /** + * Resume cursor. When set, the request resumes `runId` — the server replays + * persisted events after this cursor (see `chat({ cursor })`). On a resume the + * client sends no new messages. + */ + cursor?: string /** Client-declared tools to advertise in the request payload. */ clientTools?: Array<{ name: string @@ -443,6 +449,7 @@ function buildRunAgentInputBody( ...(runContext?.parentRunId !== undefined && { parentRunId: runContext.parentRunId, }), + ...(runContext?.cursor !== undefined && { cursor: runContext.cursor }), state: {}, messages: wireMessages, tools: runContext?.clientTools ?? [], diff --git a/packages/ai-client/src/types.ts b/packages/ai-client/src/types.ts index fa00811d7..d5cd7d3c4 100644 --- a/packages/ai-client/src/types.ts +++ b/packages/ai-client/src/types.ts @@ -391,6 +391,14 @@ export interface ChatClientBaseOptions< */ threadId?: string + /** + * Whether to auto-resume an interrupted run when {@link maybeAutoResume} is + * called (e.g. by a framework integration on mount / when the tab comes back + * online). Requires server-side persistence so the run's events can be + * replayed by `runId + cursor`. Defaults to `true`; set `false` to opt out. + */ + autoResume?: boolean + /** * Arbitrary client-controlled JSON forwarded to the server in the * AG-UI `RunAgentInput.forwardedProps` field. Use this for per-session diff --git a/packages/ai-client/tests/chat-client-resume.test.ts b/packages/ai-client/tests/chat-client-resume.test.ts new file mode 100644 index 000000000..2c5583f9a --- /dev/null +++ b/packages/ai-client/tests/chat-client-resume.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from 'vitest' +import { EventType } from '@tanstack/ai/client' +import { ChatClient } from '../src/chat-client' +import type { + ConnectConnectionAdapter, + RunAgentInputContext, +} from '../src/connection-adapters' +import type { StreamChunk } from '@tanstack/ai/client' + +/** + * Adapter that records each connect's runContext and yields scripted chunks. + * A script can be a function of the live `runContext` (so a test can emit a + * RUN_FINISHED carrying the same runId the client generated and passed in). + */ +type Script = + | Array + | ((ctx: RunAgentInputContext | undefined) => Array) + +function recordingAdapter(scripts: Array