From 899a378294bc25027f32d1424812b3b912a3d37e Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 01/11] feat(pty): add interactive PTY process manager and shared driver --- cli/src/agent/AgentPtyManager.test.ts | 218 +++++++++++++ cli/src/agent/AgentPtyManager.ts | 133 ++++++++ cli/src/agent/__tests__/runAgentPty.test.ts | 243 +++++++++++++++ cli/src/agent/runAgentPty.ts | 324 ++++++++++++++++++++ cli/src/claude/__tests__/__echo.js | 3 + 5 files changed, 921 insertions(+) create mode 100644 cli/src/agent/AgentPtyManager.test.ts create mode 100644 cli/src/agent/AgentPtyManager.ts create mode 100644 cli/src/agent/__tests__/runAgentPty.test.ts create mode 100644 cli/src/agent/runAgentPty.ts create mode 100644 cli/src/claude/__tests__/__echo.js diff --git a/cli/src/agent/AgentPtyManager.test.ts b/cli/src/agent/AgentPtyManager.test.ts new file mode 100644 index 000000000..548fe965b --- /dev/null +++ b/cli/src/agent/AgentPtyManager.test.ts @@ -0,0 +1,218 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { AgentPtyManager } from './AgentPtyManager' + +const globalWithBun = globalThis as unknown as { + Bun?: { + spawn?: unknown + } +} +const originalBun = globalWithBun.Bun + +function makeMockProc(): { terminal: Bun.Terminal; killed: boolean; exitCode: number | null; signalCode: string | null; kill: ReturnType; onExit?: (code: number | null) => void } { + return { + terminal: { + write: vi.fn(), + resize: vi.fn(), + close: vi.fn(), + } as unknown as Bun.Terminal, + killed: false, + exitCode: null, + signalCode: null, + kill: vi.fn(() => { (proc as any).killed = true }), + } +} + +let proc: ReturnType + +describe('AgentPtyManager', () => { + beforeEach(() => { + proc = makeMockProc() + const spawnMock = vi.fn(() => proc) + globalWithBun.Bun = { + spawn: spawnMock, + } + }) + + afterEach(() => { + if (originalBun === undefined) { + delete globalWithBun.Bun + } else { + globalWithBun.Bun = originalBun + } + }) + + it('spawns a process with terminal option', () => { + const manager = new AgentPtyManager() + const onData = vi.fn() + + manager.spawn({ + command: 'claude', + args: ['--model', 'sonnet'], + cwd: '/workspace/project', + cols: 80, + rows: 24, + onData, + }) + + expect(globalWithBun.Bun!.spawn).toHaveBeenCalledWith( + ['claude', '--model', 'sonnet'], + expect.objectContaining({ + cwd: '/workspace/project', + terminal: expect.objectContaining({ + cols: 80, + rows: 24, + data: expect.any(Function), + }), + }) + ) + expect(manager.isRunning).toBe(true) + }) + + it('calls onData callback when terminal emits data', () => { + const manager = new AgentPtyManager() + const onData = vi.fn() + + manager.spawn({ + command: 'claude', + onData, + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const terminalConfig = spawnCall[1].terminal + const decoder = new TextDecoder() + const data = new TextEncoder().encode('hello from claude') + + terminalConfig.data(proc.terminal, data) + + expect(onData).toHaveBeenCalledWith('hello from claude') + }) + + it('writes data to terminal', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + manager.write('test input\n') + + expect(proc.terminal.write).toHaveBeenCalledWith('test input\n') + }) + + it('resizes terminal dimensions', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + cols: 80, + rows: 24, + onData: vi.fn(), + }) + + manager.resize(120, 40) + + expect(proc.terminal.resize).toHaveBeenCalledWith(120, 40) + }) + + it('kills the process and cleans up', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + manager.kill() + + expect(proc.kill).toHaveBeenCalled() + expect(proc.terminal.close).toHaveBeenCalled() + expect(manager.isRunning).toBe(false) + }) + + it('reports exit code via onExit callback', () => { + const manager = new AgentPtyManager() + const onExit = vi.fn() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + onExit, + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const onExitHandler = spawnCall[1].onExit + + onExitHandler(proc, 0) + + expect(onExit).toHaveBeenCalledWith(0, null) + expect(manager.exitCode).toBe(0) + }) + + it('does not call spawn if Bun is unavailable', () => { + delete globalWithBun.Bun + const manager = new AgentPtyManager() + const onError = vi.fn() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + onError, + }) + + expect(onError).toHaveBeenCalledWith( + expect.objectContaining({ message: expect.stringContaining('Bun') }) + ) + expect(manager.isRunning).toBe(false) + }) + + it('does not write if not spawned', () => { + const manager = new AgentPtyManager() + manager.write('data') + // No error should be thrown + }) + + it('does not resize if not spawned', () => { + const manager = new AgentPtyManager() + manager.resize(80, 24) + // No error should be thrown + }) + + it('does not kill if not spawned', () => { + const manager = new AgentPtyManager() + manager.kill() + // No error should be thrown + }) + + it('tracks exit code and signal code', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + onData: vi.fn(), + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + const onExitHandler = spawnCall[1].onExit + + proc.signalCode = 'SIGTERM' + onExitHandler(proc, null) + + expect(manager.exitCode).toBe(null) + expect(manager.signalCode).toBe('SIGTERM') + expect(manager.isRunning).toBe(false) + }) + + it('applies environment variables from filtered env', () => { + const manager = new AgentPtyManager() + + manager.spawn({ + command: 'claude', + env: { TERM: 'xterm-256color', CUSTOM_VAR: 'value' }, + onData: vi.fn(), + }) + + const spawnCall = (globalWithBun.Bun!.spawn as ReturnType).mock.calls[0] + expect(spawnCall[1].env).toEqual({ TERM: 'xterm-256color', CUSTOM_VAR: 'value' }) + }) +}) diff --git a/cli/src/agent/AgentPtyManager.ts b/cli/src/agent/AgentPtyManager.ts new file mode 100644 index 000000000..903395bd3 --- /dev/null +++ b/cli/src/agent/AgentPtyManager.ts @@ -0,0 +1,133 @@ +import { logger } from '@/ui/logger' + +export type AgentPtyOptions = { + command: string + args?: string[] + cwd?: string + env?: Record + cols?: number + rows?: number + onData: (data: string) => void + onExit?: (code: number | null, signal: string | null) => void + onError?: (error: Error) => void +} + +function getOptionalBun(): typeof Bun | null { + return typeof Bun === 'undefined' ? null : Bun +} + +export class AgentPtyManager { + private proc: Bun.Subprocess | null = null + private terminal: Bun.Terminal | null = null + private _exitCode: number | null = null + private _signalCode: string | null = null + private _isRunning: boolean = false + + get exitCode(): number | null { + return this._exitCode + } + + get signalCode(): string | null { + return this._signalCode + } + + get isRunning(): boolean { + return this._isRunning + } + + spawn(opts: AgentPtyOptions): void { + const bun = getOptionalBun() + if (!bun || typeof bun.spawn !== 'function') { + const err = new Error('Bun.spawn is unavailable in this runtime') + opts.onError?.(err) + return + } + + const cmd = opts.command + const args = opts.args ?? [] + const cwd = opts.cwd + const decoder = new TextDecoder() + + try { + this.proc = bun.spawn([cmd, ...args], { + cwd, + env: opts.env ?? process.env, + terminal: { + cols: opts.cols ?? 80, + rows: opts.rows ?? 24, + data: (_terminal, data) => { + const text = decoder.decode(data, { stream: true }) + if (text) { + opts.onData(text) + } + }, + }, + onExit: (subprocess, exitCode) => { + this._exitCode = exitCode + this._signalCode = subprocess.signalCode ?? null + this._isRunning = false + opts.onExit?.(this._exitCode, this._signalCode) + }, + }) + + this.terminal = this.proc.terminal ?? null + if (!this.terminal) { + try { + this.proc.kill() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to kill process after missing terminal', { error }) + } + this.proc = null + const err = new Error('Failed to attach terminal to spawned process') + opts.onError?.(err) + return + } + + this._isRunning = true + } catch (error) { + logger.debug('[AgentPtyManager] Failed to spawn process', { error }) + this.proc = null + this.terminal = null + opts.onError?.(error instanceof Error ? error : new Error(String(error))) + } + } + + write(data: string): void { + if (!this.terminal || !this._isRunning) { + return + } + this.terminal.write(data) + } + + resize(cols: number, rows: number): void { + if (!this.terminal || !this._isRunning) { + return + } + this.terminal.resize(cols, rows) + } + + kill(): void { + if (!this.proc || !this._isRunning) { + return + } + + if (!this.proc.killed && this.proc.exitCode === null) { + try { + this.proc.kill() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to kill process', { error }) + } + } + + if (this.terminal) { + try { + this.terminal.close() + } catch (error) { + logger.debug('[AgentPtyManager] Failed to close terminal', { error }) + } + } + + this.terminal = null + this._isRunning = false + } +} diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts new file mode 100644 index 000000000..74455b921 --- /dev/null +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -0,0 +1,243 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const harness = vi.hoisted(() => { + let _isRunning = true + let _onExit: ((code: number | null, signal: string | null) => void) | null = null + let _onData: ((data: string) => void) | null = null + let _echo = true + + const m = { + get isRunning() { return _isRunning }, + spawn: vi.fn((opts: Record) => { + _onExit = (opts.onExit as typeof _onExit) ?? null + _onData = (opts.onData as typeof _onData) ?? null + }), + // By default simulate the agent echoing keystrokes back as output so the + // echo-confirm in runAgentPty proceeds on the first attempt. + write: vi.fn((data: string) => { + if (_echo) _onData?.(data) + }), + kill: vi.fn(() => { _isRunning = false }), + resize: vi.fn(), + } + + return { + setRunning(v: boolean) { _isRunning = v }, + setEcho(v: boolean) { _echo = v }, + triggerExit(code: number | null = 0, signal: string | null = null) { + _isRunning = false + _onExit?.(code, signal) + }, + triggerData(data: string) { _onData?.(data) }, + reset() { + _isRunning = true; _onExit = null; _onData = null; _echo = true + m.spawn.mockClear(); m.write.mockClear(); m.kill.mockClear(); m.resize.mockClear() + }, + m, + } +}) + +vi.mock('@/agent/AgentPtyManager', () => ({ + AgentPtyManager: vi.fn(function() { return harness.m }), +})) +vi.mock('@/lib', () => ({ logger: { debug: vi.fn() } })) +vi.mock('@/parsers/specialCommands', () => ({ + parseSpecialCommand: (msg: string) => { + if (msg === '/clear') return { type: 'clear' } + if (msg === '/compact') return { type: 'compact' } + return { type: 'message' } + }, +})) + +import { runAgentPty } from '../runAgentPty' + +function deferred(): { promise: Promise; resolve: (v: T) => void } { + let resolve!: (v: T) => void + return { promise: new Promise((r) => { resolve = r }), resolve } +} + +type Opts = Parameters[0] +function makeOpts(overrides: Partial = {}): Opts { + return { + command: 'testagent', + args: [], + cwd: '/tmp', + debugPrefix: '[test]', + idleReadyMs: 20, + nextMessage: vi.fn(), + onReady: vi.fn(), + onMessage: vi.fn(), + ...overrides, + } +} + +const tick = (ms = 0) => new Promise((r) => setTimeout(r, ms)) + +// Drive past the markerless waitForInputReady: emit output, then let the idle +// window + polling loop elapse. +async function reachReady() { + harness.triggerData('boot') + await tick(220) +} + +describe('runAgentPty', () => { + afterEach(() => { harness.reset() }) + + it('spawns with the given command/args/cwd and calls onReady', async () => { + const msg = deferred<{ message: string } | null>() + const onReady = vi.fn() + const opts = makeOpts({ command: 'mycli', args: ['--foo'], cwd: '/work', onReady, nextMessage: () => msg.promise }) + const promise = runAgentPty(opts) + await tick(0) + expect(harness.m.spawn).toHaveBeenCalled() + const spawnArgs = harness.m.spawn.mock.calls[0][0] as { command: string; args: string[]; cwd: string } + expect(spawnArgs.command).toBe('mycli') + expect(spawnArgs.args).toEqual(['--foo']) + expect(spawnArgs.cwd).toBe('/work') + expect(onReady).toHaveBeenCalled() + await reachReady() + msg.resolve(null) + await promise + }) + + it('injects envVars/extraEnv into the spawn env only (not process.env)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ + envVars: { FLAVOR_TOKEN: 'tok' }, + extraEnv: { CLAUDE_CONFIG_DIR: '/tmp/iso-cfg' }, + nextMessage: () => msg.promise, + }) + const promise = runAgentPty(opts) + await tick(0) + const spawnEnv = (harness.m.spawn.mock.calls[0][0] as { env: Record }).env + expect(spawnEnv.FLAVOR_TOKEN).toBe('tok') + expect(spawnEnv.CLAUDE_CONFIG_DIR).toBe('/tmp/iso-cfg') + // TERM is always set so interactive TUI agents initialize correctly. + expect(spawnEnv.TERM).toBeTruthy() + // process.env must stay clean so the parent's scanner is unaffected. + expect(process.env.CLAUDE_CONFIG_DIR).toBeUndefined() + expect(process.env.FLAVOR_TOKEN).toBeUndefined() + await reachReady() + msg.resolve(null) + await promise + }) + + it('removes unsetEnv keys from the spawn env (CLAUDECODE stripping)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ + extraEnv: { CLAUDECODE: '1', KEEP_ME: 'yes' }, + unsetEnv: ['CLAUDECODE'], + nextMessage: () => msg.promise, + }) + const promise = runAgentPty(opts) + await tick(0) + const spawnEnv = (harness.m.spawn.mock.calls[0][0] as { env: Record }).env + // CLAUDECODE is stripped so the child claude isn't treated as a nested + // session (which stops it writing its transcript); unrelated vars are kept. + expect(spawnEnv.CLAUDECODE).toBeUndefined() + expect(spawnEnv.KEEP_ME).toBe('yes') + await reachReady() + msg.resolve(null) + await promise + }) + + it('auto-approves the trust prompt with Enter (not consuming the first message)', async () => { + const msg = deferred<{ message: string } | null>() + const opts = makeOpts({ trustMarkers: ['trust this folder'], nextMessage: () => msg.promise }) + const promise = runAgentPty(opts) + await tick(0) + // Agent shows the first-run trust screen. + harness.triggerData('Quick safety check: Is this a project you trust this folder? 1. Yes') + await tick(40) + // Driver auto-approves with Enter (default highlight = Yes). + expect(harness.m.write).toHaveBeenCalledWith('\r') + msg.resolve(null) + await promise + }) + + it('submits the first message only after ready, with CR separate from text', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: 'hello' }) + await tick(300) + // text then CR, as separate writes + expect(harness.m.write).toHaveBeenCalledWith('hello') + expect(harness.m.write).toHaveBeenCalledWith('\r') + msg2.resolve(null) + await promise + }) + + it('retries the write when the agent does not echo (stdin not ready yet)', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + harness.setEcho(false) // agent ignores input → no echo + msg1.resolve({ message: 'hi' }) + await tick(2500) // 3 attempts × 700ms echo wait + const textWrites = harness.m.write.mock.calls.filter((c) => c[0] === 'hi').length + expect(textWrites).toBe(3) + msg2.resolve(null) + harness.setRunning(false) + await promise + }) + + it('ignores /clear and /compact in the loop', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const msg3 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementationOnce(() => msg3.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: '/clear' }) + await tick(60) + expect(harness.m.write).not.toHaveBeenCalledWith('/clear') + msg2.resolve({ message: '/compact' }) + await tick(60) + expect(harness.m.write).not.toHaveBeenCalledWith('/compact') + msg3.resolve(null) + await promise + }) + + it('stops and kills on exit', async () => { + const msg1 = deferred<{ message: string } | null>() + const onExit = vi.fn() + const nextMessage = vi.fn().mockImplementationOnce(() => msg1.promise) + const promise = runAgentPty(makeOpts({ nextMessage, onExit })) + await reachReady() + harness.triggerExit(0) + msg1.resolve({ message: 'late' }) + await promise + expect(onExit).toHaveBeenCalledWith(0) + expect(harness.m.kill).toHaveBeenCalled() + }) + + it('aborts via signal', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const controller = new AbortController() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage, signal: controller.signal })) + await reachReady() + msg1.resolve({ message: 'first' }) + await tick(120) + controller.abort() + msg2.resolve({ message: 'should not send' }) + await promise + expect(harness.m.write).not.toHaveBeenCalledWith('should not send') + expect(harness.m.kill).toHaveBeenCalled() + }) +}) diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts new file mode 100644 index 000000000..bc00647ba --- /dev/null +++ b/cli/src/agent/runAgentPty.ts @@ -0,0 +1,324 @@ +import { AgentPtyManager } from "@/agent/AgentPtyManager" +import { parseSpecialCommand } from "@/parsers/specialCommands" +import { logger } from "@/lib" + +/** + * Shared driver for running an interactive agent CLI (e.g. claude) inside a + * PTY. All flavor-specific behavior is supplied via options: + * - `command` / `args` / `cwd` / `envVars` / `extraEnv` — how to spawn + * - `promptMarkers` — strings that indicate the agent's input prompt has + * rendered. When provided, input-ready is gated on seeing one of them (e.g. + * claude's ink TUI). When omitted, falls back to an output-idle heuristic + * (for an agent with no detectable prompt marker). + * + * The driver handles the parts every PTY agent shares: spawn lifecycle, + * waiting until the agent is ready before sending the first message, echo- + * confirmed submit with retry (so the first keystrokes aren't dropped while the + * agent wires up stdin), and the message loop. + */ +export type RunAgentPtyOpts = { + command: string + args: string[] + cwd: string + /** Flavor env vars merged into process.env before spawn. */ + envVars?: Record + /** Additional env vars (e.g. DISABLE_AUTOUPDATER) applied after envVars. */ + extraEnv?: Record + /** + * Env var names to REMOVE from the spawned process's environment. claude uses + * this to strip CLAUDECODE / CLAUDE_CODE_* so the child isn't mistaken for a + * nested session (which would stop it writing its JSONL transcript). + */ + unsetEnv?: string[] + /** Output substrings that signal the input prompt has rendered. */ + promptMarkers?: string[] + /** + * Output substrings that indicate a trust/safety prompt the agent shows on + * first run in a folder (e.g. claude's "Is this a project you trust?"). + * When detected, the driver auto-approves it (Enter selects the default + * "Yes" option) so the trust screen doesn't get mistaken for the input + * prompt and the first user message isn't consumed by it. + */ + trustMarkers?: string[] + /** Idle window (ms) used to decide output has settled. */ + idleReadyMs?: number + /** + * Output substrings shown while the agent is actively working (e.g. claude's + * "esc to interrupt" footer / spinner). When seen, `onThinkingChange(true)`. + */ + busyMarkers?: string[] + /** + * Output substrings shown when the agent is back at an idle input prompt + * (e.g. claude's "for shortcuts" hint). When seen, `onThinkingChange(false)`. + */ + idleMarkers?: string[] + debugPrefix: string + signal?: AbortSignal + nextMessage: () => Promise<{ message: string } | null> + onReady: () => void + onMessage: (data: string) => void + /** + * Fired when the agent's working/idle state changes, derived from + * busy/idle markers in the PTY output. Drives the chat "thinking" indicator + * (PTY agents have no streaming protocol to read this from). Tracks the live + * spinner, so it stays accurate even through a long silent inference. + */ + onThinkingChange?: (thinking: boolean) => void + onExit?: (code: number | null) => void + /** + * Called once the PTY is spawned with controls for the live terminal. The + * agent-terminal viewer uses `resize` to repaint the TUI on (re)subscribe so + * the current screen is shown instead of a stale/black buffer replay. Controls + * become no-ops after the process exits. + */ + registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void +} + +export async function runAgentPty(opts: RunAgentPtyOpts): Promise { + const { debugPrefix } = opts + logger.debug(`${debugPrefix} Starting PTY session`) + + // Flavor env vars are injected into the spawned process's environment ONLY — + // never into this process's process.env. This keeps CLAUDE_CONFIG_DIR (used + // by claudePty to isolate folder-trust) scoped to the child, so the parent's + // session scanner still resolves transcripts against the real ~/.claude. + const spawnEnv = { + ...process.env, + // PTY agents with a full TUI need TERM set — the runner's Bun.spawn env + // lacks it. Default to a sane terminal so the interactive TUI initializes + // correctly. + TERM: process.env.TERM || 'xterm-256color', + ...(opts.envVars ?? {}), + ...(opts.extraEnv ?? {}), + } as Record + + for (const key of opts.unsetEnv ?? []) { + delete spawnEnv[key] + } + + const manager = new AgentPtyManager() + const signal = opts.signal + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + + const markers = opts.promptMarkers ?? [] + const hasMarkers = markers.length > 0 + const trustMarkers = opts.trustMarkers ?? [] + const idleReadyMs = opts.idleReadyMs ?? (hasMarkers ? 500 : 1000) + + let lastOutputAt = 0 + let sawOutput = false + // For marker-based agents (claude): true once the input prompt rendered. + let promptSeen = false + // Whether the first-run trust/safety prompt has been auto-approved. + let trustHandled = false + + // Working/idle state derived from busy/idle markers, reported only on change. + const busyMarkers = opts.busyMarkers ?? [] + const idleMarkers = opts.idleMarkers ?? [] + const hasBusyMarkers = busyMarkers.length > 0 + let thinking = false + // Output-silence watchdog against a stuck "thinking" indicator. The post-submit + // setThinking(true) is optimistic, and the idle MARKER that should clear it can + // be missed (it arrives mid-chunk with a busy marker, or fragmented across + // reads), so the spinner can stick long after the turn ends — or forever if the + // turn never started (a --resume replay swallowed the first message). A working + // claude repaints its spinner footer every few hundred ms, so once output has + // been SILENT for IDLE_SILENCE_MS while we still think it's busy, the turn is + // really over → force idle. Scoped to agents with a busy marker (claude). + const IDLE_SILENCE_MS = 3000 + let idleWatchdog: ReturnType | null = null + const disarmIdleWatchdog = (): void => { + if (idleWatchdog) { clearTimeout(idleWatchdog); idleWatchdog = null } + } + // (Re)start the silence timer. Called when thinking begins and on every output + // chunk while thinking, so the window only elapses once claude has gone quiet. + const armIdleWatchdog = (): void => { + if (!hasBusyMarkers || !thinking) return + disarmIdleWatchdog() + idleWatchdog = setTimeout(() => { + idleWatchdog = null + if (thinking) { + logger.debug(`${debugPrefix} idle watchdog: ${IDLE_SILENCE_MS}ms of silence; forcing idle`) + thinking = false + opts.onThinkingChange?.(false) + } + }, IDLE_SILENCE_MS) + idleWatchdog.unref?.() + } + const setThinking = (next: boolean): void => { + if (next === thinking) { + if (next) armIdleWatchdog() // refresh the silence window on repeated busy signals + return + } + thinking = next + if (next) armIdleWatchdog() + else disarmIdleWatchdog() + opts.onThinkingChange?.(next) + } + + // Wait until the agent's TUI is ready to receive input. Marker-based agents + // require both the prompt marker AND settled output; markerless agents use + // idle alone. A longer-idle fallback prevents hanging if a marker never + // matches (UI change). + const waitForInputReady = async (timeoutMs = 20000): Promise => { + const start = Date.now() + while (Date.now() - start < timeoutMs) { + if (signal?.aborted || !manager.isRunning) return + const idle = Date.now() - lastOutputAt + if (hasMarkers) { + if (promptSeen && idle >= idleReadyMs) return + } else if (sawOutput && idle >= idleReadyMs) { + return + } + if (sawOutput && idle >= 3000) return + await sleep(80) + } + } + + // Type the text, confirm the agent ingested it (its TUI echoes keystrokes → + // output), then submit with CR. If no echo comes back, stdin isn't wired up + // yet, so retry — this is what was dropping the first message. CR is sent + // separately so the text isn't submitted before it's buffered. + const submitMessage = async (message: string): Promise => { + let echoed = false + for (let attempt = 0; attempt < 3 && !echoed; attempt++) { + const before = lastOutputAt + manager.write(message) + const waitStart = Date.now() + while (Date.now() - waitStart < 700) { + if (signal?.aborted || !manager.isRunning) return + if (lastOutputAt > before) { echoed = true; break } + await sleep(40) + } + if (!echoed && process.env.DEBUG_PTY) { + logger.debug(`${debugPrefix} no echo after write (attempt ${attempt + 1}); retrying`) + } + } + await sleep(150) + manager.write('\r') + await sleep(50) + } + + const abortHandler = () => { + logger.debug(`${debugPrefix} Abort signal received`) + manager.kill() + } + signal?.addEventListener('abort', abortHandler, { once: true }) + + try { + manager.spawn({ + command: opts.command, + args: opts.args, + cwd: opts.cwd, + env: spawnEnv, + cols: 80, + rows: 24, + onData: (data) => { + sawOutput = true + lastOutputAt = Date.now() + // Auto-approve the first-run trust/safety prompt (Enter = default + // "Yes"). Do this BEFORE prompt detection so the trust screen + // isn't mistaken for the input prompt — otherwise the first user + // message gets consumed as the trust answer. + if (!trustHandled && trustMarkers.length > 0 && trustMarkers.some((m) => data.includes(m))) { + trustHandled = true + logger.debug(`${debugPrefix} trust prompt detected; auto-approving with Enter`) + manager.write('\r') + } else if (hasMarkers && !promptSeen && markers.some((m) => data.includes(m))) { + promptSeen = true + } + // Track the working/idle state from the live footer. The busy + // marker (spinner/"esc to interrupt") wins when both appear in a + // chunk; chunks with neither leave the state unchanged. + if (busyMarkers.length > 0 && busyMarkers.some((m) => data.includes(m))) { + setThinking(true) + } else if (idleMarkers.length > 0 && idleMarkers.some((m) => data.includes(m))) { + setThinking(false) + } else if (thinking) { + // Still producing output (e.g. streaming response text with no + // footer marker in this chunk) — keep the silence watchdog at bay. + armIdleWatchdog() + } + if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} onData: ${data.length} bytes`) + opts.onMessage(data) + }, + onExit: (code) => { + logger.debug(`${debugPrefix} Process exited with code ${code}`) + setThinking(false) + opts.onExit?.(code) + }, + onError: (error) => { + logger.debug(`${debugPrefix} PTY error: ${error.message}`, error) + }, + }) + + if (!manager.isRunning) { + logger.debug(`${debugPrefix} Failed to spawn ${opts.command} PTY`) + return + } + + opts.registerControls?.({ + resize: (cols: number, rows: number) => { + if (!manager.isRunning) return + if (!Number.isInteger(cols) || !Number.isInteger(rows) || cols < 1 || rows < 1) return + manager.resize(cols, rows) + }, + // Inject raw keystrokes into the live TUI — used to drive in-place + // settings changes (e.g. claude's `/model`/`/effort` slash commands) + // without re-spawning the process. + sendKeys: (data: string) => { + if (!manager.isRunning || !data) return + manager.write(data) + } + }) + + opts.onReady() + + // Spawn the agent up-front and wait until its prompt is ready BEFORE any + // message arrives, so the first user message is processed immediately + // instead of being consumed as the spawn trigger. + await waitForInputReady() + + while (manager.isRunning) { + if (signal?.aborted) { + logger.debug(`${debugPrefix} Aborted`) + break + } + + const next = await opts.nextMessage() + if (!next) { + logger.debug(`${debugPrefix} No more input; waiting for process to finish`) + break + } + + if (!manager.isRunning) { + logger.debug(`${debugPrefix} Process exited while waiting for message`) + break + } + + const cmd = parseSpecialCommand(next.message) + if (cmd.type === 'clear' || cmd.type === 'compact') { + logger.debug(`${debugPrefix} ${cmd.type} command - ignoring in PTY mode`) + continue + } + + // Queue semantics: wait until output goes idle (agent back at the + // prompt) before sending the next queued message. + await waitForInputReady() + if (!manager.isRunning || signal?.aborted) { + break + } + + if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} write(loop): ${next.message}`) + await submitMessage(next.message) + // The agent is now working on this input — show "thinking" right away + // (a busy marker reinforces it; the idle marker clears it when done). + setThinking(true) + } + } finally { + disarmIdleWatchdog() + signal?.removeEventListener('abort', abortHandler) + manager.kill() + logger.debug(`${debugPrefix} PTY session ended`) + } +} diff --git a/cli/src/claude/__tests__/__echo.js b/cli/src/claude/__tests__/__echo.js new file mode 100644 index 000000000..2bf7913cb --- /dev/null +++ b/cli/src/claude/__tests__/__echo.js @@ -0,0 +1,3 @@ +const readline = require('readline'); +const rl = readline.createInterface({ input: process.stdin, terminal: false }); +rl.on('line', (line) => { process.stdout.write('echo:' + line + '\n'); }); From 4c8a117c391ec0c91fa49baa3909187b35caca57 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 02/11] feat(pty): isolate claude folder-trust in a disposable config dir --- .../claude/__tests__/claudePty.real.test.ts | 63 ++++++++ cli/src/claude/__tests__/claudePty.test.ts | 104 ++++++++++++ .../claude/__tests__/trustedConfigDir.test.ts | 149 ++++++++++++++++++ cli/src/claude/claudePty.ts | 97 ++++++++++++ cli/src/claude/trustedConfigDir.ts | 111 +++++++++++++ 5 files changed, 524 insertions(+) create mode 100644 cli/src/claude/__tests__/claudePty.real.test.ts create mode 100644 cli/src/claude/__tests__/claudePty.test.ts create mode 100644 cli/src/claude/__tests__/trustedConfigDir.test.ts create mode 100644 cli/src/claude/claudePty.ts create mode 100644 cli/src/claude/trustedConfigDir.ts diff --git a/cli/src/claude/__tests__/claudePty.real.test.ts b/cli/src/claude/__tests__/claudePty.real.test.ts new file mode 100644 index 000000000..352e03293 --- /dev/null +++ b/cli/src/claude/__tests__/claudePty.real.test.ts @@ -0,0 +1,63 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { AgentPtyManager } from '@/agent/AgentPtyManager' + +async function waitForOutput(onData: ReturnType, timeoutMs = 3000): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + if (onData.mock.calls.length > 0) return + await new Promise(r => setTimeout(r, 10)) + } +} + +// Real PTY spawn requires the Bun runtime (Bun.spawn terminal). Vitest runs +// its test workers under Node, where Bun is undefined, so skip there. Run with +// the Bun runtime to exercise these. +describe.skipIf(typeof Bun === 'undefined')('claudePty real PTY', () => { + let manager: AgentPtyManager + + afterEach(() => { + manager?.kill() + }) + + it('onData fires for every write (messages 1, 2, 3)', async () => { + manager = new AgentPtyManager() + const onData = vi.fn() + const onError = vi.fn((err: Error) => { + console.error('[test] spawn error:', err.message) + }) + + manager.spawn({ + command: 'bash', + args: ['-c', 'while IFS= read -r line; do echo "echo:$line"; done'], + onData, + onError, + }) + + expect(manager.isRunning).toBe(true) + if (!manager.isRunning) { + console.error('[test] manager not running, onError calls:', onError.mock.calls) + return + } + + manager.write('first\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const firstCalls = onData.mock.calls.length + const firstOutput = onData.mock.calls.map(c => c[0]).join('') + expect(firstOutput).toContain('echo:first') + onData.mockClear() + + manager.write('second\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const secondOutput = onData.mock.calls.map(c => c[0]).join('') + expect(secondOutput).toContain('echo:second') + onData.mockClear() + + manager.write('third\n') + await waitForOutput(onData) + expect(onData).toHaveBeenCalled() + const thirdOutput = onData.mock.calls.map(c => c[0]).join('') + expect(thirdOutput).toContain('echo:third') + }) +}) diff --git a/cli/src/claude/__tests__/claudePty.test.ts b/cli/src/claude/__tests__/claudePty.test.ts new file mode 100644 index 000000000..989d07a1b --- /dev/null +++ b/cli/src/claude/__tests__/claudePty.test.ts @@ -0,0 +1,104 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +// claudePty is a thin wrapper over the shared runAgentPty driver. Here we only +// verify it forwards the correct claude-specific options; the PTY behavior +// (spawn/ready/echo-submit/loop) is tested in runAgentPty.test.ts. +vi.mock('@/agent/runAgentPty', () => ({ + runAgentPty: vi.fn(async () => {}), +})) + +vi.mock('@/lib', () => ({ + logger: { debug: vi.fn() }, +})) + +// Trust isolation is unit-tested in trustedConfigDir.test.ts; here we only +// verify claudePty wires it into the spawn env and cleans up afterwards. +vi.mock('@/claude/trustedConfigDir', () => ({ + prepareTrustedConfigDir: vi.fn(() => '/tmp/fake-cfg'), + cleanupTrustedConfigDir: vi.fn(), +})) + +import { claudePty } from '../claudePty' +import { runAgentPty } from '@/agent/runAgentPty' +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from '@/claude/trustedConfigDir' + +type ClaudePtyOpts = Parameters[0] + +function makeOpts(overrides: Partial = {}): ClaudePtyOpts { + return { + sessionId: 'test-session', + path: '/tmp/test', + nextMessage: vi.fn(), + onReady: vi.fn(), + onMessage: vi.fn(), + ...overrides, + } +} + +function lastCall() { + const mock = vi.mocked(runAgentPty) + return mock.mock.calls[mock.mock.calls.length - 1]![0] +} + +describe('claudePty wrapper', () => { + afterEach(() => { + vi.mocked(runAgentPty).mockClear() + }) + + it('spawns the claude command', async () => { + await claudePty(makeOpts()) + expect(runAgentPty).toHaveBeenCalled() + expect(lastCall().command).toBe('claude') + expect(lastCall().cwd).toBe('/tmp/test') + }) + + it('includes --settings when provided, preserving claudeArgs', async () => { + await claudePty(makeOpts({ hookSettingsPath: '/tmp/hooks/h.json', claudeArgs: ['--model', 'opus'] })) + const args = lastCall().args + const idx = args.indexOf('--settings') + expect(idx).toBeGreaterThanOrEqual(0) + expect(args[idx + 1]).toBe('/tmp/hooks/h.json') + expect(args).toEqual(expect.arrayContaining(['--model', 'opus'])) + }) + + it('omits --settings when no hookSettingsPath', async () => { + await claudePty(makeOpts({ claudeArgs: ['--model', 'opus'] })) + expect(lastCall().args).not.toContain('--settings') + }) + + it('passes claude prompt + trust markers and DISABLE_AUTOUPDATER', async () => { + await claudePty(makeOpts()) + expect(lastCall().promptMarkers).toEqual(expect.arrayContaining(['for shortcuts'])) + // '❯' must NOT be a prompt marker — it appears in the trust screen too. + expect(lastCall().promptMarkers).not.toContain('❯') + expect(lastCall().trustMarkers).toEqual(expect.arrayContaining(['trust this folder'])) + expect(lastCall().extraEnv).toMatchObject({ DISABLE_AUTOUPDATER: '1' }) + }) + + it('forwards callbacks and signal', async () => { + const nextMessage = vi.fn() + const onReady = vi.fn() + const onMessage = vi.fn() + const onExit = vi.fn() + const controller = new AbortController() + await claudePty(makeOpts({ nextMessage, onReady, onMessage, onExit, signal: controller.signal })) + const call = lastCall() + expect(call.nextMessage).toBe(nextMessage) + expect(call.onReady).toBe(onReady) + expect(call.onMessage).toBe(onMessage) + expect(call.onExit).toBe(onExit) + expect(call.signal).toBe(controller.signal) + }) + + it('passes claudeEnvVars as envVars', async () => { + await claudePty(makeOpts({ claudeEnvVars: { FOO: 'bar' } })) + expect(lastCall().envVars).toEqual({ FOO: 'bar' }) + }) + + it('isolates folder trust via CLAUDE_CONFIG_DIR and cleans up after', async () => { + await claudePty(makeOpts({ path: '/work/dir' })) + expect(prepareTrustedConfigDir).toHaveBeenCalledWith('/work/dir') + expect(lastCall().extraEnv).toMatchObject({ CLAUDE_CONFIG_DIR: '/tmp/fake-cfg' }) + expect(cleanupTrustedConfigDir).toHaveBeenCalledWith('/tmp/fake-cfg') + }) +}) diff --git a/cli/src/claude/__tests__/trustedConfigDir.test.ts b/cli/src/claude/__tests__/trustedConfigDir.test.ts new file mode 100644 index 000000000..299633593 --- /dev/null +++ b/cli/src/claude/__tests__/trustedConfigDir.test.ts @@ -0,0 +1,149 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +vi.mock('node:fs', () => ({ + mkdtempSync: vi.fn(() => '/tmp/hapi-claude-cfg-abc'), + readdirSync: vi.fn(() => ['.credentials.json', 'projects', 'settings.json']), + symlinkSync: vi.fn(), + readFileSync: vi.fn(() => JSON.stringify({ projects: { '/other': { hasTrustDialogAccepted: true } } })), + writeFileSync: vi.fn(), + rmSync: vi.fn(), +})) +vi.mock('node:os', () => ({ + homedir: () => '/home/user', + tmpdir: () => '/tmp', +})) +vi.mock('@/lib', () => ({ logger: { debug: vi.fn() } })) + +import { mkdtempSync, readdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from 'node:fs' +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from '../trustedConfigDir' + +function findWrite(suffix: string) { + return vi.mocked(writeFileSync).mock.calls.find((c) => String(c[0]).endsWith(suffix)) +} + +describe('prepareTrustedConfigDir', () => { + beforeEach(() => { + delete process.env.CLAUDE_CONFIG_DIR + }) + afterEach(() => { + vi.clearAllMocks() + }) + + it('symlinks every real config entry into the temp dir', () => { + prepareTrustedConfigDir('/work') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/.credentials.json', '/tmp/hapi-claude-cfg-abc/.credentials.json') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/projects', '/tmp/hapi-claude-cfg-abc/projects') + expect(symlinkSync).toHaveBeenCalledWith('/home/user/.claude/settings.json', '/tmp/hapi-claude-cfg-abc/settings.json') + }) + + it('never symlinks a .claude.json entry (would writethrough to the real file)', () => { + // A custom CLAUDE_CONFIG_DIR can itself hold a .claude.json. Symlinking it + // and then writeFileSync-ing the trust-patched copy would follow the link + // and mutate the real file — so the entry must be skipped, not linked. + vi.mocked(readdirSync).mockReturnValueOnce(['.credentials.json', '.claude.json'] as never) + prepareTrustedConfigDir('/work') + const linkedDotJson = vi.mocked(symlinkSync).mock.calls.find((c) => String(c[1]).endsWith('/.claude.json')) + expect(linkedDotJson).toBeUndefined() + // The private copy is still written into the temp dir. + expect(findWrite('.claude.json')![0]).toBe('/tmp/hapi-claude-cfg-abc/.claude.json') + }) + + it('writes a private .claude.json with the working folder pre-trusted', () => { + prepareTrustedConfigDir('/work') + const call = findWrite('.claude.json') + expect(call).toBeDefined() + // The copy lives in the temp dir, NOT in the user's home. + expect(call![0]).toBe('/tmp/hapi-claude-cfg-abc/.claude.json') + const written = JSON.parse(String(call![1])) + expect(written.projects['/work'].hasTrustDialogAccepted).toBe(true) + }) + + it('preserves the user\'s existing trusted projects in the copy', () => { + prepareTrustedConfigDir('/work') + const written = JSON.parse(String(findWrite('.claude.json')![1])) + expect(written.projects['/other'].hasTrustDialogAccepted).toBe(true) + }) + + it('never writes to the real ~/.claude.json', () => { + prepareTrustedConfigDir('/work') + const homeWrite = vi.mocked(writeFileSync).mock.calls.find((c) => c[0] === '/home/user/.claude.json') + expect(homeWrite).toBeUndefined() + // It only reads the real one. + expect(readFileSync).toHaveBeenCalledWith('/home/user/.claude.json', 'utf-8') + }) + + it('honors an existing CLAUDE_CONFIG_DIR as the real config source', () => { + process.env.CLAUDE_CONFIG_DIR = '/custom/cfg' + prepareTrustedConfigDir('/work') + expect(symlinkSync).toHaveBeenCalledWith('/custom/cfg/.credentials.json', '/tmp/hapi-claude-cfg-abc/.credentials.json') + }) + + it('returns the temp dir path on success', () => { + expect(prepareTrustedConfigDir('/work')).toBe('/tmp/hapi-claude-cfg-abc') + }) + + it('returns undefined (no throw) when preparation fails', () => { + vi.mocked(mkdtempSync).mockImplementationOnce(() => { throw new Error('no tmp') }) + expect(prepareTrustedConfigDir('/work')).toBeUndefined() + }) +}) + +describe('cleanupTrustedConfigDir', () => { + afterEach(() => vi.clearAllMocks()) + + it('recursively removes the temp dir', () => { + cleanupTrustedConfigDir('/tmp/hapi-claude-cfg-abc') + expect(rmSync).toHaveBeenCalledWith('/tmp/hapi-claude-cfg-abc', expect.objectContaining({ recursive: true, force: true })) + }) + + it('is a no-op for undefined', () => { + cleanupTrustedConfigDir(undefined) + expect(rmSync).not.toHaveBeenCalled() + }) +}) + +// Archive (KillSession) and SIGTERM/SIGINT end the runner with process.exit(), +// which skips claudePty's finally → cleanupTrustedConfigDir never runs. A +// process 'exit' handler must reap whatever is still pending so /tmp doesn't +// accumulate hapi-claude-cfg-* across sessions. +describe('exit-time reaping of leaked dirs', () => { + afterEach(() => { + vi.restoreAllMocks() + delete process.env.CLAUDE_CONFIG_DIR + }) + + it('registers a process exit handler that reaps still-pending dirs', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir } = await import('../trustedConfigDir') + prepareTrustedConfigDir('/work') + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as (() => void) | undefined + expect(exitHandler).toBeDefined() + vi.mocked(rmSync).mockClear() + exitHandler!() + expect(rmSync).toHaveBeenCalledWith('/tmp/hapi-claude-cfg-abc', expect.objectContaining({ recursive: true, force: true })) + }) + + it('does not reap a dir already cleaned up via cleanupTrustedConfigDir', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir, cleanupTrustedConfigDir } = await import('../trustedConfigDir') + const dir = prepareTrustedConfigDir('/work') + cleanupTrustedConfigDir(dir) + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as (() => void) | undefined + vi.mocked(rmSync).mockClear() + exitHandler?.() + expect(rmSync).not.toHaveBeenCalled() + }) + + it('registers the exit handler only once across multiple prepares', async () => { + vi.resetModules() + const onSpy = vi.spyOn(process, 'on') + const { prepareTrustedConfigDir } = await import('../trustedConfigDir') + prepareTrustedConfigDir('/a') + prepareTrustedConfigDir('/b') + prepareTrustedConfigDir('/c') + const exitRegistrations = onSpy.mock.calls.filter((c) => c[0] === 'exit') + expect(exitRegistrations).toHaveLength(1) + }) +}) diff --git a/cli/src/claude/claudePty.ts b/cli/src/claude/claudePty.ts new file mode 100644 index 000000000..c5ebd937f --- /dev/null +++ b/cli/src/claude/claudePty.ts @@ -0,0 +1,97 @@ +import { runAgentPty } from "@/agent/runAgentPty" +import { cleanupTrustedConfigDir, prepareTrustedConfigDir } from "@/claude/trustedConfigDir" + +export type ClaudePtyOpts = { + sessionId: string | null + path: string + claudeEnvVars?: Record + claudeArgs?: string[] + /** + * Path to a Claude settings file registering a SessionStart hook. When + * present, `--settings ` is appended so the interactive (PTY) Claude + * reports its freshly created sessionId back to Hapi, enabling the session + * scanner to tail the matching jsonl transcript for structured messages. + */ + hookSettingsPath?: string + signal?: AbortSignal + nextMessage: () => Promise<{ message: string } | null> + onReady: () => void + onMessage: (data: string) => void + onThinkingChange?: (thinking: boolean) => void + onExit?: (code: number | null) => void + registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void +} + +function buildClaudePtyArgs(opts: ClaudePtyOpts): string[] { + const args: string[] = [] + if (opts.hookSettingsPath) { + args.push('--settings', opts.hookSettingsPath) + } + if (opts.claudeArgs) { + args.push(...opts.claudeArgs) + } + return args +} + +// claude's ink TUI renders these strings once the input prompt is ready. +// NOTE: '❯' is intentionally excluded — it also appears in the first-run trust +// prompt ("❯ 1. Yes, I trust this folder"), so using it as a prompt marker +// would make the trust screen look like the input prompt. +const CLAUDE_PROMPT_MARKERS = ['for shortcuts', 'bypass permissions', 'esc to interrupt'] +// First-run trust/safety prompt. Primary suppression is an isolated +// CLAUDE_CONFIG_DIR with the folder pre-trusted (see prepareTrustedConfigDir); +// these markers are a fallback so the driver auto-approves (Enter = Yes) if the +// prompt still appears. We deliberately do NOT touch the user's ~/.claude.json. +const CLAUDE_TRUST_MARKERS = ['trust this folder', 'Yes, I trust', 'safety check'] +// Footer shown while generating ("… (esc to interrupt)") vs at an idle input +// prompt ("? for shortcuts"). Drives the chat thinking indicator. +const CLAUDE_BUSY_MARKERS = ['esc to interrupt'] +const CLAUDE_IDLE_MARKERS = ['for shortcuts'] + +// When claude is launched from a process that itself inherited Claude Code's env +// (e.g. the runner started from inside a Claude session, a hook, or a sub-agent), +// the child claude sees CLAUDECODE / CLAUDE_CODE_* and treats itself as a nested +// session — and STOPS WRITING ITS JSONL TRANSCRIPT (so HAPI's scanner has nothing +// to forward to chat). Strip these markers so the spawned claude is a clean, +// top-level session that persists its transcript. (Note: CLAUDE_CONFIG_DIR is +// NOT matched and is preserved.) +function claudeInheritedEnvKeys(): string[] { + return Object.keys(process.env).filter( + (k) => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') + ) +} + +export async function claudePty(opts: ClaudePtyOpts): Promise { + // Pre-trust the folder in a throwaway config dir so the trust prompt never + // shows — without mutating the user's real ~/.claude.json. + const configDir = prepareTrustedConfigDir(opts.path) + try { + return await runAgentPty({ + command: 'claude', + args: buildClaudePtyArgs(opts), + cwd: opts.path, + envVars: opts.claudeEnvVars, + extraEnv: { + DISABLE_AUTOUPDATER: '1', + ...(configDir ? { CLAUDE_CONFIG_DIR: configDir } : {}), + }, + // Drop inherited CLAUDECODE / CLAUDE_CODE_* so claude saves its + // transcript (see claudeInheritedEnvKeys). + unsetEnv: claudeInheritedEnvKeys(), + promptMarkers: CLAUDE_PROMPT_MARKERS, + trustMarkers: CLAUDE_TRUST_MARKERS, + busyMarkers: CLAUDE_BUSY_MARKERS, + idleMarkers: CLAUDE_IDLE_MARKERS, + debugPrefix: '[claudePty]', + signal: opts.signal, + nextMessage: opts.nextMessage, + onReady: opts.onReady, + onMessage: opts.onMessage, + onThinkingChange: opts.onThinkingChange, + onExit: opts.onExit, + registerControls: opts.registerControls, + }) + } finally { + cleanupTrustedConfigDir(configDir) + } +} diff --git a/cli/src/claude/trustedConfigDir.ts b/cli/src/claude/trustedConfigDir.ts new file mode 100644 index 000000000..f653421a1 --- /dev/null +++ b/cli/src/claude/trustedConfigDir.ts @@ -0,0 +1,111 @@ +import { mkdtempSync, readdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from "node:fs" +import { homedir, tmpdir } from "node:os" +import { join } from "node:path" +import { logger } from "@/lib" + +// Temp config dirs still pending cleanup. The normal path removes a dir via +// cleanupTrustedConfigDir (claudePty's finally), but session archive (KillSession +// RPC) and SIGTERM/SIGINT terminate the runner with process.exit(), which skips +// that finally. A synchronous 'exit' handler reaps whatever is still registered +// so these temp dirs don't pile up in /tmp across sessions. +const pendingConfigDirs = new Set() +let exitHandlerRegistered = false + +function ensureExitCleanupRegistered(): void { + if (exitHandlerRegistered) return + exitHandlerRegistered = true + // 'exit' callbacks must be synchronous; rmSync fits. It does not follow + // symlinks, so the real ~/.claude the dir links to is preserved. + process.on('exit', () => { + for (const dir of pendingConfigDirs) { + try { + rmSync(dir, { recursive: true, force: true }) + } catch { + // best-effort; process is exiting + } + } + pendingConfigDirs.clear() + }) +} + +/** + * Build an isolated CLAUDE_CONFIG_DIR that shares the user's real Claude state + * but pre-trusts the working folder — so the first-run "Is this a project you + * trust?" prompt never appears in PTY mode, WITHOUT mutating the user's own + * ~/.claude.json. + * + * How: every entry in the real config dir (credentials, projects/transcripts, + * settings, hooks, ...) is symlinked into a fresh temp dir, so login state and + * transcripts stay shared with the real install. Only `.claude.json` is a + * private copy, with `projects[cwd].hasTrustDialogAccepted = true` added. + * + * Claude resolves `.claude.json` and everything else from CLAUDE_CONFIG_DIR, so + * pointing the spawned process at this temp dir suppresses the trust prompt. + * The parent process's process.env is left untouched (see runAgentPty), so the + * session scanner still resolves transcripts against the real ~/.claude (which + * the symlinked `projects` entry points back to). + * + * Returns the temp dir path, or undefined if preparation failed (caller then + * falls back to the runtime trust-prompt auto-approve). + */ +export function prepareTrustedConfigDir(cwd: string): string | undefined { + try { + const realConfigDir = process.env.CLAUDE_CONFIG_DIR || join(homedir(), '.claude') + const realDotJson = join(homedir(), '.claude.json') + + const dir = mkdtempSync(join(tmpdir(), 'hapi-claude-cfg-')) + pendingConfigDirs.add(dir) + ensureExitCleanupRegistered() + + // Share all real config state via symlinks (login, transcripts, settings). + // `.claude.json` is skipped here — it lives in homedir, not in the config + // dir, and we want a private trust-patched copy anyway. + for (const entry of readdirSync(realConfigDir)) { + // Never symlink `.claude.json`: we write a private trust-patched copy + // below, and writeFileSync would follow the symlink and mutate the + // real file (only reachable when CLAUDE_CONFIG_DIR points at a dir + // that itself holds a .claude.json; the default ~/.claude does not). + if (entry === '.claude.json') { + continue + } + try { + symlinkSync(join(realConfigDir, entry), join(dir, entry)) + } catch (e) { + logger.debug(`[trustedConfigDir] failed to symlink ${entry}`, e) + } + } + + // Private .claude.json with the folder pre-trusted. Original untouched. + let config: Record = {} + try { + config = JSON.parse(readFileSync(realDotJson, 'utf-8')) + } catch (e) { + logger.debug('[trustedConfigDir] could not read ~/.claude.json; starting fresh', e) + } + const projects = (config.projects ?? {}) as Record> + projects[cwd] = { ...(projects[cwd] ?? {}), hasTrustDialogAccepted: true } + config.projects = projects + writeFileSync(join(dir, '.claude.json'), JSON.stringify(config)) + + logger.debug(`[trustedConfigDir] prepared isolated config at ${dir} (folder pre-trusted)`) + return dir + } catch (e) { + logger.debug('[trustedConfigDir] preparation failed; relying on trust auto-approve', e) + return undefined + } +} + +/** + * Remove a temp config dir created by prepareTrustedConfigDir. Symlinked entries + * are unlinked (Node's rm does not follow symlinks), so the real ~/.claude state + * they point to is preserved. + */ +export function cleanupTrustedConfigDir(dir: string | undefined): void { + if (!dir) return + pendingConfigDirs.delete(dir) + try { + rmSync(dir, { recursive: true, force: true }) + } catch (e) { + logger.debug(`[trustedConfigDir] cleanup failed for ${dir}`, e) + } +} From d877cbfb29da90f8f2daf51567c3ea99696f52fd Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:15 +0900 Subject: [PATCH 03/11] perf(scanner): incremental byte reads and adaptive polling --- cli/src/claude/utils/sessionScanner.ts | 103 ++++++++++++------ .../common/session/BaseSessionScanner.ts | 73 ++++++++++++- 2 files changed, 143 insertions(+), 33 deletions(-) diff --git a/cli/src/claude/utils/sessionScanner.ts b/cli/src/claude/utils/sessionScanner.ts index d97dd2230..2e2244af4 100644 --- a/cli/src/claude/utils/sessionScanner.ts +++ b/cli/src/claude/utils/sessionScanner.ts @@ -1,6 +1,6 @@ import { RawJSONLines, RawJSONLinesSchema } from "../types"; import { basename, join } from "node:path"; -import { readFile } from "node:fs/promises"; +import { open, stat } from "node:fs/promises"; import { logger } from "@/ui/logger"; import { getProjectPath } from "./path"; import { BaseSessionScanner, SessionFileScanEntry, SessionFileScanResult, SessionFileScanStats } from "@/modules/common/session/BaseSessionScanner"; @@ -35,6 +35,11 @@ export async function createSessionScanner(opts: { }, onNewSession: (sessionId: string) => { scanner.onNewSession(sessionId); + }, + // Call when the user submits input so the scanner polls fast right away + // (rather than waiting up to the idle interval for the first response). + markActive: () => { + scanner.markActive(); } }; } @@ -51,7 +56,12 @@ class ClaudeSessionScanner extends BaseSessionScanner { private readonly scannedSessions = new Set(); constructor(opts: { sessionId: string | null; workingDirectory: string; onMessage: (message: RawJSONLines) => void }) { - super({ intervalMs: 3000 }); + // fs.watch (in BaseSessionScanner.ensureWatcher) drives near-real-time + // updates; the poll is a fallback for missed watch events. Adaptive: 5s + // while idle (cheap — a stat, then an incremental read of only new + // bytes), 100ms while a response/tool-call is active or just after user + // input, dropping back after 3s of quiet. + super({ intervalMs: 5000, activeIntervalMs: 100, activeWindowMs: 3000 }); this.projectDir = getProjectPath(opts.workingDirectory); this.onMessage = opts.onMessage; this.currentSessionId = opts.sessionId; @@ -83,11 +93,11 @@ class ClaudeSessionScanner extends BaseSessionScanner { return; } const sessionFile = this.sessionFilePath(this.currentSessionId); - const { events, totalLines } = await readSessionLog(sessionFile, 0); + const { events, nextCursor } = await readSessionLog(sessionFile, 0); logger.debug(`[SESSION_SCANNER] Marking ${events.length} existing messages as processed from session ${this.currentSessionId}`); const keys = events.map((entry) => messageKey(entry.event)); this.seedProcessedKeys(keys); - this.setCursor(sessionFile, totalLines); + this.setCursor(sessionFile, nextCursor); } protected async beforeScan(): Promise { @@ -113,10 +123,10 @@ class ClaudeSessionScanner extends BaseSessionScanner { if (sessionId) { this.scannedSessions.add(sessionId); } - const { events, totalLines } = await readSessionLog(filePath, cursor); + const { events, nextCursor } = await readSessionLog(filePath, cursor); return { events, - nextCursor: totalLines + nextCursor }; } @@ -169,52 +179,81 @@ function messageKey(message: RawJSONLines): string { } /** - * Read and parse session log file. - * Returns only valid conversation messages, silently skipping internal events. + * Incrementally read and parse a session log file. + * + * The cursor is a BYTE OFFSET into the (append-only) JSONL. Each scan stats the + * file and reads only the bytes after the cursor — so the cost is O(new content) + * regardless of how large the conversation has grown, instead of re-reading the + * whole file every poll. A trailing partial line (a write in progress) is left + * unconsumed until its newline arrives. If the file shrank (compaction rewrote + * it), the cursor resets to 0 and the whole file is re-read (dedup by uuid in the + * base scanner absorbs any re-sent events). */ -async function readSessionLog(filePath: string, startLine: number): Promise<{ events: SessionFileScanEntry[]; totalLines: number }> { - logger.debug(`[SESSION_SCANNER] Reading session file: ${filePath}`); - let file: string; +async function readSessionLog(filePath: string, startByte: number): Promise<{ events: SessionFileScanEntry[]; nextCursor: number }> { + let size: number; try { - file = await readFile(filePath, 'utf-8'); + size = (await stat(filePath)).size; } catch (error) { logger.debug(`[SESSION_SCANNER] Session file not found: ${filePath}`); - return { events: [], totalLines: startLine }; + return { events: [], nextCursor: startByte }; + } + + let from = startByte; + if (from > size) { + from = 0; // file was truncated/rewritten — re-read from the top } - const lines = file.split('\n'); - const hasTrailingEmpty = lines.length > 0 && lines[lines.length - 1] === ''; - const totalLines = hasTrailingEmpty ? lines.length - 1 : lines.length; - let effectiveStartLine = startLine; - if (effectiveStartLine > totalLines) { - effectiveStartLine = 0; + if (from >= size) { + return { events: [], nextCursor: size }; // no new bytes } + + let chunk: Buffer; + const fd = await open(filePath, 'r'); + try { + const length = size - from; + const buffer = Buffer.allocUnsafe(length); + // fd.read may return fewer bytes than requested even for a regular file; + // the tail of an allocUnsafe buffer is uninitialized heap, so only the + // first `bytesRead` bytes are valid. Operating past them would let a stray + // 0x0a in garbage advance the cursor past never-read data → dropped lines. + const { bytesRead } = await fd.read(buffer, 0, length, from); + chunk = buffer.subarray(0, bytesRead); + } finally { + await fd.close(); + } + + // Consume only through the last newline; keep any trailing partial line for + // the next scan (`from` always sits on a line boundary, so the chunk's first + // line is always complete). + const lastNewline = chunk.lastIndexOf(0x0a); + if (lastNewline === -1) { + return { events: [], nextCursor: from }; + } + const nextCursor = from + lastNewline + 1; + const text = chunk.subarray(0, lastNewline).toString('utf-8'); + const messages: SessionFileScanEntry[] = []; - for (let index = effectiveStartLine; index < lines.length; index += 1) { - const l = lines[index]; + for (const l of text.split('\n')) { + if (l.trim() === '') { + continue; + } try { - if (l.trim() === '') { - continue; - } - let message = JSON.parse(l); - - // Silently skip known internal Claude Code events - // These are state/tracking events, not conversation messages + const message = JSON.parse(l); + // Silently skip known internal Claude Code state/tracking events. if (message.type && INTERNAL_CLAUDE_EVENT_TYPES.has(message.type)) { continue; } - - let parsed = RawJSONLinesSchema.safeParse(message); + const parsed = RawJSONLinesSchema.safeParse(message); if (!parsed.success) { // Unknown message types are silently skipped. continue; } - messages.push({ event: parsed.data, lineIndex: index }); + messages.push({ event: parsed.data }); } catch (e) { logger.debug(`[SESSION_SCANNER] Error processing message: ${e}`); continue; } } - return { events: messages, totalLines }; + return { events: messages, nextCursor }; } function sessionIdFromPath(filePath: string): string | null { diff --git a/cli/src/modules/common/session/BaseSessionScanner.ts b/cli/src/modules/common/session/BaseSessionScanner.ts index e19d0e751..9ffe44873 100644 --- a/cli/src/modules/common/session/BaseSessionScanner.ts +++ b/cli/src/modules/common/session/BaseSessionScanner.ts @@ -22,7 +22,17 @@ export type SessionFileScanStats = { }; type BaseSessionScannerOptions = { + /** Poll interval while idle (no recent events). */ intervalMs: number; + /** + * Poll interval while "active" — i.e. shortly after a user input or a new + * event. Defaults to `intervalMs` (adaptive polling disabled). Set lower + * (e.g. 100ms) for snappy updates during a live response without paying that + * cost while idle. + */ + activeIntervalMs?: number; + /** How long to stay on `activeIntervalMs` after the last activity. */ + activeWindowMs?: number; }; export abstract class BaseSessionScanner { @@ -33,9 +43,25 @@ export abstract class BaseSessionScanner { private intervalId: ReturnType | null = null; private stopped = false; private scanPromise: Promise | null = null; + private currentIntervalMs: number; + private activeUntil = 0; protected constructor(private readonly options: BaseSessionScannerOptions) { this.sync = new InvalidateSync(() => this.scan()); + this.currentIntervalMs = options.intervalMs; + } + + private get idleIntervalMs(): number { + return this.options.intervalMs; + } + private get activeIntervalMs(): number { + return this.options.activeIntervalMs ?? this.options.intervalMs; + } + private get activeWindowMs(): number { + return this.options.activeWindowMs ?? 3000; + } + private get adaptiveEnabled(): boolean { + return this.activeIntervalMs < this.idleIntervalMs; } protected abstract findSessionFiles(): Promise; @@ -105,7 +131,47 @@ export abstract class BaseSessionScanner { public async start(): Promise { await this.initialize(); await this.sync.invalidateAndAwait(); - this.intervalId = setInterval(() => this.sync.invalidate(), this.options.intervalMs); + this.startInterval(this.idleIntervalMs); + } + + private startInterval(ms: number): void { + if (this.intervalId) { + clearInterval(this.intervalId); + } + this.currentIntervalMs = ms; + this.intervalId = setInterval(() => this.tick(), ms); + } + + private tick(): void { + // Drop back to the idle interval once the active window lapses. + if ( + this.adaptiveEnabled && + this.currentIntervalMs === this.activeIntervalMs && + Date.now() >= this.activeUntil + ) { + this.startInterval(this.idleIntervalMs); + } + this.sync.invalidate(); + } + + /** + * Signal external activity (e.g. the user just submitted input) so the + * scanner polls at `activeIntervalMs` and re-scans immediately. New events + * found during a scan extend the window automatically. + */ + public markActive(): void { + this.extendActiveWindow(); + this.sync.invalidate(); + } + + private extendActiveWindow(): void { + if (!this.adaptiveEnabled) { + return; + } + this.activeUntil = Date.now() + this.activeWindowMs; + if (this.currentIntervalMs !== this.activeIntervalMs) { + this.startInterval(this.activeIntervalMs); + } } public async cleanup(): Promise { @@ -179,6 +245,11 @@ export abstract class BaseSessionScanner { for (const key of newKeys) { this.recordProcessedKey(key); } + if (newEvents.length > 0) { + // A live response/tool-call is streaming in — stay on the fast + // interval so the next chunk is picked up promptly. + this.extendActiveWindow(); + } } await this.afterScan(); } From e62320d7a97c35e21d38e79f561080fcb8bc62a7 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 04/11] refactor(claude): extract question-answer input builders to a shared util MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the AskUserQuestion / request_user_input answer-to-input builders (and the question tool-name predicates) out of the SDK permission handler into a standalone util. No behavior change — the SDK handler imports them — so the PTY permission bridge can reuse them without pulling in the SDK handler's dependencies. --- cli/src/claude/utils/permissionHandler.ts | 75 ++----------------- cli/src/claude/utils/questionAnswerInput.ts | 81 +++++++++++++++++++++ 2 files changed, 88 insertions(+), 68 deletions(-) create mode 100644 cli/src/claude/utils/questionAnswerInput.ts diff --git a/cli/src/claude/utils/permissionHandler.ts b/cli/src/claude/utils/permissionHandler.ts index d154b7182..549d609a2 100644 --- a/cli/src/claude/utils/permissionHandler.ts +++ b/cli/src/claude/utils/permissionHandler.ts @@ -21,6 +21,13 @@ import { type PendingPermissionRequest, type PermissionCompletion } from "@/modules/common/permission/BasePermissionHandler"; +import { + isAskUserQuestionToolName, + isRequestUserInputToolName, + isQuestionToolName, + buildAskUserQuestionUpdatedInput, + buildRequestUserInputUpdatedInput +} from "./questionAnswerInput"; interface PermissionResponse { id: string; @@ -34,17 +41,6 @@ interface PermissionResponse { const PLAN_EXIT_MODES: PermissionMode[] = ['default', 'acceptEdits', 'auto', 'bypassPermissions']; -function isAskUserQuestionToolName(toolName: string): boolean { - return toolName === 'AskUserQuestion' || toolName === 'ask_user_question'; -} - -function isRequestUserInputToolName(toolName: string): boolean { - return toolName === 'request_user_input'; -} - -function isQuestionToolName(toolName: string): boolean { - return isAskUserQuestionToolName(toolName) || isRequestUserInputToolName(toolName); -} function formatAskUserQuestionAnswers(answers: Record | Record, input: unknown): string { // Normalize nested format to flat format for display @@ -100,63 +96,6 @@ function formatAskUserQuestionAnswers(answers: Record | Record : `User answered:\n${body}`; } -function buildAskUserQuestionUpdatedInput(input: unknown, answers: Record | Record): Record { - // Normalize incoming answers (web sends Record; - // codex pathway sends nested Record) into a - // single Record shape we can iterate. - const indexedAnswers: Record = {}; - for (const [key, value] of Object.entries(answers)) { - if (Array.isArray(value)) { - indexedAnswers[key] = value; - } else if (value && typeof value === 'object' && 'answers' in value) { - indexedAnswers[key] = value.answers; - } - } - - if (!isObject(input)) { - return { answers: {} }; - } - - // claude code 2.x's built-in AskUserQuestion tool expects - // answers: Record - // and joins multi-select answers with a comma; it then echoes them - // verbatim in the tool result (`mapToolResultToToolResultBlockParam`). - // Sending the index-keyed `string[]` shape we receive from the web - // makes claude's lookup miss every question, producing the empty - // "User has answered your questions: ." result that locks the turn. - const questions = Array.isArray(input.questions) ? input.questions : []; - const claudeShapedAnswers: Record = {}; - for (let i = 0; i < questions.length; i += 1) { - const q = questions[i]; - if (!q || typeof q !== 'object') continue; - const questionText = (q as { question?: unknown }).question; - if (typeof questionText !== 'string' || questionText.length === 0) continue; - const selections = indexedAnswers[String(i)]; - if (!selections || selections.length === 0) continue; - claudeShapedAnswers[questionText] = selections.join(','); - } - - return { - ...input, - answers: claudeShapedAnswers - }; -} - -/** - * Build updated input for request_user_input tool - * The answers format is nested: { answers: { [id]: { answers: string[] } } } - */ -function buildRequestUserInputUpdatedInput(input: unknown, answers: unknown): Record { - if (!isObject(input)) { - return { answers }; - } - - return { - ...input, - answers - }; -} - export class PermissionHandler extends BasePermissionHandler { private toolCalls: { id: string, name: string, input: any, used: boolean }[] = []; private responses = new Map(); diff --git a/cli/src/claude/utils/questionAnswerInput.ts b/cli/src/claude/utils/questionAnswerInput.ts new file mode 100644 index 000000000..b25f0f486 --- /dev/null +++ b/cli/src/claude/utils/questionAnswerInput.ts @@ -0,0 +1,81 @@ +/** + * Helpers for turning a web-collected answer set into the tool input that + * claude's built-in question tools (AskUserQuestion / request_user_input) + * expect. Shared by the SDK permission handler (canUseTool path) and the PTY + * permission bridge (PreToolUse hook path), which both pre-fill the answers via + * the tool's updatedInput so claude echoes them instead of prompting. + */ + +import { isObject } from "@hapi/protocol"; + +export function isAskUserQuestionToolName(toolName: string): boolean { + return toolName === 'AskUserQuestion' || toolName === 'ask_user_question'; +} + +export function isRequestUserInputToolName(toolName: string): boolean { + return toolName === 'request_user_input'; +} + +export function isQuestionToolName(toolName: string): boolean { + return isAskUserQuestionToolName(toolName) || isRequestUserInputToolName(toolName); +} + +export function buildAskUserQuestionUpdatedInput( + input: unknown, + answers: Record | Record +): Record { + // Normalize incoming answers (web sends Record; + // codex pathway sends nested Record) into a + // single Record shape we can iterate. + const indexedAnswers: Record = {}; + for (const [key, value] of Object.entries(answers)) { + if (Array.isArray(value)) { + indexedAnswers[key] = value; + } else if (value && typeof value === 'object' && 'answers' in value) { + indexedAnswers[key] = value.answers; + } + } + + if (!isObject(input)) { + return { answers: {} }; + } + + // claude code 2.x's built-in AskUserQuestion tool expects + // answers: Record + // and joins multi-select answers with a comma; it then echoes them + // verbatim in the tool result (`mapToolResultToToolResultBlockParam`). + // Sending the index-keyed `string[]` shape we receive from the web + // makes claude's lookup miss every question, producing the empty + // "User has answered your questions: ." result that locks the turn. + const questions = Array.isArray(input.questions) ? input.questions : []; + const claudeShapedAnswers: Record = {}; + for (let i = 0; i < questions.length; i += 1) { + const q = questions[i]; + if (!q || typeof q !== 'object') continue; + const questionText = (q as { question?: unknown }).question; + if (typeof questionText !== 'string' || questionText.length === 0) continue; + const selections = indexedAnswers[String(i)]; + if (!selections || selections.length === 0) continue; + claudeShapedAnswers[questionText] = selections.join(','); + } + + return { + ...input, + answers: claudeShapedAnswers + }; +} + +/** + * Build updated input for the request_user_input tool. The answers format is + * nested: { answers: { [id]: { answers: string[] } } }. + */ +export function buildRequestUserInputUpdatedInput(input: unknown, answers: unknown): Record { + if (!isObject(input)) { + return { answers }; + } + + return { + ...input, + answers + }; +} From 278af3e82bd8a12c47ed397a34e4f7b2b5374a32 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 05/11] feat(pty): drive the claude PTY launcher with chat, model, resume and approvals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Launch claude inside a PTY and drive it from the web: chat input, in-place /model and /effort changes (auto-confirming claude's "Switch model?" dialog), and --resume. Tool approvals are bridged to the web instead of the SDK's canUseTool (which a PTY agent doesn't have): a PreToolUse hook forwards each tool call to the runner, which surfaces it in the existing web approval modal (reusing state.requests + the permission RPC — no web changes) and returns allow/deny to claude. - generateHookSettings registers the PreToolUse hook (PTY only) with a generous timeout so the blocking hook survives a slow phone approval. - sessionHookForwarder branches on the stdin hook_event_name: PreToolUse posts to a new endpoint and echoes claude's hookSpecificOutput decision on stdout; SessionStart keeps its fire-and-forget behavior. - startHookServer gains a /hook/pre-tool-use endpoint that awaits the decision. - PtyPermissionHandler reuses BasePermissionHandler. Read-only tools auto-allow; bypassPermissions allows all; everything else asks the web. "Allow for session" is honored, including Bash's command-qualified form (Bash() / Bash(:*)). Question tools (AskUserQuestion / request_user_input) are routed to the web too: the picked answers are injected back via the tool's updatedInput so claude echoes them instead of prompting in its TUI. Decisions are always allow/deny (never ask, which would fall back to the TUI prompt) and fail closed on errors. A default-mode PTY session therefore prompts for permission — and asks its questions — in the chat like the SDK path; the explicit YOLO toggle still opts into --yolo. --- .../agent/__tests__/bracketedPaste.test.ts | 28 + cli/src/agent/__tests__/loopBase.test.ts | 76 +++ cli/src/agent/__tests__/runAgentPty.test.ts | 126 +++- .../agent/__tests__/runnerLifecycle.test.ts | 51 ++ cli/src/agent/bracketedPaste.ts | 15 + cli/src/agent/loopBase.ts | 37 +- cli/src/agent/runAgentPty.ts | 56 +- cli/src/agent/runnerLifecycle.ts | 17 +- cli/src/agent/sessionBase.ts | 15 + cli/src/api/apiMachine.ts | 5 +- cli/src/api/apiSession.ts | 120 +++- .../__tests__/claudePtyLauncher.test.ts | 244 ++++++++ cli/src/claude/claudePty.ts | 3 + cli/src/claude/claudePtyLauncher.ts | 577 ++++++++++++++++++ cli/src/claude/loop.ts | 11 +- cli/src/claude/runClaude.ts | 49 +- cli/src/claude/session.ts | 21 + .../claude/utils/ptyPermissionHandler.test.ts | 213 +++++++ cli/src/claude/utils/ptyPermissionHandler.ts | 248 ++++++++ .../claude/utils/sessionHookForwarder.test.ts | 151 +++++ cli/src/claude/utils/sessionHookForwarder.ts | 146 ++++- cli/src/claude/utils/startHookServer.test.ts | 83 ++- cli/src/claude/utils/startHookServer.ts | 83 +++ cli/src/commands/agentCommandOptions.ts | 6 +- cli/src/commands/claude.ts | 2 +- cli/src/commands/resume.ts | 4 +- cli/src/gemini/runGemini.ts | 6 +- .../common/hooks/generateHookSettings.test.ts | 79 +++ .../common/hooks/generateHookSettings.ts | 44 +- cli/src/modules/common/rpcTypes.ts | 1 + cli/src/opencode/runOpencode.ts | 6 +- cli/src/runner/buildCliArgs.test.ts | 46 ++ cli/src/runner/run.ts | 7 +- cli/src/ui/ink/ResumeSessionPicker.tsx | 4 +- 34 files changed, 2494 insertions(+), 86 deletions(-) create mode 100644 cli/src/agent/__tests__/bracketedPaste.test.ts create mode 100644 cli/src/agent/__tests__/loopBase.test.ts create mode 100644 cli/src/agent/__tests__/runnerLifecycle.test.ts create mode 100644 cli/src/agent/bracketedPaste.ts create mode 100644 cli/src/claude/__tests__/claudePtyLauncher.test.ts create mode 100644 cli/src/claude/claudePtyLauncher.ts create mode 100644 cli/src/claude/utils/ptyPermissionHandler.test.ts create mode 100644 cli/src/claude/utils/ptyPermissionHandler.ts create mode 100644 cli/src/claude/utils/sessionHookForwarder.test.ts create mode 100644 cli/src/modules/common/hooks/generateHookSettings.test.ts diff --git a/cli/src/agent/__tests__/bracketedPaste.test.ts b/cli/src/agent/__tests__/bracketedPaste.test.ts new file mode 100644 index 000000000..53159435b --- /dev/null +++ b/cli/src/agent/__tests__/bracketedPaste.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from 'vitest' +import { bracketPasteIfMultiline } from '../bracketedPaste' + +const START = '\x1b[200~' +const END = '\x1b[201~' + +describe('bracketPasteIfMultiline', () => { + it('leaves a single-line message untouched', () => { + expect(bracketPasteIfMultiline('hello world')).toBe('hello world') + }) + + it('wraps a multiline message in bracketed-paste markers', () => { + expect(bracketPasteIfMultiline('line 1\nline 2')).toBe(`${START}line 1\nline 2${END}`) + }) + + it('wraps an attachment-formatted prompt (@path\\n\\ntext)', () => { + expect(bracketPasteIfMultiline('@/tmp/a.png\n\ndescribe this')) + .toBe(`${START}@/tmp/a.png\n\ndescribe this${END}`) + }) + + it('wraps a trailing newline (so it is not interpreted as a premature submit)', () => { + expect(bracketPasteIfMultiline('text\n')).toBe(`${START}text\n${END}`) + }) + + it('leaves an empty string untouched', () => { + expect(bracketPasteIfMultiline('')).toBe('') + }) +}) diff --git a/cli/src/agent/__tests__/loopBase.test.ts b/cli/src/agent/__tests__/loopBase.test.ts new file mode 100644 index 000000000..fcde251e3 --- /dev/null +++ b/cli/src/agent/__tests__/loopBase.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it, vi } from 'vitest' +import { runLocalRemoteLoop, type LoopLauncher, type SessionMode } from '../loopBase' + +// runLocalRemoteLoop only ever touches session.onModeChange, so a minimal fake +// session suffices. +function fakeSession() { + return { onModeChange: vi.fn() } +} + +type Reason = 'switch' | 'exit' + +function launcher(...reasons: Reason[]): LoopLauncher> { + let i = 0 + return vi.fn(async () => reasons[Math.min(i++, reasons.length - 1)]) +} + +async function run(opts: { + startingMode?: SessionMode + runLocal: LoopLauncher> + runRemote: LoopLauncher> + runPty?: LoopLauncher> +}) { + const session = fakeSession() + await runLocalRemoteLoop({ + session: session as never, + startingMode: opts.startingMode, + logTag: 'test', + runLocal: opts.runLocal, + runRemote: opts.runRemote, + runPty: opts.runPty, + }) + return session +} + +describe('runLocalRemoteLoop mode selection', () => { + it('a non-PTY session hands off local→SDK remote even when a runPty launcher is registered', async () => { + // The regression: claude always registers runPty, so a normal local + // session pressing space must still reach the SDK remote launcher, not + // PTY (which is opt-in). + const runLocal = launcher('switch') + const runRemote = launcher('exit') + const runPty = launcher('exit') + + const session = await run({ startingMode: 'local', runLocal, runRemote, runPty }) + + expect(runRemote).toHaveBeenCalledTimes(1) + expect(runPty).not.toHaveBeenCalled() + // The external mode reported is 'remote'. + expect(session.onModeChange).toHaveBeenCalledWith('remote') + }) + + it('defaults (no startingMode) behave as a local→remote session', async () => { + const runLocal = launcher('switch') + const runRemote = launcher('exit') + const runPty = launcher('exit') + + await run({ runLocal, runRemote, runPty }) + + expect(runRemote).toHaveBeenCalledTimes(1) + expect(runPty).not.toHaveBeenCalled() + }) + + it('a PTY session toggles local↔pty and never uses the SDK remote launcher', async () => { + // pty → (switch) local → (switch) pty → (exit) + const runPty = launcher('switch', 'exit') + const runLocal = launcher('switch') + const runRemote = launcher('exit') + + const session = await run({ startingMode: 'pty', runLocal, runRemote, runPty }) + + expect(runPty).toHaveBeenCalledTimes(2) + expect(runRemote).not.toHaveBeenCalled() + // PTY is reported to the hub/UI as 'remote'. + expect(session.onModeChange).toHaveBeenCalledWith('remote') + }) +}) diff --git a/cli/src/agent/__tests__/runAgentPty.test.ts b/cli/src/agent/__tests__/runAgentPty.test.ts index 74455b921..74779baa8 100644 --- a/cli/src/agent/__tests__/runAgentPty.test.ts +++ b/cli/src/agent/__tests__/runAgentPty.test.ts @@ -4,13 +4,22 @@ const harness = vi.hoisted(() => { let _isRunning = true let _onExit: ((code: number | null, signal: string | null) => void) | null = null let _onData: ((data: string) => void) | null = null + let _onError: ((error: Error) => void) | null = null let _echo = true + let _spawnError: Error | null = null const m = { get isRunning() { return _isRunning }, spawn: vi.fn((opts: Record) => { _onExit = (opts.onExit as typeof _onExit) ?? null _onData = (opts.onData as typeof _onData) ?? null + _onError = (opts.onError as typeof _onError) ?? null + // Simulate the manager reporting a spawn failure: onError fires and + // the process never enters the running state. + if (_spawnError) { + _isRunning = false + _onError?.(_spawnError) + } }), // By default simulate the agent echoing keystrokes back as output so the // echo-confirm in runAgentPty proceeds on the first attempt. @@ -24,13 +33,14 @@ const harness = vi.hoisted(() => { return { setRunning(v: boolean) { _isRunning = v }, setEcho(v: boolean) { _echo = v }, + setSpawnError(err: Error | null) { _spawnError = err }, triggerExit(code: number | null = 0, signal: string | null = null) { _isRunning = false _onExit?.(code, signal) }, triggerData(data: string) { _onData?.(data) }, reset() { - _isRunning = true; _onExit = null; _onData = null; _echo = true + _isRunning = true; _onExit = null; _onData = null; _onError = null; _echo = true; _spawnError = null m.spawn.mockClear(); m.write.mockClear(); m.kill.mockClear(); m.resize.mockClear() }, m, @@ -83,6 +93,29 @@ async function reachReady() { describe('runAgentPty', () => { afterEach(() => { harness.reset() }) + it('rejects (does not silently return) when the PTY fails to spawn', async () => { + // A real failure such as `claude` not installed or the terminal failing + // to attach: the manager reports onError and never enters running state. + // runAgentPty must throw so the caller surfaces the error instead of + // treating a never-started PTY as a clean exit and respawning. + harness.setSpawnError(new Error('claude: command not found')) + const nextMessage = vi.fn() + const onReady = vi.fn() + + await expect(runAgentPty(makeOpts({ nextMessage, onReady }))) + .rejects.toThrow('claude: command not found') + + // It bailed before reaching the message loop / ready callback. + expect(nextMessage).not.toHaveBeenCalled() + expect(onReady).not.toHaveBeenCalled() + }) + + it('rejects with a generic error if spawn fails without an onError detail', async () => { + harness.setRunning(false) // not running, but no onError fired + const promise = runAgentPty(makeOpts({ command: 'mycli', nextMessage: vi.fn() })) + await expect(promise).rejects.toThrow('Failed to spawn mycli PTY') + }) + it('spawns with the given command/args/cwd and calls onReady', async () => { const msg = deferred<{ message: string } | null>() const onReady = vi.fn() @@ -94,12 +127,31 @@ describe('runAgentPty', () => { expect(spawnArgs.command).toBe('mycli') expect(spawnArgs.args).toEqual(['--foo']) expect(spawnArgs.cwd).toBe('/work') - expect(onReady).toHaveBeenCalled() + // onReady fires only once the prompt is actually ready, not right after + // spawn — so it has NOT been called yet here. + expect(onReady).not.toHaveBeenCalled() await reachReady() + expect(onReady).toHaveBeenCalled() msg.resolve(null) await promise }) + it('rejects (and never calls onReady) if the PTY exits before becoming ready', async () => { + // Spawn succeeds, but the agent exits before rendering a usable prompt + // (bad config, invalid args, auth failure). This must be treated as a + // failure — not a ready session — so the caller's give-up breaker counts + // it instead of respawning forever. + const onReady = vi.fn() + const nextMessage = vi.fn() + const promise = runAgentPty(makeOpts({ command: 'mycli', onReady, nextMessage })) + await tick(0) + harness.triggerExit(1) // exits before any ready output + + await expect(promise).rejects.toThrow('mycli PTY exited before becoming ready') + expect(onReady).not.toHaveBeenCalled() + expect(nextMessage).not.toHaveBeenCalled() + }) + it('injects envVars/extraEnv into the spawn env only (not process.env)', async () => { const msg = deferred<{ message: string } | null>() const opts = makeOpts({ @@ -172,6 +224,76 @@ describe('runAgentPty', () => { await promise }) + it('fires onMessageSubmitted after the write completes, once per real message (not for /clear)', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const msg3 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + .mockImplementationOnce(() => msg3.promise) + const onMessageSubmitted = vi.fn() + const promise = runAgentPty(makeOpts({ nextMessage, onMessageSubmitted })) + await reachReady() + + // /clear is dropped before the submit path → no post-submit callback, + // so a first-message verifier armed here would never fire on a no-op. + msg1.resolve({ message: '/clear' }) + await tick(60) + expect(onMessageSubmitted).not.toHaveBeenCalled() + + // A real message fires the callback exactly once, AFTER text + CR were + // written — the contract that stops a verifier racing the submit. + msg2.resolve({ message: 'hello' }) + await tick(300) + expect(onMessageSubmitted).toHaveBeenCalledTimes(1) + expect(onMessageSubmitted).toHaveBeenCalledWith('hello') + const lastWriteOrder = Math.max(...harness.m.write.mock.invocationCallOrder) + expect(onMessageSubmitted.mock.invocationCallOrder[0]).toBeGreaterThan(lastWriteOrder) + + msg3.resolve(null) + await promise + }) + + it('bracketed-paste wraps a multiline message so only the final CR submits', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + // e.g. an attachment-formatted prompt or a batched queue flush. + msg1.resolve({ message: '@/tmp/a.png\n\ndescribe this' }) + await tick(300) + // The whole block is written once, bracketed — embedded newlines stay + // literal instead of each acting as Enter. + expect(harness.m.write).toHaveBeenCalledWith('\x1b[200~@/tmp/a.png\n\ndescribe this\x1b[201~') + // The raw (unbracketed) multiline text must never be written. + expect(harness.m.write).not.toHaveBeenCalledWith('@/tmp/a.png\n\ndescribe this') + // Exactly one CR submits the whole paste. + const crWrites = harness.m.write.mock.calls.filter((c) => c[0] === '\r').length + expect(crWrites).toBe(1) + msg2.resolve(null) + await promise + }) + + it('does not bracket a single-line message', async () => { + const msg1 = deferred<{ message: string } | null>() + const msg2 = deferred<{ message: string } | null>() + const nextMessage = vi.fn() + .mockImplementationOnce(() => msg1.promise) + .mockImplementationOnce(() => msg2.promise) + const promise = runAgentPty(makeOpts({ nextMessage })) + await reachReady() + msg1.resolve({ message: 'hello world' }) + await tick(300) + expect(harness.m.write).toHaveBeenCalledWith('hello world') + expect(harness.m.write).not.toHaveBeenCalledWith('\x1b[200~hello world\x1b[201~') + msg2.resolve(null) + await promise + }) + it('retries the write when the agent does not echo (stdin not ready yet)', async () => { const msg1 = deferred<{ message: string } | null>() const msg2 = deferred<{ message: string } | null>() diff --git a/cli/src/agent/__tests__/runnerLifecycle.test.ts b/cli/src/agent/__tests__/runnerLifecycle.test.ts new file mode 100644 index 000000000..23aaab984 --- /dev/null +++ b/cli/src/agent/__tests__/runnerLifecycle.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest' +import { setControlledByUser } from '../runnerLifecycle' +import type { ApiSessionClient } from '@/api/apiSession' + +// Minimal stand-in that applies the update handlers and records the result, so +// we can assert how startingMode/controlledByUser evolve across mode changes. +function fakeSession() { + const state = { agentState: {} as Record, metadata: {} as Record } + const session = { + updateAgentState: (h: (s: Record) => Record) => { state.agentState = h(state.agentState) }, + updateMetadata: (h: (m: Record) => Record) => { state.metadata = h(state.metadata) }, + } + return { session: session as unknown as ApiSessionClient, state } +} + +describe('setControlledByUser', () => { + it('keeps a PTY launch identity across a pty → local → pty handoff', () => { + const { session, state } = fakeSession() + + // Launch as PTY. + setControlledByUser(session, 'pty') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(false) + + // Hand off to local — user is now driving locally, but the session is + // still PTY-backed so its launch identity must not change. + setControlledByUser(session, 'local') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(true) + + // Hand back to PTY (reported as external mode 'remote'): the terminal + // toggle must remain available, i.e. startingMode stays 'pty'. + setControlledByUser(session, 'remote') + expect(state.metadata.startingMode).toBe('pty') + expect(state.agentState.startingMode).toBe('pty') + expect(state.agentState.controlledByUser).toBe(false) + }) + + it('tracks the collaboration mode for a non-PTY session (unchanged behavior)', () => { + const { session, state } = fakeSession() + + setControlledByUser(session, 'remote') + expect(state.metadata.startingMode).toBe('remote') + expect(state.agentState.controlledByUser).toBe(false) + + setControlledByUser(session, 'local') + expect(state.metadata.startingMode).toBe('local') + expect(state.agentState.controlledByUser).toBe(true) + }) +}) diff --git a/cli/src/agent/bracketedPaste.ts b/cli/src/agent/bracketedPaste.ts new file mode 100644 index 000000000..5aca5936f --- /dev/null +++ b/cli/src/agent/bracketedPaste.ts @@ -0,0 +1,15 @@ +// Bracketed-paste (DECSET 2004) framing for PTY input. +// +// Interactive TUIs that enable bracketed-paste mode (claude does — its init +// emits ESC[?2004h) treat the bytes between these markers as a single literal +// paste, so embedded newlines are inserted as text instead of being acted on +// as Enter. A multiline message written raw would otherwise submit its first +// line on its own and run the rest as separate prompts/slash-commands. Wrap +// such a message before writing it; a trailing CR (sent separately by the +// caller) is what actually submits the whole block. +const PASTE_START = '\x1b[200~' +const PASTE_END = '\x1b[201~' + +export function bracketPasteIfMultiline(text: string): string { + return text.includes('\n') ? `${PASTE_START}${text}${PASTE_END}` : text +} diff --git a/cli/src/agent/loopBase.ts b/cli/src/agent/loopBase.ts index 6e2c35bd2..65e28916d 100644 --- a/cli/src/agent/loopBase.ts +++ b/cli/src/agent/loopBase.ts @@ -3,12 +3,15 @@ import type { AgentSessionBase } from './sessionBase'; export type LoopLauncher = (session: TSession) => Promise<'switch' | 'exit'>; +export type SessionMode = 'local' | 'remote' | 'pty'; + export async function runLocalRemoteSession>(opts: { session: TSession; - startingMode?: 'local' | 'remote'; + startingMode?: SessionMode; logTag: string; runLocal: LoopLauncher; runRemote: LoopLauncher; + runPty?: LoopLauncher; onSessionReady?: (session: TSession) => void; }): Promise { if (opts.onSessionReady) { @@ -20,18 +23,20 @@ export async function runLocalRemoteSession>(opts: { session: TSession; - startingMode?: 'local' | 'remote'; + startingMode?: SessionMode; logTag: string; runLocal: LoopLauncher; runRemote: LoopLauncher; + runPty?: LoopLauncher; }): Promise { - let mode: 'local' | 'remote' = opts.startingMode ?? 'local'; + let mode: SessionMode = opts.startingMode ?? 'local'; while (true) { logger.debug(`[${opts.logTag}] Iteration with mode: ${mode}`); @@ -42,8 +47,13 @@ export async function runLocalRemoteLoop> return; } - mode = 'remote'; - opts.session.onModeChange(mode); + // Leaving local mode returns to this session's remote variant. PTY + // is OPT-IN: only a session that started in PTY mode hands off to the + // PTY launcher. A normal local/remote session must still use the SDK + // remote launcher even though claude always registers a runPty + // launcher (so `opts.runPty` is truthy for every claude session). + mode = opts.startingMode === 'pty' && opts.runPty ? 'pty' : 'remote'; + opts.session.onModeChange(mode === 'pty' ? 'remote' : mode); continue; } @@ -57,5 +67,20 @@ export async function runLocalRemoteLoop> opts.session.onModeChange(mode); continue; } + + if (mode === 'pty') { + if (!opts.runPty) { + throw new Error('PTY mode selected but no runPty launcher provided'); + } + + const reason = await opts.runPty(opts.session); + if (reason === 'exit') { + return; + } + + mode = 'local'; + opts.session.onModeChange(mode); + continue; + } } } diff --git a/cli/src/agent/runAgentPty.ts b/cli/src/agent/runAgentPty.ts index bc00647ba..ad880f0b5 100644 --- a/cli/src/agent/runAgentPty.ts +++ b/cli/src/agent/runAgentPty.ts @@ -1,5 +1,6 @@ import { AgentPtyManager } from "@/agent/AgentPtyManager" import { parseSpecialCommand } from "@/parsers/specialCommands" +import { bracketPasteIfMultiline } from "@/agent/bracketedPaste" import { logger } from "@/lib" /** @@ -65,6 +66,16 @@ export type RunAgentPtyOpts = { */ onThinkingChange?: (thinking: boolean) => void onExit?: (code: number | null) => void + /** + * Fired after a message has been written to the PTY (text + CR) by the + * driver's submit path. Callers that want to verify/repair delivery of a + * message must hook here rather than at nextMessage time: nextMessage + * returns BEFORE waitForInputReady + submitMessage run, so a verifier + * started there can race the driver's own submit (and on a slow resume, + * fire its repair keystrokes before the message was ever sent — duplicating + * it). This hook guarantees the submit already happened. + */ + onMessageSubmitted?: (message: string) => void | Promise /** * Called once the PTY is spawned with controls for the live terminal. The * agent-terminal viewer uses `resize` to repaint the TUI on (re)subscribe so @@ -180,10 +191,15 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { // yet, so retry — this is what was dropping the first message. CR is sent // separately so the text isn't submitted before it's buffered. const submitMessage = async (message: string): Promise => { + // Multiline web messages (batched queue flush, attachment prompts, + // multiline composer input) must be bracketed-pasted so their embedded + // newlines stay literal instead of each submitting a partial line. The + // trailing CR sent separately below is what submits the whole block. + const payload = bracketPasteIfMultiline(message) let echoed = false for (let attempt = 0; attempt < 3 && !echoed; attempt++) { const before = lastOutputAt - manager.write(message) + manager.write(payload) const waitStart = Date.now() while (Date.now() - waitStart < 700) { if (signal?.aborted || !manager.isRunning) return @@ -206,6 +222,10 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { signal?.addEventListener('abort', abortHandler, { once: true }) try { + // Captured so a spawn failure can be re-thrown (not swallowed): the PTY + // manager reports failure via onError + isRunning=false rather than a + // throw from spawn(). + let spawnError: Error | null = null manager.spawn({ command: opts.command, args: opts.args, @@ -248,13 +268,17 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { opts.onExit?.(code) }, onError: (error) => { + spawnError = error logger.debug(`${debugPrefix} PTY error: ${error.message}`, error) }, }) if (!manager.isRunning) { - logger.debug(`${debugPrefix} Failed to spawn ${opts.command} PTY`) - return + // Surface the failure instead of returning as if it succeeded — + // otherwise the caller (e.g. ClaudePtyLauncher) treats a never-started + // PTY as a clean exit and silently respawns, hiding real errors like + // `claude` not being installed or the terminal failing to attach. + throw spawnError ?? new Error(`Failed to spawn ${opts.command} PTY`) } opts.registerControls?.({ @@ -272,13 +296,26 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { } }) - opts.onReady() - - // Spawn the agent up-front and wait until its prompt is ready BEFORE any - // message arrives, so the first user message is processed immediately - // instead of being consumed as the spawn trigger. + // Wait until the prompt is actually usable BEFORE any message arrives, so + // the first user message is processed immediately instead of being + // consumed as the spawn trigger. await waitForInputReady() + // A successful spawn() does not mean the agent reached a working prompt: + // it can spawn and then exit before rendering one (bad config, invalid + // args, auth failure). Distinguish that from a healthy start so onReady() + // — which the caller uses to mark the session "ready" and to reset its + // launch-failure breaker — only fires for a genuinely usable prompt. A + // user abort during startup is a clean stop, not a failure. + if (signal?.aborted) { + return + } + if (!manager.isRunning) { + throw new Error(`${opts.command} PTY exited before becoming ready`) + } + + opts.onReady() + while (manager.isRunning) { if (signal?.aborted) { logger.debug(`${debugPrefix} Aborted`) @@ -311,6 +348,9 @@ export async function runAgentPty(opts: RunAgentPtyOpts): Promise { if (process.env.DEBUG_PTY) logger.debug(`${debugPrefix} write(loop): ${next.message}`) await submitMessage(next.message) + // The message has now been written to the PTY; let a caller verify it + // actually landed (and repair it) without racing this submit path. + await opts.onMessageSubmitted?.(next.message) // The agent is now working on this input — show "thinking" right away // (a busy marker reinforces it; the idle marker clears it when done). setThinking(true) diff --git a/cli/src/agent/runnerLifecycle.ts b/cli/src/agent/runnerLifecycle.ts index 0ae8faa9e..9e251c6a4 100644 --- a/cli/src/agent/runnerLifecycle.ts +++ b/cli/src/agent/runnerLifecycle.ts @@ -135,10 +135,23 @@ export function createRunnerLifecycle(options: RunnerLifecycleOptions): RunnerLi } } -export function setControlledByUser(session: ApiSessionClient, mode: 'local' | 'remote'): void { +export function setControlledByUser(session: ApiSessionClient, mode: 'local' | 'remote' | 'pty'): void { session.updateAgentState((currentState) => ({ ...currentState, - controlledByUser: mode === 'local' + controlledByUser: mode === 'local', + // Persist the launch mode so reopen/resume can restore it. 'pty' is an + // immutable launch identity (the web gates the agent-terminal toggle on + // it), so once set it must survive later local/remote collaboration-mode + // changes — otherwise a pty→local→pty handoff reports external mode + // 'remote' and would rewrite it, hiding the terminal toggle for a session + // whose PTY is still running. + startingMode: currentState.startingMode === 'pty' ? 'pty' : mode + })) + // Also surface it in metadata so the web can gate the agent-terminal toggle + // (only PTY sessions have an agent PTY to view). + session.updateMetadata((metadata) => ({ + ...metadata, + startingMode: metadata.startingMode === 'pty' ? 'pty' : mode })) } diff --git a/cli/src/agent/sessionBase.ts b/cli/src/agent/sessionBase.ts index a0f47b46d..58d9072b8 100644 --- a/cli/src/agent/sessionBase.ts +++ b/cli/src/agent/sessionBase.ts @@ -131,6 +131,21 @@ export class AgentSessionBase { } }; + private _killHandler: (() => void) | null = null; + + // Graceful-shutdown hook shared by all flavors. The active launcher + // registers a teardown handler (e.g. abort the PTY) via setKillHandler; the + // runner lifecycle's onBeforeClose calls kill() before process.exit so the + // resource is released through the normal finally path rather than relying on + // last-resort reapers. No-op when no handler is registered (e.g. local mode). + setKillHandler = (handler: () => void): void => { + this._killHandler = handler; + }; + + kill = (): void => { + this._killHandler?.(); + }; + protected getKeepAliveRuntime(): { permissionMode?: SessionPermissionMode diff --git a/cli/src/api/apiMachine.ts b/cli/src/api/apiMachine.ts index 61f15fe17..a3c633cd6 100644 --- a/cli/src/api/apiMachine.ts +++ b/cli/src/api/apiMachine.ts @@ -249,7 +249,7 @@ export class ApiMachineClient { setRPCHandlers({ spawnSession, stopSession, requestShutdown }: MachineRpcHandlers): void { this.rpcHandlerManager.registerHandler(RPC_METHODS.SpawnHappySession, async (params: any) => { - const { directory, sessionId, resumeSessionId, machineId, approvedNewDirectoryCreation, agent, model, effort, modelReasoningEffort, yolo, permissionMode, token, sessionType, worktreeName } = params || {} + const { directory, sessionId, resumeSessionId, machineId, approvedNewDirectoryCreation, agent, model, effort, modelReasoningEffort, yolo, permissionMode, token, sessionType, worktreeName, startingMode } = params || {} if (!directory) { throw new Error('Directory is required') @@ -274,7 +274,8 @@ export class ApiMachineClient { permissionMode, token, sessionType, - worktreeName + worktreeName, + startingMode }) switch (result.type) { diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index d187eba2b..b0e6f60fa 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -11,8 +11,10 @@ import type { RawJSONLines } from '@/claude/types' import { configuration } from '@/configuration' import { AGENT_MESSAGE_PAYLOAD_TYPE } from "@hapi/protocol" import type { SessionEndReason } from '@hapi/protocol' -import type { ClientToServerEvents, ServerToClientEvents, Update } from '@hapi/protocol' +import type { ClientToServerEvents, ServerToClientEvents, TerminalOutputPayload, Update } from '@hapi/protocol' import { + AgentTerminalRefreshPayloadSchema, + AgentTerminalResizePayloadSchema, TerminalClosePayloadSchema, TerminalOpenPayloadSchema, TerminalResizePayloadSchema, @@ -48,6 +50,10 @@ const SYSTEM_INJECTION_PREFIXES = [ '', ] +// Cap for the runner-side in-memory agent-terminal screen buffer (matches the +// hub's scrollback ring). The tail always holds the latest full-screen redraw. +const AGENT_TERMINAL_LOCAL_BUFFER_BYTES = 256 * 1024 + function extractRawUserTextContent(content: unknown): string | null { if (typeof content === 'string') { return content @@ -82,7 +88,11 @@ function extractRawUserTextContent(content: unknown): string | null { */ export function isExternalUserMessage(body: RawJSONLines): body is Extract { if (body.type !== 'user') return false - const text = extractRawUserTextContent(body.message.content) + // Defensive: a malformed/minimal user line may lack `.message`. Treat it as + // a non-external (forwardable) message rather than throwing. + const message = (body as { message?: { content?: unknown } }).message + if (!message || typeof message !== 'object') return false + const text = extractRawUserTextContent(message.content) if (text === null) return false if (body.isSidechain === true) return false if (body.isMeta === true) return false @@ -169,6 +179,18 @@ export class ApiSessionClient extends EventEmitter { private hasConnectedOnce = false readonly rpcHandlerManager: RpcHandlerManager private readonly terminalManager: TerminalManager + private agentTerminalResize: ((cols: number, rows: number) => void) | null = null + private lastAgentTerminalSize: { cols: number; rows: number } | null = null + // The agent PTY emits a high-frequency byte stream (spinners ~10Hz, full + // redraws). Only forward it to the hub while a viewer is actually subscribed + // to the agent terminal — otherwise the hub relays it to an empty room and + // buffers it for no one. Enabled on (re)subscribe, disabled when the last + // viewer leaves. Default false: chat-only users never open the raw terminal, + // so nothing is streamed for them. + private agentTerminalActive = false + // In-memory copy of the recent agent-PTY screen, captured regardless of the + // network gate so a subscribing viewer can be replayed the current screen. + private agentTerminalLocalBuffer = '' private agentStateLock = new AsyncLock() private metadataLock = new AsyncLock() @@ -282,6 +304,29 @@ export class ApiSessionClient extends EventEmitter { this.terminalManager.close(payload.terminalId) })) + // Read-only agent-terminal viewer: resize the agent PTY to the viewer's + // size, and force a repaint when a viewer (re)subscribes so it sees the + // live screen instead of a stale/black buffer replay. + this.socket.on('agent-terminal:resize', handleTerminalEvent(AgentTerminalResizePayloadSchema, (payload) => { + this.lastAgentTerminalSize = { cols: payload.cols, rows: payload.rows } + this.agentTerminalResize?.(payload.cols, payload.rows) + })) + + this.socket.on('agent-terminal:refresh', handleTerminalEvent(AgentTerminalRefreshPayloadSchema, () => { + // A viewer is subscribed → start streaming (enable BEFORE replay so + // the bytes flow), replay the locally-captured current screen (works + // even for resumed sessions that don't repaint), then nudge a repaint + // as a belt-and-suspenders for any truncated head sequence. + this.agentTerminalActive = true + this.emitAgentTerminalLocalReplay() + this.forceAgentTerminalRepaint() + })) + + this.socket.on('agent-terminal:idle', handleTerminalEvent(AgentTerminalRefreshPayloadSchema, () => { + // Last viewer left — stop streaming the PTY to the hub. + this.agentTerminalActive = false + })) + this.socket.on('update', (data: Update, ack?: (response: { removed: boolean }) => void) => { try { if (!data.body) return @@ -565,6 +610,77 @@ export class ApiSessionClient extends EventEmitter { }) } + emitAgentTerminalOutput(data: string): void { + // Always capture the screen locally (in-memory, no network) so a late + // subscriber can be replayed the CURRENT screen without depending on a + // TUI repaint — resumed (`--resume`) sessions don't reliably redraw on + // SIGWINCH, which is what caused the reopen black screen. + this.agentTerminalLocalBuffer = + (this.agentTerminalLocalBuffer + data).slice(-AGENT_TERMINAL_LOCAL_BUFFER_BYTES) + // Gate only the NETWORK forward: with no viewer the hub would relay this + // high-frequency byte stream (spinners ~10Hz) to an empty room. On + // subscribe, 'agent-terminal:refresh' flips this on and replays the local + // buffer (see the handler), so nothing is lost. + if (!this.agentTerminalActive) return + const payload: TerminalOutputPayload = { + sessionId: this.sessionId, + terminalId: 'agent', + data + } + this.socket.emit('agent-terminal:output', payload) + } + + private emitAgentTerminalLocalReplay(): void { + if (!this.agentTerminalLocalBuffer) return + this.socket.emit('agent-terminal:output', { + sessionId: this.sessionId, + terminalId: 'agent', + data: this.agentTerminalLocalBuffer + }) + } + + /** + * Tell the hub to drop its scrollback buffer for this session. Called when a + * fresh agent PTY spawns (e.g. after archive→restart) so a re-subscribing + * viewer replays only the NEW session's screen, not a stale mix of the old + * one's output and its alt-screen-exit. + */ + resetAgentTerminal(): void { + // New PTY → drop the previous screen from both the hub buffer and our + // local copy so neither replays stale output. + this.agentTerminalLocalBuffer = '' + this.socket.emit('agent-terminal:reset', { sessionId: this.sessionId }) + } + + /** + * Register (or clear) the live agent-PTY controls. The PTY launcher calls + * this once the agent is spawned so the agent-terminal viewer can resize / + * repaint it. Passing null (on exit) makes the controls no-ops. + */ + setAgentTerminalControls(controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void } | null): void { + this.agentTerminalResize = controls?.resize ?? null + } + + // Force the agent TUI to repaint its current screen. A plain same-size resize + // is a no-op (the kernel only sends SIGWINCH on an actual size change), so we + // nudge one row smaller then back — a single transient frame, imperceptible — + // which guarantees the TUI redraws the full current screen for a freshly + // (re)subscribed viewer. + private forceAgentTerminalRepaint(): void { + const resize = this.agentTerminalResize + if (!resize) return + const initial = this.lastAgentTerminalSize ?? { cols: 80, rows: 24 } + resize(initial.cols, Math.max(1, initial.rows - 1)) + // Restore to the LATEST known size (a concurrent viewer resize may have + // updated it in the meantime) so the nudge never shrinks the final view. + setTimeout(() => { + const r = this.agentTerminalResize + if (!r) return + const cur = this.lastAgentTerminalSize ?? initial + r(cur.cols, cur.rows) + }, 30) + } + keepAlive( thinking: boolean, mode: 'local' | 'remote', diff --git a/cli/src/claude/__tests__/claudePtyLauncher.test.ts b/cli/src/claude/__tests__/claudePtyLauncher.test.ts new file mode 100644 index 000000000..91ebb9793 --- /dev/null +++ b/cli/src/claude/__tests__/claudePtyLauncher.test.ts @@ -0,0 +1,244 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const harness = vi.hoisted(() => ({ + scannerOnMessage: null as ((message: Record) => void) | null, + scannerOpts: null as Record | null, + cleanupCalls: 0, + foundCallbacks: [] as Array<(sessionId: string) => void>, +})) + +// claudePty is never reached because the mocked base skips the run loop +// (exitReason starts as 'exit'); we only exercise the scanner wiring. +vi.mock('../claudePty', () => ({ + claudePty: vi.fn(), +})) + +vi.mock('../utils/sessionScanner', () => ({ + createSessionScanner: async (opts: { onMessage: (message: Record) => void }) => { + harness.scannerOnMessage = opts.onMessage + harness.scannerOpts = opts + return { + cleanup: async () => { harness.cleanupCalls += 1 }, + onNewSession: () => {}, + } + }, +})) + +vi.mock('@/ui/ink/RemoteModeDisplay', () => ({ + RemoteModeDisplay: () => null, +})) + +vi.mock('@/ui/logger', () => ({ + logger: { debug: vi.fn() }, +})) + +// Minimal RemoteLauncherBase stand-in: start() drives runMainLoop once, and +// exitReason='exit' means the while-loop body (the actual PTY spawn) is skipped. +vi.mock('@/modules/common/remote/RemoteLauncherBase', () => ({ + RemoteLauncherBase: class { + protected exitReason: string | null = 'exit' + protected hasTTY = false + protected messageBuffer = { addMessage: () => {} } + constructor(_logPath?: string) {} + protected setupAbortHandlers() {} + protected clearAbortHandlers() {} + protected async requestExit() {} + async start(): Promise<'exit'> { + await (this as unknown as { runMainLoop: () => Promise }).runMainLoop() + return 'exit' + } + }, +})) + +import { claudePtyLauncher, decideRespawn, lastUserPromptText, transcriptConfirmsDelivery } from '../claudePtyLauncher' + +describe('decideRespawn', () => { + it('recovers (respawns at once) when the launch reached a ready prompt', () => { + // A mid-session crash after the prompt was ready — counter is irrelevant. + expect(decideRespawn(true, 0, 3)).toBe('recover') + expect(decideRespawn(true, 99, 3)).toBe('recover') + }) + + it('backs off while immediate failures are under the cap', () => { + expect(decideRespawn(false, 1, 3)).toBe('backoff') + expect(decideRespawn(false, 2, 3)).toBe('backoff') + }) + + it('gives up once immediate failures reach the cap', () => { + // Deterministic failure (e.g. claude not installed) — end the session. + expect(decideRespawn(false, 3, 3)).toBe('give-up') + expect(decideRespawn(false, 4, 3)).toBe('give-up') + }) +}) + +describe('transcriptConfirmsDelivery', () => { + const userLine = (text: string) => JSON.stringify({ type: 'user', message: { content: text } }) + const assistantLine = (text: string) => + JSON.stringify({ type: 'assistant', message: { content: [{ type: 'text', text }] } }) + + it('confirms when the just-submitted message is the last prompt', () => { + const transcript = [userLine('first'), assistantLine('ok'), userLine('continue the task')].join('\n') + expect(transcriptConfirmsDelivery(transcript, 'continue the task')).toBe(true) + }) + + it('does NOT confirm on a stale substring of the prior prompt (resume false-positive guard)', () => { + // Prior turn typed "please continue the task"; on --resume the new message + // "continue" has NOT landed yet, so the last prompt is still the prior one. + // A substring check would wrongly confirm and suppress the re-type. + const transcript = [userLine('please continue the task'), assistantLine('done')].join('\n') + expect(transcriptConfirmsDelivery(transcript, 'continue')).toBe(false) + }) + + it('ignores trailing whitespace differences', () => { + const transcript = userLine('hello world\n') + expect(transcriptConfirmsDelivery(transcript, 'hello world')).toBe(true) + }) + + it('falls back to whole-file match when no user prompt parses', () => { + expect(transcriptConfirmsDelivery('not json\n{"type":"assistant"}', 'assistant')).toBe(true) + expect(transcriptConfirmsDelivery('', 'anything')).toBe(false) + }) +}) + +describe('lastUserPromptText', () => { + const userLine = (text: string) => JSON.stringify({ type: 'user', message: { content: text } }) + const userBlocks = (text: string) => + JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text }] } }) + const assistantLine = (text: string) => + JSON.stringify({ type: 'assistant', message: { content: [{ type: 'text', text }] } }) + const toolResultLine = JSON.stringify({ + type: 'user', + message: { content: [{ type: 'tool_result', content: 'PINGA file output' }] }, + }) + + it('returns the most recent typed prompt, ignoring assistant turns', () => { + const transcript = [userLine('PINGA'), assistantLine('ok'), userLine('PONGB')].join('\n') + expect(lastUserPromptText(transcript)).toBe('PONGB') + }) + + it('does not match stale pre-resume history (the false-positive guard)', () => { + // Replayed history contains PINGA; the just-submitted prompt is PONGB. + const transcript = [userLine('PINGA'), assistantLine('A'), userBlocks('PONGB')].join('\n') + const result = lastUserPromptText(transcript) + expect(result).toBe('PONGB') + // The whole-file substring would have matched PINGA; the anchored check must not. + expect(result?.includes('PINGA')).toBe(false) + }) + + it('skips tool_result user entries (no typed text)', () => { + const transcript = [userLine('PINGA'), toolResultLine].join('\n') + expect(lastUserPromptText(transcript)).toBe('PINGA') + }) + + it('returns null when there is no parseable user prompt', () => { + expect(lastUserPromptText('')).toBeNull() + expect(lastUserPromptText('not json\n{"type":"assistant"}')).toBeNull() + expect(lastUserPromptText(toolResultLine)).toBeNull() + }) +}) + +function createSessionStub() { + const sentMessages: Array> = [] + return { + session: { + sessionId: 'pty-session', + path: '/tmp/pty-test', + startedBy: 'terminal' as const, + startingMode: 'remote' as const, + claudeEnvVars: {}, + claudeArgs: [], + hookSettingsPath: '/tmp/hooks/pty.json', + consumeOneTimeFlags: () => {}, + setKillHandler: (_handler: () => void) => {}, + setConfigChangeHandler: (_handler: (() => void) | null) => {}, + getModel: () => null, + getEffort: () => undefined, + addSessionFoundCallback: (cb: (sessionId: string) => void) => { harness.foundCallbacks.push(cb) }, + removeSessionFoundCallback: () => {}, + client: { + sendClaudeSessionMessage: (msg: Record) => { sentMessages.push(msg) }, + sendSessionEvent: () => {}, + emitAgentTerminalOutput: () => {}, + setAgentTerminalControls: () => {}, + rpcHandlerManager: { registerHandler: () => {} }, + }, + }, + sentMessages, + } +} + +describe('claudePtyLauncher structured message forwarding', () => { + afterEach(() => { + harness.scannerOnMessage = null + harness.scannerOpts = null + harness.cleanupCalls = 0 + harness.foundCallbacks = [] + }) + + it('creates the scanner with the session id and working directory', async () => { + const { session } = createSessionStub() + await claudePtyLauncher(session as never) + + expect(harness.scannerOpts).toMatchObject({ + sessionId: 'pty-session', + workingDirectory: '/tmp/pty-test', + }) + }) + + it('registers a session-found callback and cleans up the scanner', async () => { + const { session } = createSessionStub() + await claudePtyLauncher(session as never) + + expect(harness.foundCallbacks).toHaveLength(1) + expect(harness.cleanupCalls).toBe(1) + }) + + it('registers a kill handler so the lifecycle can tear down the PTY on archive', async () => { + const { session } = createSessionStub() + let killHandler: (() => void) | undefined + session.setKillHandler = (h: () => void) => { killHandler = h } + await claudePtyLauncher(session as never) + // onBeforeClose calls session.kill() → this handler → launcher.abort(). + expect(killHandler).toBeTypeOf('function') + }) + + it('filters out summary messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'summary', leafUuid: '1' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('filters out invisible system messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'system', subtype: 'init', uuid: '1' }) + harness.scannerOnMessage!({ type: 'system', subtype: 'stop_hook_summary', uuid: '2' }) + harness.scannerOnMessage!({ type: 'system', uuid: '3' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('filters out isMeta and isCompactSummary messages', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'user', isMeta: true, uuid: '1' }) + harness.scannerOnMessage!({ type: 'assistant', isCompactSummary: true, uuid: '2' }) + + expect(sentMessages).toHaveLength(0) + }) + + it('forwards normal conversation messages to the hub', async () => { + const { session, sentMessages } = createSessionStub() + await claudePtyLauncher(session as never) + + harness.scannerOnMessage!({ type: 'user', uuid: '1' }) + harness.scannerOnMessage!({ type: 'assistant', uuid: '2' }) + + expect(sentMessages).toHaveLength(2) + }) +}) diff --git a/cli/src/claude/claudePty.ts b/cli/src/claude/claudePty.ts index c5ebd937f..6841e6802 100644 --- a/cli/src/claude/claudePty.ts +++ b/cli/src/claude/claudePty.ts @@ -17,6 +17,8 @@ export type ClaudePtyOpts = { nextMessage: () => Promise<{ message: string } | null> onReady: () => void onMessage: (data: string) => void + /** Fired after the driver has written a message to the PTY. See runAgentPty. */ + onMessageSubmitted?: (message: string) => void | Promise onThinkingChange?: (thinking: boolean) => void onExit?: (code: number | null) => void registerControls?: (controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void }) => void @@ -87,6 +89,7 @@ export async function claudePty(opts: ClaudePtyOpts): Promise { nextMessage: opts.nextMessage, onReady: opts.onReady, onMessage: opts.onMessage, + onMessageSubmitted: opts.onMessageSubmitted, onThinkingChange: opts.onThinkingChange, onExit: opts.onExit, registerControls: opts.registerControls, diff --git a/cli/src/claude/claudePtyLauncher.ts b/cli/src/claude/claudePtyLauncher.ts new file mode 100644 index 000000000..f357bfa4d --- /dev/null +++ b/cli/src/claude/claudePtyLauncher.ts @@ -0,0 +1,577 @@ +import React from "react" +import { Session } from "./session" +import { RemoteModeDisplay } from "@/ui/ink/RemoteModeDisplay" +import { claudePty } from "./claudePty" +import { bracketPasteIfMultiline } from "@/agent/bracketedPaste" +import { createSessionScanner } from "./utils/sessionScanner" +import { getProjectPath } from "./utils/path" +import { isClaudeChatVisibleMessage } from "./utils/chatVisibility" +import { isExternalUserMessage } from "@/api/apiSession" +import type { SessionEffort, SessionModel } from "@/api/types" +import { logger } from "@/ui/logger" +import { readFile } from "node:fs/promises" +import { join } from "node:path" +import { + RemoteLauncherBase, + type RemoteLauncherDisplayContext, + type RemoteLauncherExitReason +} from "@/modules/common/remote/RemoteLauncherBase" + +// Delay before respawning the PTY after a launch failure, so a persistent +// failure surfaces its error at a steady cadence instead of a tight respawn loop. +const RESPAWN_BACKOFF_MS = 1000 +// Give up after this many consecutive launches that never reached a ready +// prompt. Such failures are deterministic (claude not installed, terminal can't +// attach) and will not recover by respawning — bound them so the session ends +// with a clear error instead of retrying forever. A launch that DOES reach +// ready resets the counter, so genuine mid-session crash recovery stays +// unbounded. +const MAX_IMMEDIATE_LAUNCH_FAILURES = 3 + +// Extract the text of the LAST typed user prompt from a claude transcript +// (JSONL). Tool-result user entries and assistant turns carry no prompt text and +// are skipped, so the result is the most recent thing the human actually typed. +// Returns null when nothing parseable is found (caller falls back). +export function lastUserPromptText(transcript: string): string | null { + let last: string | null = null + for (const line of transcript.split('\n')) { + const trimmed = line.trim() + if (!trimmed) continue + let entry: { type?: string; message?: { content?: unknown } } + try { + entry = JSON.parse(trimmed) + } catch { + continue + } + if (entry.type !== 'user') continue + const content = entry.message?.content + let text: string | null = null + if (typeof content === 'string') { + text = content + } else if (Array.isArray(content)) { + const parts = content + .filter((part): part is { type?: string; text?: string } => + typeof part === 'object' && part !== null) + .filter((part) => part.type === 'text' && typeof part.text === 'string') + .map((part) => part.text as string) + if (parts.length > 0) text = parts.join('') + } + if (text !== null && text.length > 0) last = text + } + return last +} + +// Whether `text` was actually delivered as the latest user prompt in a claude +// transcript. claude writes the user prompt to its JSONL the moment it ingests it +// (before the API call), so a hit confirms delivery. On --resume the file also +// contains the REPLAYED prior conversation, so a plain whole-file substring match +// would false-positive on stale history (e.g. a short "continue") and suppress +// the re-type self-correction. Anchor on the LAST typed user prompt and require +// EQUALITY: only the just-submitted message can be the last prompt, and equality +// (not substring) keeps a new message that is a substring of the prior turn from +// matching stale content. Falls back to a whole-file check only when no user +// prompt parses (e.g. a fresh transcript with nothing to false-match yet). +export function transcriptConfirmsDelivery(transcript: string, text: string): boolean { + const lastPrompt = lastUserPromptText(transcript) + if (lastPrompt !== null) return lastPrompt.trim() === text.trim() + return transcript.includes(text) +} + +export type RespawnDecision = 'recover' | 'backoff' | 'give-up' + +// Decide what to do after a PTY launch threw, given whether it ever reached a +// ready prompt and how many consecutive launches have failed without doing so. +// A launch that reached ready is a recoverable mid-session crash → respawn at +// once. One that never did is a deterministic failure (claude not installed, +// terminal can't attach) → back off, then give up once too many have piled up +// so the session ends instead of looping forever. +export function decideRespawn( + reachedReady: boolean, + immediateFailures: number, + maxImmediateFailures: number +): RespawnDecision { + if (reachedReady) return 'recover' + return immediateFailures >= maxImmediateFailures ? 'give-up' : 'backoff' +} + +class ClaudePtyLauncher extends RemoteLauncherBase { + private readonly session: Session + private abortController: AbortController | null = null + // Claude's own session UUID (discovered via the SessionStart hook). Used to + // --resume the conversation if Claude ever has to be re-spawned (e.g. a crash) + // so the conversation continues with the current model/effort. + private claudeSessionId: string | null = null + // Live PTY controls (raw keystroke injection) for in-place /model and /effort. + private ptyControls: { sendKeys: (data: string) => void } | null = null + // The model/effort currently applied to the running Claude TUI, so a config + // change only drives the slash command for what actually changed. + private appliedModel: SessionModel = null + private appliedEffort: SessionEffort = null + // When set, PTY output is fed here to detect claude's "Switch model?" dialog + // (across chunks, ANSI-stripped) and accept it with Enter. + private confirmWatch: { feed: (chunk: string) => void } | null = null + // Coalesce rapid model+effort changes into a single apply pass. + private configApplyScheduled = false + // True once claude's SessionStart hook has fired for the CURRENT spawn (reset + // each (re)launch). Gates the first message so a --resume that's still + // replaying its transcript doesn't eat the keystrokes (the input box renders + // before the replay redraw completes; typing then is lost). See waitForSessionStart. + private sessionStartSeen = false + private sessionStartResolvers: Array<() => void> = [] + + private sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)) } + + // Apply a mid-session model/effort change to the LIVE claude TUI via its + // /model and /effort slash commands — no re-spawn, so the conversation and + // scrollback are preserved. claude's /model pops a "Switch model?" dialog + // (default = Yes); we accept it with Enter. + private scheduleConfigApply(): void { + if (this.configApplyScheduled) return + this.configApplyScheduled = true + setTimeout(() => { this.configApplyScheduled = false; void this.applyConfigChange() }, 120) + } + + private async applyConfigChange(): Promise { + const controls = this.ptyControls + if (!controls) return + const model = this.session.getModel() + const effort = this.session.getEffort() + if (model !== this.appliedModel) { + this.appliedModel = model + if (model) { + logger.debug(`[pty]: applying model change via /model ${model}`) + controls.sendKeys(`/model ${model}\r`) + await this.confirmModelDialog() + } + } + if (effort !== this.appliedEffort) { + this.appliedEffort = effort + if (effort) { + logger.debug(`[pty]: applying effort change via /effort ${effort}`) + controls.sendKeys(`/effort ${effort}\r`) + await this.sleep(300) + } + } + } + + private confirmModelDialog(timeoutMs = 3500): Promise { + return new Promise((resolve) => { + let settled = false + let buf = '' + // Match the dialog across chunks, with ANSI escapes stripped (the TUI + // interleaves color codes between words, so a raw regex misses it). + const marker = /yes,\s*switch|switch model|no,\s*go back/i + const finish = () => { if (settled) return; settled = true; this.confirmWatch = null; resolve() } + const timer = setTimeout(finish, timeoutMs) + this.confirmWatch = { + feed: (chunk: string) => { + buf = (buf + chunk.replace(/\x1b\[[0-9;?]*[a-zA-Z]/g, '')).slice(-2000) + if (marker.test(buf)) { + clearTimeout(timer) + // Default-highlighted option is "Yes, switch" — Enter accepts. + setTimeout(() => this.ptyControls?.sendKeys('\r'), 200) + finish() + } + } + } + }) + } + + // Re-derive Claude's spawn args each (re)launch: --model/--effort/--resume are + // dynamic (the model/effort can change mid-session, and a re-spawn must resume + // the existing conversation), so strip any stale copies from the base args and + // append the current values. + private buildSpawnArgs(): string[] { + const DYNAMIC = new Set(['--model', '--effort', '--resume']) + const base: string[] = [] + const args = this.session.claudeArgs ?? [] + // Preserve a HAPI-resume uuid passed in the initial args (first spawn, + // before the SessionStart hook has reported Claude's own id). + let resumeFromArgs: string | null = null + for (let i = 0; i < args.length; i++) { + if (DYNAMIC.has(args[i])) { + const hasValue = i + 1 < args.length && !args[i + 1].startsWith('-') + if (args[i] === '--resume' && hasValue) resumeFromArgs = args[i + 1] + if (hasValue) i++ + continue + } + base.push(args[i]) + } + const resumeId = this.claudeSessionId ?? resumeFromArgs + const model = this.session.getModel() + const effort = this.session.getEffort() + return [ + ...base, + ...(resumeId ? ['--resume', resumeId] : []), + ...(model ? ['--model', model] : []), + ...(effort ? ['--effort', effort] : []), + ] + } + + // The claude session id passed via `--resume ` in the initial args (set by + // the runner when reopening/resuming an existing conversation). Used to seed the + // scanner with the already-forwarded transcript so resume doesn't re-emit the + // prior turns (the new runner has a fresh scanner with no memory of what the + // previous lifetime already sent). + private resumeIdFromArgs(): string | null { + const args = this.session.claudeArgs ?? [] + for (let i = 0; i < args.length; i++) { + if (args[i] === '--resume' && i + 1 < args.length && !args[i + 1].startsWith('-')) { + return args[i + 1] + } + } + return null + } + + // Resolve once claude's SessionStart hook fires for the current spawn (or after + // `timeoutMs` as a fallback so a missed hook never hangs the message loop). + private waitForSessionStart(timeoutMs: number): Promise { + if (this.sessionStartSeen) return Promise.resolve() + return new Promise((resolve) => { + const wrapped = () => { clearTimeout(timer); resolve() } + const timer = setTimeout(() => { + this.sessionStartResolvers = this.sessionStartResolvers.filter((r) => r !== wrapped) + logger.debug('[pty]: SessionStart hook gate timed out; proceeding with first message') + resolve() + }, timeoutMs) + this.sessionStartResolvers.push(wrapped) + }) + } + + private markSessionStartSeen(): void { + this.sessionStartSeen = true + const resolvers = this.sessionStartResolvers.splice(0) + for (const r of resolvers) r() + } + + // Path of the live claude transcript (used to confirm a submitted message was + // actually ingested). Resolves against the REAL ~/.claude (not the isolated + // CLAUDE_CONFIG_DIR), mirroring the scanner. + private transcriptPath(): string | null { + if (!this.claudeSessionId) return null + return join(getProjectPath(this.session.path), `${this.claudeSessionId}.jsonl`) + } + + private async transcriptHasText(text: string): Promise { + const path = this.transcriptPath() + if (!path) return false + try { + return transcriptConfirmsDelivery(await readFile(path, 'utf-8'), text) + } catch { + return false + } + } + + // Self-correcting delivery for the FIRST message after a (re)spawn. The driver + // submits it right after nextMessage returns, but a claude --resume that's still + // painting its replayed conversation can swallow those keystrokes (the input box + // renders, then a late redraw wipes the typed text) — the message never reaches + // claude and no response ever comes. Confirm the prompt landed in the transcript + // and re-type it if not. Guarded by claudeSessionId so we never blindly re-send + // when we can't verify. + private async ensureFirstMessageDelivered(text: string, signal: AbortSignal): Promise { + if (!this.claudeSessionId) return + const trimmed = text.trim() + if (!trimmed) return + for (let attempt = 0; attempt < 3; attempt++) { + const deadline = Date.now() + 5000 + while (Date.now() < deadline) { + if (signal.aborted || !!this.exitReason) return + if (await this.transcriptHasText(trimmed)) return + await this.sleep(500) + } + if (signal.aborted || !!this.exitReason || !this.ptyControls) return + logger.debug(`[pty]: first message not in transcript after submit; re-typing (attempt ${attempt + 1})`) + // Match the driver's submit path: a multiline first message must be + // bracketed-pasted on repair too, otherwise the re-typed newlines act + // as Enter and Claude receives split prompts instead of the message. + this.ptyControls.sendKeys(bracketPasteIfMultiline(trimmed)) + await this.sleep(200) + this.ptyControls.sendKeys('\r') + } + } + + constructor(session: Session) { + super(process.env.DEBUG ? session.logPath : undefined) + this.session = session + // Let the runner lifecycle (onBeforeClose) tear down the PTY gracefully + // on archive/SIGTERM: aborting the controller triggers runAgentPty's + // synchronous manager.kill(), so the child dies before process.exit. + session.setKillHandler(() => { void this.abort() }) + } + + protected createDisplay(context: RemoteLauncherDisplayContext): React.ReactElement { + return React.createElement(RemoteModeDisplay, context) + } + + private async abort(): Promise { + if (this.abortController && !this.abortController.signal.aborted) { + this.abortController.abort() + } + } + + private async handleAbortRequest(): Promise { + logger.debug('[pty]: doAbort') + await this.abort() + } + + private async handleSwitchRequest(): Promise { + logger.debug('[pty]: doSwitch') + await this.requestExit('switch', async () => { + await this.abort() + }) + } + + private async handleExitFromUi(): Promise { + logger.debug('[pty]: Exiting via Ctrl-C') + await this.requestExit('exit', async () => { + await this.abort() + }) + } + + private async handleSwitchFromUi(): Promise { + logger.debug('[pty]: Switching to local mode via double space') + await this.handleSwitchRequest() + } + + public async launch(): Promise { + return this.start({ + onExit: () => this.handleExitFromUi(), + onSwitchToLocal: () => this.handleSwitchFromUi() + }) + } + + protected async runMainLoop(): Promise { + logger.debug('[claudePtyLauncher] Starting PTY launcher') + logger.debug(`[claudePtyLauncher] TTY available: ${this.hasTTY}`) + + const session = this.session + const messageBuffer = this.messageBuffer + + this.setupAbortHandlers(session.client.rpcHandlerManager, { + onAbort: () => this.handleAbortRequest(), + onSwitch: () => this.handleSwitchRequest() + }) + + // Phase 2 — structured message recovery. The interactive (PTY) Claude + // writes a jsonl transcript just like local mode; tail it and forward + // RawJSONLines to the hub via sendClaudeSessionMessage so the existing + // mobile structured UI works unchanged (hub/web 0-line change). The + // sessionId is discovered through the SessionStart hook registered via + // claudePty's `--settings` (session.hookSettingsPath). Mirrors + // claudeLocalLauncher's filtering exactly. + // On resume, seed the scanner with the claude session being --resume'd so + // its already-forwarded transcript is marked processed (initialize() reads + // it and seeds the keys + sets the cursor to EOF). Without this, the fresh + // scanner re-reads the resumed transcript from byte 0 and re-emits every + // prior turn — the "previous last response shows again" bug. For a fresh + // spawn there's no --resume id and the transcript doesn't exist yet, so the + // HAPI sessionId placeholder is harmless (the real id arrives via the hook). + const resumeId = this.resumeIdFromArgs() + if (resumeId) this.claudeSessionId = resumeId + const scanner = await createSessionScanner({ + sessionId: resumeId ?? session.sessionId, + workingDirectory: session.path, + onMessage: (message) => { + if (message.type === 'summary') return + if (message.isMeta || message.isCompactSummary) return + if (!isClaudeChatVisibleMessage(message)) return + // The user's own typed prompt is already shown (and persisted) by + // the webapp composer; the PTY transcript echoes it back, which + // would duplicate it in the chat. Skip forwarding the external user + // message. Tool-result "user" messages (no plain text) still flow. + if (isExternalUserMessage(message)) return + session.client.sendClaudeSessionMessage(message) + } + }) + const handleSessionFound = (sessionId: string) => { + this.claudeSessionId = sessionId + // claude is now loaded (fresh or resume): release the first-message gate. + this.markSessionStartSeen() + scanner.onNewSession(sessionId) + } + session.addSessionFoundCallback(handleSessionFound) + + // Seed the applied model/effort with what we spawn with, so the first real + // change is detected (and not the initial sync that sets the same value). + this.appliedModel = session.getModel() + this.appliedEffort = session.getEffort() + + // Mid-session model/effort change: drive claude's live /model and /effort + // slash commands (in-place, conversation preserved) instead of re-spawning. + session.setConfigChangeHandler(() => this.scheduleConfigApply()) + + try { + let previousSessionId: string | null = null + // Consecutive launches that threw before ever reaching a ready prompt. + let consecutiveImmediateFailures = 0 + while (!this.exitReason) { + logger.debug('[pty]: launch iteration') + messageBuffer.addMessage('═'.repeat(40), 'status') + + const isNewSession = session.sessionId !== previousSessionId + if (isNewSession) { + messageBuffer.addMessage('Starting new Claude PTY session...', 'status') + logger.debug(`[pty]: New session (previous: ${previousSessionId}, current: ${session.sessionId})`) + } else { + messageBuffer.addMessage('Continuing Claude PTY session...', 'status') + } + + previousSessionId = session.sessionId + const controller = new AbortController() + this.abortController = controller + + // Reset the per-spawn gate. The hook fires anew on each (re)spawn; + // a fresh resume must re-arm the wait. `claudeSessionId` stays set so + // buildSpawnArgs can still --resume on a crash re-spawn. + this.sessionStartSeen = false + // Only the FIRST message of this spawn is gated on SessionStart; + // later messages arrive while claude is already idle-ready. + let gatedFirstMessage = false + // Did this launch reach a ready prompt? Distinguishes a recoverable + // mid-session crash from a deterministic launch failure. + let reachedReady = false + // The first message that the driver actually SUBMITS gets a + // background delivery check (a slow --resume replay can swallow + // the keystrokes). Armed from onMessageSubmitted, not nextMessage, + // so it never runs before the driver's own submit — otherwise the + // check could re-type the message before it was first sent. + let firstSubmitVerified = false + + try { + await claudePty({ + sessionId: session.sessionId, + path: session.path, + claudeEnvVars: session.claudeEnvVars, + claudeArgs: this.buildSpawnArgs(), + hookSettingsPath: session.hookSettingsPath, + signal: controller.signal, + nextMessage: async () => { + const msg = await session.queue.waitForMessagesAndGetAsString(controller.signal) + if (!msg) return null + // Hold the first message until claude's SessionStart hook + // has fired (claude finished loading). A --resume keeps + // replaying its transcript for a beat after the input box + // first renders; typing during that window is swallowed by + // the redraw. Gating here lets runAgentPty's post-return + // waitForInputReady settle on the *real* idle prompt. + if (!gatedFirstMessage) { + gatedFirstMessage = true + await this.waitForSessionStart(15000) + if (controller.signal.aborted) return null + } + // User just submitted input — Claude is about to respond, + // so poll fast for the incoming turn instead of waiting + // for the idle interval. + scanner.markActive() + // The webapp sends `/model ` as a chat message + // (not a config RPC), so it reaches Claude's TUI as text + // and pops a "Switch model?" dialog. Arm the auto-accept + // before the message is written so we confirm it. + if (/^\/model\s+\S/i.test(msg.message.trim())) { + void this.confirmModelDialog(6000) + } + return { message: msg.message } + }, + onMessageSubmitted: (message: string) => { + // Verify only the first actually-submitted message: a + // slow --resume replay can eat its keystrokes. Runs + // after the driver's submit, so it repairs a dropped + // message instead of racing/duplicating a pending one. + if (firstSubmitVerified) return + firstSubmitVerified = true + void this.ensureFirstMessageDelivered(message, controller.signal) + }, + onReady: () => { + reachedReady = true + consecutiveImmediateFailures = 0 + logger.debug('[pty]: claude PTY ready') + session.client.sendSessionEvent({ type: 'ready' }) + }, + onMessage: (data: string) => { + if (process.env.DEBUG_PTY) { + logger.debug(`[pty:onMessage] received ${data.length} bytes: ${data.slice(0, 80)}`) + } + // Accept claude's "Switch model?" dialog when a /model + // change is in flight. + if (this.confirmWatch) this.confirmWatch.feed(data) + session.client.emitAgentTerminalOutput(data) + }, + onThinkingChange: (thinking: boolean) => { + session.onThinkingChange(thinking) + }, + registerControls: (controls) => { + this.ptyControls = controls + // Fresh PTY — drop the hub's stale scrollback (e.g. from + // before an archive→restart) before its output streams. + session.client.resetAgentTerminal() + session.client.setAgentTerminalControls(controls) + }, + onExit: (code: number | null) => { + logger.debug(`[pty]: claude PTY exited with code ${code}`) + this.ptyControls = null + session.client.sendSessionEvent({ + type: 'message', + message: `Process exited with code ${code}` + }) + }, + }) + + session.consumeOneTimeFlags() + + if (!this.exitReason && controller.signal.aborted) { + session.client.sendSessionEvent({ type: 'message', message: 'Aborted by user' }) + } + } catch (e) { + logger.debug('[pty]: launch error', e) + if (!this.exitReason) { + const detail = e instanceof Error ? e.message : String(e) + session.client.sendSessionEvent({ type: 'message', message: `PTY process error: ${detail}` }) + if (!reachedReady) consecutiveImmediateFailures++ + const decision = decideRespawn(reachedReady, consecutiveImmediateFailures, MAX_IMMEDIATE_LAUNCH_FAILURES) + if (decision === 'give-up') { + // Deterministic failure (claude not installed, terminal + // can't attach): end the session with a clear error + // instead of respawning forever. + session.client.sendSessionEvent({ + type: 'message', + message: `Claude PTY failed to start after ${MAX_IMMEDIATE_LAUNCH_FAILURES} attempts; ending session.` + }) + this.exitReason = 'exit' + break + } + if (decision === 'backoff') { + // An immediate failure has no natural latency, so back + // off before respawning — otherwise it CPU-spins and + // floods the session with error events. A crash AFTER + // ready ('recover') already carries real startup latency + // and reset the counter, so it respawns at once (crash + // recovery stays unbounded and undelayed). + await this.sleep(RESPAWN_BACKOFF_MS) + } + continue + } + } finally { + logger.debug('[pty]: launch done') + this.abortController = null + } + } + } finally { + session.setConfigChangeHandler(null) + session.client.setAgentTerminalControls(null) + session.removeSessionFoundCallback(handleSessionFound) + await scanner.cleanup() + logger.debug('[pty]: main loop ended') + } + } + + protected async cleanup(): Promise { + this.clearAbortHandlers(this.session.client.rpcHandlerManager) + logger.debug('[pty]: cleanup done') + } +} + +export async function claudePtyLauncher(session: Session): Promise<'switch' | 'exit'> { + const launcher = new ClaudePtyLauncher(session) + return launcher.launch() +} diff --git a/cli/src/claude/loop.ts b/cli/src/claude/loop.ts index a7846171b..89d251777 100644 --- a/cli/src/claude/loop.ts +++ b/cli/src/claude/loop.ts @@ -1,10 +1,11 @@ import { ApiSessionClient } from "@/api/apiSession" import { MessageQueue2 } from "@/utils/MessageQueue2" import { logger } from "@/ui/logger" -import { runLocalRemoteSession } from "@/agent/loopBase" +import { runLocalRemoteSession, type SessionMode } from "@/agent/loopBase" import { Session } from "./session" import { claudeLocalLauncher } from "./claudeLocalLauncher" import { claudeRemoteLauncher } from "./claudeRemoteLauncher" +import { claudePtyLauncher } from "./claudePtyLauncher" import { ApiClient } from "@/lib" import type { SessionEffort, SessionModel } from "@/api/types" import type { ClaudePermissionMode } from "@hapi/protocol/types" @@ -27,7 +28,7 @@ interface LoopOptions { model?: SessionModel effort?: SessionEffort permissionMode?: PermissionMode - startingMode?: 'local' | 'remote' + startingMode?: 'local' | 'remote' | 'pty' startedBy?: 'runner' | 'terminal' onModeChange: (mode: 'local' | 'remote') => void mcpServers: Record @@ -48,6 +49,7 @@ export async function loop(opts: LoopOptions) { const logPath = logger.logFilePath; const startedBy = opts.startedBy ?? 'terminal'; const startingMode = opts.startingMode ?? 'local'; + const sessionMode: 'local' | 'remote' = startingMode === 'pty' ? 'remote' : startingMode; const session = new Session({ api: opts.api, client: opts.session, @@ -60,9 +62,9 @@ export async function loop(opts: LoopOptions) { messageQueue: opts.messageQueue, allowedTools: opts.allowedTools, onModeChange: opts.onModeChange, - mode: startingMode, + mode: sessionMode, startedBy, - startingMode, + startingMode: sessionMode, hookSettingsPath: opts.hookSettingsPath, permissionMode: opts.permissionMode ?? 'default', model: opts.model, @@ -75,6 +77,7 @@ export async function loop(opts: LoopOptions) { logTag: 'loop', runLocal: claudeLocalLauncher, runRemote: claudeRemoteLauncher, + runPty: claudePtyLauncher, onSessionReady: opts.onSessionReady }); } diff --git a/cli/src/claude/runClaude.ts b/cli/src/claude/runClaude.ts index 4472aee92..e4d15bfcc 100644 --- a/cli/src/claude/runClaude.ts +++ b/cli/src/claude/runClaude.ts @@ -1,3 +1,4 @@ +import { randomUUID } from 'node:crypto'; import { logger } from '@/ui/logger'; import { loop } from '@/claude/loop'; import { AgentState, SessionEffort, SessionModel } from '@/api/types'; @@ -9,6 +10,7 @@ import { parseSpecialCommand } from '@/parsers/specialCommands'; import { getEnvironmentInfo } from '@/ui/doctor'; import { startHappyServer } from '@/claude/utils/startHappyServer'; import { startHookServer } from '@/claude/utils/startHookServer'; +import { PtyPermissionHandler } from '@/claude/utils/ptyPermissionHandler'; import { generateHookSettingsFile, cleanupHookSettingsFile } from '@/modules/common/hooks/generateHookSettings'; import { registerKillSessionHandler } from './registerKillSessionHandler'; import type { Session } from './session'; @@ -27,7 +29,7 @@ export interface StartOptions { model?: string effort?: string permissionMode?: PermissionMode - startingMode?: 'local' | 'remote' + startingMode?: 'local' | 'remote' | 'pty' shouldStartRunner?: boolean claudeEnvVars?: Record claudeArgs?: string[] @@ -67,6 +69,7 @@ export async function runClaude(options: StartOptions = {}): Promise { flavor: 'claude', startedBy, workingDirectory, + tag: options.startingMode === 'pty' ? `__hapi_pty__claude-${randomUUID()}` : undefined, agentState: initialState, model: initialModel ?? undefined, effort: initialEffort ?? undefined @@ -97,6 +100,12 @@ export async function runClaude(options: StartOptions = {}): Promise { // Variable to track current session instance (updated via onSessionReady callback) const currentSessionRef: { current: Session | null } = { current: null }; + // PTY mode has no SDK canUseTool callback, so tool approvals are bridged from + // a PreToolUse hook to the web via this handler (assigned below once the + // permission-mode state exists). Null in SDK/local/remote modes. + const isPtyMode = options.startingMode === 'pty'; + let ptyPermissionHandler: PtyPermissionHandler | null = null; + const formatFailureReason = (message: string): string => { const maxLength = 200; if (message.length <= maxLength) { @@ -118,13 +127,26 @@ export async function runClaude(options: StartOptions = {}): Promise { currentSession.onSessionFound(sessionId); } } + }, + // PTY-mode tool-approval bridge. Resolves once the user answers in the + // web modal (may take minutes). Allows by default if the handler isn't + // up yet (should not happen in PTY mode). + onPreToolUse: async (data) => { + if (!ptyPermissionHandler) { + return { permissionDecision: 'allow' }; + } + const toolUseId = data.tool_use_id || `${data.tool_name ?? 'tool'}-${data.session_id ?? ''}`; + return ptyPermissionHandler.requestDecision(toolUseId, data.tool_name ?? '', data.tool_input); } }); logger.debug(`[START] Hook server started on port ${hookServer.port}`); const hookSettingsPath = generateHookSettingsFile(hookServer.port, hookServer.token, { filenamePrefix: 'session-hook', - logLabel: 'generateHookSettings' + logLabel: 'generateHookSettings', + // PTY sessions rely on the PreToolUse hook for approvals; the SDK path + // must NOT register it (it uses canUseTool instead). + includePreToolUse: isPtyMode }); logger.debug(`[START] Generated hook settings file: ${hookSettingsPath}`); @@ -137,7 +159,13 @@ export async function runClaude(options: StartOptions = {}): Promise { session, logTag: 'claude', stopKeepAlive: () => currentSessionRef.current?.stopKeepAlive(), + // Tear down the PTY before process.exit. For PTY mode + // the launcher registers a kill handler that aborts the controller → + // runAgentPty's manager.kill() runs synchronously. No-op in local/remote + // mode where no handler is registered. + onBeforeClose: () => { currentSessionRef.current?.kill(); }, onAfterClose: () => { + ptyPermissionHandler?.cancelAll('Session ended'); happyServer.stop(); hookServer.stop(); cleanupHookSettingsFile(hookSettingsPath, 'generateHookSettings'); @@ -187,6 +215,23 @@ export async function runClaude(options: StartOptions = {}): Promise { sessionInstance.setEffort(currentEffort); logger.debug(`[loop] Synced session config for keepalive: permissionMode=${currentPermissionMode}, model=${currentModel ?? 'auto'}, effort=${currentEffort ?? 'auto'}`); }; + + // Bring up the PTY tool-approval bridge now that the permission-mode state + // exists. It reads the live mode (web dropdown can change it mid-session) and + // routes any "approve & switch mode" choice back into that same state. + if (isPtyMode) { + ptyPermissionHandler = new PtyPermissionHandler(session, { + getPermissionMode: () => currentPermissionMode, + onModeChange: (mode) => { + if (!isPermissionModeAllowedForFlavor(mode, 'claude')) { + return; + } + currentPermissionMode = mode as PermissionMode; + currentSessionRef.current?.setPermissionMode(mode as PermissionMode); + syncSessionModes(); + } + }); + } session.onUserMessage((message, localId) => { const sessionPermissionMode = currentSessionRef.current?.getPermissionMode(); if (sessionPermissionMode && isPermissionModeAllowedForFlavor(sessionPermissionMode, 'claude')) { diff --git a/cli/src/claude/session.ts b/cli/src/claude/session.ts index 2106d312f..87a080a0e 100644 --- a/cli/src/claude/session.ts +++ b/cli/src/claude/session.ts @@ -85,14 +85,35 @@ export class Session extends AgentSessionBase { return this.permissionMode as PermissionMode | undefined; } + // Fired when the model or effort actually changes mid-session. The PTY + // launcher uses this to re-spawn Claude with --resume + the new --model / + // --effort (the interactive CLI fixes its model at spawn, so a live change + // can only take effect on a fresh, conversation-preserving re-spawn). + private configChangeHandler: (() => void) | null = null; + setConfigChangeHandler = (handler: (() => void) | null): void => { + this.configChangeHandler = handler; + }; + setModel = (model: SessionModel): void => { + if (model === this.model) return; this.model = model; + this.configChangeHandler?.(); }; setEffort = (effort: SessionEffort): void => { + if (effort === this.effort) return; this.effort = effort; + this.configChangeHandler?.(); }; + getModel(): SessionModel { + return this.model ?? null; + } + + getEffort(): SessionEffort { + return this.effort ?? null; + } + recordLocalLaunchFailure = (message: string, exitReason: LocalLaunchExitReason): void => { this.localLaunchFailure = { message, exitReason }; }; diff --git a/cli/src/claude/utils/ptyPermissionHandler.test.ts b/cli/src/claude/utils/ptyPermissionHandler.test.ts new file mode 100644 index 000000000..dd03bc24a --- /dev/null +++ b/cli/src/claude/utils/ptyPermissionHandler.test.ts @@ -0,0 +1,213 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { PermissionMode } from '@hapi/protocol/types'; +import { PtyPermissionHandler } from './ptyPermissionHandler'; +import type { PermissionHandlerClient } from '@/modules/common/permission/BasePermissionHandler'; +import { RPC_METHODS } from '@hapi/protocol/rpcMethods'; + +type PermissionRpcHandler = (response: { + id: string; + approved: boolean; + reason?: string; + mode?: PermissionMode; + allowTools?: string[]; + answers?: Record | Record; +}) => Promise | void; + +function createFakeClient() { + let permissionHandler: PermissionRpcHandler | null = null; + const state: { requests: Record; completedRequests: Record } = { + requests: {}, + completedRequests: {} + }; + + const client: PermissionHandlerClient = { + rpcHandlerManager: { + registerHandler: vi.fn((method: string, handler: unknown) => { + if (method === RPC_METHODS.Permission) { + permissionHandler = handler as PermissionRpcHandler; + } + }) + }, + updateAgentState: vi.fn((handler: (s: any) => any) => { + Object.assign(state, handler(state)); + }) + }; + + return { + client, + state, + respond: (response: Parameters[0]) => { + if (!permissionHandler) throw new Error('Permission RPC handler not registered'); + return permissionHandler(response); + } + }; +} + +describe('PtyPermissionHandler', () => { + it('auto-allows pure read-only tools without a web round trip', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + for (const tool of ['Read', 'Glob', 'Grep', 'LS', 'NotebookRead', 'TodoWrite']) { + const decision = await handler.requestDecision(`id-${tool}`, tool, {}); + expect(decision.permissionDecision).toBe('allow'); + } + // never surfaced a request to the web + expect(Object.keys(state.requests)).toHaveLength(0); + }); + + it('routes AskUserQuestion to the web and injects the picked answers via updatedInput', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const input = { questions: [{ question: 'Pick a color?', header: 'Color' }] }; + const pending = handler.requestDecision('q1', 'AskUserQuestion', input); + // surfaced in agent state so the web shows the question card + expect(state.requests['q1']).toMatchObject({ tool: 'AskUserQuestion' }); + + await respond({ id: 'q1', approved: true, answers: { '0': ['Blue'] } }); + const decision = await pending; + expect(decision.permissionDecision).toBe('allow'); + // claude's AskUserQuestion expects answers keyed by question text + expect(decision.updatedInput).toMatchObject({ answers: { 'Pick a color?': 'Blue' } }); + }); + + it('denies AskUserQuestion when no answers are provided', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('q2', 'AskUserQuestion', { questions: [{ question: 'X?' }] }); + await respond({ id: 'q2', approved: true, answers: {} }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + }); + + it('denies AskUserQuestion when answers cannot be mapped to questions (never stalls)', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + // Web sends a non-empty answer, but the index doesn't line up with any + // question text, so the claude-shaped map comes out empty. Allowing here + // would make claude echo "answered: ." and lock the turn — deny instead. + const pending = handler.requestDecision('q3', 'AskUserQuestion', { questions: [{ question: 'X?' }] }); + await respond({ id: 'q3', approved: true, answers: { '5': ['Stray'] } }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + expect(decision.updatedInput).toBeUndefined(); + }); + + it('auto-allows everything in bypassPermissions (the --yolo mapping)', async () => { + const { client, state } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'bypassPermissions' }); + + const decision = await handler.requestDecision('b1', 'Bash', { command: 'rm -rf /tmp/x' }); + expect(decision.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests)).toHaveLength(0); + }); + + it('routes gated tools to the web modal and resolves allow on approval', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('tool-1', 'Bash', { command: 'ls' }); + // surfaced in agent state for the web modal + expect(state.requests['tool-1']).toMatchObject({ tool: 'Bash' }); + + await respond({ id: 'tool-1', approved: true }); + const decision = await pending; + expect(decision.permissionDecision).toBe('allow'); + expect(decision.updatedInput).toEqual({ command: 'ls' }); + }); + + it('resolves deny (never ask) when the user rejects', async () => { + const { client, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('tool-2', 'Write', { file_path: '/etc/x' }); + await respond({ id: 'tool-2', approved: false, reason: 'nope' }); + const decision = await pending; + expect(decision.permissionDecision).toBe('deny'); + expect(decision.reason).toContain('nope'); + }); + + it('remembers "allow for session" tools and skips re-prompting', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('w-1', 'WebFetch', { url: 'https://a' }); + await respond({ id: 'w-1', approved: true, allowTools: ['WebFetch'] }); + expect((await first).permissionDecision).toBe('allow'); + + // second call to the same tool is auto-allowed without a new request + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('w-2', 'WebFetch', { url: 'https://b' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('honors "allow for session" for a Bash command (web sends Bash())', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('b-1', 'Bash', { command: 'echo hi' }); + // web's "Allow For Session" for claude Bash sends the command-qualified id + await respond({ id: 'b-1', approved: true, allowTools: ['Bash(echo hi)'] }); + expect((await first).permissionDecision).toBe('allow'); + + // same command auto-allows without a new web request + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('b-2', 'Bash', { command: 'echo hi' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('still prompts for a different Bash command after a literal session-allow', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('b-1', 'Bash', { command: 'echo hi' }); + await respond({ id: 'b-1', approved: true, allowTools: ['Bash(echo hi)'] }); + await first; + + // a DIFFERENT command is not covered by the literal allow → surfaces a request + handler.requestDecision('b-2', 'Bash', { command: 'rm -rf /' }); + expect(state.requests['b-2']).toMatchObject({ tool: 'Bash' }); + }); + + it('honors a Bash prefix session-allow (Bash(:*))', async () => { + const { client, state, respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const first = handler.requestDecision('p-1', 'Bash', { command: 'npm test' }); + await respond({ id: 'p-1', approved: true, allowTools: ['Bash(npm:*)'] }); + await first; + + const before = Object.keys(state.requests).length; + const second = await handler.requestDecision('p-2', 'Bash', { command: 'npm run build' }); + expect(second.permissionDecision).toBe('allow'); + expect(Object.keys(state.requests).length).toBe(before); + }); + + it('propagates a mode switch chosen alongside the approval', async () => { + const { client, respond } = createFakeClient(); + const onModeChange = vi.fn(); + const handler = new PtyPermissionHandler(client, { + getPermissionMode: () => 'default', + onModeChange + }); + + const pending = handler.requestDecision('e-1', 'Edit', { file_path: '/x' }); + await respond({ id: 'e-1', approved: true, mode: 'acceptEdits' }); + await pending; + expect(onModeChange).toHaveBeenCalledWith('acceptEdits'); + }); + + it('cancelAll rejects in-flight requests (deny path for teardown)', async () => { + const { client, respond: _respond } = createFakeClient(); + const handler = new PtyPermissionHandler(client, { getPermissionMode: () => 'default' }); + + const pending = handler.requestDecision('c-1', 'Bash', { command: 'sleep 999' }); + handler.cancelAll('Session ended'); + await expect(pending).rejects.toThrow('Session ended'); + }); +}); diff --git a/cli/src/claude/utils/ptyPermissionHandler.ts b/cli/src/claude/utils/ptyPermissionHandler.ts new file mode 100644 index 000000000..529da6837 --- /dev/null +++ b/cli/src/claude/utils/ptyPermissionHandler.ts @@ -0,0 +1,248 @@ +/** + * Permission bridge for PTY-mode claude sessions. + * + * The SDK path routes tool approvals through the SDK's `canUseTool` callback + * (see {@link ../utils/permissionHandler.ts}). A PTY-mode claude has no such + * callback — it would render permission prompts in its own TUI and stall the + * chat-driven flow. Instead, a PreToolUse hook forwards each tool call here; we + * either auto-allow it or surface it in the web approval modal (reusing the + * exact `state.requests` + `permission` RPC machinery the SDK path uses) and + * return the resulting allow/deny to claude. + * + * We MUST always resolve to `allow` or `deny` — never `ask` — because `ask` + * makes claude fall back to its own (TUI) prompt, which blocks the PTY. + */ + +import type { PermissionMode } from '@hapi/protocol/types'; +import { + BasePermissionHandler, + resolveToolAutoApprovalDecision, + type PendingPermissionRequest, + type PermissionCompletion, + type PermissionHandlerClient +} from '@/modules/common/permission/BasePermissionHandler'; +import { logger } from '@/ui/logger'; +import { + isAskUserQuestionToolName, + isRequestUserInputToolName, + isQuestionToolName, + buildAskUserQuestionUpdatedInput, + buildRequestUserInputUpdatedInput +} from './questionAnswerInput'; + +export type PtyPermissionDecision = { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +}; + +// The web-driven response delivered over the `permission` RPC. Same shape the +// SDK PermissionHandler consumes, so the existing web approval UI works as-is. +type PermissionResponse = { + id: string; + approved: boolean; + reason?: string; + mode?: PermissionMode; + allowTools?: string[]; + // Picked answers for the question tools (AskUserQuestion / request_user_input). + answers?: Record | Record; +}; + +// Tools claude itself does not prompt for in default mode: pure read-only +// file/search/state tools. Auto-allow them so PTY default mode isn't flooded +// with an approval prompt for every Read/Grep. Network/exec/write tools still +// go to the web. Question tools (AskUserQuestion / request_user_input) are NOT +// here on purpose — they are routed to the web so the user answers in the chat, +// and the picked answers are injected back via the tool's updatedInput. +const PTY_AUTO_ALLOW_TOOLS = new Set([ + 'Read', + 'Glob', + 'Grep', + 'LS', + 'NotebookRead', + 'TodoWrite' +]); + +export type PtyPermissionHandlerOptions = { + /** Reads the session's CURRENT permission mode (web dropdown can change it mid-session). */ + getPermissionMode: () => PermissionMode | undefined; + /** Propagate a mode change requested via the web approval (e.g. "approve & switch to acceptEdits"). */ + onModeChange?: (mode: PermissionMode) => void; +}; + +export class PtyPermissionHandler extends BasePermissionHandler { + private readonly options: PtyPermissionHandlerOptions; + // Tools the user chose to always allow this session ("allow for session"). + private readonly sessionAllowedTools = new Set(); + // Bash "allow for session" arrives command-qualified (Bash() or + // Bash(:*)), so it needs literal/prefix matching rather than a plain + // tool-name set — mirrors the SDK PermissionHandler. + private readonly allowedBashLiterals = new Set(); + private readonly allowedBashPrefixes = new Set(); + + constructor(client: PermissionHandlerClient, options: PtyPermissionHandlerOptions) { + super(client); + this.options = options; + } + + /** + * Decide whether a PTY tool call may proceed. Resolves immediately for + * auto-allowed tools/modes; otherwise registers a pending request that + * resolves when the user answers in the web modal. + */ + requestDecision(toolUseId: string, toolName: string, input: unknown): Promise { + const mode = this.options.getPermissionMode(); + + // 1. Already allowed for the session via a prior approval. + if (toolName === 'Bash') { + const command = (input as { command?: string } | null)?.command; + if (command && this.isBashCommandAllowed(command)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + } else if (this.sessionAllowedTools.has(toolName)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 2. Pure read-only / question tools — never gated. + if (PTY_AUTO_ALLOW_TOOLS.has(toolName)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 3. Mode-based auto-approval. bypassPermissions (the --yolo mapping) + // isn't covered by resolveToolAutoApprovalDecision, so handle it here. + if (mode === 'bypassPermissions') { + return Promise.resolve({ permissionDecision: 'allow' }); + } + if (resolveToolAutoApprovalDecision(mode, toolName, toolUseId)) { + return Promise.resolve({ permissionDecision: 'allow' }); + } + + // 4. Ask the user via the web approval modal. + return new Promise((resolve, reject) => { + this.addPendingRequest(toolUseId, toolName, input, { resolve, reject }); + logger.debug(`[ptyPermission] Awaiting web approval for ${toolName} (${toolUseId})`); + }); + } + + /** Reject every in-flight request (deny) — call on session teardown/abort. */ + cancelAll(reason: string): void { + this.cancelPendingRequests({ + completedReason: reason, + rejectMessage: reason, + decision: 'denied' + }); + } + + protected async handlePermissionResponse( + response: PermissionResponse, + pending: PendingPermissionRequest + ): Promise { + // Remember "allow for session" choices so we don't re-prompt. Bash comes + // command-qualified (Bash() / Bash(:*)); other tools by name. + if (response.allowTools && response.allowTools.length > 0) { + for (const tool of response.allowTools) { + if (tool === 'Bash' || tool.startsWith('Bash(')) { + this.rememberBashPermission(tool); + } else { + this.sessionAllowedTools.add(tool); + } + } + } + + // A mode switch chosen alongside the approval (e.g. acceptEdits). + if (response.mode) { + this.options.onModeChange?.(response.mode); + } + + const completion: PermissionCompletion = { + status: response.approved ? 'approved' : 'denied', + reason: response.reason, + mode: response.mode, + allowTools: response.allowTools, + answers: response.answers + }; + + // Question tools: the user answered in the chat. Inject the picked + // answers into the tool's updatedInput so claude echoes them instead of + // re-prompting in its TUI (same trick the SDK canUseTool path uses). + if (isQuestionToolName(pending.toolName)) { + const answers = response.answers ?? {}; + const denyNoAnswers = (): PermissionCompletion => { + completion.status = 'denied'; + completion.reason = completion.reason ?? 'No answers were provided.'; + pending.resolve({ permissionDecision: 'deny', reason: 'No answers were provided.' }); + return completion; + }; + if (Object.keys(answers).length === 0) { + return denyNoAnswers(); + } + const updatedInput = isAskUserQuestionToolName(pending.toolName) + ? buildAskUserQuestionUpdatedInput(pending.input, answers) + : isRequestUserInputToolName(pending.toolName) + ? buildRequestUserInputUpdatedInput(pending.input, answers) + : (pending.input as Record); + // Never-stall guard: if the index->questionText mapping produced no + // usable answers (e.g. malformed/reordered questions), an `allow` with + // empty answers makes claude echo an empty "answered: ." result and + // lock the turn. Deny instead so the bridge never silently stalls. + if (isAskUserQuestionToolName(pending.toolName)) { + const mapped = (updatedInput as { answers?: unknown }).answers; + if (!mapped || typeof mapped !== 'object' || Object.keys(mapped as object).length === 0) { + return denyNoAnswers(); + } + } + pending.resolve({ permissionDecision: 'allow', updatedInput }); + return completion; + } + + if (response.approved) { + pending.resolve({ + permissionDecision: 'allow', + updatedInput: (pending.input as Record) ?? undefined + }); + } else { + pending.resolve({ + permissionDecision: 'deny', + reason: + response.reason || + "The user declined this tool use. The tool was NOT run. Stop and wait for the user to tell you how to proceed." + }); + } + + return completion; + } + + protected handleMissingPendingResponse(response: PermissionResponse): void { + logger.debug(`[ptyPermission] No pending request for response ${response.id} (already resolved?)`); + } + + private isBashCommandAllowed(command: string): boolean { + if (this.allowedBashLiterals.has(command)) { + return true; + } + for (const prefix of this.allowedBashPrefixes) { + if (command.startsWith(prefix)) { + return true; + } + } + return false; + } + + private rememberBashPermission(permission: string): void { + // Plain "Bash" would allow every command — treat it as a name-level allow. + if (permission === 'Bash') { + this.sessionAllowedTools.add('Bash'); + return; + } + const match = permission.match(/^Bash\((.+?)\)$/); + if (!match) { + return; + } + const command = match[1]; + if (command.endsWith(':*')) { + this.allowedBashPrefixes.add(command.slice(0, -2)); + } else { + this.allowedBashLiterals.add(command); + } + } +} diff --git a/cli/src/claude/utils/sessionHookForwarder.test.ts b/cli/src/claude/utils/sessionHookForwarder.test.ts new file mode 100644 index 000000000..d6974ad64 --- /dev/null +++ b/cli/src/claude/utils/sessionHookForwarder.test.ts @@ -0,0 +1,151 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { createServer, type Server } from 'node:http'; +import { + detectHookEventName, + buildPreToolUseStdout, + runSessionHookForwarder +} from './sessionHookForwarder'; + +describe('detectHookEventName', () => { + it('extracts the hook event name from a JSON payload', () => { + expect(detectHookEventName(JSON.stringify({ hook_event_name: 'PreToolUse' }))).toBe('PreToolUse'); + expect(detectHookEventName(Buffer.from(JSON.stringify({ hook_event_name: 'SessionStart' })))).toBe('SessionStart'); + }); + + it('returns null for non-JSON or missing event name', () => { + expect(detectHookEventName('not json')).toBeNull(); + expect(detectHookEventName(JSON.stringify({ session_id: 'x' }))).toBeNull(); + }); +}); + +describe('buildPreToolUseStdout', () => { + it('wraps an allow decision in claude hookSpecificOutput shape', () => { + const out = JSON.parse(buildPreToolUseStdout({ permissionDecision: 'allow' })); + expect(out).toEqual({ + hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'allow' } + }); + }); + + it('includes reason and updatedInput when present', () => { + const out = JSON.parse( + buildPreToolUseStdout({ permissionDecision: 'deny', reason: 'no', updatedInput: { a: 1 } }) + ); + expect(out.hookSpecificOutput.permissionDecisionReason).toBe('no'); + expect(out.hookSpecificOutput.updatedInput).toEqual({ a: 1 }); + }); +}); + +// --- integration: drive the forwarder against a stub hook server --- + +let server: Server | null = null; + +afterEach(async () => { + if (server) { + await new Promise((r) => server!.close(() => r())); + server = null; + } +}); + +function startStub(handler: (path: string, body: string) => { status: number; body: string }): Promise { + return new Promise((resolve) => { + server = createServer((req, res) => { + const chunks: Buffer[] = []; + req.on('data', (c) => chunks.push(c as Buffer)); + req.on('end', () => { + const { status, body } = handler(req.url || '', Buffer.concat(chunks).toString('utf-8')); + res.writeHead(status, { 'Content-Type': 'application/json' }).end(body); + }); + }); + server.listen(0, '127.0.0.1', () => { + const addr = server!.address(); + resolve(typeof addr === 'object' && addr ? addr.port : 0); + }); + }); +} + +function withStdin(payload: string, fn: () => Promise): Promise { + const original = process.stdin; + // Minimal async-iterable stdin stub. + const fake = (async function* () { + yield Buffer.from(payload); + })(); + Object.defineProperty(process, 'stdin', { + value: Object.assign(fake, { resume: () => {} }), + configurable: true + }); + return fn().finally(() => { + Object.defineProperty(process, 'stdin', { value: original, configurable: true }); + }); +} + +function captureStdout(): { restore: () => void; get: () => string } { + const original = process.stdout.write.bind(process.stdout); + let captured = ''; + (process.stdout as unknown as { write: (s: string) => boolean }).write = (s: string) => { + captured += s; + return true; + }; + return { restore: () => { (process.stdout as unknown as { write: typeof original }).write = original; }, get: () => captured }; +} + +describe('runSessionHookForwarder — PreToolUse routing', () => { + it('POSTs PreToolUse to /hook/pre-tool-use and echoes the decision on stdout', async () => { + let hitPath = ''; + const port = await startStub((path) => { + hitPath = path; + return { status: 200, body: JSON.stringify({ permissionDecision: 'allow' }) }; + }); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'PreToolUse', tool_name: 'Bash', tool_use_id: 'tc-1' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(hitPath).toBe('/hook/pre-tool-use'); + expect(JSON.parse(out.get())).toEqual({ + hookSpecificOutput: { hookEventName: 'PreToolUse', permissionDecision: 'allow' } + }); + }); + + it('fails closed (deny) when the bridge returns an error status', async () => { + const port = await startStub(() => ({ status: 500, body: 'boom' })); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'PreToolUse', tool_name: 'Write', tool_use_id: 'tc-2' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(JSON.parse(out.get()).hookSpecificOutput.permissionDecision).toBe('deny'); + }); + + it('routes SessionStart to /hook/session-start and writes nothing to stdout', async () => { + let hitPath = ''; + const port = await startStub((path) => { + hitPath = path; + return { status: 200, body: 'ok' }; + }); + + const out = captureStdout(); + try { + await withStdin( + JSON.stringify({ hook_event_name: 'SessionStart', session_id: 's-1' }), + () => runSessionHookForwarder(['--port', String(port), '--token', 'tok']) + ); + } finally { + out.restore(); + } + + expect(hitPath).toBe('/hook/session-start'); + expect(out.get()).toBe(''); + }); +}); diff --git a/cli/src/claude/utils/sessionHookForwarder.ts b/cli/src/claude/utils/sessionHookForwarder.ts index 8cc206d30..64d6c3521 100644 --- a/cli/src/claude/utils/sessionHookForwarder.ts +++ b/cli/src/claude/utils/sessionHookForwarder.ts @@ -6,6 +6,84 @@ function logError(message: string, error?: unknown): void { process.stderr.write(`[hook-forwarder] ${message}${suffix}\n`); } +export type PreToolUseDecision = { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +}; + +/** Read the hook event name from a hook stdin payload, or null if unparseable. */ +export function detectHookEventName(body: Buffer | string): string | null { + try { + const parsed = JSON.parse(typeof body === 'string' ? body : body.toString('utf-8')); + if (parsed && typeof parsed === 'object' && typeof parsed.hook_event_name === 'string') { + return parsed.hook_event_name; + } + } catch { + // Not JSON / no event name — caller falls back to the session-start path. + } + return null; +} + +/** + * Wrap a permission decision in the JSON shape claude's PreToolUse hook reads + * from stdout. `permissionDecision` is always allow/deny — never `ask` (which + * would make claude fall back to its own TUI prompt and stall the PTY). + */ +export function buildPreToolUseStdout(decision: PreToolUseDecision): string { + const hookSpecificOutput: Record = { + hookEventName: 'PreToolUse', + permissionDecision: decision.permissionDecision + }; + if (decision.reason) { + hookSpecificOutput.permissionDecisionReason = decision.reason; + } + if (decision.updatedInput) { + hookSpecificOutput.updatedInput = decision.updatedInput; + } + return JSON.stringify({ hookSpecificOutput }); +} + +function postHook( + port: number, + token: string, + path: string, + body: Buffer +): Promise<{ statusCode?: number; body: string; error: boolean }> { + return new Promise((resolve) => { + const chunks: Buffer[] = []; + const req = request( + { + host: '127.0.0.1', + port, + method: 'POST', + path, + headers: { + 'Content-Type': 'application/json', + 'Content-Length': body.length, + 'x-hapi-hook-token': token + } + }, + (res) => { + res.on('data', (chunk) => chunks.push(chunk as Buffer)); + res.on('error', (error) => { + logError('Error reading hook server response', error); + resolve({ statusCode: res.statusCode, body: Buffer.concat(chunks).toString('utf-8'), error: true }); + }); + res.on('end', () => + resolve({ statusCode: res.statusCode, body: Buffer.concat(chunks).toString('utf-8'), error: false }) + ); + } + ); + + req.on('error', (error) => { + logError('Failed to send hook request', error); + resolve({ body: '', error: true }); + }); + req.end(body); + }); +} + function parsePort(value: string | undefined): number | null { if (!value) { return null; @@ -91,40 +169,42 @@ export async function runSessionHookForwarder(args: string[]): Promise { const body = Buffer.concat(chunks); - let hadError = false; - await new Promise((resolve) => { - const req = request({ - host: '127.0.0.1', - port, - method: 'POST', - path: '/hook/session-start', - headers: { - 'Content-Type': 'application/json', - 'Content-Length': body.length, - 'x-hapi-hook-token': token + // PTY-mode permission bridge: a PreToolUse hook must wait for the web + // decision and echo it on stdout (allow/deny). Everything else (chiefly + // SessionStart) keeps the original fire-and-forget behavior. + if (detectHookEventName(body) === 'PreToolUse') { + const response = await postHook(port, token, '/hook/pre-tool-use', body); + + // Fail closed: if the bridge is unreachable or replies oddly, deny the + // tool rather than silently letting it run. Always exit 0 with valid + // stdout so claude honors the decision instead of treating the hook as + // failed (which would fall back to its own TUI prompt). + let decision: PreToolUseDecision = { + permissionDecision: 'deny', + reason: 'Permission bridge unavailable.' + }; + if (!response.error && response.statusCode === 200) { + try { + const parsed = JSON.parse(response.body); + if (parsed?.permissionDecision === 'allow' || parsed?.permissionDecision === 'deny') { + decision = parsed as PreToolUseDecision; + } + } catch (parseError) { + logError('Failed to parse pre-tool-use decision', parseError); } - }, (res) => { - if (res.statusCode && res.statusCode >= 400) { - hadError = true; - logError(`Hook server responded with status ${res.statusCode}`); - } - res.on('error', (error) => { - hadError = true; - logError('Error reading hook server response', error); - resolve(); - }); - res.on('end', () => resolve()); - res.resume(); - }); - - req.on('error', (error) => { - hadError = true; - logError('Failed to send hook request', error); - resolve(); - }); - req.end(body); - }); - if (hadError) { + } else if (response.statusCode && response.statusCode >= 400) { + logError(`Pre-tool-use hook responded with status ${response.statusCode}`); + } + + process.stdout.write(buildPreToolUseStdout(decision)); + return; + } + + const response = await postHook(port, token, '/hook/session-start', body); + if (response.error || (response.statusCode && response.statusCode >= 400)) { + if (response.statusCode && response.statusCode >= 400) { + logError(`Hook server responded with status ${response.statusCode}`); + } process.exitCode = 1; } } catch (error) { diff --git a/cli/src/claude/utils/startHookServer.test.ts b/cli/src/claude/utils/startHookServer.test.ts index 2e5b2d57d..3ecad254b 100644 --- a/cli/src/claude/utils/startHookServer.test.ts +++ b/cli/src/claude/utils/startHookServer.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest' import { request } from 'node:http' import { startHookServer, type SessionHookData } from './startHookServer' -const sendHookRequest = async (port: number, body: string, token?: string): Promise<{ statusCode?: number; body: string }> => { +const sendHookRequest = async (port: number, body: string, token?: string, path = '/hook/session-start'): Promise<{ statusCode?: number; body: string }> => { return await new Promise((resolve, reject) => { const headers: Record = { 'Content-Type': 'application/json', @@ -15,7 +15,7 @@ const sendHookRequest = async (port: number, body: string, token?: string): Prom const req = request({ host: '127.0.0.1', port, - path: '/hook/session-start', + path, method: 'POST', headers }, (res) => { @@ -114,4 +114,83 @@ describe('startHookServer', () => { expect(hookCalled).toBe(false) }) + + describe('pre-tool-use', () => { + const sendPreToolUse = (port: number, payload: unknown, token?: string) => + sendHookRequest(port, JSON.stringify(payload), token, '/hook/pre-tool-use') + + it('forwards the tool call to onPreToolUse and returns its decision', async () => { + let received: unknown = null + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async (data) => { + received = data + return { permissionDecision: 'deny', reason: 'not allowed' } + } + }) + + try { + const response = await sendPreToolUse( + server.port, + { tool_name: 'Bash', tool_input: { command: 'ls' }, tool_use_id: 'tc-1', hook_event_name: 'PreToolUse' }, + server.token + ) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body)).toEqual({ permissionDecision: 'deny', reason: 'not allowed' }) + } finally { + server.stop() + } + + expect((received as { tool_name?: string }).tool_name).toBe('Bash') + }) + + it('allows by default when no onPreToolUse handler is wired', async () => { + const server = await startHookServer({ onSessionHook: () => {} }) + try { + const response = await sendPreToolUse( + server.port, + { tool_name: 'Bash', tool_use_id: 'tc-2' }, + server.token + ) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body)).toEqual({ permissionDecision: 'allow' }) + } finally { + server.stop() + } + }) + + it('fails closed (deny) when the handler throws', async () => { + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async () => { + throw new Error('bridge down') + } + }) + try { + const response = await sendPreToolUse(server.port, { tool_name: 'Write', tool_use_id: 'tc-3' }, server.token) + expect(response.statusCode).toBe(200) + expect(JSON.parse(response.body).permissionDecision).toBe('deny') + } finally { + server.stop() + } + }) + + it('returns 401 when the token is missing', async () => { + let called = false + const server = await startHookServer({ + onSessionHook: () => {}, + onPreToolUse: async () => { + called = true + return { permissionDecision: 'allow' } + } + }) + try { + const response = await sendPreToolUse(server.port, { tool_name: 'Bash' }) + expect(response.statusCode).toBe(401) + } finally { + server.stop() + } + expect(called).toBe(false) + }) + }) }) diff --git a/cli/src/claude/utils/startHookServer.ts b/cli/src/claude/utils/startHookServer.ts index 02073edfe..63aed9663 100644 --- a/cli/src/claude/utils/startHookServer.ts +++ b/cli/src/claude/utils/startHookServer.ts @@ -22,9 +22,37 @@ export interface SessionHookData { [key: string]: unknown; } +/** + * Data received from Claude's PreToolUse hook (PTY mode only). claude sends this + * before every tool call so we can bridge the approval to the web. + */ +export interface PreToolUseHookData { + session_id?: string; + tool_name?: string; + tool_input?: unknown; + tool_use_id?: string; + permission_mode?: string; + cwd?: string; + hook_event_name?: string; + [key: string]: unknown; +} + +/** Decision returned to claude for a PreToolUse tool call. Never 'ask' (would stall the PTY). */ +export interface PreToolUseDecision { + permissionDecision: 'allow' | 'deny'; + reason?: string; + updatedInput?: Record; +} + export interface HookServerOptions { /** Called when a session hook is received with a valid session ID. */ onSessionHook: (sessionId: string, data: SessionHookData) => void; + /** + * Called for each PreToolUse hook (PTY mode). Resolves with the allow/deny + * decision once the user answers; may legitimately take minutes. When + * omitted, tool calls are allowed (no-op), matching --yolo behavior. + */ + onPreToolUse?: (data: PreToolUseHookData) => Promise; /** Optional token to require for hook requests. */ token?: string; } @@ -130,6 +158,61 @@ export async function startHookServer(options: HookServerOptions): Promise = { startedBy?: 'runner' | 'terminal' - startingMode?: 'local' | 'remote' + startingMode?: 'local' | 'remote' | 'pty' permissionMode?: TPermissionMode model?: string modelReasoningEffort?: string @@ -22,10 +22,10 @@ export function parseRemoteAgentCommandOptions { diff --git a/cli/src/gemini/runGemini.ts b/cli/src/gemini/runGemini.ts index 34b13026e..32ac0aeaa 100644 --- a/cli/src/gemini/runGemini.ts +++ b/cli/src/gemini/runGemini.ts @@ -18,7 +18,7 @@ import { getInvokedCwd } from '@/utils/invokedCwd'; export async function runGemini(opts: { startedBy?: 'runner' | 'terminal'; - startingMode?: 'local' | 'remote'; + startingMode?: 'local' | 'remote' | 'pty'; permissionMode?: PermissionMode; model?: string; resumeSessionId?: string; @@ -65,8 +65,8 @@ export async function runGemini(opts: { }); const { api, session } = bootstrap; - const startingMode: 'local' | 'remote' = opts.startingMode - ?? (startedBy === 'runner' ? 'remote' : 'local'); + const startingMode: 'local' | 'remote' = opts.startingMode === 'pty' ? 'remote' + : (opts.startingMode ?? (startedBy === 'runner' ? 'remote' : 'local')); setControlledByUser(session, startingMode); diff --git a/cli/src/modules/common/hooks/generateHookSettings.test.ts b/cli/src/modules/common/hooks/generateHookSettings.test.ts new file mode 100644 index 000000000..50f93b7cd --- /dev/null +++ b/cli/src/modules/common/hooks/generateHookSettings.test.ts @@ -0,0 +1,79 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { readFileSync, rmSync } from 'node:fs'; +import { generateHookSettingsFile } from './generateHookSettings'; + +type WrittenSettings = { + hooks: { + SessionStart: Array<{ matcher: string; hooks: Array<{ type: string; command: string; timeout?: number }> }>; + PreToolUse?: Array<{ matcher: string; hooks: Array<{ type: string; command: string; timeout?: number }> }>; + }; +}; + +const created: string[] = []; + +function readSettings(filepath: string): WrittenSettings { + created.push(filepath); + return JSON.parse(readFileSync(filepath, 'utf-8')) as WrittenSettings; +} + +afterEach(() => { + for (const filepath of created.splice(0)) { + try { + rmSync(filepath, { force: true }); + } catch { + // best effort + } + } +}); + +describe('generateHookSettingsFile', () => { + it('registers SessionStart with the hook-forwarder command', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-session-hook', + logLabel: 'test' + }) + ); + + expect(settings.hooks.SessionStart).toHaveLength(1); + const entry = settings.hooks.SessionStart[0]; + expect(entry.matcher).toBe('*'); + expect(entry.hooks[0].type).toBe('command'); + expect(entry.hooks[0].command).toContain('hook-forwarder'); + expect(entry.hooks[0].command).toContain('45678'); + expect(entry.hooks[0].command).toContain('tok-abc'); + }); + + it('does NOT register PreToolUse by default (SDK/local/remote modes)', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-session-hook', + logLabel: 'test' + }) + ); + + expect(settings.hooks.PreToolUse).toBeUndefined(); + }); + + it('registers PreToolUse only when includePreToolUse is set (PTY mode)', () => { + const settings = readSettings( + generateHookSettingsFile(45678, 'tok-abc', { + filenamePrefix: 'test-pty-hook', + logLabel: 'test', + includePreToolUse: true + }) + ); + + expect(settings.hooks.PreToolUse).toHaveLength(1); + const entry = settings.hooks.PreToolUse![0]; + // matcher '*' matches every tool name (claude's Ghz: !q || q==='*' → true) + expect(entry.matcher).toBe('*'); + expect(entry.hooks[0].type).toBe('command'); + // same forwarder command — it branches on stdin hook_event_name + expect(entry.hooks[0].command).toBe(settings.hooks.SessionStart[0].hooks[0].command); + // generous timeout so the blocking hook survives a slow phone approval + expect(entry.hooks[0].timeout).toBeGreaterThanOrEqual(600); + // SessionStart keeps claude's default (no explicit timeout) + expect(settings.hooks.SessionStart[0].hooks[0].timeout).toBeUndefined(); + }); +}); diff --git a/cli/src/modules/common/hooks/generateHookSettings.ts b/cli/src/modules/common/hooks/generateHookSettings.ts index b61280131..2ec111835 100644 --- a/cli/src/modules/common/hooks/generateHookSettings.ts +++ b/cli/src/modules/common/hooks/generateHookSettings.ts @@ -9,15 +9,26 @@ type HookCommandConfig = { hooks: Array<{ type: 'command'; command: string; + /** Per-command timeout in SECONDS (claude's hook schema). */ + timeout?: number; }>; }; +// PreToolUse bridges a tool approval to the web and blocks the (synchronous) +// hook until the user answers on their phone — which can take minutes. claude's +// default command-hook timeout is 60s; on timeout the decision is dropped and +// claude falls back to its own permission prompt (in PTY that renders in the TUI +// and stalls the chat flow). Give the PreToolUse hook a generous timeout so a +// human has time to respond. +const PRE_TOOL_USE_TIMEOUT_SECONDS = 3600; + type HookSettings = { hooksConfig?: { enabled?: boolean; }; hooks: { SessionStart: HookCommandConfig[]; + PreToolUse?: HookCommandConfig[]; }; }; @@ -25,6 +36,15 @@ export type HookSettingsOptions = { filenamePrefix: string; logLabel: string; hooksEnabled?: boolean; + /** + * Register a PreToolUse hook (PTY mode only). The SDK path routes tool + * approvals through the SDK's canUseTool callback, so it must NOT register + * PreToolUse or every tool would be double-handled. PTY sessions have no + * SDK callback, so they rely on this hook to bridge tool approvals to the + * web. The same forwarder command serves both events; it branches on the + * stdin `hook_event_name`. + */ + includePreToolUse?: boolean; }; function shellQuote(value: string): string { @@ -43,21 +63,29 @@ function shellJoin(parts: string[]): string { return parts.map(shellQuote).join(' '); } -function buildHookSettings(command: string, hooksEnabled?: boolean): HookSettings { +function buildHookSettings(command: string, hooksEnabled?: boolean, includePreToolUse?: boolean): HookSettings { const hooks: HookSettings['hooks'] = { SessionStart: [ { matcher: '*', - hooks: [ - { - type: 'command', - command - } - ] + hooks: [{ type: 'command', command }] } ] }; + if (includePreToolUse) { + // matcher '*' matches every tool name (claude's matcher: !q || q==='*' → all). + // The same forwarder command serves both events; it branches on the + // stdin hook_event_name. The long timeout keeps the (blocking) hook + // alive while the user approves on their phone. + hooks.PreToolUse = [ + { + matcher: '*', + hooks: [{ type: 'command', command, timeout: PRE_TOOL_USE_TIMEOUT_SECONDS }] + } + ]; + } + const settings: HookSettings = { hooks }; if (hooksEnabled !== undefined) { settings.hooksConfig = { @@ -88,7 +116,7 @@ export function generateHookSettingsFile( ]); const hookCommand = shellJoin([command, ...args]); - const settings = buildHookSettings(hookCommand, options.hooksEnabled); + const settings = buildHookSettings(hookCommand, options.hooksEnabled, options.includePreToolUse); writeFileSync(filepath, JSON.stringify(settings, null, 4)); logger.debug(`[${options.logLabel}] Created hook settings file: ${filepath}`); diff --git a/cli/src/modules/common/rpcTypes.ts b/cli/src/modules/common/rpcTypes.ts index 5e243eb87..f1112b371 100644 --- a/cli/src/modules/common/rpcTypes.ts +++ b/cli/src/modules/common/rpcTypes.ts @@ -15,6 +15,7 @@ export interface SpawnSessionOptions { token?: string sessionType?: 'simple' | 'worktree' worktreeName?: string + startingMode?: 'remote' | 'pty' } export type SpawnSessionResult = diff --git a/cli/src/opencode/runOpencode.ts b/cli/src/opencode/runOpencode.ts index a78f93119..9c51ba746 100644 --- a/cli/src/opencode/runOpencode.ts +++ b/cli/src/opencode/runOpencode.ts @@ -19,7 +19,7 @@ import { resolveOpencodeSlashCommand } from './utils/slashCommands'; export async function runOpencode(opts: { startedBy?: 'runner' | 'terminal'; - startingMode?: 'local' | 'remote'; + startingMode?: 'local' | 'remote' | 'pty'; permissionMode?: PermissionMode; model?: string; modelReasoningEffort?: string | null; @@ -37,8 +37,8 @@ export async function runOpencode(opts: { opts.startingMode = 'remote'; } - const startingMode: 'local' | 'remote' = opts.startingMode - ?? (startedBy === 'runner' ? 'remote' : 'local'); + const startingMode: 'local' | 'remote' = opts.startingMode === 'pty' ? 'remote' + : (opts.startingMode ?? (startedBy === 'runner' ? 'remote' : 'local')); if (opts.permissionMode === 'plan' && startingMode !== 'remote') { throw new Error('OpenCode plan mode is only supported in remote mode'); diff --git a/cli/src/runner/buildCliArgs.test.ts b/cli/src/runner/buildCliArgs.test.ts index 6d809b11b..a80e20f12 100644 --- a/cli/src/runner/buildCliArgs.test.ts +++ b/cli/src/runner/buildCliArgs.test.ts @@ -62,6 +62,52 @@ describe('buildCliArgs', () => { + it('passes --model and --effort through for claude in PTY mode (model/effort at start)', () => { + const args = buildCliArgs('claude', { + directory: '/tmp', + startingMode: 'pty', + model: 'opus', + effort: 'high', + }) + expect(args).toContain('--model') + expect(args[args.indexOf('--model') + 1]).toBe('opus') + expect(args).toContain('--effort') + expect(args[args.indexOf('--effort') + 1]).toBe('high') + expect(args).toContain('--hapi-starting-mode') + expect(args[args.indexOf('--hapi-starting-mode') + 1]).toBe('pty') + }) + + it('does NOT force --yolo for PTY mode (tool approvals are bridged via the PreToolUse hook)', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }) + expect(args).not.toContain('--yolo') + }) + + it('still honors explicit yolo (the new-session toggle) in PTY mode', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }, true) + expect(args).toContain('--yolo') + }) + + it('prefers an explicit --permission-mode over yolo in PTY mode', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty', permissionMode: 'plan' }, true) + expect(args).toContain('--permission-mode') + expect(args[args.indexOf('--permission-mode') + 1]).toBe('plan') + expect(args).not.toContain('--yolo') + }) + + it('does not add --effort for non-claude agents (claude-only flag)', () => { + const args = buildCliArgs('opencode', { + directory: '/tmp', + effort: 'high', + }) + expect(args).not.toContain('--effort') + }) + + it('omits --model/--effort when not specified', () => { + const args = buildCliArgs('claude', { directory: '/tmp', startingMode: 'pty' }) + expect(args).not.toContain('--model') + expect(args).not.toContain('--effort') + }) + it('passes --model-reasoning-effort through for opencode', () => { const args = buildCliArgs('opencode', { directory: '/tmp', diff --git a/cli/src/runner/run.ts b/cli/src/runner/run.ts index f49d3b15b..ca5ea6647 100644 --- a/cli/src/runner/run.ts +++ b/cli/src/runner/run.ts @@ -1112,7 +1112,8 @@ export function buildCliArgs( args.push('--resume', options.resumeSessionId); } } - args.push('--hapi-starting-mode', 'remote', '--started-by', 'runner'); + const startingMode = options.startingMode || 'remote'; + args.push('--hapi-starting-mode', startingMode, '--started-by', 'runner'); if (options.model) { args.push('--model', options.model); } @@ -1127,5 +1128,9 @@ export function buildCliArgs( } else if (yolo) { args.push('--yolo'); } + // PTY tool approvals are bridged from a PreToolUse hook to the web (see + // ptyPermissionHandler + generateHookSettings), so a default-mode PTY session + // prompts for permission just like the SDK path — no implicit bypass. Explicit + // YOLO (the new-session toggle) opts into --yolo via `yolo`. return args; } diff --git a/cli/src/ui/ink/ResumeSessionPicker.tsx b/cli/src/ui/ink/ResumeSessionPicker.tsx index 5ef9398ec..185d291f9 100644 --- a/cli/src/ui/ink/ResumeSessionPicker.tsx +++ b/cli/src/ui/ink/ResumeSessionPicker.tsx @@ -1,6 +1,6 @@ import React, { useMemo, useState } from 'react' import { Box, Text, useInput, useStdout } from 'ink' -import type { ResumableSession } from '@hapi/protocol' +import { getFlavorLabel, type ResumableSession } from '@hapi/protocol' import { filterResumeSessions, formatResumeSessionRelativeTime, @@ -85,7 +85,7 @@ function padEndColumns(value: string, width: number): string { function formatSessionLine(session: ResumableSession, width: number): string { const state = getResumeSessionState(session) const time = formatResumeSessionRelativeTime(session.updatedAt).padStart(10) - const prefix = `${time} ${session.flavor.padEnd(8)} ${state.padEnd(8)} ` + const prefix = `${time} ${getFlavorLabel(session.flavor).padEnd(12)} ${state.padEnd(8)} ` const nameBudget = Math.max(12, width - prefix.length) const name = truncateText(getResumeSessionName(session), nameBudget) return padEndColumns(`${prefix}${name}`, width) From 4c60683bfbfef920b4a70b908993d27177d5df59 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 06/11] feat(pty): stream the interactive terminal and structured chat to the web --- hub/src/socket/agentTerminalBuffer.ts | 42 ++++ .../handlers/cli/terminalHandlers.test.ts | 82 ++++++++ .../socket/handlers/cli/terminalHandlers.ts | 44 ++++ hub/src/socket/handlers/terminal.test.ts | 164 ++++++++++++++- hub/src/socket/handlers/terminal.ts | 109 ++++++++++ hub/src/socket/userTerminalBuffer.test.ts | 47 +++++ hub/src/socket/userTerminalBuffer.ts | 31 +++ hub/src/sync/rpcGateway.ts | 5 +- hub/src/sync/syncEngine.ts | 49 ++++- hub/src/web/routes/machines.ts | 4 +- shared/src/apiTypes.ts | 3 +- shared/src/schemas.ts | 7 + shared/src/socket.ts | 27 +++ .../AgentTerminal/AgentTerminalView.tsx | 156 +++++++++++++++ web/src/hooks/useAgentTerminalSocket.test.ts | 115 +++++++++++ web/src/hooks/useAgentTerminalSocket.ts | 189 ++++++++++++++++++ web/src/router.tsx | 2 +- web/src/routes/sessions/terminal.test.tsx | 27 ++- web/src/routes/sessions/terminal.tsx | 18 +- web/vite.config.ts | 21 +- 20 files changed, 1125 insertions(+), 17 deletions(-) create mode 100644 hub/src/socket/agentTerminalBuffer.ts create mode 100644 hub/src/socket/userTerminalBuffer.test.ts create mode 100644 hub/src/socket/userTerminalBuffer.ts create mode 100644 web/src/components/AgentTerminal/AgentTerminalView.tsx create mode 100644 web/src/hooks/useAgentTerminalSocket.test.ts create mode 100644 web/src/hooks/useAgentTerminalSocket.ts diff --git a/hub/src/socket/agentTerminalBuffer.ts b/hub/src/socket/agentTerminalBuffer.ts new file mode 100644 index 000000000..23cd52fb9 --- /dev/null +++ b/hub/src/socket/agentTerminalBuffer.ts @@ -0,0 +1,42 @@ +// Per-session scrollback buffer for the agent (PTY) terminal output. +// +// Claude's interactive TUI only emits output when something changes. A web +// client that subscribes while the TUI is idle therefore receives nothing and +// shows a black screen until the next keystroke forces a redraw. We keep a +// rolling buffer of recent raw output so a fresh subscriber can be replayed the +// current screen immediately. +// +// The buffer is a byte-bounded ring: the oldest bytes are dropped first. The +// most recent full-screen redraw sequence from the TUI is always preserved at +// the tail, so replaying the buffer reconstructs the current screen (older, +// possibly-truncated escape sequences at the head are overwritten by later +// redraws). + +const MAX_BUFFER_BYTES = 256 * 1024 + +const buffers = new Map() + +export function appendAgentTerminalOutput(sessionId: string, data: string): void { + if (!data) return + const next = (buffers.get(sessionId) ?? '') + data + buffers.set( + sessionId, + next.length > MAX_BUFFER_BYTES ? next.slice(next.length - MAX_BUFFER_BYTES) : next + ) +} + +// Replay variant: when a full-screen TUI exits (e.g. an archived alt-screen +// session) it emits an alt-screen-exit (`CSI ? 1049 l`) that restores the empty +// normal screen — so a raw replay would render black. If the buffer's LAST +// alt-screen toggle is an exit (the process ended without re-entering), drop it +// and everything after, leaving the final alt-screen frame visible. Live sessions +// stay in the alt screen (no trailing exit), so this is a no-op for them. +const TRAILING_ALT_EXIT = /\x1b\[\?1049l(?:(?!\x1b\[\?1049h)[\s\S])*$/ +export function getAgentTerminalReplay(sessionId: string): string { + const raw = buffers.get(sessionId) ?? '' + return raw.replace(TRAILING_ALT_EXIT, '') +} + +export function clearAgentTerminalBuffer(sessionId: string): void { + buffers.delete(sessionId) +} diff --git a/hub/src/socket/handlers/cli/terminalHandlers.test.ts b/hub/src/socket/handlers/cli/terminalHandlers.test.ts index 4983b6351..695a3a394 100644 --- a/hub/src/socket/handlers/cli/terminalHandlers.test.ts +++ b/hub/src/socket/handlers/cli/terminalHandlers.test.ts @@ -42,15 +42,97 @@ class FakeSocket { } } +type RoomEmit = { + room: string + event: string + data: unknown +} + class FakeNamespace { readonly sockets = new Map() + readonly roomEmits: RoomEmit[] = [] + + to(room: string): { emit: (event: string, data: unknown) => FakeNamespace } { + const self = this + return { + emit(event: string, data: unknown) { + self.roomEmits.push({ room, event, data }) + return self + } + } + } } function lastEmit(socket: FakeSocket, event: string): EmittedEvent | undefined { return [...socket.emitted].reverse().find((entry) => entry.event === event) } +function lastRoomEmit(namespace: FakeNamespace, event: string): RoomEmit | undefined { + return [...namespace.roomEmits].reverse().find((entry) => entry.event === event) +} + +function firstRoomEmit(namespace: FakeNamespace, event: string): RoomEmit | undefined { + return namespace.roomEmits.find((entry) => entry.event === event) +} + describe('cli terminal handlers', () => { + it('forwards agent-terminal:output to the agent-terminal room on terminal namespace', () => { + const cliSocket = new FakeSocket('cli-socket') + const terminalNamespace = new FakeNamespace() + const terminalRegistry = new TerminalRegistry({ idleTimeoutMs: 0 }) + + registerTerminalHandlers(cliSocket as unknown as CliSocketWithData, { + terminalRegistry, + terminalNamespace: terminalNamespace as never, + resolveSessionAccess: () => ({ ok: true, value: {} as StoredSession }), + emitAccessError: () => { + throw new Error('Unexpected access error') + } + }) + + cliSocket.trigger('agent-terminal:output', { + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mhello\x1b[0m' + }) + + const emit = lastRoomEmit(terminalNamespace, 'agent-terminal:output') + expect(emit).toBeDefined() + expect(emit?.room).toBe('agent-session:session-1') + expect(emit?.data).toEqual({ + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mhello\x1b[0m' + }) + }) + + it('rejects agent-terminal:output when session access is denied', () => { + const cliSocket = new FakeSocket('cli-socket') + const terminalNamespace = new FakeNamespace() + const terminalRegistry = new TerminalRegistry({ idleTimeoutMs: 0 }) + const accessErrors: { scope: string; id: string; reason: string }[] = [] + + registerTerminalHandlers(cliSocket as unknown as CliSocketWithData, { + terminalRegistry, + terminalNamespace: terminalNamespace as never, + resolveSessionAccess: () => ({ ok: false, reason: 'access-denied' }), + emitAccessError: (scope, id, reason) => { + accessErrors.push({ scope, id, reason }) + } + }) + + cliSocket.trigger('agent-terminal:output', { + sessionId: 'session-1', + terminalId: 'agent', + data: 'should not pass' + }) + + expect(terminalNamespace.roomEmits).toHaveLength(0) + expect(accessErrors).toEqual([ + { scope: 'session', id: 'session-1', reason: 'access-denied' } + ]) + }) + it('removes stale registry entries after terminal errors', () => { const cliSocket = new FakeSocket('cli-socket') const terminalSocket = new FakeSocket('terminal-socket') diff --git a/hub/src/socket/handlers/cli/terminalHandlers.ts b/hub/src/socket/handlers/cli/terminalHandlers.ts index bf54f6df1..a83f20deb 100644 --- a/hub/src/socket/handlers/cli/terminalHandlers.ts +++ b/hub/src/socket/handlers/cli/terminalHandlers.ts @@ -1,3 +1,4 @@ +import { z } from 'zod' import { TerminalErrorPayloadSchema, TerminalExitPayloadSchema, @@ -8,6 +9,8 @@ import type { StoredSession } from '../../../store' import type { TerminalRegistry } from '../../terminalRegistry' import type { CliSocketWithData, SocketServer } from '../../socketTypes' import type { AccessErrorReason, AccessResult } from './types' +import { appendAgentTerminalOutput, clearAgentTerminalBuffer } from '../../agentTerminalBuffer' +import { appendUserTerminalOutput, clearUserTerminalBuffer } from '../../userTerminalBuffer' type ResolveSessionAccess = (sessionId: string) => AccessResult @@ -68,9 +71,47 @@ export function registerTerminalHandlers(socket: CliSocketWithData, deps: Termin return } terminalRegistry.markActivity(parsed.data.terminalId) + // Keep a scrollback buffer so reconnecting web clients see the + // current terminal content instead of a black screen. + appendUserTerminalOutput(parsed.data.sessionId, parsed.data.terminalId, parsed.data.data) forwardTerminalEvent('terminal:output', parsed.data) }) + socket.on('agent-terminal:output', (data: unknown) => { + const parsed = terminalOutputSchema.safeParse(data) + if (!parsed.success) { + return + } + const sessionAccess = resolveSessionAccess(parsed.data.sessionId) + if (!sessionAccess.ok) { + emitAccessError('session', parsed.data.sessionId, sessionAccess.reason) + return + } + // Keep a scrollback buffer so a web client that subscribes later can be + // replayed the current screen (avoids the black-screen-until-keystroke). + appendAgentTerminalOutput(parsed.data.sessionId, parsed.data.data) + // Broadcast to the agent-terminal room (distinct from the user-terminal's + // `session:${id}` room) so only agent-terminal viewers receive PTY output + // and the streaming-teardown viewer count stays accurate. + terminalNamespace.to(`agent-session:${parsed.data.sessionId}`).emit('agent-terminal:output', parsed.data) + }) + + socket.on('agent-terminal:reset', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const sessionAccess = resolveSessionAccess(parsed.data.sessionId) + if (!sessionAccess.ok) { + emitAccessError('session', parsed.data.sessionId, sessionAccess.reason) + return + } + // A fresh agent PTY spawned — drop the previous session's scrollback so a + // re-subscribing viewer doesn't replay stale (and alt-screen-corrupted) + // output from before the restart. + clearAgentTerminalBuffer(parsed.data.sessionId) + }) + socket.on('terminal:exit', (data: unknown) => { const parsed = terminalExitSchema.safeParse(data) if (!parsed.success) { @@ -81,6 +122,9 @@ export function registerTerminalHandlers(socket: CliSocketWithData, deps: Termin return } terminalRegistry.remove(parsed.data.terminalId) + // Drop the scrollback so a reconnecting viewer doesn't replay a dead + // terminal's output, and so the buffer doesn't leak for the session's life. + clearUserTerminalBuffer(parsed.data.sessionId) const terminalSocket = terminalNamespace.sockets.get(entry.socketId) if (!terminalSocket) { return diff --git a/hub/src/socket/handlers/terminal.test.ts b/hub/src/socket/handlers/terminal.test.ts index 0a9cf2382..542114cad 100644 --- a/hub/src/socket/handlers/terminal.test.ts +++ b/hub/src/socket/handlers/terminal.test.ts @@ -1,6 +1,7 @@ -import { describe, expect, it } from 'bun:test' +import { beforeEach, describe, expect, it } from 'bun:test' import { registerTerminalHandlers } from './terminal' import { TerminalRegistry } from '../terminalRegistry' +import { appendAgentTerminalOutput, clearAgentTerminalBuffer, getAgentTerminalReplay } from '../agentTerminalBuffer' import type { SocketServer, SocketWithData } from '../socketTypes' type EmittedEvent = { @@ -12,6 +13,7 @@ class FakeSocket { readonly id: string readonly data: Record = {} readonly emitted: EmittedEvent[] = [] + readonly rooms = new Set() private readonly handlers = new Map void>() constructor(id: string) { @@ -28,6 +30,10 @@ class FakeSocket { return true } + join(room: string): void { + this.rooms.add(room) + } + trigger(event: string, data?: unknown): void { const handler = this.handlers.get(event) if (!handler) { @@ -69,6 +75,7 @@ type Harness = { function createHarness(options?: { sessionActive?: boolean + sessionNamespace?: string maxTerminalsPerSocket?: number maxTerminalsPerSession?: number }): Harness { @@ -80,7 +87,10 @@ function createHarness(options?: { registerTerminalHandlers(terminalSocket as unknown as SocketWithData, { io: io as unknown as SocketServer, - getSession: () => ({ active: options?.sessionActive ?? true, namespace: 'default' }), + getSession: () => ({ + active: options?.sessionActive ?? true, + namespace: options?.sessionNamespace ?? 'default' + }), terminalRegistry, maxTerminalsPerSocket: options?.maxTerminalsPerSocket ?? 4, maxTerminalsPerSession: options?.maxTerminalsPerSession ?? 4 @@ -200,6 +210,156 @@ describe('terminal socket handlers', () => { expect(terminalRegistry.get('terminal-1')).toBeNull() }) + it('joins terminal socket to session room on create', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('terminal:create', { + sessionId: 'session-1', + terminalId: 'terminal-1', + cols: 80, + rows: 24 + }) + + expect(terminalSocket.rooms.has('session:session-1')).toBe(true) + }) + + describe('agent-terminal:subscribe', () => { + beforeEach(() => { + clearAgentTerminalBuffer('session-1') + clearAgentTerminalBuffer('session-2') + }) + + it('replays buffered agent output on subscribe', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', '\x1b[32mInitial output\x1b[0m\r\n') + appendAgentTerminalOutput('session-1', 'More output\r\n') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(true) + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent).toBeDefined() + expect(replayEvent?.data).toEqual({ + sessionId: 'session-1', + terminalId: 'agent', + data: '\x1b[32mInitial output\x1b[0m\r\nMore output\r\n' + }) + }) + + it('rejects subscribe to a session in another namespace (no join, no replay)', () => { + // A valid token for the 'default' namespace must not be able to + // subscribe to a session that belongs to a different namespace. + const { terminalSocket } = createHarness({ sessionNamespace: 'other' }) + appendAgentTerminalOutput('session-1', 'secret-output-from-other-namespace') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(false) + expect(lastEmit(terminalSocket, 'agent-terminal:output')).toBeUndefined() + }) + + it('rejects subscribe when the session is inactive (no join, no replay)', () => { + const { terminalSocket } = createHarness({ sessionActive: false }) + appendAgentTerminalOutput('session-1', 'stale-output') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(false) + expect(lastEmit(terminalSocket, 'agent-terminal:output')).toBeUndefined() + }) + + it('joins a dedicated agent-terminal room (not the user-terminal session room)', () => { + const { terminalSocket } = createHarness() + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + + // Agent-terminal viewers must NOT land in the user-terminal `session:` room, + // otherwise the streaming-teardown viewer count counts the wrong sockets. + expect(terminalSocket.rooms.has('agent-session:session-1')).toBe(true) + expect(terminalSocket.rooms.has('session:session-1')).toBe(false) + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent).toBeUndefined() + }) + + it('strips a trailing alt-screen-exit so an exited TUI replays its last frame (not black)', () => { + clearAgentTerminalBuffer('session-3') + // alt-screen enter + a frame, then the process exits (alt-screen exit). + appendAgentTerminalOutput('session-3', '\x1b[?1049h\x1b[HLAST FRAME') + appendAgentTerminalOutput('session-3', '\r\n\x1b[?1049l\x1b[?25h') + const replay = getAgentTerminalReplay('session-3') + expect(replay).toContain('LAST FRAME') + expect(replay).not.toContain('\x1b[?1049l') + clearAgentTerminalBuffer('session-3') + }) + + it('keeps alt-screen content intact for a live (still in alt-screen) TUI', () => { + clearAgentTerminalBuffer('session-4') + appendAgentTerminalOutput('session-4', '\x1b[?1049h\x1b[HLIVE FRAME') + const replay = getAgentTerminalReplay('session-4') + expect(replay).toBe('\x1b[?1049h\x1b[HLIVE FRAME') + clearAgentTerminalBuffer('session-4') + }) + + it('replays buffer per-session independently', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', 'data-for-session-1') + appendAgentTerminalOutput('session-2', 'data-for-session-2') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-2' }) + + const replayEvent = lastEmit(terminalSocket, 'agent-terminal:output') + expect(replayEvent?.data).toEqual({ + sessionId: 'session-2', + terminalId: 'agent', + data: 'data-for-session-2' + }) + }) + + it('replays same buffer on repeated subscribe (no clear)', () => { + const { terminalSocket } = createHarness() + appendAgentTerminalOutput('session-1', 'persistent-data') + + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + const firstReplay = lastEmit(terminalSocket, 'agent-terminal:output') + expect(firstReplay).toBeDefined() + expect((firstReplay!.data as { data: string }).data).toBe('persistent-data') + + // Second subscribe gets the same buffer again (not cleared) + terminalSocket.emitted.length = 0 + terminalSocket.trigger('agent-terminal:subscribe', { sessionId: 'session-1' }) + const secondReplay = lastEmit(terminalSocket, 'agent-terminal:output') + expect(secondReplay).toBeDefined() + expect((secondReplay!.data as { data: string }).data).toBe('persistent-data') + }) + }) + + describe('agent-terminal:resize', () => { + it('forwards a resize to the CLI socket for an authorized active session', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:resize', { sessionId: 'session-1', cols: 100, rows: 30 }) + + const resizeEvent = lastEmit(cliSocket, 'agent-terminal:resize') + expect(resizeEvent?.data).toEqual({ sessionId: 'session-1', cols: 100, rows: 30 }) + }) + + it('does not forward a resize when the session is inactive (guard, not just pickCliSocket)', () => { + // CLI socket IS connected in this socket's own namespace, so without + // the authorization guard pickCliSocketId would find it and emit. + const { terminalSocket, cliNamespace } = createHarness({ sessionActive: false }) + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:resize', { sessionId: 'session-1', cols: 100, rows: 30 }) + + expect(lastEmit(cliSocket, 'agent-terminal:resize')).toBeUndefined() + }) + }) + it('enforces per-socket terminal limits', () => { const { terminalSocket, cliNamespace } = createHarness({ maxTerminalsPerSocket: 1 }) const cliSocket = new FakeSocket('cli-socket-1') diff --git a/hub/src/socket/handlers/terminal.ts b/hub/src/socket/handlers/terminal.ts index 086f25df1..dc5350014 100644 --- a/hub/src/socket/handlers/terminal.ts +++ b/hub/src/socket/handlers/terminal.ts @@ -2,6 +2,8 @@ import { TerminalOpenPayloadSchema } from '@hapi/protocol' import { z } from 'zod' import type { TerminalRegistry, TerminalRegistryEntry } from '../terminalRegistry' import type { SocketServer, SocketWithData } from '../socketTypes' +import { getAgentTerminalReplay } from '../agentTerminalBuffer' +import { getUserTerminalBuffer } from '../userTerminalBuffer' const terminalCreateSchema = TerminalOpenPayloadSchema @@ -127,6 +129,8 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH return } + socket.join(`session:${sessionId}`) + cliSocket.emit('terminal:open', { sessionId, terminalId, @@ -134,6 +138,16 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH rows }) terminalRegistry.markActivity(terminalId) + + // Replay buffered output so the terminal shows scrollback immediately + // instead of staying black until the next output from CLI. + // The buffer is never explicitly cleared here: it persists so a client + // that navigates away and back (new socket, isReconnect=false) still + // sees the accumulated output. It is bounded to 256KB per session. + const buffered = getUserTerminalBuffer(sessionId) + if (buffered && !isReconnect) { + socket.emit('terminal:output', { terminalId, data: buffered }) + } }) socket.on('terminal:write', (data: unknown) => { @@ -201,10 +215,105 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH emitCloseToCli(entry) }) + const emitToCliForSession = (sessionId: string, event: 'agent-terminal:resize' | 'agent-terminal:refresh' | 'agent-terminal:idle', payload: Record): void => { + const cliSocketId = pickCliSocketId(sessionId) + if (!cliSocketId) return + const cliSocket = cliNamespace.sockets.get(cliSocketId) + if (!cliSocket || cliSocket.data.namespace !== namespace) return + cliSocket.emit(event, payload as never) + } + + // Sessions this socket is viewing the agent terminal for. When the last + // viewer of a session leaves (this socket unsubscribes or disconnects and the + // room empties), tell the CLI to stop streaming that PTY. + // + // Agent-terminal viewers get their OWN room, distinct from the user-terminal's + // `session:${id}` room: the streaming-teardown count must reflect agent-terminal + // viewers only, otherwise a user-terminal viewer in `session:${id}` would keep + // the agent PTY streaming forever after every agent-terminal viewer has left. + const agentTerminalRoom = (sessionId: string): string => `agent-session:${sessionId}` + const subscribedAgentSessions = new Set() + // A valid token for one namespace must not be able to act on (subscribe to, + // replay, or drive) a session in another namespace. Same shape as the + // terminal:create guard (terminal.ts:95). Callers drop silently rather than + // emitting an error: surfacing "session inactive/unavailable" to an + // unauthorized caller would leak existence, and the only honest-client + // rejection path (a session that just went inactive) unmounts the terminal + // view anyway via canViewAgentTerminal, so there is no live viewer to inform. + const isAuthorizedSession = (sessionId: string): boolean => { + const session = getSession(sessionId) + return Boolean(namespace && session && session.namespace === namespace && session.active) + } + const tellCliIfNoViewers = (sessionId: string): void => { + const size = socket.nsp.adapter.rooms.get(agentTerminalRoom(sessionId))?.size ?? 0 + if (size === 0) { + emitToCliForSession(sessionId, 'agent-terminal:idle', { sessionId }) + } + } + + socket.on('agent-terminal:subscribe', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId } = parsed.data + if (!isAuthorizedSession(sessionId)) { + return + } + socket.join(agentTerminalRoom(sessionId)) + subscribedAgentSessions.add(sessionId) + // Replay recent output so the terminal renders the current screen + // immediately instead of staying black until the next keystroke. + // terminalId must match the web client's filter ('agent'), not a + // synthetic id, otherwise the replayed data is silently dropped. + const buffered = getAgentTerminalReplay(sessionId) + if (buffered) { + socket.emit('agent-terminal:output', { sessionId, terminalId: 'agent', data: buffered }) + } + // Full-screen TUIs (e.g. claude's ink alt-screen) can't always + // be reconstructed from a byte-ring replay (truncated alt-screen enter, + // stale alt-screen-exit from a prior spawn). Ask the CLI to repaint the + // current screen so a freshly (re)subscribed viewer never sees black. + emitToCliForSession(sessionId, 'agent-terminal:refresh', { sessionId }) + }) + + socket.on('agent-terminal:unsubscribe', (data: unknown) => { + const parsed = z.object({ sessionId: z.string().min(1) }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId } = parsed.data + socket.leave(agentTerminalRoom(sessionId)) + subscribedAgentSessions.delete(sessionId) + tellCliIfNoViewers(sessionId) + }) + + socket.on('agent-terminal:resize', (data: unknown) => { + const parsed = z.object({ + sessionId: z.string().min(1), + cols: z.number().int().positive(), + rows: z.number().int().positive() + }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId, cols, rows } = parsed.data + if (!isAuthorizedSession(sessionId)) { + return + } + emitToCliForSession(sessionId, 'agent-terminal:resize', { sessionId, cols, rows }) + }) + socket.on('disconnect', () => { const removed = terminalRegistry.removeBySocket(socket.id) for (const entry of removed) { emitCloseToCli(entry) } + // On disconnect the socket has already left its rooms, so the room size + // now reflects the remaining viewers — tell the CLI to stop streaming any + // agent terminal this socket was the last viewer of. + for (const sessionId of subscribedAgentSessions) { + tellCliIfNoViewers(sessionId) + } }) } diff --git a/hub/src/socket/userTerminalBuffer.test.ts b/hub/src/socket/userTerminalBuffer.test.ts new file mode 100644 index 000000000..898e6cbf2 --- /dev/null +++ b/hub/src/socket/userTerminalBuffer.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from 'bun:test' +import { appendUserTerminalOutput, getUserTerminalBuffer, clearUserTerminalBuffer } from './userTerminalBuffer' + +describe('userTerminalBuffer', () => { + it('stores and retrieves output per session', () => { + appendUserTerminalOutput('s1', 't1', 'hello ') + appendUserTerminalOutput('s1', 't1', 'world') + expect(getUserTerminalBuffer('s1')).toBe('hello world') + }) + + it('keeps sessions isolated', () => { + appendUserTerminalOutput('sa', 't1', 'alpha') + appendUserTerminalOutput('sb', 't1', 'beta') + expect(getUserTerminalBuffer('sa')).toBe('alpha') + expect(getUserTerminalBuffer('sb')).toBe('beta') + }) + + it('returns empty string for unknown session', () => { + expect(getUserTerminalBuffer('nonexistent')).toBe('') + }) + + it('ignores empty data', () => { + appendUserTerminalOutput('s3', 't1', 'keep') + appendUserTerminalOutput('s3', 't1', '') + expect(getUserTerminalBuffer('s3')).toBe('keep') + }) + + it('clears buffer on demand', () => { + appendUserTerminalOutput('s4', 't1', 'data') + clearUserTerminalBuffer('s4') + expect(getUserTerminalBuffer('s4')).toBe('') + }) + + it('rolls over at max size', () => { + const small = 'a'.repeat(100) + // Fill buffer to near capacity + for (let i = 0; i < 2600; i++) { + appendUserTerminalOutput('s5', 't1', small) + } + const buf = getUserTerminalBuffer('s5') + // Should be at most MAX_BUFFER_BYTES (256KB) + const MAX = 256 * 1024 + expect(buf.length).toBeLessThanOrEqual(MAX) + // Should contain the most recent data (tail preserved) + expect(buf.endsWith(small)).toBe(true) + }) +}) diff --git a/hub/src/socket/userTerminalBuffer.ts b/hub/src/socket/userTerminalBuffer.ts new file mode 100644 index 000000000..0b1e4730a --- /dev/null +++ b/hub/src/socket/userTerminalBuffer.ts @@ -0,0 +1,31 @@ +// Per-session scrollback buffer for the user (remote) terminal output. +// +// A web client that navigates away and back creates a new xterm.js instance +// with a new terminalId, so the previous output is lost. We keep a rolling +// buffer per session so a fresh subscriber can replay the current terminal +// content immediately instead of showing a black screen until the next +// keystroke or output. +// +// The buffer is keyed by sessionId only (not terminalId) because each +// navigation creates a new terminalId for the same session. + +const MAX_BUFFER_BYTES = 256 * 1024 + +const buffers = new Map() + +export function appendUserTerminalOutput(sessionId: string, _terminalId: string, data: string): void { + if (!data) return + const next = (buffers.get(sessionId) ?? '') + data + buffers.set( + sessionId, + next.length > MAX_BUFFER_BYTES ? next.slice(next.length - MAX_BUFFER_BYTES) : next + ) +} + +export function getUserTerminalBuffer(sessionId: string): string { + return buffers.get(sessionId) ?? '' +} + +export function clearUserTerminalBuffer(sessionId: string): void { + buffers.delete(sessionId) +} diff --git a/hub/src/sync/rpcGateway.ts b/hub/src/sync/rpcGateway.ts index 8e0389401..e16210f81 100644 --- a/hub/src/sync/rpcGateway.ts +++ b/hub/src/sync/rpcGateway.ts @@ -117,13 +117,14 @@ export class RpcGateway { worktreeName?: string, resumeSessionId?: string, effort?: string, - permissionMode?: PermissionMode + permissionMode?: PermissionMode, + startingMode?: 'remote' | 'pty' ): Promise<{ type: 'success'; sessionId: string } | { type: 'error'; message: string }> { try { const result = await this.machineRpc( machineId, RPC_METHODS.SpawnHappySession, - { type: 'spawn-in-directory', directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, resumeSessionId, effort, permissionMode } + { type: 'spawn-in-directory', directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, resumeSessionId, effort, permissionMode, startingMode } ) if (result && typeof result === 'object') { const obj = result as Record diff --git a/hub/src/sync/syncEngine.ts b/hub/src/sync/syncEngine.ts index a7f88c032..db2f296d0 100644 --- a/hub/src/sync/syncEngine.ts +++ b/hub/src/sync/syncEngine.ts @@ -15,6 +15,7 @@ import type { Server } from 'socket.io' import type { Store, CancelQueuedMessageResult } from '../store' import type { HapiSessionExportResult } from '@hapi/protocol/sessionExport' import type { RpcRegistry } from '../socket/rpcRegistry' +import { clearAgentTerminalBuffer } from '../socket/agentTerminalBuffer' import type { SSEManager } from '../sse/sseManager' import { CursorLegacyMigrator, type CursorLegacyMigratorOptions } from '../cursor/cursorLegacyMigrator' @@ -136,7 +137,7 @@ export class SyncEngine { constructor( private readonly store: Store, - io: Server, + private readonly io: Server, rpcRegistry: RpcRegistry, sseManager: SSEManager ) { @@ -319,6 +320,23 @@ export class SyncEngine { // Retry dedup now that this session is inactive — a prior dedup may have // skipped it because it was still active at the time. this.triggerDedupIfNeeded(payload.sid) + + // Notify agent-terminal subscribers so the web UI shows a clear + // termination message instead of staying "connected" with stale output. + // Targets the dedicated agent-terminal room (NOT the user-terminal + // `session:${id}` room), matching where agent viewers actually subscribe. + if (typeof this.io.of === 'function') { + this.io.of('/terminal').to(`agent-session:${payload.sid}`).emit('agent-terminal:output', { + sessionId: payload.sid, + terminalId: 'agent', + data: '\r\n[Session terminated]\r\n' + }) + } + // Release the PTY scrollback for a session that has ended (mirrors the + // userTerminalBuffer clear-on-`terminal:exit`); a fresh spawn would also + // reset it, but an ended-and-never-reopened session would otherwise leak + // its buffer for the hub process's lifetime. + clearAgentTerminalBuffer(payload.sid) } handleBackgroundTaskDelta(sessionId: string, delta: { started: number; completed: number }): void { @@ -675,9 +693,10 @@ export class SyncEngine { worktreeName?: string, resumeSessionId?: string, effort?: string, - permissionMode?: PermissionMode + permissionMode?: PermissionMode, + startingMode?: 'remote' | 'pty' ): Promise<{ type: 'success'; sessionId: string } | { type: 'error'; message: string }> { - return await this.rpcGateway.spawnSession( + const result = await this.rpcGateway.spawnSession( machineId, directory, agent, @@ -688,8 +707,20 @@ export class SyncEngine { worktreeName, resumeSessionId, effort, - permissionMode + permissionMode, + startingMode ) + // PTY sessions need the runner to attach the interactive terminal before + // the web client can connect; wait for the session to register active so a + // failed PTY spawn surfaces as an error instead of an empty terminal. Other + // start modes return as soon as the spawn RPC succeeds (legacy behavior). + if (result.type === 'success' && startingMode === 'pty') { + const becameActive = await this.waitForSessionActive(result.sessionId) + if (!becameActive) { + return { type: 'error', message: 'Session spawned but failed to become active' } + } + } + return result } private resolveFlavor(session: Session): AgentFlavor { @@ -1130,6 +1161,13 @@ export class SyncEngine { const preferredPermissionMode = opts?.permissionMode ?? session.permissionMode ?? session.metadata?.preferredPermissionMode + // Restore the original launch mode. Without this a reopened PTY session + // would re-spawn in the default 'remote' (SDK) mode — no agent terminal, + // so the terminal view renders black. + const resumedStartingMode = + (session.agentState as { startingMode?: 'local' | 'remote' | 'pty' } | null)?.startingMode === 'pty' + ? 'pty' + : undefined const spawnResult = await this.rpcGateway.spawnSession( targetMachine.id, directory, @@ -1141,7 +1179,8 @@ export class SyncEngine { undefined, resumeToken, session.effort ?? undefined, - preferredPermissionMode + preferredPermissionMode, + resumedStartingMode ) if (spawnResult.type !== 'success') { diff --git a/hub/src/web/routes/machines.ts b/hub/src/web/routes/machines.ts index 7288a42de..01a53df8b 100644 --- a/hub/src/web/routes/machines.ts +++ b/hub/src/web/routes/machines.ts @@ -50,7 +50,9 @@ export function createMachinesRoutes(getSyncEngine: () => SyncEngine | null): Ho parsed.data.sessionType, parsed.data.worktreeName, undefined, - parsed.data.effort + parsed.data.effort, + undefined, + parsed.data.startingMode ) return c.json(result) }) diff --git a/shared/src/apiTypes.ts b/shared/src/apiTypes.ts index 604e1a901..f4037dbce 100644 --- a/shared/src/apiTypes.ts +++ b/shared/src/apiTypes.ts @@ -234,7 +234,8 @@ export const SpawnSessionRequestSchema = z.object({ modelReasoningEffort: z.string().optional(), yolo: z.boolean().optional(), sessionType: z.enum(['simple', 'worktree']).optional(), - worktreeName: z.string().optional() + worktreeName: z.string().optional(), + startingMode: z.enum(['remote', 'pty']).optional() }) export type SpawnSessionRequest = z.infer diff --git a/shared/src/schemas.ts b/shared/src/schemas.ts index 22a6fd511..8c5230d44 100644 --- a/shared/src/schemas.ts +++ b/shared/src/schemas.ts @@ -62,6 +62,9 @@ export const MetadataSchema = z.object({ archiveReason: z.string().optional(), preferredPermissionMode: PermissionModeSchema.optional(), flavor: z.string().nullish(), + // Launch mode, surfaced so the web can show the agent-terminal toggle only + // for PTY sessions (a 'remote'/SDK session has no agent PTY to view). + startingMode: z.enum(['local', 'remote', 'pty']).nullish(), capabilities: SessionCapabilitiesSchema.optional(), worktree: WorktreeMetadataSchema.optional() }) @@ -98,6 +101,10 @@ export type AgentStateCompletedRequest = z.infer +// Read-only agent-terminal viewer controls (no terminalId — the agent PTY is the +// session's single TUI, keyed by sessionId). `resize` repaints the agent TUI at a +// given size; `refresh` forces a repaint of the current screen so a freshly +// (re)subscribed viewer sees the live state instead of a stale/black buffer. +export const AgentTerminalResizePayloadSchema = z.object({ + sessionId: z.string().min(1), + cols: z.number().int().positive(), + rows: z.number().int().positive() +}) + +export type AgentTerminalResizePayload = z.infer + +export const AgentTerminalRefreshPayloadSchema = z.object({ + sessionId: z.string().min(1) +}) + +export type AgentTerminalRefreshPayload = z.infer + export const TerminalClosePayloadSchema = z.object({ sessionId: z.string().min(1), terminalId: z.string().min(1) @@ -196,6 +214,11 @@ export interface ServerToClientEvents { 'terminal:write': (data: TerminalWritePayload) => void 'terminal:resize': (data: TerminalResizePayload) => void 'terminal:close': (data: TerminalClosePayload) => void + 'agent-terminal:resize': (data: AgentTerminalResizePayload) => void + 'agent-terminal:refresh': (data: AgentTerminalRefreshPayload) => void + // Sent to the CLI when the last agent-terminal viewer leaves, so it stops + // streaming PTY output to the hub until someone subscribes again. + 'agent-terminal:idle': (data: AgentTerminalRefreshPayload) => void error: (data: { message: string; code?: SocketErrorReason; scope?: 'session' | 'machine'; id?: string }) => void } @@ -225,6 +248,10 @@ export interface ClientToServerEvents { 'terminal:output': (data: TerminalOutputPayload) => void 'terminal:exit': (data: TerminalExitPayload) => void 'terminal:error': (data: TerminalErrorPayload) => void + 'agent-terminal:output': (data: TerminalOutputPayload) => void + // Drop the hub's scrollback buffer for this session (a new agent PTY just + // spawned, e.g. after archive→restart, so old output must not replay). + 'agent-terminal:reset': (data: { sessionId: string }) => void ping: (callback: () => void) => void 'usage-report': (data: unknown) => void } diff --git a/web/src/components/AgentTerminal/AgentTerminalView.tsx b/web/src/components/AgentTerminal/AgentTerminalView.tsx new file mode 100644 index 000000000..34648a1cb --- /dev/null +++ b/web/src/components/AgentTerminal/AgentTerminalView.tsx @@ -0,0 +1,156 @@ +import { useEffect, useRef } from 'react' +import { Terminal } from '@xterm/xterm' +import { FitAddon } from '@xterm/addon-fit' +import '@xterm/xterm/css/xterm.css' +import { useAgentTerminalSocket } from '@/hooks/useAgentTerminalSocket' +import { useAppContext } from '@/lib/app-context' + +function resolveThemeColors(): { background: string; foreground: string; selectionBackground: string } { + const styles = getComputedStyle(document.documentElement) + const background = styles.getPropertyValue('--app-bg').trim() || '#000000' + const foreground = styles.getPropertyValue('--app-fg').trim() || '#ffffff' + const selectionBackground = styles.getPropertyValue('--app-subtle-bg').trim() || 'rgba(255, 255, 255, 0.2)' + return { background, foreground, selectionBackground } +} + +type AgentTerminalViewProps = { + sessionId: string + visible: boolean + className?: string +} + +// Output-only view of the agent PTY. Input is handled by the shared chat +// composer (HappyComposer) so there is a single composer with correct IME +// handling — no separate terminal input bar. +export function AgentTerminalView(props: AgentTerminalViewProps) { + const { sessionId, visible, className } = props + const { token, baseUrl } = useAppContext() + const containerRef = useRef(null) + const terminalRef = useRef(null) + const fitAddonRef = useRef(null) + + const { + state, + connect, + disconnect, + resubscribe, + unsubscribe, + onOutput, + resize, + } = useAgentTerminalSocket({ + baseUrl, + token, + sessionId, + }) + + const onOutputRef = useRef(onOutput) + useEffect(() => { + onOutputRef.current = onOutput + }, [onOutput]) + + const resizeRef = useRef(resize) + useEffect(() => { + resizeRef.current = resize + }, [resize]) + + useEffect(() => { + const container = containerRef.current + if (!container) return + + const abortController = new AbortController() + const { background, foreground, selectionBackground } = resolveThemeColors() + + const terminal = new Terminal({ + cursorBlink: true, + fontSize: 13, + theme: { + background, + foreground, + cursor: foreground, + selectionBackground, + }, + convertEol: true, + customGlyphs: true, + cols: 80, + rows: 12, + }) + + const fitAddon = new FitAddon() + fitAddonRef.current = fitAddon + terminal.loadAddon(fitAddon) + terminal.open(container) + + const observer = new ResizeObserver(() => { + requestAnimationFrame(() => { + fitAddon.fit() + // Push the fitted size to the agent PTY so the TUI re-renders at + // the viewer's dimensions (and repaints — no black screen). + resizeRef.current(terminal.cols, terminal.rows) + }) + }) + observer.observe(container) + + onOutputRef.current((data) => { + terminal.write(data) + }) + + abortController.signal.addEventListener('abort', () => { + observer.disconnect() + fitAddon.dispose() + terminal.dispose() + }) + + requestAnimationFrame(() => { + fitAddon.fit() + }) + terminalRef.current = terminal + + return () => abortController.abort() + }, []) + + useEffect(() => { + connect() + return () => disconnect() + }, [connect, disconnect]) + + useEffect(() => { + if (!visible) return + resubscribe() + requestAnimationFrame(() => { + fitAddonRef.current?.fit() + const terminal = terminalRef.current + if (terminal) { + // On (re)entry: sync size and trigger a repaint so the current + // screen shows instead of a stale/black buffer replay. + resizeRef.current(terminal.cols, terminal.rows) + } + }) + // Leaving the terminal view (hidden or unmounted) → unsubscribe so the + // CLI can stop streaming the PTY when no viewers remain. + return () => unsubscribe() + }, [visible, resubscribe, unsubscribe]) + + const statusColor = state.status === 'connected' + ? 'bg-emerald-500' + : state.status === 'connecting' + ? 'bg-amber-400 animate-pulse' + : state.status === 'error' + ? 'bg-red-500' + : 'bg-[var(--app-hint)]' + + return ( +
+
+ + + {state.status === 'connected' ? 'Agent terminal connected' : + state.status === 'connecting' ? 'Connecting...' : + state.status === 'error' ? `Error: ${state.error}` : + 'Disconnected'} + +
+ +
+
+ ) +} diff --git a/web/src/hooks/useAgentTerminalSocket.test.ts b/web/src/hooks/useAgentTerminalSocket.test.ts new file mode 100644 index 000000000..373094719 --- /dev/null +++ b/web/src/hooks/useAgentTerminalSocket.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { renderHook, act } from '@testing-library/react' + +// A minimal fake socket.io-client that records emits and lets the test drive +// lifecycle events ('connect', etc.). The hook calls `new Manager(url, opts)` +// then `manager.socket('/terminal', { auth })`. +class FakeSocket { + connected = false + auth: unknown + readonly emitted: Array<{ event: string; data: unknown }> = [] + private readonly handlers = new Map void>() + + constructor(auth: unknown) { + this.auth = auth + } + + on(event: string, handler: (arg?: unknown) => void): this { + this.handlers.set(event, handler) + return this + } + + emit(event: string, data: unknown): boolean { + this.emitted.push({ event, data }) + return true + } + + connect(): void { + this.connected = true + this.handlers.get('connect')?.() + } + + disconnect(): void { + this.connected = false + } + + removeAllListeners(): void { + this.handlers.clear() + } + + subscribeCount(): number { + return this.emitted.filter((e) => e.event === 'agent-terminal:subscribe').length + } +} + +let lastSocket: FakeSocket | null = null + +vi.mock('socket.io-client', () => ({ + Manager: class { + socket(_nsp: string, opts: { auth: unknown }): FakeSocket { + lastSocket = new FakeSocket(opts.auth) + return lastSocket + } + } +})) + +import { useAgentTerminalSocket } from './useAgentTerminalSocket' + +const options = { baseUrl: 'http://localhost:3000', token: 'tok', sessionId: 'session-1' } + +describe('useAgentTerminalSocket subscribe gating', () => { + beforeEach(() => { + lastSocket = null + }) + + it('does NOT subscribe on connect when the viewer never asked (hidden mount)', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + // connect() created the socket and connected it synchronously. + expect(lastSocket).not.toBeNull() + expect(lastSocket!.subscribeCount()).toBe(0) + }) + + it('subscribes only after resubscribe(), and re-subscribes across reconnects', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + expect(lastSocket!.subscribeCount()).toBe(0) + + // Becoming visible → resubscribe() emits the subscribe. + act(() => result.current.resubscribe()) + expect(lastSocket!.subscribeCount()).toBe(1) + + // A reconnect (e.g. network blip) must re-emit subscribe because the + // viewer is still watching. + act(() => lastSocket!.connect()) + expect(lastSocket!.subscribeCount()).toBe(2) + }) + + it('does not subscribe when connect() is called again on an already-connected socket', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + // Second connect() hits the reuse branch (socket already exists + + // connected); it must not subscribe on its own — only resubscribe() does. + act(() => result.current.connect()) + expect(lastSocket!.subscribeCount()).toBe(0) + }) + + it('stops re-subscribing on reconnect after unsubscribe() (viewer left)', () => { + const { result } = renderHook(() => useAgentTerminalSocket(options)) + + act(() => result.current.connect()) + act(() => result.current.resubscribe()) + expect(lastSocket!.subscribeCount()).toBe(1) + + act(() => result.current.unsubscribe()) + // After leaving, a reconnect must NOT re-subscribe. + act(() => lastSocket!.connect()) + expect(lastSocket!.subscribeCount()).toBe(1) + expect( + lastSocket!.emitted.some((e) => e.event === 'agent-terminal:unsubscribe') + ).toBe(true) + }) +}) diff --git a/web/src/hooks/useAgentTerminalSocket.ts b/web/src/hooks/useAgentTerminalSocket.ts new file mode 100644 index 000000000..da5cce2b2 --- /dev/null +++ b/web/src/hooks/useAgentTerminalSocket.ts @@ -0,0 +1,189 @@ +import { useCallback, useEffect, useRef, useState } from 'react' +import { Manager, type Socket } from 'socket.io-client' + +type AgentTerminalConnectionState = + | { status: 'idle' } + | { status: 'connecting' } + | { status: 'connected' } + | { status: 'error'; error: string } + +type UseAgentTerminalSocketOptions = { + baseUrl: string + token: string + sessionId: string +} + +type TerminalOutputPayload = { + terminalId: string + data: string +} + +export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): { + state: AgentTerminalConnectionState + connect: () => void + disconnect: () => void + resubscribe: () => void + unsubscribe: () => void + onOutput: (handler: (data: string) => void) => void + resize: (cols: number, rows: number) => void +} { + const [state, setState] = useState({ status: 'idle' }) + const socketRef = useRef(null) + const outputHandlerRef = useRef<(data: string) => void>(() => {}) + const sessionIdRef = useRef(options.sessionId) + const tokenRef = useRef(options.token) + const baseUrlRef = useRef(options.baseUrl) + // Whether the viewer currently wants the PTY streamed. Connecting alone must + // NOT subscribe — SessionChat mounts this hidden for every PTY session, and + // an unconditional subscribe-on-connect would stream the high-frequency raw + // TUI even when the terminal is never opened. Subscribe is gated on this so + // (re)connects only re-subscribe when the terminal is actually visible. + const subscribedRef = useRef(false) + + useEffect(() => { + sessionIdRef.current = options.sessionId + baseUrlRef.current = options.baseUrl + }, [options.sessionId, options.baseUrl]) + + useEffect(() => { + tokenRef.current = options.token + const socket = socketRef.current + if (!socket) { + return + } + if (!options.token) { + if (socket.connected) { + socket.disconnect() + } + return + } + socket.auth = { token: options.token } + if (socket.connected) { + socket.disconnect() + socket.connect() + } + }, [options.token]) + + const connect = useCallback(() => { + const token = tokenRef.current + const sessionId = sessionIdRef.current + + if (!token || !sessionId) { + setState({ status: 'error', error: 'Missing terminal credentials.' }) + return + } + + if (socketRef.current) { + const socket = socketRef.current + socket.auth = { token } + if (socket.connected) { + setState({ status: 'connected' }) + } else { + socket.connect() + } + return + } + + const manager = new Manager(baseUrlRef.current, { + path: '/socket.io/', + reconnection: true, + reconnectionAttempts: Infinity, + reconnectionDelay: 1000, + reconnectionDelayMax: 5000, + transports: ['polling', 'websocket'], + autoConnect: false + }) + const socket = manager.socket('/terminal', { + auth: { token } + }) + + socketRef.current = socket + setState({ status: 'connecting' }) + + socket.on('connect', () => { + // Re-subscribe across reconnects only if the viewer still wants it. + if (subscribedRef.current) { + socket.emit('agent-terminal:subscribe', { sessionId }) + } + setState({ status: 'connected' }) + }) + + socket.on('agent-terminal:output', (payload: TerminalOutputPayload) => { + if (payload.terminalId !== 'agent') { + return + } + outputHandlerRef.current(payload.data) + }) + + socket.on('connect_error', (error) => { + const message = error instanceof Error ? error.message : 'Connection error' + setState({ status: 'error', error: message }) + }) + + socket.on('disconnect', (reason) => { + if (reason === 'io client disconnect') { + setState({ status: 'idle' }) + return + } + setState({ status: 'error', error: `Disconnected: ${reason}` }) + }) + + socket.connect() + }, []) + + const disconnect = useCallback(() => { + const socket = socketRef.current + if (!socket) { + return + } + socket.removeAllListeners() + socket.disconnect() + socketRef.current = null + setState({ status: 'idle' }) + }, []) + + const resubscribe = useCallback(() => { + subscribedRef.current = true + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (socket?.connected && sessionId) { + socket.emit('agent-terminal:subscribe', { sessionId }) + } + }, []) + + // Tell the hub we're no longer viewing, so the CLI can stop streaming the PTY + // when no viewers remain. (Safe to miss — the runner keeps streaming until it + // hears this, never the other way around, so a missed unsubscribe never + // causes a black screen.) + const unsubscribe = useCallback(() => { + subscribedRef.current = false + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (socket?.connected && sessionId) { + socket.emit('agent-terminal:unsubscribe', { sessionId }) + } + }, []) + + const resize = useCallback((cols: number, rows: number) => { + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (!socket?.connected || !sessionId || cols < 1 || rows < 1) { + return + } + socket.emit('agent-terminal:resize', { sessionId, cols, rows }) + }, []) + + const onOutput = useCallback((handler: (data: string) => void) => { + outputHandlerRef.current = handler + }, []) + + return { + state, + connect, + disconnect, + resubscribe, + unsubscribe, + onOutput, + resize + } +} diff --git a/web/src/router.tsx b/web/src/router.tsx index 84e030a53..597272c73 100644 --- a/web/src/router.tsx +++ b/web/src/router.tsx @@ -828,7 +828,7 @@ function SessionDetailRoute() { return } navigate({ to: '/sessions', replace: true }) - }, [navigate, sessionNotFound]) + }, [navigate, sessionNotFound, sessionId]) if (sessionNotFound) { return ( diff --git a/web/src/routes/sessions/terminal.test.tsx b/web/src/routes/sessions/terminal.test.tsx index 4497873b8..531b33e65 100644 --- a/web/src/routes/sessions/terminal.test.tsx +++ b/web/src/routes/sessions/terminal.test.tsx @@ -51,8 +51,13 @@ vi.mock('@/hooks/queries/useSession', () => ({ }) })) +const capturedTerminalIds: string[] = [] + vi.mock('@/hooks/useTerminalSocket', () => ({ - useTerminalSocket: () => terminalSocketState + useTerminalSocket: (opts: { terminalId: string }) => { + capturedTerminalIds.push(opts.terminalId) + return terminalSocketState + } })) vi.mock('@/hooks/useLongPress', () => ({ @@ -112,6 +117,26 @@ describe('TerminalPage paste behavior', () => { }) }) +describe('TerminalPage terminal id', () => { + beforeEach(() => { + vi.clearAllMocks() + capturedTerminalIds.length = 0 + }) + + it('generates a unique terminal id per mount so concurrent viewers do not collide', () => { + // Two viewers (tabs/devices) of the SAME session must not share one + // terminal id: the hub registry would treat the second viewer's reused + // id as a stale reconnect and evict the first viewer's PTY ownership. + renderWithProviders() + renderWithProviders() + + const distinct = new Set(capturedTerminalIds) + expect(distinct.size).toBe(2) + // Each id still carries the session for debuggability/scoping. + expect([...distinct].every((id) => id.startsWith('term-session-1-'))).toBe(true) + }) +}) + describe('TerminalPage exit behavior', () => { beforeEach(() => { vi.clearAllMocks() diff --git a/web/src/routes/sessions/terminal.tsx b/web/src/routes/sessions/terminal.tsx index 0339145b7..a204d5f31 100644 --- a/web/src/routes/sessions/terminal.tsx +++ b/web/src/routes/sessions/terminal.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useRef, useState } from 'react' +import { useCallback, useEffect, useRef, useState } from 'react' import type { PointerEvent } from 'react' import { useParams } from '@tanstack/react-router' import type { Terminal } from '@xterm/xterm' @@ -8,7 +8,6 @@ import { useSession } from '@/hooks/queries/useSession' import { useTerminalSocket } from '@/hooks/useTerminalSocket' import { useLongPress } from '@/hooks/useLongPress' import { useTranslation } from '@/lib/use-translation' -import { randomId } from '@/lib/randomId' import { TerminalView } from '@/components/Terminal/TerminalView' import { LoadingState } from '@/components/LoadingState' import { Button } from '@/components/ui/button' @@ -189,7 +188,20 @@ export default function TerminalPage() { const goBack = useAppGoBack() const { session } = useSession(api, sessionId) const terminalSupported = isRemoteTerminalSupported(session?.metadata) - const terminalId = useMemo(() => randomId(), [sessionId]) + // A per-viewer-unique terminal id. Two browsers/tabs/devices viewing the + // same session must each drive their own shell: the hub registry evicts a + // reused id arriving from a different socket as a stale reconnect + // (terminalRegistry.ts), which would otherwise let a second viewer hijack + // the first viewer's PTY. The id is intentionally NOT derived from sessionId + // alone — scrollback survives navigation via the sessionId-keyed buffer + // (userTerminalBuffer.ts), not via a stable id. Held in a ref so it stays + // constant across re-renders and transient socket reconnects, and + // regenerates only when the route switches to a different session. + const terminalIdRef = useRef<{ sessionId: string; id: string } | null>(null) + if (terminalIdRef.current?.sessionId !== sessionId) { + terminalIdRef.current = { sessionId, id: `term-${sessionId}-${crypto.randomUUID()}` } + } + const terminalId = terminalIdRef.current.id const terminalRef = useRef(null) const inputDisposableRef = useRef<{ dispose: () => void } | null>(null) const connectOnceRef = useRef(false) diff --git a/web/vite.config.ts b/web/vite.config.ts index 58ab14226..f7675dffd 100644 --- a/web/vite.config.ts +++ b/web/vite.config.ts @@ -1,9 +1,26 @@ -import { defineConfig } from 'vite' +import { defineConfig, type Plugin } from 'vite' import react from '@vitejs/plugin-react' import { VitePWA } from 'vite-plugin-pwa' import { readFileSync } from 'node:fs' import { resolve } from 'node:path' +function spaFallback(): Plugin { + return { + name: 'spa-fallback', + configureServer(server) { + server.middlewares.use((req, _res, next) => { + const url = (req.url ?? '').split('?')[0] + if (url === '/' || url === '' || url.includes('.') || url.startsWith('/@') || url.startsWith('/api') || url.startsWith('/socket.io') || url.startsWith('/src/')) { + next() + return + } + req.url = '/index.html' + next() + }) + } + } +} + const base = process.env.VITE_BASE_URL || '/' const hubTarget = process.env.VITE_HUB_PROXY || 'http://127.0.0.1:3006' const appVersion = readAppVersion() @@ -45,6 +62,7 @@ function getVendorChunkName(id: string): string | undefined { } export default defineConfig({ + appType: 'spa', define: { __APP_VERSION__: JSON.stringify(appVersion), }, @@ -64,6 +82,7 @@ export default defineConfig({ }, plugins: [ react(), + spaFallback(), VitePWA({ registerType: 'autoUpdate', includeAssets: ['favicon.ico', 'apple-touch-icon-180x180.png', 'mask-icon.svg'], From 9d0a8a50e97cfc440fb3a06db13fadb9903c02ce Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Fri, 12 Jun 2026 17:28:16 +0900 Subject: [PATCH 07/11] feat(pty): add PTY-mode option to the web new-session flow --- web/src/api/client.ts | 5 +-- web/src/chat/normalizeAgent.ts | 1 + .../components/NewSession/AgentSelector.tsx | 4 +-- web/src/components/NewSession/index.tsx | 35 +++++++++++++++++-- .../NewSession/newSessionFormDraft.ts | 4 ++- web/src/components/SessionChat.tsx | 25 +++++++++++++ web/src/components/SessionHeader.tsx | 29 ++++++++++++++- web/src/hooks/mutations/useSpawnSession.ts | 4 ++- web/src/lib/locales/en.ts | 2 ++ web/src/lib/locales/zh-CN.ts | 2 ++ web/src/types/api.ts | 1 + 11 files changed, 103 insertions(+), 9 deletions(-) diff --git a/web/src/api/client.ts b/web/src/api/client.ts index 488e7ea24..a3b39efe7 100644 --- a/web/src/api/client.ts +++ b/web/src/api/client.ts @@ -589,11 +589,12 @@ export class ApiClient { yolo?: boolean, sessionType?: 'simple' | 'worktree', worktreeName?: string, - effort?: string + effort?: string, + startingMode?: 'remote' | 'pty' ): Promise { return await this.request(`/api/machines/${encodeURIComponent(machineId)}/spawn`, { method: 'POST', - body: JSON.stringify({ directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, effort }) + body: JSON.stringify({ directory, agent, model, modelReasoningEffort, yolo, sessionType, worktreeName, effort, startingMode }) }) } diff --git a/web/src/chat/normalizeAgent.ts b/web/src/chat/normalizeAgent.ts index 39de43c65..84a412ea5 100644 --- a/web/src/chat/normalizeAgent.ts +++ b/web/src/chat/normalizeAgent.ts @@ -518,6 +518,7 @@ export function normalizeAgentRecord( meta } } + return null } diff --git a/web/src/components/NewSession/AgentSelector.tsx b/web/src/components/NewSession/AgentSelector.tsx index 4146f3081..cf0c5f294 100644 --- a/web/src/components/NewSession/AgentSelector.tsx +++ b/web/src/components/NewSession/AgentSelector.tsx @@ -1,4 +1,4 @@ -import { AGENT_FLAVORS } from '@hapi/protocol' +import { AGENT_FLAVORS, getFlavorLabel } from '@hapi/protocol' import type { AgentType } from './types' import { useTranslation } from '@/lib/use-translation' @@ -29,7 +29,7 @@ export function AgentSelector(props: { disabled={props.isDisabled} className="accent-[var(--app-link)]" /> - {agentType} + {getFlavorLabel(agentType)} ))}
diff --git a/web/src/components/NewSession/index.tsx b/web/src/components/NewSession/index.tsx index f19e311a8..9f9c44447 100644 --- a/web/src/components/NewSession/index.tsx +++ b/web/src/components/NewSession/index.tsx @@ -77,6 +77,9 @@ export function NewSession(props: { const [effort, setEffort] = useState('auto') const [modelReasoningEffort, setModelReasoningEffort] = useState('default') const [yoloMode, setYoloMode] = useState(loadPreferredYoloMode) + // Default to 'remote' (the stable SDK path); PTY is an explicit opt-in via the + // checkbox below. + const [startingMode, setStartingMode] = useState<'remote' | 'pty'>('remote') const [sessionType, setSessionType] = useState('simple') const [worktreeName, setWorktreeName] = useState('') const [directoryCreationConfirmed, setDirectoryCreationConfirmed] = useState(false) @@ -102,6 +105,12 @@ export function NewSession(props: { savePreferredAgent(agent) }, [agent]) + useEffect(() => { + // Reset to the stable 'remote' path when switching agents; PTY stays an + // explicit opt-in via the checkbox. + setStartingMode('remote') + }, [agent]) + useEffect(() => { savePreferredYoloMode(yoloMode) }, [yoloMode]) @@ -147,6 +156,7 @@ export function NewSession(props: { setYoloMode(draft.yoloMode) setSessionType(draft.sessionType) setWorktreeName(draft.worktreeName) + setStartingMode(draft.startingMode ?? 'remote') clearNewSessionFormDraft() }, [ props.initialDirectory, @@ -343,6 +353,7 @@ export function NewSession(props: { cwdExists: deferredDirectoryExists, }) }) + useEffect(() => { // Auto-pick the OpenCode default model when discovery finishes, so the // form has a sensible value if the user hits Enter without scrolling. @@ -456,7 +467,8 @@ export function NewSession(props: { modelReasoningEffort, yoloMode, sessionType, - worktreeName + worktreeName, + startingMode: agent === 'claude' ? startingMode : undefined }) props.onChooseFolder({ machineId, directory: trimmedDirectory }) }, [ @@ -572,9 +584,11 @@ export function NewSession(props: { modelReasoningEffort: resolvedModelReasoningEffort, yolo: yoloMode, sessionType, - worktreeName: sessionType === 'worktree' ? (worktreeName.trim() || undefined) : undefined + worktreeName: sessionType === 'worktree' ? (worktreeName.trim() || undefined) : undefined, + startingMode: agent === 'claude' ? startingMode : undefined }) + if (result.type === 'success') { haptic.notification('success') clearNewSessionFormDraft() @@ -716,6 +730,23 @@ export function NewSession(props: { isDisabled={isFormDisabled} onEffortChange={setEffort} /> + {agent === 'claude' && ( +
+ + + {t('newSession.pty.desc')} + +
+ )} >(new Map()) const blocksByIdRef = useRef>(new Map()) const visibleGroupsRef = useRef([]) const [forceScrollToken, setForceScrollToken] = useState(0) const [outlineOpen, setOutlineOpen] = useState(false) + const [terminalVisible, setTerminalVisible] = useState(false) const [cursorSelectedBase, setCursorSelectedBase] = useState('auto') const lastSyncedCursorModelRef = useRef(undefined) const scratchlist = useScratchlist(props.session.id) @@ -972,6 +981,8 @@ function SessionChatInner(props: SessionChatProps) { onBack={props.onBack} onViewFiles={props.session.metadata?.path ? handleViewFiles : undefined} onOpenOutline={() => setOutlineOpen(true)} + onToggleTerminal={canViewAgentTerminal ? () => setTerminalVisible(v => !v) : undefined} + terminalActive={terminalVisible} api={props.api} onSessionDeleted={props.onBack} onSessionReopened={(newSessionId) => { @@ -985,6 +996,7 @@ function SessionChatInner(props: SessionChatProps) { +
{props.session.teamState && ( )} @@ -1001,6 +1013,17 @@ function SessionChatInner(props: SessionChatProps) {
+ {/* Terminal output replaces the message thread when toggled, + but the composer below stays shared. Only PTY sessions have + an agent terminal — don't mount (and connect) it otherwise. */} + {canViewAgentTerminal && ( + + )} +
+
{codexCollaborationModeSupported && codexModelsState.error ? (
@@ -1178,6 +1202,7 @@ function SessionChatInner(props: SessionChatProps) { />
+
{/* Voice session component - renders nothing but initializes voice backend */} {voice && ( diff --git a/web/src/components/SessionHeader.tsx b/web/src/components/SessionHeader.tsx index a65f84d0f..877edbea1 100644 --- a/web/src/components/SessionHeader.tsx +++ b/web/src/components/SessionHeader.tsx @@ -10,6 +10,7 @@ import { ConfirmDialog } from '@/components/ui/ConfirmDialog' import { formatReopenError } from '@/lib/reopenError' import { getSessionModelLabel } from '@/lib/sessionModelLabel' import { useTranslation } from '@/lib/use-translation' +import { getFlavorLabel } from '@hapi/protocol' import { AgentFlavorIcon } from '@/components/AgentFlavorIcon' function getSessionTitle(session: Session): string { @@ -70,6 +71,15 @@ function OutlineIcon(props: { className?: string }) { ) } +function TerminalIcon(props: { className?: string }) { + return ( + + + + + ) +} + function MoreVerticalIcon(props: { className?: string }) { return ( void onViewFiles?: () => void onOpenOutline?: () => void + onToggleTerminal?: () => void + terminalActive?: boolean api: ApiClient | null onSessionDeleted?: () => void onSessionReopened?: (newSessionId: string) => void @@ -181,7 +193,7 @@ export function SessionHeader(props: {
- {session.metadata?.flavor?.trim() || 'unknown'} + {getFlavorLabel(session.metadata?.flavor)} {modelLabel ? ( @@ -217,6 +229,21 @@ export function SessionHeader(props: { ) : null} + {props.onToggleTerminal ? ( + + ) : null} + + ) +} + +// Sticky-modifier state + a dispatcher that applies the modifiers and resets +// them after a real send. Shared by the quick-key buttons AND the terminal's +// raw onData path so toggling Ctrl then typing a letter sends the control code, +// exactly like a physical modifier key. Gating (when to disable) is the caller's +// concern — the quick-key buttons gate via their `disabled` prop, while the raw +// onData path is intentionally ungated. +export function useQuickKeyInput(opts: { onSend: (data: string) => void }): { + ctrlActive: boolean + altActive: boolean + dispatch: (sequence: string) => void + toggleModifier: (modifier: 'ctrl' | 'alt') => void + resetModifiers: () => void +} { + const [ctrlActive, setCtrlActive] = useState(false) + const [altActive, setAltActive] = useState(false) + // Read modifiers from a ref inside dispatch so the terminal onData closure + // (registered once) always sees the current state, never a stale snapshot. + const modifierStateRef = useRef({ ctrl: false, alt: false }) + useEffect(() => { + modifierStateRef.current = { ctrl: ctrlActive, alt: altActive } + }, [ctrlActive, altActive]) + const onSendRef = useRef(opts.onSend) + useEffect(() => { + onSendRef.current = opts.onSend + }, [opts.onSend]) + + const resetModifiers = useCallback(() => { + setCtrlActive(false) + setAltActive(false) + }, []) + + const dispatch = useCallback((sequence: string) => { + const state = modifierStateRef.current + onSendRef.current(applyModifierState(sequence, state)) + if (shouldResetModifiers(sequence, state)) { + resetModifiers() + } + }, [resetModifiers]) + + const toggleModifier = useCallback((modifier: 'ctrl' | 'alt') => { + if (modifier === 'ctrl') { + setCtrlActive((value) => !value) + setAltActive(false) + } else { + setAltActive((value) => !value) + setCtrlActive(false) + } + }, []) + + return { ctrlActive, altActive, dispatch, toggleModifier, resetModifiers } +} + +// Presentational rows of quick-input keys. State/dispatch live in the caller +// (via useQuickKeyInput) so they can be shared with the terminal onData path. +export function QuickKeyRows(props: { + ctrlActive: boolean + altActive: boolean + disabled: boolean + onPress: (sequence: string) => void + onToggleModifier: (modifier: 'ctrl' | 'alt') => void +}) { + const { ctrlActive, altActive, disabled, onPress, onToggleModifier } = props + return ( + <> + {QUICK_INPUT_ROWS.map((row, rowIndex) => ( +
+ {row.map((input) => { + const modifier = input.modifier + const isCtrl = modifier === 'ctrl' + const isAlt = modifier === 'alt' + const isActive = (isCtrl && ctrlActive) || (isAlt && altActive) + return ( + + ) + })} +
+ ))} + + ) +} diff --git a/web/src/routes/sessions/terminal.tsx b/web/src/routes/sessions/terminal.tsx index a204d5f31..935d6d023 100644 --- a/web/src/routes/sessions/terminal.tsx +++ b/web/src/routes/sessions/terminal.tsx @@ -1,12 +1,11 @@ import { useCallback, useEffect, useRef, useState } from 'react' -import type { PointerEvent } from 'react' import { useParams } from '@tanstack/react-router' import type { Terminal } from '@xterm/xterm' import { useAppContext } from '@/lib/app-context' import { useAppGoBack } from '@/hooks/useAppGoBack' import { useSession } from '@/hooks/queries/useSession' import { useTerminalSocket } from '@/hooks/useTerminalSocket' -import { useLongPress } from '@/hooks/useLongPress' +import { useQuickKeyInput, QuickKeyRows } from '@/components/QuickKeys/QuickKeys' import { useTranslation } from '@/lib/use-translation' import { TerminalView } from '@/components/Terminal/TerminalView' import { LoadingState } from '@/components/LoadingState' @@ -54,133 +53,8 @@ function ConnectionIndicator(props: { status: 'idle' | 'connecting' | 'connected ) } -type QuickInput = { - label: string - sequence?: string - description: string - modifier?: 'ctrl' | 'alt' - popup?: { - label: string - sequence: string - description: string - } -} - -type ModifierState = { - ctrl: boolean - alt: boolean -} - -function applyModifierState(sequence: string, state: ModifierState): string { - let modified = sequence - if (state.alt) { - modified = `\u001b${modified}` - } - if (state.ctrl && modified.length === 1) { - const code = modified.toUpperCase().charCodeAt(0) - if (code >= 64 && code <= 95) { - modified = String.fromCharCode(code - 64) - } - } - return modified -} - -function shouldResetModifiers(sequence: string, state: ModifierState): boolean { - if (!sequence) { - return false - } - return state.ctrl || state.alt -} - const EXIT_NAVIGATION_DELAY_MS = 700 -const QUICK_INPUT_ROWS: QuickInput[][] = [ - [ - { label: 'Esc', sequence: '\u001b', description: 'Escape' }, - { - label: '/', - sequence: '/', - description: 'Forward slash', - popup: { label: '?', sequence: '?', description: 'Question mark' }, - }, - { - label: '-', - sequence: '-', - description: 'Hyphen', - popup: { label: '|', sequence: '|', description: 'Pipe' }, - }, - { label: 'Home', sequence: '\u001b[H', description: 'Home' }, - { label: '↑', sequence: '\u001b[A', description: 'Arrow up' }, - { label: 'End', sequence: '\u001b[F', description: 'End' }, - { label: 'PgUp', sequence: '\u001b[5~', description: 'Page up' }, - ], - [ - { label: 'Tab', sequence: '\t', description: 'Tab' }, - { label: 'Ctrl', description: 'Control', modifier: 'ctrl' }, - { label: 'Alt', description: 'Alternate', modifier: 'alt' }, - { label: '←', sequence: '\u001b[D', description: 'Arrow left' }, - { label: '↓', sequence: '\u001b[B', description: 'Arrow down' }, - { label: '→', sequence: '\u001b[C', description: 'Arrow right' }, - { label: 'PgDn', sequence: '\u001b[6~', description: 'Page down' }, - ], -] - -function QuickKeyButton(props: { - input: QuickInput - disabled: boolean - isActive: boolean - onPress: (sequence: string) => void - onToggleModifier: (modifier: 'ctrl' | 'alt') => void -}) { - const { input, disabled, isActive, onPress, onToggleModifier } = props - const modifier = input.modifier - const popupSequence = input.popup?.sequence - const popupDescription = input.popup?.description - const hasPopup = Boolean(popupSequence) - const longPressDisabled = disabled || Boolean(modifier) || !hasPopup - - const handleClick = useCallback(() => { - if (modifier) { - onToggleModifier(modifier) - return - } - onPress(input.sequence ?? '') - }, [modifier, onToggleModifier, onPress, input.sequence]) - - const handlePointerDown = useCallback((event: PointerEvent) => { - if (event.pointerType === 'touch') { - event.preventDefault() - } - }, []) - - const longPressHandlers = useLongPress({ - onLongPress: () => { - if (popupSequence && !modifier) { - onPress(popupSequence) - } - }, - onClick: handleClick, - disabled: longPressDisabled, - }) - - return ( - - ) -} - export default function TerminalPage() { const { t } = useTranslation() const { sessionId } = useParams({ from: '/sessions/$sessionId/terminal' }) @@ -206,11 +80,8 @@ export default function TerminalPage() { const inputDisposableRef = useRef<{ dispose: () => void } | null>(null) const connectOnceRef = useRef(false) const lastSizeRef = useRef<{ cols: number; rows: number } | null>(null) - const modifierStateRef = useRef({ ctrl: false, alt: false }) const exitNavTimerRef = useRef | null>(null) const [exitInfo, setExitInfo] = useState<{ code: number | null; signal: string | null } | null>(null) - const [ctrlActive, setCtrlActive] = useState(false) - const [altActive, setAltActive] = useState(false) const [pasteDialogOpen, setPasteDialogOpen] = useState(false) const [manualPasteText, setManualPasteText] = useState('') @@ -249,36 +120,21 @@ export default function TerminalPage() { }) }, [onExit, goBack]) - useEffect(() => { - modifierStateRef.current = { ctrl: ctrlActive, alt: altActive } - }, [ctrlActive, altActive]) - - const resetModifiers = useCallback(() => { - setCtrlActive(false) - setAltActive(false) - }, []) - - const dispatchSequence = useCallback( - (sequence: string, modifierState: ModifierState) => { - write(applyModifierState(sequence, modifierState)) - if (shouldResetModifiers(sequence, modifierState)) { - resetModifiers() - } - }, - [write, resetModifiers] - ) + // Raw terminal input AND the quick-key buttons share one sticky-modifier + // state via the dispatcher, so toggling Ctrl then typing sends the control + // code. onData is intentionally ungated; the buttons gate via `disabled`. + const { ctrlActive, altActive, dispatch, toggleModifier, resetModifiers } = useQuickKeyInput({ onSend: write }) const handleTerminalMount = useCallback( (terminal: Terminal) => { terminalRef.current = terminal inputDisposableRef.current?.dispose() inputDisposableRef.current = terminal.onData((data) => { - const modifierState = modifierStateRef.current - dispatchSequence(data, modifierState) + dispatch(data) }) terminal.focus() }, - [dispatchSequence] + [dispatch] ) const handleResize = useCallback( @@ -399,11 +255,10 @@ export default function TerminalPage() { if (quickInputDisabled) { return } - const modifierState = { ctrl: ctrlActive, alt: altActive } - dispatchSequence(sequence, modifierState) + dispatch(sequence) terminalRef.current?.focus() }, - [quickInputDisabled, ctrlActive, altActive, dispatchSequence] + [quickInputDisabled, dispatch] ) const handleModifierToggle = useCallback( @@ -411,16 +266,10 @@ export default function TerminalPage() { if (quickInputDisabled) { return } - if (modifier === 'ctrl') { - setCtrlActive((value) => !value) - setAltActive(false) - } else { - setAltActive((value) => !value) - setCtrlActive(false) - } + toggleModifier(modifier) terminalRef.current?.focus() }, - [quickInputDisabled] + [quickInputDisabled, toggleModifier] ) if (!session) { @@ -508,29 +357,13 @@ export default function TerminalPage() { > {t('button.paste')} - {QUICK_INPUT_ROWS.map((row, rowIndex) => ( -
- {row.map((input) => { - const modifier = input.modifier - const isCtrl = modifier === 'ctrl' - const isAlt = modifier === 'alt' - const isActive = (isCtrl && ctrlActive) || (isAlt && altActive) - return ( - - ) - })} -
- ))} +
From f2865d188fd7d79a596e879e8a965c4f1cd182d5 Mon Sep 17 00:00:00 2001 From: Junmo Kim Date: Sun, 14 Jun 2026 10:10:22 +0900 Subject: [PATCH 11/11] feat(pty): make the agent terminal interactive (raw keys + quick keys) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent-terminal view was output-only, so a remote viewer could not navigate TUI screens the structured chat composer cannot express — escape a /usage screen, answer a /model dialog, or send Ctrl-C. Add an agent-terminal:input path (web emit -> hub relay -> CLI -> agent PTY) so the xterm onData and a shared QuickKey bar drive the live TUI. The CLI now stores the launcher's sendKeys control, which setAgentTerminalControls previously dropped. The relay reuses the same authorization guard as resize (authorized viewer of an active session in the namespace). The chat composer remains the primary way to send messages. --- cli/src/api/apiSession.ts | 17 +++++-- hub/src/socket/handlers/terminal.test.ts | 46 +++++++++++++++++++ hub/src/socket/handlers/terminal.ts | 20 +++++++- shared/src/socket.ts | 14 ++++++ .../AgentTerminal/AgentTerminalView.tsx | 44 ++++++++++++++++-- web/src/hooks/useAgentTerminalSocket.ts | 13 +++++- 6 files changed, 146 insertions(+), 8 deletions(-) diff --git a/cli/src/api/apiSession.ts b/cli/src/api/apiSession.ts index b0e6f60fa..cda456e12 100644 --- a/cli/src/api/apiSession.ts +++ b/cli/src/api/apiSession.ts @@ -13,6 +13,7 @@ import { AGENT_MESSAGE_PAYLOAD_TYPE } from "@hapi/protocol" import type { SessionEndReason } from '@hapi/protocol' import type { ClientToServerEvents, ServerToClientEvents, TerminalOutputPayload, Update } from '@hapi/protocol' import { + AgentTerminalInputPayloadSchema, AgentTerminalRefreshPayloadSchema, AgentTerminalResizePayloadSchema, TerminalClosePayloadSchema, @@ -180,6 +181,9 @@ export class ApiSessionClient extends EventEmitter { readonly rpcHandlerManager: RpcHandlerManager private readonly terminalManager: TerminalManager private agentTerminalResize: ((cols: number, rows: number) => void) | null = null + // Writes raw keystroke(s) from a web viewer into the agent PTY (interactive + // TUI navigation). Null until the agent is spawned and after it exits. + private agentTerminalSendKeys: ((data: string) => void) | null = null private lastAgentTerminalSize: { cols: number; rows: number } | null = null // The agent PTY emits a high-frequency byte stream (spinners ~10Hz, full // redraws). Only forward it to the hub while a viewer is actually subscribed @@ -304,14 +308,20 @@ export class ApiSessionClient extends EventEmitter { this.terminalManager.close(payload.terminalId) })) - // Read-only agent-terminal viewer: resize the agent PTY to the viewer's - // size, and force a repaint when a viewer (re)subscribes so it sees the - // live screen instead of a stale/black buffer replay. + // Agent-terminal viewer: resize the agent PTY to the viewer's size, and + // force a repaint when a viewer (re)subscribes so it sees the live screen + // instead of a stale/black buffer replay. this.socket.on('agent-terminal:resize', handleTerminalEvent(AgentTerminalResizePayloadSchema, (payload) => { this.lastAgentTerminalSize = { cols: payload.cols, rows: payload.rows } this.agentTerminalResize?.(payload.cols, payload.rows) })) + // Raw keystroke(s) typed by a viewer → write into the agent PTY. No-op + // (controls null) before the agent is spawned or after it exits. + this.socket.on('agent-terminal:input', handleTerminalEvent(AgentTerminalInputPayloadSchema, (payload) => { + this.agentTerminalSendKeys?.(payload.data) + })) + this.socket.on('agent-terminal:refresh', handleTerminalEvent(AgentTerminalRefreshPayloadSchema, () => { // A viewer is subscribed → start streaming (enable BEFORE replay so // the bytes flow), replay the locally-captured current screen (works @@ -659,6 +669,7 @@ export class ApiSessionClient extends EventEmitter { */ setAgentTerminalControls(controls: { resize: (cols: number, rows: number) => void; sendKeys: (data: string) => void } | null): void { this.agentTerminalResize = controls?.resize ?? null + this.agentTerminalSendKeys = controls?.sendKeys ?? null } // Force the agent TUI to repaint its current screen. A plain same-size resize diff --git a/hub/src/socket/handlers/terminal.test.ts b/hub/src/socket/handlers/terminal.test.ts index 542114cad..b0708e14d 100644 --- a/hub/src/socket/handlers/terminal.test.ts +++ b/hub/src/socket/handlers/terminal.test.ts @@ -360,6 +360,52 @@ describe('terminal socket handlers', () => { }) }) + describe('agent-terminal:input', () => { + it('forwards raw keystrokes to the CLI socket for an authorized active session', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:input', { sessionId: 'session-1', data: '\u001b' }) + + const inputEvent = lastEmit(cliSocket, 'agent-terminal:input') + expect(inputEvent?.data).toEqual({ sessionId: 'session-1', data: '\u001b' }) + }) + + it('does not forward input when the session is inactive (same guard as resize)', () => { + const { terminalSocket, cliNamespace } = createHarness({ sessionActive: false }) + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:input', { sessionId: 'session-1', data: 'a' }) + + expect(lastEmit(cliSocket, 'agent-terminal:input')).toBeUndefined() + }) + + it('drops malformed input (empty data) without emitting', () => { + const { terminalSocket, cliNamespace } = createHarness() + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:input', { sessionId: 'session-1', data: '' }) + + expect(lastEmit(cliSocket, 'agent-terminal:input')).toBeUndefined() + }) + + it('does not forward input to a session in another namespace (no cross-namespace keystroke injection)', () => { + // The socket's namespace is 'default'; the session belongs to 'other'. + // A CLI socket IS connected, so without the namespace guard the relay + // would inject keystrokes into another namespace's live agent PTY. + const { terminalSocket, cliNamespace } = createHarness({ sessionNamespace: 'other' }) + const cliSocket = new FakeSocket('cli-socket-1') + connectCliSocket(cliNamespace, cliSocket, 'session-1') + + terminalSocket.trigger('agent-terminal:input', { sessionId: 'session-1', data: 'a' }) + + expect(lastEmit(cliSocket, 'agent-terminal:input')).toBeUndefined() + }) + }) + it('enforces per-socket terminal limits', () => { const { terminalSocket, cliNamespace } = createHarness({ maxTerminalsPerSocket: 1 }) const cliSocket = new FakeSocket('cli-socket-1') diff --git a/hub/src/socket/handlers/terminal.ts b/hub/src/socket/handlers/terminal.ts index dc5350014..ebfdf6002 100644 --- a/hub/src/socket/handlers/terminal.ts +++ b/hub/src/socket/handlers/terminal.ts @@ -215,7 +215,7 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH emitCloseToCli(entry) }) - const emitToCliForSession = (sessionId: string, event: 'agent-terminal:resize' | 'agent-terminal:refresh' | 'agent-terminal:idle', payload: Record): void => { + const emitToCliForSession = (sessionId: string, event: 'agent-terminal:resize' | 'agent-terminal:refresh' | 'agent-terminal:idle' | 'agent-terminal:input', payload: Record): void => { const cliSocketId = pickCliSocketId(sessionId) if (!cliSocketId) return const cliSocket = cliNamespace.sockets.get(cliSocketId) @@ -304,6 +304,24 @@ export function registerTerminalHandlers(socket: SocketWithData, deps: TerminalH emitToCliForSession(sessionId, 'agent-terminal:resize', { sessionId, cols, rows }) }) + // Raw keystroke(s) from a viewer → relay to the CLI to write into the agent + // PTY. Same authorization guard as resize: only an authorized viewer of an + // active session in this namespace may drive its TUI. + socket.on('agent-terminal:input', (data: unknown) => { + const parsed = z.object({ + sessionId: z.string().min(1), + data: z.string().min(1) + }).safeParse(data) + if (!parsed.success) { + return + } + const { sessionId, data: keys } = parsed.data + if (!isAuthorizedSession(sessionId)) { + return + } + emitToCliForSession(sessionId, 'agent-terminal:input', { sessionId, data: keys }) + }) + socket.on('disconnect', () => { const removed = terminalRegistry.removeBySocket(socket.id) for (const entry of removed) { diff --git a/shared/src/socket.ts b/shared/src/socket.ts index d0ee3701b..93dd5bb31 100644 --- a/shared/src/socket.ts +++ b/shared/src/socket.ts @@ -49,6 +49,17 @@ export const AgentTerminalRefreshPayloadSchema = z.object({ export type AgentTerminalRefreshPayload = z.infer +// Raw keystroke(s) typed into the agent TUI from a web viewer — the agent PTY is +// the session's single TUI, keyed by sessionId (no terminalId). Lets a remote +// viewer navigate TUI screens (e.g. answer/escape a /usage or /model dialog) that +// the structured chat composer cannot express. +export const AgentTerminalInputPayloadSchema = z.object({ + sessionId: z.string().min(1), + data: z.string().min(1) +}) + +export type AgentTerminalInputPayload = z.infer + export const TerminalClosePayloadSchema = z.object({ sessionId: z.string().min(1), terminalId: z.string().min(1) @@ -216,6 +227,9 @@ export interface ServerToClientEvents { 'terminal:close': (data: TerminalClosePayload) => void 'agent-terminal:resize': (data: AgentTerminalResizePayload) => void 'agent-terminal:refresh': (data: AgentTerminalRefreshPayload) => void + // Raw keystroke(s) from a web viewer, relayed to the CLI to write into the + // agent PTY (interactive TUI navigation; see AgentTerminalInputPayload). + 'agent-terminal:input': (data: AgentTerminalInputPayload) => void // Sent to the CLI when the last agent-terminal viewer leaves, so it stops // streaming PTY output to the hub until someone subscribes again. 'agent-terminal:idle': (data: AgentTerminalRefreshPayload) => void diff --git a/web/src/components/AgentTerminal/AgentTerminalView.tsx b/web/src/components/AgentTerminal/AgentTerminalView.tsx index 34648a1cb..f6a5e591e 100644 --- a/web/src/components/AgentTerminal/AgentTerminalView.tsx +++ b/web/src/components/AgentTerminal/AgentTerminalView.tsx @@ -3,6 +3,7 @@ import { Terminal } from '@xterm/xterm' import { FitAddon } from '@xterm/addon-fit' import '@xterm/xterm/css/xterm.css' import { useAgentTerminalSocket } from '@/hooks/useAgentTerminalSocket' +import { useQuickKeyInput, QuickKeyRows } from '@/components/QuickKeys/QuickKeys' import { useAppContext } from '@/lib/app-context' function resolveThemeColors(): { background: string; foreground: string; selectionBackground: string } { @@ -19,9 +20,10 @@ type AgentTerminalViewProps = { className?: string } -// Output-only view of the agent PTY. Input is handled by the shared chat -// composer (HappyComposer) so there is a single composer with correct IME -// handling — no separate terminal input bar. +// Interactive view of the agent PTY. The chat composer remains the primary way +// to send messages (multiline, IME, mobile), but this terminal also accepts raw +// keystrokes + quick keys so a viewer can drive TUI screens the composer cannot +// express (escape a /usage screen, answer a /model dialog, send Ctrl-C). export function AgentTerminalView(props: AgentTerminalViewProps) { const { sessionId, visible, className } = props const { token, baseUrl } = useAppContext() @@ -37,12 +39,17 @@ export function AgentTerminalView(props: AgentTerminalViewProps) { unsubscribe, onOutput, resize, + sendInput, } = useAgentTerminalSocket({ baseUrl, token, sessionId, }) + // Raw keystrokes (terminal typing AND quick keys) share one sticky-modifier + // state, then go to the agent PTY via sendInput. + const { ctrlActive, altActive, dispatch, toggleModifier } = useQuickKeyInput({ onSend: sendInput }) + const onOutputRef = useRef(onOutput) useEffect(() => { onOutputRef.current = onOutput @@ -53,6 +60,13 @@ export function AgentTerminalView(props: AgentTerminalViewProps) { resizeRef.current = resize }, [resize]) + // Dispatch is stable, but the terminal is created once (mount effect), so + // read it through a ref to avoid re-creating the terminal on identity change. + const dispatchRef = useRef(dispatch) + useEffect(() => { + dispatchRef.current = dispatch + }, [dispatch]) + useEffect(() => { const container = containerRef.current if (!container) return @@ -94,8 +108,16 @@ export function AgentTerminalView(props: AgentTerminalViewProps) { terminal.write(data) }) + // Interactive: forward typed keystrokes to the agent PTY (with sticky + // modifiers applied) so a viewer can drive TUI screens the chat composer + // cannot express (e.g. escape a /usage screen, answer a dialog). + const inputDisposable = terminal.onData((data) => { + dispatchRef.current(data) + }) + abortController.signal.addEventListener('abort', () => { observer.disconnect() + inputDisposable.dispose() fitAddon.dispose() terminal.dispose() }) @@ -151,6 +173,22 @@ export function AgentTerminalView(props: AgentTerminalViewProps) {
+ +
+ { + dispatch(sequence) + terminalRef.current?.focus() + }} + onToggleModifier={(modifier) => { + toggleModifier(modifier) + terminalRef.current?.focus() + }} + /> +
) } diff --git a/web/src/hooks/useAgentTerminalSocket.ts b/web/src/hooks/useAgentTerminalSocket.ts index da5cce2b2..0a560fb4c 100644 --- a/web/src/hooks/useAgentTerminalSocket.ts +++ b/web/src/hooks/useAgentTerminalSocket.ts @@ -26,6 +26,7 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): unsubscribe: () => void onOutput: (handler: (data: string) => void) => void resize: (cols: number, rows: number) => void + sendInput: (data: string) => void } { const [state, setState] = useState({ status: 'idle' }) const socketRef = useRef(null) @@ -177,6 +178,15 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): outputHandlerRef.current = handler }, []) + const sendInput = useCallback((data: string) => { + const socket = socketRef.current + const sessionId = sessionIdRef.current + if (!socket?.connected || !sessionId || !data) { + return + } + socket.emit('agent-terminal:input', { sessionId, data }) + }, []) + return { state, connect, @@ -184,6 +194,7 @@ export function useAgentTerminalSocket(options: UseAgentTerminalSocketOptions): resubscribe, unsubscribe, onOutput, - resize + resize, + sendInput } }