diff --git a/frontend/src/components/Settings.tsx b/frontend/src/components/Settings.tsx index a02c2b4..4f21a27 100644 --- a/frontend/src/components/Settings.tsx +++ b/frontend/src/components/Settings.tsx @@ -52,6 +52,8 @@ export function Settings({ isOpen, onClose, initialSection }: SettingsProps) { const [claudeExecutablePath, setClaudeExecutablePath] = useState(''); const [autoCheckUpdates, setAutoCheckUpdates] = useState(true); const [devMode, setDevMode] = useState(false); + const [usePtyHost, setUsePtyHost] = useState(false); + const [initialUsePtyHost, setInitialUsePtyHost] = useState(false); const [additionalPathsText, setAdditionalPathsText] = useState(''); const [platform, setPlatform] = useState('darwin'); const [enableCommitFooter, setEnableCommitFooter] = useState(true); @@ -142,6 +144,8 @@ export function Settings({ isOpen, onClose, initialSection }: SettingsProps) { setVerbose(data.verbose || false); setAutoCheckUpdates(data.autoCheckUpdates !== false); // Default to true setDevMode(data.devMode || false); + setUsePtyHost(data.usePtyHost === true); + setInitialUsePtyHost(data.usePtyHost === true); setClaudeExecutablePath(data.claudeExecutablePath || ''); setEnableCommitFooter(data.enableCommitFooter !== false); // Default to true setUiScale(data.uiScale || 1.0); @@ -213,6 +217,7 @@ export function Settings({ isOpen, onClose, initialSection }: SettingsProps) { verbose, autoCheckUpdates, devMode, + usePtyHost, claudeExecutablePath, enableCommitFooter, uiScale, @@ -1084,6 +1089,22 @@ export function Settings({ isOpen, onClose, initialSection }: SettingsProps) { Adds a "Messages" tab to each pane showing raw JSON responses from Claude Code. Useful for debugging and development.

+ +
+ setUsePtyHost(e.target.checked)} + /> +

+ Run terminal processes in a separate utility process for better crash isolation and fixes for Claude Code v2.1.113+ on macOS. Requires app restart; existing terminals keep their current backend, and new terminals after restart use the selected PTY mode. +

+ {usePtyHost !== initialUsePtyHost && ( +

+ Restart Pane for this change to take effect. +

+ )} +
= React.memo(({ panel, const isCliPanel = !!terminalState?.isCliPanel; const [isCliReady, setIsCliReady] = useState(!!terminalState?.isCliReady); + // ptyId for the current PTY behind this panel, delivered via + // `terminal:ptyReady` when spawned through the ptyHost UtilityProcess. + // Null under the legacy `pty.spawn` path. Re-fires with a new value on + // auto-reattach after a supervisor restart, which re-subscribes the data + // listener below. + const [ptyId, setPtyId] = useState(null); + + // Ref holding the terminal output consumer installed by the main init effect. + // The data-subscription effect below reads from this ref so it can swap the + // subscription source (legacy `terminal:output` vs `electronAPI.ptyHost.onData`) + // without re-running the full terminal init. + const outputConsumerRef = useRef<{ + write: (data: string) => void; + } | null>(null); + + // Mirror of `ptyId` so the ack-flush closure (captured inside the init effect) + // can read the current value without re-creating. Updated by the effect below + // whenever `ptyId` changes (spawn, auto-reattach, or unmount). + const currentPtyIdRef = useRef(null); + useEffect(() => { + currentPtyIdRef.current = ptyId; + }, [ptyId]); + // Sync isCliReady from panel prop when it changes (e.g. backend persisted isCliReady // before this component subscribed to the IPC event, or panel state was updated externally) useEffect(() => { @@ -108,6 +131,32 @@ export const TerminalPanel: React.FC = React.memo(({ panel, return cleanup; }, [panel.id, isCliPanel, isCliReady]); + // Listen for the ptyHost ptyId assignment. The main process fires this + // once per spawn when the `usePtyHost` setting is on; fires again on auto-reattach + // after a supervisor restart with a new ptyId. Updating state triggers the + // data-subscription effect below to tear down and re-subscribe. + useEffect(() => { + const cleanup = window.electronAPI.events.onTerminalPtyReady((data) => { + if (data.panelId === panel.id) { + setPtyId(data.ptyId); + } + }); + return cleanup; + }, [panel.id]); + + // Subscribe to the ptyHost MessagePort data stream for this panel when we + // have a `ptyId`. Flag-off panels keep the legacy `terminal:output` IPC + // subscription installed inside the main init effect and skip this effect + // entirely. Re-subscribes when `ptyId` changes (auto-reattach after a + // supervisor restart). + useEffect(() => { + if (!ptyId) return; + const unsubData = window.electronAPI.ptyHost.onData(ptyId, (data: string) => { + outputConsumerRef.current?.write(data); + }); + return unsubData; + }, [ptyId]); + // Get session data from context using the safe hook const sessionContext = useSession(); const sessionId = sessionContext?.sessionId; @@ -641,7 +690,7 @@ export const TerminalPanel: React.FC = React.memo(({ panel, }); // Ack batching for flow control - const ACK_BATCH_SIZE = 10_000; // 10KB + const ACK_BATCH_SIZE = 5_000; // 5KB - aligned with main LOW_WATERMARK per VS Code FlowControlConstants const ACK_BATCH_INTERVAL = 100; // ms let pendingAckBytes = 0; let ackFlushTimer: ReturnType | null = null; @@ -654,7 +703,16 @@ export const TerminalPanel: React.FC = React.memo(({ panel, if (pendingAckBytes > 0) { const bytes = pendingAckBytes; pendingAckBytes = 0; - window.electronAPI.invoke('terminal:ack', panel.id, bytes); + // Under the ptyHost flag, ack over the per-window MessagePort so it + // bypasses the main IPC invoke queue. Flag-off keeps the legacy + // IPC path. `currentPtyIdRef` is a ref because the ptyId can change + // across auto-reattach after a supervisor restart. + const activePtyId = currentPtyIdRef.current; + if (activePtyId) { + window.electronAPI.ptyHost.ack(activePtyId, bytes); + } else { + window.electronAPI.invoke('terminal:ack', panel.id, bytes); + } } }; @@ -882,34 +940,49 @@ export const TerminalPanel: React.FC = React.memo(({ panel, setIsInitialized(true); console.log('[TerminalPanel] Terminal initialization complete, isInitialized set to true'); - // Set up IPC communication for terminal I/O - const outputHandler = (data: { panelId?: string; sessionId?: string; output?: string } | unknown) => { - // Check if this is panel terminal output (has panelId) vs session terminal output (has sessionId) + // Core write-and-ack: consume a raw output chunk (already filtered by + // source/panelId on the dispatcher side). Installed into a ref so the + // `ptyId` effect below can swap subscription sources (legacy + // `terminal:output` IPC vs `electronAPI.ptyHost.onData` port) without + // re-running the full terminal init. + const writeAndAck = (output: string) => { + if (!terminal || disposed) return; + const outputLength = output.length; + terminal.write(output, () => { + if (disposed) return; + // Ack AFTER xterm has rendered the data — proper backpressure + pendingAckBytes += outputLength; + if (pendingAckBytes >= ACK_BATCH_SIZE) { + flushAck(); + } else if (!ackFlushTimer) { + ackFlushTimer = setTimeout(flushAck, ACK_BATCH_INTERVAL); + } + // Read scroll position LIVE after render, not before write — + // avoids stale shouldSnap=true yanking user back to bottom + if (isNearBottomRef.current && terminal) { + terminal.scrollToBottom(); + } + }); + }; + outputConsumerRef.current = { write: writeAndAck }; + + // Legacy `terminal:output` IPC subscription. Stays the primary source + // for flag-off panels (which never receive a `ptyId`). Under flag-on + // main also tees bytes through the ptyHost MessagePort; to avoid + // double-delivery to xterm, this handler short-circuits once the + // panel's `ptyId` is populated and the dedicated effect below takes + // over as the single byte source. + const legacyOutputHandler = (data: unknown) => { + if (currentPtyIdRef.current) return; if (data && typeof data === 'object' && 'panelId' in data && data.panelId && 'output' in data) { const typedData = data as { panelId: string; output: string }; - if (typedData.panelId === panel.id && terminal && !disposed && isActiveRef.current) { - const outputLength = typedData.output.length; - terminal.write(typedData.output, () => { - if (disposed) return; - // Ack AFTER xterm has rendered the data — proper backpressure - pendingAckBytes += outputLength; - if (pendingAckBytes >= ACK_BATCH_SIZE) { - flushAck(); - } else if (!ackFlushTimer) { - ackFlushTimer = setTimeout(flushAck, ACK_BATCH_INTERVAL); - } - // Read scroll position LIVE after render, not before write — - // avoids stale shouldSnap=true yanking user back to bottom - if (isNearBottomRef.current && terminal) { - terminal.scrollToBottom(); - } - }); + if (typedData.panelId === panel.id) { + outputConsumerRef.current?.write(typedData.output); } } // Ignore session terminal output (has sessionId instead of panelId) }; - - const unsubscribeOutput = window.electronAPI.events.onTerminalOutput(outputHandler); + const unsubscribeOutput = window.electronAPI.events.onTerminalOutput(legacyOutputHandler); console.log('[TerminalPanel] Subscribed to terminal output events for panel:', panel.id); // Detect full-screen TUI apps (vim, htop, etc.) via alternate screen buffer. @@ -1107,6 +1180,7 @@ export const TerminalPanel: React.FC = React.memo(({ panel, disposed = true; interceptor.dispose(); interceptorRef.current = null; + outputConsumerRef.current = null; flushAck(); if (ackFlushTimer) clearTimeout(ackFlushTimer); resizeObserver.disconnect(); diff --git a/frontend/src/types/config.ts b/frontend/src/types/config.ts index 33f2fd6..0c1e274 100644 --- a/frontend/src/types/config.ts +++ b/frontend/src/types/config.ts @@ -30,6 +30,9 @@ export interface AppConfig { enabled: boolean; }; devMode?: boolean; + // Route PTY spawns through an isolated ptyHost UtilityProcess for crash + // isolation. Off by default. Requires app restart to take effect. + usePtyHost?: boolean; sessionCreationPreferences?: { sessionCount?: number; toolType?: 'claude' | 'none'; diff --git a/frontend/src/types/electron.d.ts b/frontend/src/types/electron.d.ts index 05a0c47..1fb5a80 100644 --- a/frontend/src/types/electron.d.ts +++ b/frontend/src/types/electron.d.ts @@ -277,6 +277,13 @@ interface ElectronAPI { onTerminalCliReady: (callback: (data: { panelId: string }) => void) => () => void; onTerminalExited: (callback: (data: { sessionId: string; panelId: string; exitCode: number; signal: number | null }) => void) => () => void; onTerminalAlternateScreen: (callback: (data: { panelId: string; active: boolean }) => void) => () => void; + /** + * Fired when a terminal panel is spawned via the ptyHost UtilityProcess. + * Carries the host-allocated `ptyId` so TerminalPanel.tsx can subscribe to + * `electronAPI.ptyHost.onData(ptyId, cb)` when the `usePtyHost` setting is on. + * Re-fires on auto-reattach after a supervisor restart with a new ptyId. + */ + onTerminalPtyReady: (callback: (data: { sessionId: string; panelId: string; ptyId: string }) => void) => () => void; onUncleanShutdownDetected: (callback: () => void) => () => void; onMainLog: (callback: (level: string, message: string) => void) => () => void; onVersionUpdateAvailable: (callback: (versionInfo: VersionUpdateInfo) => void) => () => void; @@ -403,6 +410,25 @@ interface ElectronAPI { window: { isFocused: () => Promise; }; + + // ptyHost: typed wrapper over the per-window MessagePort installed by the + // preload script. The raw port never crosses contextBridge — these + // functions are the only surface. Chunk D will switch TerminalPanel.tsx + // over to these; Chunk C ships the plumbing so renderer code can start + // subscribing when the `usePtyHost` setting is on. + ptyHost: { + /** Subscribe to PTY byte output for a given ptyId. Returns unsubscribe. */ + onData: (ptyId: string, cb: (data: string) => void) => () => void; + /** Subscribe to PTY exit for a given ptyId. Returns unsubscribe. */ + onExit: ( + ptyId: string, + cb: (exitCode: number | null, signal: number | null) => void, + ) => () => void; + /** Ack `bytes` bytes back over the port for flow-control bookkeeping. */ + ack: (ptyId: string, bytes: number) => void; + /** Write `data` over the port without round-tripping through IPC invoke. */ + write: (ptyId: string, data: string) => void; + }; } interface CloudVmState { diff --git a/main/src/index.ts b/main/src/index.ts index 7541553..f320633 100644 --- a/main/src/index.ts +++ b/main/src/index.ts @@ -54,6 +54,7 @@ import { terminalPanelManager } from './services/terminalPanelManager'; import { panelManager } from './services/panelManager'; import { TerminalPanelState } from '../../shared/types/panels'; import { worktreePoolManager } from './services/worktreePoolManager'; +import { PtyHostSupervisor } from './ptyHost/ptyHostSupervisor'; export let mainWindow: BrowserWindow | null = null; @@ -115,6 +116,23 @@ let archiveProgressManager: ArchiveProgressManager; let analyticsManager: AnalyticsManager; let spotlightManager: SpotlightManager; +// ptyHost supervisor — forked as an Electron UtilityProcess on app ready, +// but only when the `usePtyHost` setting is enabled (default: off). When +// disabled, the supervisor is never forked and every manager transparently +// falls through to the legacy in-main `pty.spawn` path. +let ptyHostSupervisor: PtyHostSupervisor | null = null; + +/** + * Getter for the ptyHost supervisor. Managers route spawn/write/resize/kill + * through this when `configManager.getUsePtyHost()` returns true and the + * supervisor fork succeeded. Returns null when the setting is off OR when + * fork failed — callers must handle the null case and fall back to the + * legacy `pty.spawn` path. + */ +export function getPtyHostSupervisor(): PtyHostSupervisor | null { + return ptyHostSupervisor; +} + // Store app start time for session duration tracking let appStartTime: number; @@ -861,6 +879,15 @@ async function createWindow() { } resourceMonitorService.handleVisibilityChange(false); }); + + // Hand the renderer its per-window ptyHost data port once the preload + // listener is guaranteed to be installed. Chunk C: the port is a + // passthrough; Chunk D switches `TerminalPanel.tsx` to subscribe on it. + mainWindow.webContents.once('did-finish-load', () => { + if (ptyHostSupervisor && mainWindow) { + ptyHostSupervisor.attachWindow(mainWindow.webContents); + } + }); } async function initializeServices() { @@ -1070,6 +1097,56 @@ app.whenReady().then(async () => { console.log('[Main] App is ready, initializing services...'); await initializeServices(); console.log('[Main] Services initialized, creating window...'); + + // Start the ptyHost supervisor before the window opens so the renderer's + // preload listener for 'ptyHost-port' has a port to receive when the window + // finishes loading. Gated on the `usePtyHost` setting: when off (default), + // the supervisor is never forked and every spawn site falls through to the + // legacy in-main `pty.spawn` path with zero ptyHost code executing. + if (configManager.getUsePtyHost()) { + try { + ptyHostSupervisor = new PtyHostSupervisor(); + await ptyHostSupervisor.start(); + + ptyHostSupervisor.on('renderer-ack', (ptyId: string, bytes: number) => { + terminalPanelManager.acknowledgePtyHostBytes(ptyId, bytes); + }); + + // Auto-reattach: on ptyHost restart, every manager re-enters the spawn + // path for its live panels. Order (per plan Task 6b / gotcha line 825): + // rejectPendingRpcs → keep manager maps → await nextReady → respawnAll + // The supervisor keeps manager maps intact; the `ready-after-restart` + // event marks "await nextReady" complete so we can drive respawnAll across + // every manager in parallel. + ptyHostSupervisor.on('ready-after-restart', () => { + console.log('[ptyHost] ready-after-restart; fanning respawnAll across managers'); + const respawnTasks: Array> = [ + terminalPanelManager.respawnAll(), + ]; + if (cliManagerFactory) { + const managers = cliManagerFactory.getAllManagers(); + for (const manager of managers) { + respawnTasks.push(manager.respawnAll()); + } + } + if (runCommandManager) { + respawnTasks.push(runCommandManager.respawnAll()); + } + if (sessionManager) { + respawnTasks.push(sessionManager.respawnAll()); + } + Promise.all(respawnTasks) + .then(() => console.log('[ptyHost] respawnAll fan-out complete')) + .catch((err) => console.error('[ptyHost] respawnAll fan-out error:', err)); + }); + } catch (error) { + console.error('[ptyHost] supervisor failed to start; legacy pty.spawn path will be used', error); + ptyHostSupervisor = null; + } + } else { + console.log('[ptyHost] usePtyHost setting is disabled; skipping supervisor fork'); + } + await createWindow(); console.log('[Main] Window created successfully'); diff --git a/main/src/preload.ts b/main/src/preload.ts index 3e33eb9..08d0ef3 100644 --- a/main/src/preload.ts +++ b/main/src/preload.ts @@ -90,6 +90,84 @@ interface UpdaterInfo { // Increase max listeners for ipcRenderer to prevent warnings when many components listen to events ipcRenderer.setMaxListeners(50); +// ptyHost data port wiring. +// +// Main posts `webContents.postMessage('ptyHost-port', null, [rendererPort])` +// after `did-finish-load`. The renderer end is a DOM-style `MessagePort` — +// use `.onmessage`, not `.on('message')`. The port CANNOT cross contextBridge +// directly, so we keep it in this preload-scoped closure and expose a typed +// function surface on `window.electronAPI.ptyHost` below. +// +// Chunk C: ptyHost does not yet tee bytes to this port; the RPC path on the +// main side continues to deliver bytes via the existing `terminal:output` +// channel. Subscribers are still wired so Chunk D can flip the byte path over +// without further preload changes. +type PtyHostDataFrame = { type: 'data'; ptyId: string; data: string }; +type PtyHostExitFrame = { + type: 'exit'; + ptyId: string; + exitCode: number | null; + signal: number | null; +}; +type PtyHostInboundFrame = PtyHostDataFrame | PtyHostExitFrame; + +type PtyDataCallback = (data: string) => void; +type PtyExitCallback = (exitCode: number | null, signal: number | null) => void; + +let ptyHostPort: MessagePort | null = null; +const ptyDataSubscribers = new Map>(); +const ptyExitSubscribers = new Map>(); + +function isPtyHostInboundFrame(frame: unknown): frame is PtyHostInboundFrame { + if (typeof frame !== 'object' || frame === null) return false; + const f = frame as { type?: unknown; ptyId?: unknown }; + if (typeof f.ptyId !== 'string') return false; + return f.type === 'data' || f.type === 'exit'; +} + +ipcRenderer.on('ptyHost-port', (event) => { + const [port] = event.ports; + if (!port) { + console.error('[ptyHost] ptyHost-port IPC arrived with no port'); + return; + } + // Renderer-world MessagePort is DOM-style: use .start() + .onmessage. + port.start(); + port.onmessage = (e: MessageEvent) => { + const frame = e.data as unknown; + if (!isPtyHostInboundFrame(frame)) { + return; + } + if (frame.type === 'data') { + const subs = ptyDataSubscribers.get(frame.ptyId); + if (subs) { + for (const cb of subs) { + try { + cb(frame.data); + } catch (err) { + console.error('[ptyHost] data subscriber threw', err); + } + } + } + return; + } + if (frame.type === 'exit') { + const subs = ptyExitSubscribers.get(frame.ptyId); + if (subs) { + for (const cb of subs) { + try { + cb(frame.exitCode, frame.signal); + } catch (err) { + console.error('[ptyHost] exit subscriber threw', err); + } + } + } + return; + } + }; + ptyHostPort = port; +}); + // Bridge panel events from main process to renderer window as DOM CustomEvents // This allows React components to listen with `window.addEventListener('panel:event', ...)` try { @@ -606,6 +684,17 @@ contextBridge.exposeInMainWorld('electronAPI', { return () => ipcRenderer.removeListener('terminal:alternateScreen', wrappedCallback); }, + // Fired once per terminal spawn when the `usePtyHost` setting is on and + // the ptyHost supervisor is live. Carries the host-allocated `ptyId` so + // `TerminalPanel.tsx` can subscribe to `electronAPI.ptyHost.onData(ptyId, ...)` + // instead of the legacy `terminal:output` channel. Fires again on auto-reattach + // after a supervisor restart so the renderer re-subscribes to the new ptyId. + onTerminalPtyReady: (callback: (data: { sessionId: string; panelId: string; ptyId: string }) => void) => { + const wrappedCallback = (_event: Electron.IpcRendererEvent, data: { sessionId: string; panelId: string; ptyId: string }) => callback(data); + ipcRenderer.on('terminal:ptyReady', wrappedCallback); + return () => ipcRenderer.removeListener('terminal:ptyReady', wrappedCallback); + }, + // Spotlight events onSpotlightStatusChanged: (callback: (data: { sessionId: string; projectId: number; active: boolean }) => void) => { const wrappedCallback = (_event: Electron.IpcRendererEvent, data: { sessionId: string; projectId: number; active: boolean }) => callback(data); @@ -790,6 +879,58 @@ contextBridge.exposeInMainWorld('electronAPI', { window: { isFocused: (): Promise => ipcRenderer.invoke('window:is-focused') as Promise, }, + + // ptyHost: typed wrapper over the per-window MessagePort. The raw port is + // kept in preload scope; only these functions cross the contextBridge. + // + // `onData` / `onExit` return an unsubscribe function matching the existing + // event-subscription convention elsewhere on `electronAPI.events`. Chunks + // D/E switch `TerminalPanel.tsx` over to these. + // + // `write` / `ack` post frames back over the port; Chunk D wires these in + // main-side via `PtyHostSupervisor.onRendererMessage` when they land. + ptyHost: { + onData: (ptyId: string, cb: PtyDataCallback): (() => void) => { + let set = ptyDataSubscribers.get(ptyId); + if (!set) { + set = new Set(); + ptyDataSubscribers.set(ptyId, set); + } + set.add(cb); + return () => { + const current = ptyDataSubscribers.get(ptyId); + if (!current) return; + current.delete(cb); + if (current.size === 0) { + ptyDataSubscribers.delete(ptyId); + } + }; + }, + onExit: (ptyId: string, cb: PtyExitCallback): (() => void) => { + let set = ptyExitSubscribers.get(ptyId); + if (!set) { + set = new Set(); + ptyExitSubscribers.set(ptyId, set); + } + set.add(cb); + return () => { + const current = ptyExitSubscribers.get(ptyId); + if (!current) return; + current.delete(cb); + if (current.size === 0) { + ptyExitSubscribers.delete(ptyId); + } + }; + }, + ack: (ptyId: string, bytes: number): void => { + if (!ptyHostPort) return; + ptyHostPort.postMessage({ type: 'ack', ptyId, bytes }); + }, + write: (ptyId: string, data: string): void => { + if (!ptyHostPort) return; + ptyHostPort.postMessage({ type: 'write', ptyId, data }); + }, + }, }); // Expose electron event listeners and utilities for permission requests diff --git a/main/src/ptyHost/flowControl.test.ts b/main/src/ptyHost/flowControl.test.ts new file mode 100644 index 0000000..248efb3 --- /dev/null +++ b/main/src/ptyHost/flowControl.test.ts @@ -0,0 +1,95 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + HIGH_WATERMARK, + LOW_WATERMARK, + PAUSE_SAFETY_TIMEOUT, + createFlowControlRecord, + disposeFlowControlRecord, + onAck, + onPtyBytes, +} from './flowControl'; + +async function flushMicrotasks(): Promise { + await Promise.resolve(); + await Promise.resolve(); + await Promise.resolve(); +} + +describe('ptyHost flowControl', () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('pauses after high watermark and resumes after low watermark ack', async () => { + const record = createFlowControlRecord(); + const pause = vi.fn().mockResolvedValue(undefined); + const resume = vi.fn(); + + onPtyBytes(record, HIGH_WATERMARK + 1, pause, resume); + + expect(record.isPaused).toBe(true); + expect(record.pauseRpcInFlight).toBe(true); + expect(pause).toHaveBeenCalledTimes(1); + + await flushMicrotasks(); + + expect(record.pauseRpcInFlight).toBe(false); + expect(record.pauseSafetyTimer).not.toBeNull(); + + onAck(record, HIGH_WATERMARK + 1 - LOW_WATERMARK, resume); + + expect(record.pendingBytes).toBe(LOW_WATERMARK); + expect(record.isPaused).toBe(false); + expect(record.pauseSafetyTimer).toBeNull(); + expect(resume).toHaveBeenCalledTimes(1); + + disposeFlowControlRecord(record); + }); + + it('force-resumes if a paused terminal receives no acks', async () => { + const record = createFlowControlRecord(); + const pause = vi.fn().mockResolvedValue(undefined); + const resume = vi.fn(); + + onPtyBytes(record, HIGH_WATERMARK + 1, pause, resume); + await flushMicrotasks(); + + await vi.advanceTimersByTimeAsync(PAUSE_SAFETY_TIMEOUT); + + expect(record.isPaused).toBe(false); + expect(record.pauseSafetyTimer).toBeNull(); + expect(resume).toHaveBeenCalledTimes(1); + + disposeFlowControlRecord(record); + }); + + it('does not arm safety timer until async pause resolves', async () => { + const record = createFlowControlRecord(); + let resolvePause: (() => void) | undefined; + const pause = vi.fn(() => new Promise((resolve) => { + resolvePause = resolve; + })); + const resume = vi.fn(); + + onPtyBytes(record, HIGH_WATERMARK + 1, pause, resume); + + expect(record.isPaused).toBe(true); + expect(record.pauseRpcInFlight).toBe(true); + expect(record.pauseSafetyTimer).toBeNull(); + + vi.advanceTimersByTime(PAUSE_SAFETY_TIMEOUT); + expect(resume).not.toHaveBeenCalled(); + + resolvePause?.(); + await flushMicrotasks(); + + expect(record.pauseRpcInFlight).toBe(false); + expect(record.pauseSafetyTimer).not.toBeNull(); + + disposeFlowControlRecord(record); + }); +}); diff --git a/main/src/ptyHost/flowControl.ts b/main/src/ptyHost/flowControl.ts new file mode 100644 index 0000000..bac8bb1 --- /dev/null +++ b/main/src/ptyHost/flowControl.ts @@ -0,0 +1,193 @@ +/** + * Per-ptyId flow-control bookkeeping across the ptyHost async seam. + * + * Encodes the VS Code `FlowControlConstants` triple (100 000 / 5 000 / 5 000) + * that the plan has decided to adopt. Note: the existing main-side constant + * at `terminalPanelManager.ts:14` is currently `LOW_WATERMARK = 10_000`; this + * module uses the new aligned value of `5_000` (matching the renderer's + * `ACK_BATCH_SIZE`). Task 5 in the plan migrates `terminalPanelManager.ts` + * and `TerminalPanel.tsx` to this value in lockstep. + * + * The key subtlety is the `pauseRpcInFlight` gate: when we send a pause RPC + * to the host we don't know when it actually lands, so a saturated main event + * loop could fire the safety-resume timer and force-resume before pause even + * applies. To defeat this, the safety timer is armed ONLY after the pause RPC + * resolves (plan lines 619-624), and the safety-resume callback is a no-op + * while the pause RPC is still in flight (plan gotcha at line 762). + */ + +/** Pause the host PTY when pending bytes reach this watermark. */ +export const HIGH_WATERMARK = 100_000; + +/** + * Resume the host PTY when pending bytes drop to this watermark. + * + * NOTE: main's `terminalPanelManager.ts` still has `LOW_WATERMARK = 10_000` + * at the time this module is introduced. This module uses the new aligned + * value; Task 5 updates the main-side constant and the renderer's + * `ACK_BATCH_SIZE` together so the three numbers match. + */ +export const LOW_WATERMARK = 5_000; + +/** + * Force-resume a stuck pause after this many ms if no acks arrive. + * + * Matches the existing `PAUSE_SAFETY_TIMEOUT` at `terminalPanelManager.ts:17`. + * Only runs while `pauseRpcInFlight` is false; a pause RPC that is still in + * flight must be allowed to land first. + */ +export const PAUSE_SAFETY_TIMEOUT = 5_000; + +/** + * Per-ptyId flow-control state. One record per live PTY on the main side. + * + * Fields mirror the shape of `TerminalProcess` at `terminalPanelManager.ts:21-43` + * (`pendingBytes`, `isPaused`, `pauseSafetyTimer`) plus the new + * `pauseRpcInFlight` gate required by the async seam. + */ +export interface FlowControlRecord { + pendingBytes: number; + isPaused: boolean; + pauseSafetyTimer: ReturnType | null; + /** + * True between posting a `pause` RPC and receiving its response. While this + * is true, the safety timer is NOT armed, and any already-armed timer's + * resume callback must short-circuit. Prevents a stalled main event loop + * from force-resuming a PTY we just tried to pause. + */ + pauseRpcInFlight: boolean; +} + +/** + * Construct a fresh flow-control record in the default (running) state. + */ +export function createFlowControlRecord(): FlowControlRecord { + return { + pendingBytes: 0, + isPaused: false, + pauseSafetyTimer: null, + pauseRpcInFlight: false, + }; +} + +/** + * Callback invoked when the high watermark is first crossed. + * + * The caller awaits this; typically it posts a `pause` RPC to the ptyHost and + * returns the resulting promise. `onPtyBytes` uses the promise's resolution to + * know when to arm the safety timer (see `handleBytes` implementation). + */ +export type PauseCallback = () => Promise; + +/** + * Callback invoked when the low watermark is crossed with bytes still paused. + * + * Like `PauseCallback`, typically posts a `resume` RPC to the ptyHost. The + * caller is not required to await it; the record flips `isPaused` synchronously + * so subsequent `onAck` calls don't double-resume. + */ +export type ResumeCallback = () => void; + +/** + * Called when `length` new bytes arrive from the host for a given ptyId. + * + * Increments `pendingBytes`. If we cross the high watermark and aren't already + * paused, marks the record paused, sets `pauseRpcInFlight = true`, invokes + * `onPause()`, and arms the safety timer only after the pause RPC resolves - + * per plan lines 619-624. + * + * Any error thrown by `onPause` is swallowed: the record stays marked paused, + * `pauseRpcInFlight` clears, and the safety timer is armed as a fallback so a + * stuck pause still gets force-resumed eventually. + */ +export function onPtyBytes( + record: FlowControlRecord, + length: number, + onPause: PauseCallback, + onResume: ResumeCallback, +): void { + record.pendingBytes += length; + if (record.pendingBytes < HIGH_WATERMARK || record.isPaused) { + return; + } + + record.isPaused = true; + record.pauseRpcInFlight = true; + + // Arm the safety timer only after the pause RPC lands (or fails). While the + // RPC is in flight, any pre-existing timer's callback short-circuits because + // `pauseRpcInFlight` is true (see `armSafetyTimer`). + onPause() + .catch(() => { + // Swallow; treat as "pause couldn't be delivered". The safety timer + // below will fire eventually and force-resume. + }) + .finally(() => { + record.pauseRpcInFlight = false; + armSafetyTimer(record, onResume); + }); +} + +/** + * Called when the renderer acks `bytes` bytes of processed output. + * + * Decrements `pendingBytes`. If we drop to or below the low watermark and the + * record is paused, clears the safety timer and invokes `onResume()`. + * Clearing `isPaused` is synchronous so a subsequent `onAck` doesn't race + * another resume. + */ +export function onAck( + record: FlowControlRecord, + bytes: number, + onResume: ResumeCallback, +): void { + record.pendingBytes = Math.max(0, record.pendingBytes - bytes); + if (record.pendingBytes > LOW_WATERMARK || !record.isPaused) { + return; + } + if (record.pauseSafetyTimer) { + clearTimeout(record.pauseSafetyTimer); + record.pauseSafetyTimer = null; + } + record.isPaused = false; + onResume(); +} + +/** + * Clean up a record's timers. Call when the PTY exits so stale safety timers + * don't keep the event loop alive. + */ +export function disposeFlowControlRecord(record: FlowControlRecord): void { + if (record.pauseSafetyTimer) { + clearTimeout(record.pauseSafetyTimer); + record.pauseSafetyTimer = null; + } + record.isPaused = false; + record.pauseRpcInFlight = false; + record.pendingBytes = 0; +} + +/** + * Arm the 5-second safety timer that force-resumes a stuck pause. + * + * The timer's callback checks `pauseRpcInFlight` first and short-circuits if + * a pause RPC has been re-issued in the meantime; this is the guard that + * defeats the race described in the module header. + */ +function armSafetyTimer(record: FlowControlRecord, onResume: ResumeCallback): void { + if (record.pauseSafetyTimer) { + clearTimeout(record.pauseSafetyTimer); + } + record.pauseSafetyTimer = setTimeout(() => { + record.pauseSafetyTimer = null; + // Suppress force-resume while another pause RPC is still in flight. + if (record.pauseRpcInFlight) { + return; + } + if (!record.isPaused) { + return; + } + record.isPaused = false; + onResume(); + }, PAUSE_SAFETY_TIMEOUT); +} diff --git a/main/src/ptyHost/ptyHostMain.ts b/main/src/ptyHost/ptyHostMain.ts new file mode 100644 index 0000000..8a024dd --- /dev/null +++ b/main/src/ptyHost/ptyHostMain.ts @@ -0,0 +1,437 @@ +/** + * UtilityProcess entry for the ptyHost. + * + * This file runs inside an Electron `UtilityProcess` (forked by the main-side + * `PtyHostSupervisor` in Chunk C). It owns the PTY handles and executes spawn, + * write, resize, kill, pause, and resume requests on behalf of main. Bytes and + * exit events stream back over the same port as unsolicited `PtyHostEvent` + * frames. + * + * Self-contained by design: + * - No imports from Pane's main services (Logger, ConfigManager, shell + * detection, wslUtils). The UtilityProcess knows nothing about Pane's + * configuration; all policy stays on the main side. + * - `@lydell/node-pty` loads with the same ABI as main because UtilityProcess + * shares Electron's Node runtime (`package.json:55, 115-121`). + * + * Wire protocol (matches `types.ts`): + * - The main side sends a single `{ type: 'init' }` message on Electron's `parentPort` + * with one `MessagePortMain` attached via `transferList`. That port is the + * sole bidirectional channel thereafter. + * - Inbound frames on the port are either: + * - `{ type: 'heartbeat-ping' }` (raw event, no `id`) — reply with + * `{ type: 'heartbeat-pong' }`. + * - `PtyHostRequest` (has numeric `id` + string `method`) — dispatch and + * reply with a matching `PtyHostResponse`. + * - Outbound frames are `PtyHostResponse` (with `id`) or `PtyHostEvent` + * (without `id`). + * + * Port lifetime: + * - The port is stored in a module-scoped variable. Closure locals would let + * GC close the channel (see plan gotchas line 323, 736, 963). + */ + +import { randomUUID } from 'node:crypto'; + +// node-pty's CommonJS export shape matches what main uses at +// `terminalPanelManager.ts:1`. We use a typed `require` here because this +// UtilityProcess entry is deliberately self-contained; importing via +// `import * as pty` would couple to main's module graph. +// eslint-disable-next-line @typescript-eslint/no-require-imports +const pty = require('@lydell/node-pty') as typeof import('@lydell/node-pty'); + +// Electron exposes UtilityProcess parentPort on `process.parentPort` in this +// runtime. Keep a fallback to `require('electron').parentPort` for forward +// compatibility without depending on a named export that Electron's main +// process type surface does not expose. +// eslint-disable-next-line @typescript-eslint/no-require-imports +const electron = require('electron') as { parentPort?: unknown }; + +import type { + PtyHostRequest, + PtyHostResponse, + PtyHostSpawnError, + PtyHostSpawnOpts, +} from './types'; + +/** + * Minimal subset of `IPty` we use. Lifted from `@lydell/node-pty`'s + * `node-pty.d.ts` so this file does not have to type-require the whole module + * surface. Matches the methods called in `terminalPanelManager.ts`. + */ +interface HostPty { + readonly pid: number; + onData(callback: (data: string) => void): { dispose(): void }; + onExit(callback: (event: { exitCode: number; signal?: number }) => void): { dispose(): void }; + write(data: string): void; + resize(columns: number, rows: number): void; + kill(signal?: string): void; + pause(): void; + resume(): void; +} + +/** Port handoff message shape; the supervisor posts exactly this on init. */ +interface InitMessage { + type: 'init'; +} + +/** Raw heartbeat frame; NOT an RPC request (no `id`, no `method`). */ +interface HeartbeatPing { + type: 'heartbeat-ping'; +} + +/** + * Minimal typing for the MessagePortMain we receive from Electron's `parentPort`. + * We intentionally avoid importing `electron`'s types here so this module can + * be reasoned about in isolation. Electron's `MessagePortMain` extends + * `NodeEventEmitter` and exposes `.start()`, `.postMessage()`, and + * `.on('message', listener)` — exactly the shape below. + */ +interface HostPort { + start(): void; + postMessage(message: unknown): void; + on(event: 'message', listener: (event: { data: unknown; ports: unknown[] }) => void): void; +} + +// Module-scoped port and PTY map. Both MUST live at module scope: the port +// would otherwise be collected and close the channel; the PTY map must +// survive across every inbound RPC. +let rpcPort: HostPort | null = null; +const ptyMap = new Map(); + +/** + * Top-level bootstrap. + * + * Electron's `parentPort` is provided by the UtilityProcess runtime. We register a + * one-shot listener for the init message; after that, all traffic flows over + * the attached `MessagePortMain`. + */ +function bootstrap(): void { + const parent = ( + (process as unknown as { parentPort?: unknown }).parentPort ?? electron.parentPort + ) as (HostPort & { once: HostPort['on'] }) | undefined; + if (!parent) { + console.error('[ptyHost] parentPort is not available; exiting'); + process.exit(1); + return; + } + + parent.once('message', (event: { data: unknown; ports: unknown[] }) => { + if (!isInitMessage(event.data)) { + console.error('[ptyHost] first parentPort message was not { type: "init" }; exiting', event.data); + process.exit(1); + return; + } + const [port] = event.ports as HostPort[]; + if (!port) { + console.error('[ptyHost] init message arrived without a MessagePort; exiting'); + process.exit(1); + return; + } + rpcPort = port; + // MessagePortMain queues inbound messages until `.start()` is called; the + // first miss manifests as silent hang, so we must call it before adding + // the listener to minimize the chance of a missed early frame. + port.start(); + port.on('message', (message) => { + handleInboundFrame(message.data); + }); + parent.postMessage({ type: 'host-ready' }); + console.log('[ptyHost] ready; RPC port attached'); + }); +} + +/** + * Route an inbound port frame to the right handler. + * + * Heartbeat frames are distinguished by `type === 'heartbeat-ping'`; RPC + * requests carry `id` (number) and `method` (string). Everything else is + * logged and dropped. + */ +function handleInboundFrame(frame: unknown): void { + if (isHeartbeatPing(frame)) { + if (rpcPort) { + rpcPort.postMessage({ type: 'heartbeat-pong' }); + } + return; + } + if (isPtyHostRequest(frame)) { + handleRequest(frame).catch((err: unknown) => { + console.error('[ptyHost] unhandled error in request dispatch', err); + }); + return; + } + console.error('[ptyHost] received unknown frame, dropping', frame); +} + +/** + * Dispatch one RPC request. Never throws; always posts a response. + */ +async function handleRequest(request: PtyHostRequest): Promise { + switch (request.method) { + case 'spawn': + handleSpawn(request.id, request.args); + return; + case 'write': + handleWrite(request.id, request.args.ptyId, request.args.data); + return; + case 'resize': + handleResize(request.id, request.args.ptyId, request.args.cols, request.args.rows); + return; + case 'kill': + handleKill(request.id, request.args.ptyId, request.args.signal); + return; + case 'ack': + // Flow-control + pause/resume live on the main side; `ack` is a stub on + // the host. Reply `ok` so callers resolve. + respondOk(request.id, undefined); + return; + case 'pause': + handlePause(request.id, request.args.ptyId); + return; + case 'resume': + handleResume(request.id, request.args.ptyId); + return; + default: { + // Exhaustiveness guard: if a new method is added to `PtyHostRequest` + // without updating this switch, TypeScript will flag `_unreachable`. + const _unreachable: never = request; + void _unreachable; + return; + } + } +} + +function handleSpawn(id: number, opts: PtyHostSpawnOpts): void { + const ptyId = randomUUID(); + let ptyProcess: HostPty; + try { + // Cast via `unknown` to keep the HostPty shape narrow; node-pty's own + // `IPty` is a superset we don't need here. + ptyProcess = pty.spawn(opts.shell, opts.args, { + name: opts.name ?? 'xterm-256color', + cwd: opts.cwd, + cols: opts.cols, + rows: opts.rows, + env: opts.env, + }) as unknown as HostPty; + } catch (err) { + const classified = classifySpawnError(err); + console.error(`[ptyHost] spawn failed: code=${classified.code} message=${classified.message}`); + respondErr(id, classified); + return; + } + + ptyMap.set(ptyId, ptyProcess); + + ptyProcess.onData((data) => { + if (!rpcPort) { + return; + } + rpcPort.postMessage({ type: 'data', ptyId, data }); + }); + + ptyProcess.onExit(({ exitCode, signal }) => { + if (rpcPort) { + // Forward `{exitCode, signal}` verbatim including undefined → null. + // `AbstractCliManager.ts:781-795` branches on the raw signal number so + // we must not normalize to a string name. + rpcPort.postMessage({ + type: 'exit', + ptyId, + exitCode: exitCode ?? null, + signal: signal ?? null, + }); + } + ptyMap.delete(ptyId); + }); + + console.log(`[ptyHost] spawned ptyId=${ptyId} pid=${ptyProcess.pid} shell=${opts.shell}`); + respondOk(id, { ptyId, pid: ptyProcess.pid }); +} + +function handleWrite(id: number, ptyId: string, data: string): void { + const p = ptyMap.get(ptyId); + if (!p) { + respondErr(id, { code: 'OTHER', message: `unknown ptyId: ${ptyId}` }); + return; + } + try { + p.write(data); + respondOk(id, undefined); + } catch (err) { + respondErr(id, { code: 'OTHER', message: errorMessage(err) }); + } +} + +function handleResize(id: number, ptyId: string, cols: number, rows: number): void { + const p = ptyMap.get(ptyId); + if (!p) { + respondErr(id, { code: 'OTHER', message: `unknown ptyId: ${ptyId}` }); + return; + } + try { + p.resize(cols, rows); + respondOk(id, undefined); + } catch (err) { + respondErr(id, { code: 'OTHER', message: errorMessage(err) }); + } +} + +function handleKill(id: number, ptyId: string, signal: NodeJS.Signals | undefined): void { + const p = ptyMap.get(ptyId); + if (!p) { + respondErr(id, { code: 'OTHER', message: `unknown ptyId: ${ptyId}` }); + return; + } + try { + // Do NOT pre-delete from the map; `onExit` is the single source of truth + // for removal (plan task 2 gotcha). + p.kill(signal); + respondOk(id, undefined); + } catch (err) { + respondErr(id, { code: 'OTHER', message: errorMessage(err) }); + } +} + +function handlePause(id: number, ptyId: string): void { + const p = ptyMap.get(ptyId); + if (!p) { + respondErr(id, { code: 'OTHER', message: `unknown ptyId: ${ptyId}` }); + return; + } + try { + p.pause(); + respondOk(id, undefined); + } catch (err) { + respondErr(id, { code: 'OTHER', message: errorMessage(err) }); + } +} + +function handleResume(id: number, ptyId: string): void { + const p = ptyMap.get(ptyId); + if (!p) { + respondErr(id, { code: 'OTHER', message: `unknown ptyId: ${ptyId}` }); + return; + } + try { + p.resume(); + respondOk(id, undefined); + } catch (err) { + respondErr(id, { code: 'OTHER', message: errorMessage(err) }); + } +} + +/** + * Classify a spawn error into one of the four buckets main's Node-fallback + * path at `AbstractCliManager.ts:604-717, 781-795` recognizes. Mirrors the + * substring checks at lines 717-722. + */ +function classifySpawnError(err: unknown): PtyHostSpawnError { + const message = errorMessage(err); + const errObj = err as { code?: unknown; errno?: unknown } | undefined; + const code = typeof errObj?.code === 'string' ? errObj.code : undefined; + + // Windows error 193 ("not a valid Win32 application"): `code === 'UNKNOWN'` + // with `errno === -4094` historically, but the classifier in AbstractCli + // matches on message substrings, so we do the same. + if ( + message.includes('error code: 193') || + message.includes('not a valid Win32 application') || + (code === 'UNKNOWN' && (errObj?.errno === 193 || errObj?.errno === -193)) + ) { + return { code: 'E193', message }; + } + + // ENOENT: binary not found on PATH. Covers both Node's native `code` field + // and the message-string variants surfaced by conpty / node-pty. + if ( + code === 'ENOENT' || + message.includes('ENOENT') || + message.includes('No such file or directory') || + message.includes('is not recognized') + ) { + return { code: 'ENOENT', message }; + } + + // Shebang / posix_spawn interpreter failures. The message substrings here + // match the shebang case at `AbstractCliManager.ts:717-722` (`env: node:` + // etc.) plus the `posix_spawn` text surfaced by darwin when the kernel + // can't read the interpreter line. + if ( + message.includes('posix_spawn') || + message.includes('env: node:') || + message.toLowerCase().includes('shebang') + ) { + return { code: 'SHEBANG', message }; + } + + return { code: 'OTHER', message }; +} + +function respondOk(id: number, result: { ptyId: string; pid: number } | undefined): void { + if (!rpcPort) { + return; + } + const frame: PtyHostResponse = result + ? { id, ok: true, result } + : { id, ok: true, result: undefined }; + rpcPort.postMessage(frame); +} + +function respondErr(id: number, error: PtyHostSpawnError): void { + if (!rpcPort) { + return; + } + const frame: PtyHostResponse = { id, ok: false, error }; + rpcPort.postMessage(frame); +} + +function errorMessage(err: unknown): string { + if (err instanceof Error) { + return err.message; + } + if (typeof err === 'string') { + return err; + } + try { + return JSON.stringify(err); + } catch { + return String(err); + } +} + +function isInitMessage(frame: unknown): frame is InitMessage { + if (typeof frame !== 'object' || frame === null) { + return false; + } + const candidate = frame as { type?: unknown }; + return candidate.type === 'init'; +} + +function isHeartbeatPing(frame: unknown): frame is HeartbeatPing { + if (typeof frame !== 'object' || frame === null) { + return false; + } + const candidate = frame as { type?: unknown }; + return candidate.type === 'heartbeat-ping'; +} + +/** + * Cheap structural check; mirrors the shape guard in `rpc.ts` but lives here + * so this file stays self-contained relative to any main-side helpers. + */ +function isPtyHostRequest(frame: unknown): frame is PtyHostRequest { + if (typeof frame !== 'object' || frame === null) { + return false; + } + const candidate = frame as { id?: unknown; method?: unknown; args?: unknown }; + return ( + typeof candidate.id === 'number' && + typeof candidate.method === 'string' && + typeof candidate.args === 'object' && + candidate.args !== null + ); +} + +bootstrap(); diff --git a/main/src/ptyHost/ptyHostSupervisor.ts b/main/src/ptyHost/ptyHostSupervisor.ts new file mode 100644 index 0000000..82f1cb0 --- /dev/null +++ b/main/src/ptyHost/ptyHostSupervisor.ts @@ -0,0 +1,711 @@ +/** + * Main-side supervisor for the ptyHost UtilityProcess. + * + * Owns: + * - the `UtilityProcess` handle and its lifecycle (start, exit, backoff restart) + * - the main-side end of the RPC `MessageChannelMain` to `ptyHostMain.ts` + * - a per-BrowserWindow `MessageChannelMain` pair for direct renderer data flow + * (delivered to the renderer via `webContents.postMessage('ptyHost-port', ...)`) + * - live `PtyHandle` shims keyed by `ptyId`, used by the managers as an + * `IPty`-compatible surface + * + * Chunk C scope: + * - wire the RPC channel and fan data events to `PtyHandle`s (for SQLite / + * sync-block strip / alt-screen detection in main) + * - stand up the per-window renderer port as a passthrough: the supervisor + * creates the channel, retains both ends, and posts one to the renderer + * - heartbeat + restart with manager-state-preserving respawn + exponential backoff + * + * Out of scope for Chunk C: true two-port tee from ptyHost to (main + renderer). + * Chunk D/E/F will extend `ptyHostMain.ts` with an `attach-renderer` port so + * bytes can flow directly to the renderer without traversing main. + * + * Wire constraints enforced here (see plan gotchas lines 320-340, 734-743): + * - Every `MessagePortMain` is stored as a class field; closure locals would + * let GC close the channel. + * - `.start()` is called on every port before `.on('message', ...)` or + * `.postMessage(...)`. + * - The init frame is exactly `{ type: 'init' }` with the RPC port in the + * `transfer` array (see `ptyHostMain.ts:110-132`). + * - The host sends `{ type: 'host-ready' }` on UtilityProcess `parentPort` + * after it attaches the transferred RPC port. `start()` does not resolve + * before this handshake lands. + * - Heartbeat frames are raw `{ type: 'heartbeat-ping' | 'heartbeat-pong' }`; + * NOT RPC-framed. + * - No renderer-facing reconnect banner; log-only per locked decision. + */ + +import { EventEmitter } from 'events'; +import * as path from 'path'; +import { + MessageChannelMain, + utilityProcess, + type MessagePortMain, + type UtilityProcess, + type WebContents, +} from 'electron'; + +import { + isPtyHostResponse, + RpcDispatcher, +} from './rpc'; +import type { + PtyHostEvent, + PtyHostRequest, + PtyHostSpawnOpts, +} from './types'; + +/** Max restart attempts before we give up and leave the host down. */ +const MAX_RESTART_ATTEMPTS = 5; +/** Heartbeat ping interval in ms. */ +const HEARTBEAT_INTERVAL_MS = 10_000; +/** Max time (ms) with no pong before we kill the host and let restart fire. */ +const HEARTBEAT_DEAD_MS = 30_000; +/** Reset the restart counter only after the host has stayed up this long. */ +const RESTART_STABLE_MS = 60_000; +/** Fail startup if the child does not confirm the RPC port was attached. */ +const STARTUP_READY_TIMEOUT_MS = 5_000; + +/** + * Narrow shape of inbound frames on the RPC port. The supervisor multiplexes + * four kinds: the two heartbeat events, data/exit events, and RPC responses. + * The rest is handled in `onRpcMessage` via the shared `isPtyHostResponse`. + */ +function isHeartbeatPong(frame: unknown): frame is { type: 'heartbeat-pong' } { + return typeof frame === 'object' && frame !== null && (frame as { type?: unknown }).type === 'heartbeat-pong'; +} + +function isHostReady(frame: unknown): frame is { type: 'host-ready' } { + return typeof frame === 'object' && frame !== null && (frame as { type?: unknown }).type === 'host-ready'; +} + +function isDataEvent(frame: unknown): frame is Extract { + if (typeof frame !== 'object' || frame === null) return false; + const f = frame as { type?: unknown; ptyId?: unknown; data?: unknown }; + return f.type === 'data' && typeof f.ptyId === 'string' && typeof f.data === 'string'; +} + +function isExitEvent(frame: unknown): frame is Extract { + if (typeof frame !== 'object' || frame === null) return false; + const f = frame as { type?: unknown; ptyId?: unknown; exitCode?: unknown; signal?: unknown }; + return ( + f.type === 'exit' && + typeof f.ptyId === 'string' && + (typeof f.exitCode === 'number' || f.exitCode === null) && + (typeof f.signal === 'number' || f.signal === null) + ); +} + +/** Listener signatures kept explicit so `PtyHandle` type stays analyzable. */ +type DataListener = (data: string) => void; +type ExitListener = (exitCode: number | null, signal: number | null) => void; + +/** + * Thin `IPty`-compatible shim. + * + * The managers (`terminalPanelManager`, `AbstractCliManager`, ...) treat this + * as a stand-in for `pty.IPty` so their existing `onData` / `onExit` wiring + * continues to work across the async seam. `pid` is cached from the spawn + * response so synchronous `.pid` reads in the three `killProcessTree` + * implementations keep working. + * + * Chunks D/E replace the direct `pty.IPty` with this shim in each manager. + */ +export class PtyHandle { + readonly id: string; + readonly pid: number; + private readonly dataListeners = new Set(); + private readonly exitListeners = new Set(); + private exited = false; + + constructor( + id: string, + pid: number, + private readonly supervisor: PtyHostSupervisor, + ) { + this.id = id; + this.pid = pid; + } + + /** + * Subscribe to PTY byte output. Returns an `IDisposable`-shaped object so + * callers that previously used `pty.onData(...).dispose()` keep working. + */ + onData(listener: DataListener): { dispose(): void } { + this.dataListeners.add(listener); + return { + dispose: () => { + this.dataListeners.delete(listener); + }, + }; + } + + /** + * Subscribe to PTY exit. Callback receives `{exitCode, signal}` verbatim + * from the host; `signal` is the raw number so SIGSEGV/SIGABRT/SIGBUS + * detection at `AbstractCliManager.ts:781-795` keeps working. + */ + onExit(listener: ExitListener): { dispose(): void } { + this.exitListeners.add(listener); + return { + dispose: () => { + this.exitListeners.delete(listener); + }, + }; + } + + /** Used by the supervisor to fan incoming data frames to listeners. */ + emitData(data: string): void { + for (const listener of this.dataListeners) { + try { + listener(data); + } catch (err) { + console.error('[ptyHost] onData listener threw', err); + } + } + } + + /** + * Used by the supervisor to fan incoming real exit frames. + * Idempotent: once `exited` is set, subsequent calls are no-ops so a real + * repeated host exit frames don't double-fire. + */ + emitExit(exitCode: number | null, signal: number | null): void { + if (this.exited) return; + this.exited = true; + for (const listener of this.exitListeners) { + try { + listener(exitCode, signal); + } catch (err) { + console.error('[ptyHost] onExit listener threw', err); + } + } + } + + async write(data: string): Promise { + await this.supervisor.write(this.id, data); + } + + async resize(cols: number, rows: number): Promise { + await this.supervisor.resize(this.id, cols, rows); + } + + async kill(signal?: NodeJS.Signals): Promise { + await this.supervisor.kill(this.id, signal); + } + + async pause(): Promise { + await this.supervisor.pause(this.id); + } + + async resume(): Promise { + await this.supervisor.resume(this.id); + } +} + +/** + * Per-window MessageChannel pair. Both ends are retained on the supervisor + * because port GC closes the channel (plan gotcha line 323). + */ +interface WindowPortPair { + mainPort: MessagePortMain; + rendererPort: MessagePortMain; +} + +/** + * Supervisor events. Consumed by Chunk E (`respawnAll`) and future telemetry. + * + * - `restart`: emitted when the UtilityProcess exits and a restart is scheduled. + * - `ready`: emitted on every successful `start()` (initial AND restart). + * - `ready-after-restart`: emitted ONLY after a restart completes; callers use + * this to drive per-manager `respawnAll()` without firing on the initial boot. + */ +export interface PtyHostSupervisorEvents { + restart: () => void; + ready: () => void; + 'ready-after-restart': () => void; + 'renderer-ack': (ptyId: string, bytes: number) => void; +} + +export class PtyHostSupervisor extends EventEmitter { + private proc: UtilityProcess | null = null; + /** Main-side end of the RPC channel to `ptyHostMain.ts`. Field, not local. */ + private rpcPort: MessagePortMain | null = null; + /** Per-BrowserWindow data port pairs keyed by `webContents.id`. */ + private readonly windowPorts = new Map(); + /** Live PTY shims keyed by host-allocated `ptyId`. */ + private readonly liveHandles = new Map(); + private readonly dispatcher = new RpcDispatcher(); + private restartCount = 0; + private heartbeatTimer: NodeJS.Timeout | null = null; + private pongTimer: NodeJS.Timeout | null = null; + private stableTimer: NodeJS.Timeout | null = null; + /** Resolves on the first successful `start()`. Replaced on every restart. */ + private readyResolved = false; + private readyPromise: Promise; + private readyResolve: (() => void) | null = null; + private readyReject: ((err: Error) => void) | null = null; + private hostReadyResolve: (() => void) | null = null; + private hostReadyReject: ((err: Error) => void) | null = null; + + constructor() { + super(); + // Seed the first ready promise; `start()` replaces these on restart. + this.readyPromise = new Promise((resolve, reject) => { + this.readyResolve = resolve; + this.readyReject = reject; + }); + } + + /** + * Promise that resolves on the first successful `start()`. Managers await + * this before posting their first RPC so the port is guaranteed live. + */ + ready(): Promise { + return this.readyPromise; + } + + /** + * Fork the UtilityProcess and wire the RPC channel. On restart this is + * called again from `onProcExit`; we reset `proc`/`rpcPort` and rebuild. + */ + async start(): Promise { + // After TypeScript build, supervisor lives at + // main/dist/main/src/ptyHost/ptyHostSupervisor.js + // (per `main/tsconfig.json` preserved-source layout + `main/package.json:5`). + // `ptyHostMain.js` is its sibling. Do NOT hardcode `main/dist/ptyHost/...`. + const entry = path.join(__dirname, 'ptyHostMain.js'); + + const { port1, port2 } = new MessageChannelMain(); + this.rpcPort = port1; + + const execArgv = process.env.PANE_PTY_HOST_DEBUG ? ['--inspect-brk=9230'] : []; + this.proc = utilityProcess.fork(entry, [], { + serviceName: 'pane-pty-host', + stdio: 'pipe', + execArgv, + }); + + const hostReadyPromise = new Promise((resolve, reject) => { + this.hostReadyResolve = resolve; + this.hostReadyReject = reject; + }); + const startupTimeout = setTimeout(() => { + console.error(`[ptyHost] host did not signal ready within ${STARTUP_READY_TIMEOUT_MS}ms; killing host`); + this.hostReadyReject?.(new Error('PTY_HOST_READY_TIMEOUT')); + this.proc?.kill(); + }, STARTUP_READY_TIMEOUT_MS); + + // Pipe UtilityProcess stdout/stderr into main's console with a `[ptyHost]` + // prefix so operators can correlate host logs with main (plan line 423). + this.proc.stdout?.on('data', (chunk: Buffer) => { + process.stdout.write(`[ptyHost] ${chunk.toString()}`); + }); + this.proc.stderr?.on('data', (chunk: Buffer) => { + process.stderr.write(`[ptyHost] ${chunk.toString()}`); + }); + + // Post the init frame with port2 in the transfer array. Shape is fixed by + // `ptyHostMain.ts:67-69, 110-132`: `{ type: 'init' }` + `[port2]`. + this.proc.postMessage({ type: 'init' }, [port2]); + + // `.start()` must be called before adding the listener, otherwise any + // frame that arrives before start() is silently queued until start and + // can interleave with the listener install (plan gotchas line 322, 738). + this.rpcPort.start(); + this.rpcPort.on('message', (event: Electron.MessageEvent) => { + this.onRpcMessage(event.data); + }); + + this.proc.on('message', (message: unknown) => { + if (isHostReady(message)) { + this.hostReadyResolve?.(); + this.hostReadyResolve = null; + this.hostReadyReject = null; + } + }); + + this.proc.on('exit', (code: number | null) => { + this.onProcExit(code); + }); + + try { + await hostReadyPromise; + } finally { + clearTimeout(startupTimeout); + } + + this.startHeartbeat(); + + console.log( + `[ptyHost] started (flag=${process.env.PANE_USE_PTY_HOST ?? 'unset'}, pid=${this.proc.pid})`, + ); + + if (!this.readyResolved) { + this.readyResolved = true; + this.readyResolve?.(); + } + + // Capture whether this was a restart before arming the stability timer. The + // `ready-after-restart` event is the hook the index.ts listener uses to + // drive per-manager `respawnAll()` — firing it on the initial boot would + // re-enter empty maps and waste work. + const wasRestart = this.restartCount > 0; + + // Reset the restart counter only after the host stays healthy. Resetting + // immediately on fork lets a fast crash loop restart forever. + if (this.stableTimer) { + clearTimeout(this.stableTimer); + } + this.stableTimer = setTimeout(() => { + if (this.restartCount > 0) { + console.log('[ptyHost] host stayed up; resetting restart counter'); + } + this.restartCount = 0; + this.stableTimer = null; + }, RESTART_STABLE_MS); + + // `ready` fires on every successful start (initial + restart) for + // diagnostic listeners. `ready-after-restart` fires only when we're + // recovering from a crash; that's what Task 6b's respawn wiring binds to. + this.emit('ready'); + if (wasRestart) { + this.emit('ready-after-restart'); + } + } + + /** + * Route an inbound frame on the RPC port. + * + * Four kinds: + * - `heartbeat-pong` → clear pong timer + * - `data` event → fan to `PtyHandle.emitData` + * - `exit` event → fan to `PtyHandle.emitExit`, drop from `liveHandles` + * - `PtyHostResponse` → dispatch via `RpcDispatcher` + */ + private onRpcMessage(data: unknown): void { + if (isHeartbeatPong(data)) { + if (this.pongTimer) { + clearTimeout(this.pongTimer); + this.pongTimer = null; + } + return; + } + + if (isDataEvent(data)) { + const handle = this.liveHandles.get(data.ptyId); + if (handle) { + handle.emitData(data.data); + } + // Data frames are NOT auto-teed to renderers here. Main-side managers + // (`terminalPanelManager.setupTerminalHandlers`) subscribe via the + // `PtyHandle` and run `filterSyncBlockClears` / alt-screen detection + // BEFORE forwarding filtered bytes to renderers. Raw broadcasting here + // would re-introduce the clear-screen scroll yank in xterm.js. + return; + } + + if (isExitEvent(data)) { + const handle = this.liveHandles.get(data.ptyId); + if (handle) { + handle.emitExit(data.exitCode, data.signal); + this.liveHandles.delete(data.ptyId); + } + // Exit frames ARE mirrored to renderers so `electronAPI.ptyHost.onExit` + // subscribers fire. No main-side filtering is required for exit frames; + // the preload dispatches by ptyId. Stale listeners (e.g. for a ptyId + // whose panel already unmounted) drop the frame on the floor. + this.broadcastToRenderers(data); + return; + } + + if (isPtyHostResponse(data)) { + this.dispatcher.handleResponse(data); + return; + } + + console.log('[ptyHost] unknown message frame, dropping', data); + } + + /** + * UtilityProcess exited (crash, kill, or graceful exit). Clean up every + * piece of state that referenced the dead process, then schedule a restart + * unless we've exceeded `MAX_RESTART_ATTEMPTS`. + * + * Order: + * 1. Reject every pending RPC so `withLock` callers release. + * 2. Drop dead handles but do NOT emit synthetic exits. Manager maps must + * remain intact so `ready-after-restart` can snapshot and respawn them. + * 3. Tear down heartbeat timers. + * 4. Emit `'restart'` (log-only; no renderer banner per locked decision). + * 5. If over the cap, log and give up. + * 6. Otherwise schedule `start()` with exponential backoff. + */ + private onProcExit(code: number | null): void { + console.warn(`[ptyHost] UtilityProcess exited (code=${code ?? 'null'})`); + + this.dispatcher.rejectAll(new Error('PTY_HOST_RESTARTED')); + + this.liveHandles.clear(); + + if (this.heartbeatTimer) { + clearInterval(this.heartbeatTimer); + this.heartbeatTimer = null; + } + if (this.pongTimer) { + clearTimeout(this.pongTimer); + this.pongTimer = null; + } + if (this.stableTimer) { + clearTimeout(this.stableTimer); + this.stableTimer = null; + } + + this.proc = null; + this.rpcPort = null; + this.hostReadyReject?.(new Error('PTY_HOST_EXITED_BEFORE_READY')); + this.hostReadyResolve = null; + this.hostReadyReject = null; + + if (!this.readyResolved && this.restartCount === 0) { + this.readyReject?.(new Error('PTY_HOST_EXITED_BEFORE_READY')); + return; + } + + this.emit('restart'); + + if (this.restartCount >= MAX_RESTART_ATTEMPTS) { + console.error( + `[ptyHost] giving up after ${MAX_RESTART_ATTEMPTS} restart attempts; PTY operations will fail until app restart`, + ); + if (!this.readyResolved) { + this.readyReject?.(new Error('PTY_HOST_GAVE_UP')); + } + return; + } + + const backoff = Math.min(2000, 200 * 2 ** this.restartCount); + this.restartCount += 1; + console.log(`[ptyHost] scheduling restart #${this.restartCount} in ${backoff}ms`); + + // Reset the ready promise so awaiters block until the new start succeeds. + this.readyResolved = false; + this.readyPromise = new Promise((resolve, reject) => { + this.readyResolve = resolve; + this.readyReject = reject; + }); + + setTimeout(() => { + this.start().catch((err) => { + console.error('[ptyHost] restart failed', err); + }); + }, backoff); + } + + /** + * Arm the heartbeat loop. Every `HEARTBEAT_INTERVAL_MS` we post a ping and + * (if not already armed) set a pong timer. Arrival of any pong clears the + * pong timer; failure to pong within `HEARTBEAT_DEAD_MS` kills the host, + * which triggers `onProcExit` and the restart path. + */ + private startHeartbeat(): void { + this.heartbeatTimer = setInterval(() => { + this.rpcPort?.postMessage({ type: 'heartbeat-ping' }); + if (!this.pongTimer) { + this.pongTimer = setTimeout(() => { + console.warn(`[ptyHost] no pong within ${HEARTBEAT_DEAD_MS}ms; killing host`); + this.pongTimer = null; + this.proc?.kill(); + }, HEARTBEAT_DEAD_MS); + } + }, HEARTBEAT_INTERVAL_MS); + } + + /** + * Spawn a PTY. Awaits `ready()` so callers don't have to worry about + * posting before the host is attached. Registers a `PtyHandle` on success + * so subsequent `data` / `exit` events can route by `ptyId`. + */ + async spawn(opts: PtyHostSpawnOpts): Promise<{ ptyId: string; pid: number }> { + await this.readyPromise; + if (!this.rpcPort) { + throw new Error('PTY_HOST_NOT_READY'); + } + const req: Omit = { method: 'spawn', args: opts }; + const result = await this.dispatcher.send(this.rpcPort, req); + const spawned = result as { ptyId: string; pid: number }; + const handle = new PtyHandle(spawned.ptyId, spawned.pid, this); + this.liveHandles.set(spawned.ptyId, handle); + return spawned; + } + + async write(ptyId: string, data: string): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'write', args: { ptyId, data } }; + await this.dispatcher.send(this.rpcPort, req); + } + + async resize(ptyId: string, cols: number, rows: number): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'resize', args: { ptyId, cols, rows } }; + await this.dispatcher.send(this.rpcPort, req); + } + + async kill(ptyId: string, signal?: NodeJS.Signals): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'kill', args: { ptyId, signal } }; + await this.dispatcher.send(this.rpcPort, req); + } + + async pause(ptyId: string): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'pause', args: { ptyId } }; + await this.dispatcher.send(this.rpcPort, req); + } + + async resume(ptyId: string): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'resume', args: { ptyId } }; + await this.dispatcher.send(this.rpcPort, req); + } + + async ack(ptyId: string, bytes: number): Promise { + await this.readyPromise; + if (!this.rpcPort) throw new Error('PTY_HOST_NOT_READY'); + const req: Omit = { method: 'ack', args: { ptyId, bytes } }; + await this.dispatcher.send(this.rpcPort, req); + } + + /** + * Look up a live handle by id. Used by managers that stored the id as part + * of their panel state and need the shim back. + */ + getHandle(ptyId: string): PtyHandle | undefined { + return this.liveHandles.get(ptyId); + } + + /** + * Stand up the per-BrowserWindow data port pair and deliver the renderer + * end to the window. Called from `index.ts` on `did-finish-load`. + * + * Chunk C scope: the renderer port is a passthrough. Bytes still flow from + * ptyHost → supervisor → `PtyHandle.emitData` and from there to main-side + * code (SQLite, sync-block strip). `TerminalPanel.tsx` continues to receive + * bytes via the existing `terminal:output` IPC path. Chunk D switches the + * renderer to subscribe on this port, and future work may extend ptyHost to + * tee bytes directly to the renderer end. + * + * Both ports are retained on `windowPorts` — port GC would otherwise close + * the channel (plan gotcha line 323). + */ + attachWindow(webContents: WebContents): void { + // Guard: ignore if we've already attached this window. + if (this.windowPorts.has(webContents.id)) { + return; + } + + const { port1: mainPort, port2: rendererPort } = new MessageChannelMain(); + this.windowPorts.set(webContents.id, { mainPort, rendererPort }); + + // Start the main-side end before listening. This end will carry ack/write + // frames from the renderer in Chunk D. + mainPort.start(); + mainPort.on('message', (event: Electron.MessageEvent) => { + this.onRendererMessage(webContents.id, event.data); + }); + + // Hand the renderer end to the window. The preload listener for + // 'ptyHost-port' takes `event.ports[0]` and stores it. + webContents.postMessage('ptyHost-port', null, [rendererPort]); + + // Clean up on window destroy so the map doesn't retain dead entries. + // Both ports become unreferenced and GC closes the channel. + webContents.once('destroyed', () => { + this.windowPorts.delete(webContents.id); + }); + + console.log(`[ptyHost] attached window webContentsId=${webContents.id}`); + } + + /** + * Inbound message from a renderer over its data port. Chunk D: handles + * ack frames (`{type: 'ack', ptyId, bytes}`) so the renderer can ack + * bytes back over the MessagePort instead of round-tripping through + * `ipcRenderer.invoke('terminal:ack', ...)`. Ack is forwarded to the + * supervisor's `ack()` RPC which the host currently treats as a no-op; + * managers track flow-control state on the main side via + * `acknowledgeBytes()` elsewhere. + */ + private onRendererMessage(webContentsId: number, data: unknown): void { + void webContentsId; + if (typeof data !== 'object' || data === null) return; + const frame = data as { type?: unknown; ptyId?: unknown; bytes?: unknown; data?: unknown }; + + if (frame.type === 'ack' && typeof frame.ptyId === 'string' && typeof frame.bytes === 'number') { + // Managers still track flow control on the main side. Emit first so + // TerminalPanelManager can decrement pending bytes and resume the PTY. + this.emit('renderer-ack', frame.ptyId, frame.bytes); + + // Forward as a no-op RPC for forward compat. Silently swallow errors + // because an ack failure after a supervisor restart is not actionable. + this.ack(frame.ptyId, frame.bytes).catch(() => { + /* ignore; stale ack after host restart */ + }); + return; + } + + if (frame.type === 'write' && typeof frame.ptyId === 'string' && typeof frame.data === 'string') { + this.write(frame.ptyId, frame.data).catch(() => { + /* ignore; stale write after host restart */ + }); + return; + } + } + + /** + * Post `frame` to every attached renderer's data port. Preload routes the + * frame to subscribers registered via `electronAPI.ptyHost.onData` / + * `onExit` by `ptyId`; windows that never registered a subscriber for + * `frame.ptyId` drop the frame on the floor. + * + * Kept narrow: only `data` and `exit` frames flow this way. Heartbeat and + * RPC-response frames stay on the main-side RPC port. + */ + private broadcastToRenderers(frame: PtyHostEvent): void { + for (const { mainPort } of this.windowPorts.values()) { + try { + mainPort.postMessage(frame); + } catch (err) { + // A destroyed window's port can throw; safe to drop the frame. + console.warn('[ptyHost] failed to post renderer frame', err); + } + } + } + + /** + * Post a FILTERED `data` frame to every attached renderer's data port. + * Called by main-side managers (e.g. `terminalPanelManager.flushOutputBuffer`) + * AFTER running `filterSyncBlockClears` and alt-screen detection on the raw + * bytes. This is the hand-off for flag-on renderer subscriptions via + * `electronAPI.ptyHost.onData(ptyId, cb)`. + * + * Kept separate from `broadcastToRenderers` so the intent is explicit: + * supervisor never auto-broadcasts raw data bytes. + */ + postDataToRenderers(ptyId: string, data: string): void { + this.broadcastToRenderers({ type: 'data', ptyId, data }); + } + + /** + * Snapshot of live handle ids. Used by Chunk E's `respawnAll` wiring to + * figure out which panels need re-spawning after a restart. + */ + getLiveHandleIds(): string[] { + return Array.from(this.liveHandles.keys()); + } +} diff --git a/main/src/ptyHost/rpc.ts b/main/src/ptyHost/rpc.ts new file mode 100644 index 0000000..886ccea --- /dev/null +++ b/main/src/ptyHost/rpc.ts @@ -0,0 +1,168 @@ +/** + * Transport-agnostic framed-RPC helpers for the ptyHost boundary. + * + * This module intentionally does NOT import from `electron`; it works with + * either `MessagePortMain` (Node-style `.on('message', ...)`) on the main side + * or the renderer-side `MessagePort` (DOM-style `.onmessage`). The supervisor + * (Chunk C) and `ptyHostMain` (Chunk B) wire the transport in. + * + * Responsibilities: + * - Allocate monotonically-increasing request ids. + * - Track pending `send()` promises in a `Map`. + * - Resolve / reject pending promises when a `PtyHostResponse` lands via + * `handleResponse()`. + * - Surface a minimal "poster" interface that the caller's transport can + * satisfy from either port flavour. + */ + +import type { + PtyHostRequest, + PtyHostResponse, + PtyHostSpawnError, +} from './types'; + +/** + * Minimal poster contract; anything with a `postMessage(msg)` method fits. + * + * `MessagePortMain.postMessage(message: unknown)` and the renderer-side + * `MessagePort.postMessage(message: unknown)` both satisfy this shape, as does + * Electron's `UtilityProcess.postMessage(message, transfer?)` (the optional + * `transfer` argument is compatible with a single-argument call). + */ +export interface RpcPoster { + postMessage(message: unknown): void; +} + +/** + * Allocator for monotonically-increasing RPC correlation ids. + * + * The counter is per-instance so a supervisor and a host-side responder can + * each own their own stream of ids without collision. Starts at 1 so `0` can + * remain a sentinel for "not yet assigned" in callers that need one. + */ +export class RpcIdAllocator { + private next = 1; + + allocate(): number { + const id = this.next; + this.next += 1; + return id; + } +} + +/** + * Resolver for a single pending RPC request. + * + * Returned by `trackPending()` so callers that need to handle cancellation + * (e.g., supervisor restart rejecting every in-flight promise with + * `PTY_HOST_RESTARTED`) can walk the map and reject entries without racing + * the response handler. + */ +export type PendingResolver = { + resolve: (result: unknown) => void; + reject: (error: Error) => void; +}; + +/** + * Tracks pending RPC requests and dispatches responses. + * + * Intentionally does not know about the transport; `send()` takes any poster + * and `handleResponse()` takes already-decoded `PtyHostResponse` payloads. + * Callers wire the transport: listen on the port, route inbound frames to + * `handleResponse()` or to their own event-dispatch function based on shape. + */ +export class RpcDispatcher { + private readonly pending = new Map(); + private readonly ids = new RpcIdAllocator(); + + /** + * Send a request and return a promise that resolves with the success payload + * or rejects with an `Error` wrapping the `PtyHostSpawnError`. + * + * The caller provides the request minus its `id`; the dispatcher allocates + * an id, stores the promise resolvers, and posts the fully-formed frame. + */ + send>( + poster: RpcPoster, + request: Req, + ): Promise { + return new Promise((resolve, reject) => { + const id = this.ids.allocate(); + this.pending.set(id, { resolve, reject }); + const framed = { id, ...request } as PtyHostRequest; + try { + poster.postMessage(framed); + } catch (err) { + this.pending.delete(id); + reject(err instanceof Error ? err : new Error(String(err))); + } + }); + } + + /** + * Dispatch an inbound `PtyHostResponse` to its waiting promise. + * + * Unknown ids are silently dropped; this can happen if the supervisor + * rejected the pending map during restart and a late response lands. + */ + handleResponse(response: PtyHostResponse): void { + const entry = this.pending.get(response.id); + if (!entry) { + return; + } + this.pending.delete(response.id); + if (response.ok) { + entry.resolve(response.result); + } else { + entry.reject(toError(response.error)); + } + } + + /** + * Reject every pending request with the supplied error and clear the map. + * Used by the supervisor on UtilityProcess exit / restart so any + * `withLock()`-protected callers release their locks promptly. + */ + rejectAll(error: Error): void { + for (const [, entry] of this.pending) { + entry.reject(error); + } + this.pending.clear(); + } + + /** + * Inspect-only view of how many RPCs are in flight. Useful for telemetry + * and tests; not part of the hot path. + */ + pendingCount(): number { + return this.pending.size; + } +} + +/** + * Shape-guard: narrow an arbitrary inbound frame into a `PtyHostResponse`. + * + * The supervisor may multiplex responses and events on the same port, so + * callers need a cheap way to discriminate before routing. Returns `true` if + * the frame has a numeric `id` and an `ok` boolean; the minimum shape a + * response must satisfy. + */ +export function isPtyHostResponse(frame: unknown): frame is PtyHostResponse { + if (typeof frame !== 'object' || frame === null) { + return false; + } + const candidate = frame as { id?: unknown; ok?: unknown }; + return typeof candidate.id === 'number' && typeof candidate.ok === 'boolean'; +} + +/** + * Convert a classified `PtyHostSpawnError` into a regular `Error`, preserving + * the `code` on a `.code` property so main's Node-fallback classifier at + * `AbstractCliManager.ts:604-717` can branch on it without re-parsing the + * message string. + */ +function toError(spawnError: PtyHostSpawnError): Error { + const err = new Error(spawnError.message) as Error & { code: PtyHostSpawnError['code'] }; + err.code = spawnError.code; + return err; +} diff --git a/main/src/ptyHost/types.ts b/main/src/ptyHost/types.ts new file mode 100644 index 0000000..19693f4 --- /dev/null +++ b/main/src/ptyHost/types.ts @@ -0,0 +1,91 @@ +/** + * Main-internal RPC DTOs for the ptyHost UtilityProcess boundary. + * + * These types are NOT re-exported via `shared/types/panels.ts` because the + * renderer only interacts with the ptyHost through the typed `electronAPI.ptyHost` + * surface (added in Chunk C). Only the main process and the UtilityProcess entry + * (`ptyHostMain.ts`) share structural access to these shapes. + * + * Locked decisions this file encodes: + * - `cwd: string | undefined` is intentional; preserves the WSL contract at + * `terminalPanelManager.ts:194-198` where `wsl.exe` ignores cwd and the `cd` + * is baked into the bash command. + * - `exit` events MUST carry `exitCode: number | null` and `signal: number | null` + * verbatim; SIGSEGV/SIGABRT/SIGBUS detection at `AbstractCliManager.ts:781-795` + * depends on the raw signal number surviving RPC serialization. + */ + +export interface PtyHostSpawnOpts { + shell: string; + args: string[]; + /** + * Working directory. May be `undefined` for WSL spawns; `wsl.exe` ignores cwd + * and the `cd` is baked into the bash command. Do not default to a truthy value. + */ + cwd: string | undefined; + cols: number; + rows: number; + env: Record; + /** Terminal type name; defaults to `xterm-256color` in the host when omitted. */ + name?: string; +} + +/** + * Discriminated union over the seven RPC methods. Every request carries a + * monotonically-increasing `id` so the caller can correlate the matching + * `PtyHostResponse`. + * + * `pause` / `resume` are the async across-seam counterparts of + * `IPty.pause()` / `IPty.resume()`. Main's flow-control bookkeeping + * (`flowControl.ts`) posts these when the high / low watermarks are crossed. + * `ack` is retained in the union for forward-compat but is a no-op on the + * host side — flow-control state lives entirely on the main side. + */ +export type PtyHostRequest = + | { id: number; method: 'spawn'; args: PtyHostSpawnOpts } + | { id: number; method: 'write'; args: { ptyId: string; data: string } } + | { id: number; method: 'resize'; args: { ptyId: string; cols: number; rows: number } } + | { id: number; method: 'kill'; args: { ptyId: string; signal?: NodeJS.Signals } } + | { id: number; method: 'ack'; args: { ptyId: string; bytes: number } } + | { id: number; method: 'pause'; args: { ptyId: string } } + | { id: number; method: 'resume'; args: { ptyId: string } }; + +/** + * Classified spawn error. + * + * Main's Node-fallback loop at `AbstractCliManager.ts:604-717` branches on + * `code` to decide whether to retry with the Node-executable path: + * - `ENOENT`: binary not found on PATH. + * - `E193`: Windows error 193; npm-bin-stub (non-PE-exec) case. + * - `SHEBANG`: spawn failed because the kernel couldn't exec the shebang line. + * - `OTHER`: classifier did not recognize the failure; no retry. + */ +export type PtyHostSpawnError = { + code: 'ENOENT' | 'E193' | 'SHEBANG' | 'OTHER'; + message: string; +}; + +/** + * Response to a `PtyHostRequest`, discriminated on `ok`. + * + * Success shape depends on the method: + * - `spawn` resolves to `{ ptyId: string; pid: number }`. + * - `write` / `resize` / `kill` / `ack` resolve to `void`. + * + * Failure always carries a `PtyHostSpawnError`. + */ +export type PtyHostResponse = + | { id: number; ok: true; result: { ptyId: string; pid: number } | void } + | { id: number; ok: false; error: PtyHostSpawnError }; + +/** + * Unsolicited events emitted by the ptyHost (no `id` correlation). + * + * `exit.signal` is intentionally typed as `number | null` rather than a + * `NodeJS.Signals` name; preserving the raw numeric form is required for + * SIGSEGV/SIGABRT/SIGBUS detection at `AbstractCliManager.ts:781-795`. + */ +export type PtyHostEvent = + | { type: 'data'; ptyId: string; data: string } + | { type: 'exit'; ptyId: string; exitCode: number | null; signal: number | null } + | { type: 'heartbeat-pong' }; diff --git a/main/src/services/cliManagerFactory.ts b/main/src/services/cliManagerFactory.ts index 023ebf6..cc92df3 100644 --- a/main/src/services/cliManagerFactory.ts +++ b/main/src/services/cliManagerFactory.ts @@ -157,6 +157,14 @@ export class CliManagerFactory { return totalCount; } + /** + * Get every registered CLI manager instance. Used by the ptyHost supervisor's + * `ready-after-restart` hook in `index.ts` to fan out `respawnAll()` calls. + */ + public getAllManagers(): AbstractCliManager[] { + return this.registry.getAllManagers(); + } + /** * Shutdown all managers */ diff --git a/main/src/services/configManager.ts b/main/src/services/configManager.ts index b6d27c1..8fd4b66 100644 --- a/main/src/services/configManager.ts +++ b/main/src/services/configManager.ts @@ -262,6 +262,16 @@ export class ConfigManager extends EventEmitter { return this.config.verbose || false; } + /** + * Whether PTY spawns should be routed through the isolated ptyHost + * `UtilityProcess`. Off by default. The `PANE_USE_PTY_HOST=1` env var is + * honored as a dev override so testing doesn't require flipping the config. + */ + getUsePtyHost(): boolean { + if (process.env.PANE_USE_PTY_HOST === '1') return true; + return this.config.usePtyHost === true; + } + getDatabasePath(): string { return path.join(this.configDir, 'sessions.db'); } diff --git a/main/src/services/panels/claude/claudeCodeManager.ts b/main/src/services/panels/claude/claudeCodeManager.ts index 03dfb44..8515fd3 100644 --- a/main/src/services/panels/claude/claudeCodeManager.ts +++ b/main/src/services/panels/claude/claudeCodeManager.ts @@ -552,6 +552,51 @@ export class ClaudeCodeManager extends AbstractCliManager { return { cmd: '/bin/sh', args: ['-c', `exec ${line}`], env }; } + /** + * Claude respawn on ptyHost restart. + * + * We rely on the base class's `respawnPanel` → `this.spawnCliProcess(options)` + * dispatch, which invokes Claude's own `spawnCliProcess` override. That + * override already wraps the spawn in `withLock('claude-spawn-')` + * at line 440, so no additional lock-wrapping is needed here. The base + * implementation is sufficient and preserves the serialization contract. + * + * This comment exists so future editors don't add a redundant override; + * doing so would deadlock on the same lock name. + */ + // (intentionally no override of respawnPanel — base implementation is correct) + + /** + * Plan Task 6b: skip panels whose Claude process had not finished its + * interactive-init handshake at restart time. `isCliReady` is persisted on + * panel state by the spawn flow (see `terminalPanelManager.ts:566-569` and + * the `system:init` branch in `parseCliOutput`) so we can read it back here. + */ + protected override isSpawnInProgress(panelId: string): boolean { + // Use synchronous cache-hit path; panelManager.getPanel is sync. + // eslint-disable-next-line @typescript-eslint/no-require-imports + const { panelManager } = require('../../panelManager') as typeof import('../../panelManager'); + const panel = panelManager.getPanel(panelId); + if (!panel) return false; + const cs = (panel.state.customState || {}) as { isCliReady?: boolean; isCliPanel?: boolean }; + // Only consider "in progress" if this is a CLI panel but the CLI has not + // reported ready yet. Non-CLI callers shouldn't hit this (Claude entries + // are always CLI) but guard anyway. + if (cs.isCliPanel === false) return false; + return cs.isCliReady === false; + } + + /** + * Plan Task 6b: skip respawn when the original spawn was non-interactive + * `-p` mode. Claude's options carry `isInteractive`; absence means headless + * (i.e. `-p` will be injected by `buildCommandArgs`). We can't reproduce the + * original prompt meaningfully after a mid-spawn crash. + */ + protected override isNonInteractiveSpawn(options: import('../cli/AbstractCliManager').CliSpawnOptions): boolean { + const opts = options as ClaudeSpawnOptions; + return opts.isInteractive !== true; + } + // Implementation of abstract methods from AbstractCliManager async startPanel(panelId: string, sessionId: string, worktreePath: string, prompt: string, permissionMode?: 'approve' | 'ignore', model?: string): Promise { diff --git a/main/src/services/panels/cli/AbstractCliManager.ts b/main/src/services/panels/cli/AbstractCliManager.ts index 678ba37..eaf3788 100644 --- a/main/src/services/panels/cli/AbstractCliManager.ts +++ b/main/src/services/panels/cli/AbstractCliManager.ts @@ -10,14 +10,49 @@ import type { ConversationMessage } from '../../../database/models'; import { getShellPath, findExecutableInPath } from '../../../utils/shellPath'; import { findNodeExecutable } from '../../../utils/nodeFinder'; import { GIT_ATTRIBUTION_ENV } from '../../../utils/attribution'; +import { getPtyHostSupervisor } from '../../../index'; +import type { PtyHandle } from '../../../ptyHost/ptyHostSupervisor'; const LAST_OUTPUT_TAIL_BYTES = 16 * 1024; +/** + * Minimal shim of the `pty.IPty` surface actually touched by this base class + * and the three `killProcessTree` implementations in the codebase. + * + * Two implementers satisfy this at runtime: + * - `pty.IPty` directly (legacy path, flag off or supervisor null). + * - A `PtyHandle`-backed wrapper produced by `spawnViaHost()` (ptyHost path). + * + * Synchronous reads of `.pid` in `killProcess` / `getSessionPids` / the + * `killProcessTree` branch are preserved because the ptyHost spawn response + * carries the pid and the shim caches it before `spawnViaHost()` resolves. + * + * Mutation methods widen the IPty-sync `void` return to `void | Promise` + * so the `PtyHandle`-backed wrapper (whose methods are async) satisfies the + * same interface. All existing call sites ignore the return value. + */ +interface PtyLike { + readonly pid: number; + write(data: string | Buffer): void | Promise; + resize(columns: number, rows: number): void | Promise; + kill(signal?: string): void | Promise; + pause(): void | Promise; + resume(): void | Promise; + onData(listener: (data: string) => void): { dispose(): void }; + onExit(listener: (event: { exitCode: number; signal?: number }) => void): { dispose(): void }; +} + interface CliProcess { - process: pty.IPty; + process: PtyLike; panelId: string; sessionId: string; worktreePath: string; + /** + * Snapshot of the spawn options used for this process. Required so + * `respawnAll()` can re-enter `spawnCliProcess` after a ptyHost restart + * without guessing prompt/isResume/model/permissionMode. Plan Task 6b. + */ + spawnOptions: CliSpawnOptions; } interface AvailabilityCache { @@ -25,7 +60,7 @@ interface AvailabilityCache { timestamp: number; } -interface CliSpawnOptions { +export interface CliSpawnOptions { panelId: string; sessionId: string; worktreePath: string; @@ -198,12 +233,15 @@ export abstract class AbstractCliManager extends EventEmitter { // Spawn the process const ptyProcess = await this.spawnPtyProcess(finalCmd, finalArgs, worktreePath, finalEnv); - // Create process record + // Create process record. We snapshot `options` verbatim so `respawnAll()` + // can re-enter `spawnCliProcess` after a ptyHost restart without + // reconstructing prompt/isResume/model from panel state guesses. const cliProcess: CliProcess = { process: ptyProcess, panelId, sessionId, - worktreePath + worktreePath, + spawnOptions: options }; this.processes.set(panelId, cliProcess); @@ -617,9 +655,19 @@ export abstract class AbstractCliManager extends EventEmitter { /** * Spawn PTY process with error handling and Node.js fallback * This handles the common case where CLI tools are Node.js scripts with shebangs - * that may not work correctly on all systems + * that may not work correctly on all systems. + * + * When the `usePtyHost` setting is on (and a live supervisor), both spawn + * attempts are routed through `spawnViaHost`. The Node-fallback classifier below still + * runs in main: ptyHost rejects with a `.code`-tagged error (see + * `PtyHostSpawnError` / `rpc.ts:toError`) AND preserves the original message, + * so the substring-based branching at lines ~717 continues to match. + * `withLock('claude-spawn-')` at `claudeCodeManager.ts:438-439` + * wraps this call; on supervisor restart mid-RPC, the reject with + * `PTY_HOST_RESTARTED` propagates through and the lock's try/finally + * releases normally. */ - protected async spawnPtyProcess(command: string, args: string[], cwd: string, env: { [key: string]: string }): Promise { + protected async spawnPtyProcess(command: string, args: string[], cwd: string, env: { [key: string]: string }): Promise { if (!pty) { throw new Error('node-pty not available'); } @@ -628,7 +676,7 @@ export abstract class AbstractCliManager extends EventEmitter { this.logger?.verbose(`Executing ${this.getCliToolName()} command: ${fullCommand}`); this.logger?.verbose(`Working directory: ${cwd}`); - let ptyProcess: pty.IPty; + let ptyProcess: PtyLike; let spawnAttempt = 0; let lastError: unknown; const toolName = this.getCliToolName().toLowerCase(); @@ -652,22 +700,32 @@ export abstract class AbstractCliManager extends EventEmitter { await new Promise(resolve => setTimeout(resolve, 500)); } + const supervisor = getPtyHostSupervisor(); + const useHost = (this.configManager?.getUsePtyHost() ?? false) && supervisor !== null; + if (spawnAttempt === 0 && !(global as typeof global & Record)[needsNodeFallbackKey]) { // First attempt: normal spawn - ptyProcess = pty.spawn(command, args, { - name: 'xterm-color', - cols: 80, - rows: 30, - cwd, - env - }); + ptyProcess = useHost + ? await this.spawnViaHost(command, args, cwd, env, 80, 30) + : pty.spawn(command, args, { + name: 'xterm-color', + cols: 80, + rows: 30, + cwd, + env + }); } else { - // Second attempt or if we know we need Node.js: use Node.js directly + // Second attempt or if we know we need Node.js: use Node.js directly. + // The Phase 0 `wrapSpawnArgs` hook fires ONCE in `spawnCliProcess` before + // entering this loop, so the Node-fallback branch operates on the + // unwrapped `(nodePath, nodeArgs)` pair. Claude v2.1.113+ is a native + // binary that never trips the fallback; v1.x Node-based Claude still + // works because sh exec'd the Node wrapper transparently on attempt 1. this.logger?.verbose(`[${this.getCliToolName()}] Using Node.js fallback for execution`); // Try to find the CLI script (for npm-installed tools) let scriptPath = command; - + // For tools installed via npm, the command might be a symlink to a script // Try using the nodeFinder utility to locate the actual script try { @@ -687,17 +745,19 @@ export abstract class AbstractCliManager extends EventEmitter { this.logger?.verbose(`[${this.getCliToolName()}] Using Node.js: ${nodePath}`); // Spawn with Node.js directly - const nodeArgs = scriptPath === command + const nodeArgs = scriptPath === command ? [command, ...args] // Command might be a direct script path : ['--no-warnings', '--enable-source-maps', scriptPath, ...args]; // Found script path - - ptyProcess = pty.spawn(nodePath, nodeArgs, { - name: 'xterm-color', - cols: 80, - rows: 30, - cwd, - env - }); + + ptyProcess = useHost + ? await this.spawnViaHost(nodePath, nodeArgs, cwd, env, 80, 30) + : pty.spawn(nodePath, nodeArgs, { + name: 'xterm-color', + cols: 80, + rows: 30, + cwd, + env + }); } const spawnTime = Date.now() - startTime; @@ -735,10 +795,208 @@ export abstract class AbstractCliManager extends EventEmitter { throw new Error(`Failed to spawn ${this.getCliToolName()}: ${errorMsg}`); } + /** + * Spawn the CLI binary through the ptyHost `UtilityProcess` and return a + * `PtyLike`-compatible shim. The shim caches `pid` from the spawn response + * so synchronous `.pid` reads (killProcess, getSessionPids, killProcessTree) + * remain synchronous. + * + * Error mapping: on reject, the supervisor's `RpcDispatcher` converts the + * host's `PtyHostSpawnError` into an `Error` with the original `message` + * preserved verbatim AND a `.code` property set to one of + * `ENOENT | E193 | SHEBANG | OTHER` (see `rpc.ts:toError`). The two-attempt + * classifier above branches on the message substring, so the existing + * shebang / ENOENT / Windows-193 checks continue to match transparently. + * The `.code` property is preserved on the thrown error for any future + * callers that want to branch on it directly. + */ + private async spawnViaHost( + cmd: string, + args: string[], + cwd: string, + env: { [key: string]: string }, + cols: number, + rows: number + ): Promise { + const supervisor = getPtyHostSupervisor(); + if (!supervisor) { + // Guard: caller must have checked already; if we got here without one, + // surface a classifier-agnostic OTHER to avoid accidental fallback. + const err = new Error('ptyHost supervisor not available') as Error & { code?: string }; + err.code = 'OTHER'; + throw err; + } + + const { ptyId, pid } = await supervisor.spawn({ + shell: cmd, + args, + cwd, + cols, + rows, + env, + name: 'xterm-256color', + }); + + const handle = supervisor.getHandle(ptyId); + if (!handle) { + // This shouldn't happen: supervisor.spawn() registers the handle before + // resolving. If it does, treat as OTHER so the fallback path doesn't fire. + const err = new Error(`ptyHost returned ptyId ${ptyId} but no handle`) as Error & { code?: string }; + err.code = 'OTHER'; + throw err; + } + + return this.wrapPtyHandle(handle, pid); + } + + /** + * Adapt a `PtyHandle` to the `PtyLike` surface this base class uses. The + * adapter is strictly local; it converts `PtyHandle.onExit`'s positional + * `(exitCode, signal)` shape to the `{exitCode, signal}` object shape the + * rest of this file (and `pty.IPty`) expects. + */ + private wrapPtyHandle(handle: PtyHandle, pid: number): PtyLike { + return { + pid, + write(data: string | Buffer): Promise { + return handle.write(typeof data === 'string' ? data : data.toString('utf8')); + }, + resize(columns: number, rowsValue: number): Promise { + return handle.resize(columns, rowsValue); + }, + kill(signal?: string): Promise { + return handle.kill(signal as NodeJS.Signals | undefined); + }, + pause(): Promise { + return handle.pause(); + }, + resume(): Promise { + return handle.resume(); + }, + onData(listener: (data: string) => void) { + return handle.onData(listener); + }, + onExit(listener: (event: { exitCode: number; signal?: number }) => void) { + // Preserve the raw signal number; SIGSEGV/SIGABRT/SIGBUS detection at + // `setupProcessHandlers` depends on it. Widen `exitCode: number | null` + // to `number` by passing it through; downstream code already tolerates + // null via `exitCode ?? 0` / `exitCode !== 0`. + return handle.onExit((exitCode, signal) => { + listener({ + exitCode: (exitCode ?? 0), + signal: signal ?? undefined, + }); + }); + }, + }; + } + + /** + * Re-spawn every live CLI process after a ptyHost `UtilityProcess` restart. + * + * Order inside the supervisor (see `ptyHostSupervisor.onProcExit`): + * rejectPendingRpcs → keep manager maps → await nextReady → respawnAll + * + * The supervisor intentionally does not emit synthetic exits on host crash. + * A synthetic exit would run this file's `onExit` cleanup and delete the + * process records we need to snapshot here. + * + * Per-panel work is delegated to `respawnPanel()` so subclasses (Claude) + * can wrap each re-entry in their `withLock('claude-spawn-')` + * envelope without duplicating the iteration logic here. + * + * Plan Task 6b: "Run per-panel respawns in parallel via Promise.all." + */ + async respawnAll(): Promise { + const entries = Array.from(this.processes.entries()); + if (entries.length === 0) { + this.logger?.info(`[ptyHost] ${this.getCliToolName()} respawnAll: no live panels`); + return; + } + + this.logger?.info(`[ptyHost] ${this.getCliToolName()} respawnAll: ${entries.length} panels`); + + const results = await Promise.all(entries.map(async ([panelId, cliProcess]) => { + // Snapshot the options BEFORE clearing the stale entry; the entry's + // `spawnOptions` still points at the pre-restart invocation. + const snapshot = cliProcess.spawnOptions; + + // Clear the stale entry so the re-entry's `spawnCliProcess` duplicate + // check doesn't throw. + this.processes.delete(panelId); + + // Skip panels whose CLI never finished spawning. The pending RPC + // rejection (`PTY_HOST_RESTARTED`) already propagated the error up to + // the original caller's `spawnCliProcess` promise, whose error-handler + // runs its own cleanup. Re-spawning here would duplicate work. + if (this.isSpawnInProgress(panelId)) { + this.logger?.warn(`[ptyHost] ${this.getCliToolName()} skipping respawn for ${panelId}: spawn in progress at restart`); + return { panelId, skipped: 'in-progress' as const }; + } + + // Skip non-interactive `-p` spawns. The prompt is not recoverable from + // panel state alone and re-running the original prompt after a mid-flight + // crash risks double-executing work the user already saw partially complete. + if (this.isNonInteractiveSpawn(snapshot)) { + this.logger?.warn(`[ptyHost] ${this.getCliToolName()} skipping respawn for ${panelId}: non-interactive -p spawn interrupted`); + this.emit('output', { + panelId, + sessionId: cliProcess.sessionId, + type: 'stderr', + data: `\n[${this.getCliToolName()}] ptyHost restarted mid-spawn; non-interactive prompt lost. Start a new message to continue.\n`, + timestamp: new Date() + } as CliOutputEvent); + return { panelId, skipped: 'non-interactive' as const }; + } + + try { + await this.respawnPanel(snapshot); + return { panelId, skipped: false as const }; + } catch (err) { + this.logger?.error(`[ptyHost] ${this.getCliToolName()} respawn failed for ${panelId}:`, err instanceof Error ? err : undefined); + return { panelId, skipped: 'error' as const }; + } + })); + + const respawned = results.filter(r => r.skipped === false).length; + const skipped = results.length - respawned; + this.logger?.info(`[ptyHost] ${this.getCliToolName()} respawn complete: ${respawned} respawned, ${skipped} skipped`); + } + + /** + * Re-enter the spawn flow for a single panel. Base implementation calls + * `spawnCliProcess(options)` directly. Subclasses (e.g. `ClaudeCodeManager`) + * override to wrap in their per-panel lock so a concurrent user-initiated + * spawn cannot race with the restart-driven respawn. + * + * Not a public entry point — only called from `respawnAll`. + */ + protected async respawnPanel(options: CliSpawnOptions): Promise { + await this.spawnCliProcess(options); + } + + /** + * Hook for subclasses to tell respawnAll whether the pre-restart spawn had + * completed enough that re-entering makes sense. Default: always respawn. + * `ClaudeCodeManager` overrides to check `isCliReady` on the panel state. + */ + protected isSpawnInProgress(_panelId: string): boolean { + return false; + } + + /** + * Hook for subclasses to tell respawnAll whether the pre-restart spawn was + * a non-interactive `-p` invocation that can't be reproduced after a crash. + * Default: false. `ClaudeCodeManager` overrides to check `isInteractive`. + */ + protected isNonInteractiveSpawn(_options: CliSpawnOptions): boolean { + return false; + } + /** * Set up event handlers for a PTY process */ - protected setupProcessHandlers(ptyProcess: pty.IPty, panelId: string, sessionId: string): void { + protected setupProcessHandlers(ptyProcess: PtyLike, panelId: string, sessionId: string): void { let hasReceivedOutput = false; let lastOutput = ''; let buffer = ''; @@ -1093,4 +1351,4 @@ export abstract class AbstractCliManager extends EventEmitter { return success; } -} \ No newline at end of file +} diff --git a/main/src/services/runCommandManager.ts b/main/src/services/runCommandManager.ts index 748166e..eab9c0d 100644 --- a/main/src/services/runCommandManager.ts +++ b/main/src/services/runCommandManager.ts @@ -8,13 +8,105 @@ import { ShellDetector } from '../utils/shellDetector'; import * as os from 'os'; import { exec } from 'child_process'; import { promisify } from 'util'; -import { configManager } from '../index'; +import { configManager, getPtyHostSupervisor } from '../index'; import { GIT_ATTRIBUTION_ENV } from '../utils/attribution'; +import type { PtyHandle, PtyHostSupervisor } from '../ptyHost/ptyHostSupervisor'; + +/** + * IPty-compatible shim over a ptyHost `PtyHandle`. + * + * Mirrors `PtyHandleShim` in `terminalPanelManager.ts`. Kept file-local because + * the other shim is not exported and the surfaces each manager touches diverge + * slightly over time; a tiny amount of duplication is acceptable to keep the + * two call-sites independent. + * + * Critical: `pid` is cached synchronously from the spawn response so the + * synchronous `.pid` reads in `killProcessTree` (lines ~306-432) keep working + * after we route through the async RPC seam. + */ +class RunCommandPtyShim implements pty.IPty { + readonly pid: number; + cols: number; + rows: number; + readonly process = 'ptyHost'; + handleFlowControl = false; + readonly ptyId: string; + private readonly handle: PtyHandle; + + constructor(handle: PtyHandle, cols: number, rows: number) { + this.handle = handle; + this.ptyId = handle.id; + this.pid = handle.pid; + this.cols = cols; + this.rows = rows; + } + + readonly onData = (listener: (data: string) => void): pty.IDisposable => { + return this.handle.onData(listener); + }; + + readonly onExit = ( + listener: (e: { exitCode: number; signal?: number }) => void, + ): pty.IDisposable => { + return this.handle.onExit((exitCode, signal) => { + listener({ + exitCode: exitCode ?? 0, + signal: signal === null ? undefined : signal, + }); + }); + }; + + resize(columns: number, rows: number): void { + this.cols = columns; + this.rows = rows; + this.handle.resize(columns, rows).catch((err: unknown) => { + console.warn('[ptyHost] run-command resize failed', err); + }); + } + + clear(): void { + // No-op; ptyHost does not expose a clear RPC. + } + + write(data: string | Buffer): void { + const str = typeof data === 'string' ? data : data.toString(); + this.handle.write(str).catch((err: unknown) => { + console.warn('[ptyHost] run-command write failed', err); + }); + } + + kill(signal?: string): void { + this.handle.kill(signal as NodeJS.Signals | undefined).catch((err: unknown) => { + console.warn('[ptyHost] run-command kill failed', err); + }); + } + + pause(): void { + this.handle.pause().catch((err: unknown) => { + console.warn('[ptyHost] run-command pause failed', err); + }); + } + + resume(): void { + this.handle.resume().catch((err: unknown) => { + console.warn('[ptyHost] run-command resume failed', err); + }); + } +} interface RunProcess { process: pty.IPty; + /** Host-allocated PTY id when routed through ptyHost; undefined on legacy path. */ + ptyId?: string; + /** True when `process` is a `RunCommandPtyShim` wrapping a ptyHost handle. */ + isPtyHost: boolean; command: ProjectRunCommand; sessionId: string; + /** + * Worktree path captured at spawn time so `respawnAll()` can re-enter + * `startRunCommands` without consulting the session manager. + */ + worktreePath: string; } export class RunCommandManager extends EventEmitter { @@ -101,23 +193,74 @@ export class RunCommandManager extends EventEmitter { this.logger?.verbose(`Using shell: ${shell}`); this.logger?.verbose(`Full command: ${commandWithEnv}`); - // Spawn the shell process with the enhanced environment - // IMPORTANT: We don't use 'detached' here because node-pty already creates a new session - const ptyProcess = pty.spawn(shell, shellArgs, { - name: 'xterm-color', - cols: 80, - rows: 30, - cwd: worktreePath, - env: env - }); + // Spawn the shell process with the enhanced environment. + // IMPORTANT: We don't use 'detached' here because node-pty already creates a new session. + // When the `usePtyHost` setting is on (with a live supervisor) the + // spawn is routed through the ptyHost `UtilityProcess`; otherwise + // we fall back to the legacy in-main `pty.spawn`. Behavior is + // byte-identical under setting-off or when the supervisor is + // unavailable. + const useFlag = configManager.getUsePtyHost(); + let supervisor: PtyHostSupervisor | null = null; + if (useFlag) { + supervisor = getPtyHostSupervisor(); + if (!supervisor) { + this.logger?.warn('[ptyHost] supervisor unavailable, falling back to legacy pty.spawn for run-command'); + } + } + const usePtyHost = !!supervisor; + + const spawnCols = 80; + const spawnRows = 30; + + let ptyProcess: pty.IPty; + let ptyHostId: string | undefined; + + if (usePtyHost && supervisor) { + // Drop `undefined` values from env so the RPC DTO (`Record`) + // stays well-formed. `process.env` keys can legally be undefined. + const envStr: Record = {}; + for (const [key, value] of Object.entries(env)) { + if (typeof value === 'string') { + envStr[key] = value; + } + } + const spawned = await supervisor.spawn({ + shell, + args: shellArgs, + cwd: worktreePath, + cols: spawnCols, + rows: spawnRows, + env: envStr, + name: 'xterm-color', + }); + const handle = supervisor.getHandle(spawned.ptyId); + if (!handle) { + throw new Error(`[ptyHost] supervisor returned ptyId=${spawned.ptyId} but getHandle() was undefined`); + } + ptyProcess = new RunCommandPtyShim(handle, spawnCols, spawnRows); + ptyHostId = spawned.ptyId; + } else { + ptyProcess = pty.spawn(shell, shellArgs, { + name: 'xterm-color', + cols: spawnCols, + rows: spawnRows, + cwd: worktreePath, + env: env + }); + } const runProcess: RunProcess = { process: ptyProcess, + ptyId: ptyHostId, + isPtyHost: usePtyHost, command, - sessionId + sessionId, + worktreePath }; - // Store the process immediately so it can be stopped if needed + // Store the process AFTER spawn response lands (pid is cached on + // the shim) so it can be stopped if needed. const currentProcesses = this.processes.get(sessionId) || []; currentProcesses.push(runProcess); this.processes.set(sessionId, currentProcesses); @@ -497,4 +640,73 @@ export class RunCommandManager extends EventEmitter { this.logger?.error('Error killing escaped processes:', error as Error); } } -} \ No newline at end of file + + /** + * Re-spawn every live run-command process after a ptyHost `UtilityProcess` + * restart. + * + * Order in the supervisor (see `ptyHostSupervisor.onProcExit`): + * rejectPendingRpcs → keep manager maps → await nextReady → respawnAll + * + * The supervisor intentionally does not emit synthetic exits on host crash. + * A synthetic exit would remove the run-process entries before this method + * can snapshot and restart the command sequence. + * + * Skip rules: + * - Legacy (non-ptyHost) run-commands: supervisor restart is irrelevant + * to them; their `pty.IPty` is still alive. + * - Sessions whose snapshots only held legacy processes contribute nothing. + * + * Per-line exitCode tracking is not preserved across restart; re-running + * the same command sequence from scratch is the least-surprising behavior + * and matches what users see today when a shell crashes mid-run. + */ + async respawnAll(): Promise { + // Snapshot by session before we mutate `this.processes`. Keyed to avoid + // restarting the same sequence multiple times if a session had several + // live run-commands at restart (which today can only happen across + // sequential executions, but the defensive shape stays cheap). + const snapshots = new Map(); + + for (const [sessionId, processes] of this.processes) { + for (const runProcess of processes) { + if (!runProcess.isPtyHost || !runProcess.ptyId) continue; + if (!snapshots.has(sessionId)) { + snapshots.set(sessionId, { + sessionId, + projectId: runProcess.command.project_id, + worktreePath: runProcess.worktreePath, + }); + } + } + } + + if (snapshots.size === 0) { + this.logger?.info('[ptyHost] RunCommandManager respawnAll: no ptyHost-backed run-commands to restart'); + return; + } + + // Drop stale entries before re-entering `startRunCommands`. + for (const sessionId of snapshots.keys()) { + this.processes.delete(sessionId); + } + + this.logger?.info(`[ptyHost] RunCommandManager respawnAll: ${snapshots.size} sessions`); + + const results = await Promise.all( + Array.from(snapshots.values()).map(async ({ sessionId, projectId, worktreePath }) => { + try { + await this.startRunCommands(sessionId, projectId, worktreePath); + return { sessionId, ok: true as const }; + } catch (err) { + this.logger?.error(`[ptyHost] respawnAll: startRunCommands failed for session ${sessionId}`, err instanceof Error ? err : undefined); + return { sessionId, ok: false as const }; + } + }), + ); + + const ok = results.filter(r => r.ok).length; + const failed = results.length - ok; + this.logger?.info(`[ptyHost] RunCommandManager respawn complete: ${ok} sessions (${failed} failed)`); + } +} diff --git a/main/src/services/sessionManager.ts b/main/src/services/sessionManager.ts index a74e5f5..779a48e 100644 --- a/main/src/services/sessionManager.ts +++ b/main/src/services/sessionManager.ts @@ -1637,6 +1637,16 @@ export class SessionManager extends EventEmitter { await this.terminalSessionManager.cleanup(); } + /** + * Re-spawn every live PTY-backed terminal-session after a ptyHost + * `UtilityProcess` restart. Delegates to the owned `TerminalSessionManager`. + * Today this is a no-op stub; Chunk F (Task 7) fills it in when + * `terminalSessionManager.ts:40` is routed through ptyHost. + */ + async respawnAll(): Promise { + await this.terminalSessionManager.respawnAll(); + } + async runTerminalCommand(sessionId: string, command: string): Promise { // Add log entry for terminal command addSessionLog(sessionId, 'info', `Running terminal command: ${command}`, 'Terminal'); diff --git a/main/src/services/terminalPanelManager.ts b/main/src/services/terminalPanelManager.ts index fee685b..bbe9994 100644 --- a/main/src/services/terminalPanelManager.ts +++ b/main/src/services/terminalPanelManager.ts @@ -1,7 +1,7 @@ import * as pty from '@lydell/node-pty'; import { ToolPanel, TerminalPanelState, PanelEventType } from '../../../shared/types/panels'; import { panelManager } from './panelManager'; -import { mainWindow, configManager } from '../index'; +import { mainWindow, configManager, getPtyHostSupervisor } from '../index'; import * as os from 'os'; import * as path from 'path'; import { getShellPath } from '../utils/shellPath'; @@ -9,22 +9,113 @@ import { ShellDetector } from '../utils/shellDetector'; import type { AnalyticsManager } from './analyticsManager'; import { getWSLShellSpawn, buildWSLENV, WSLContext } from '../utils/wslUtils'; import { GIT_ATTRIBUTION_ENV } from '../utils/attribution'; +import type { PtyHandle, PtyHostSupervisor } from '../ptyHost/ptyHostSupervisor'; +import { + type FlowControlRecord, + createFlowControlRecord, + disposeFlowControlRecord, + onAck as flowControlOnAck, + onPtyBytes as flowControlOnPtyBytes, +} from '../ptyHost/flowControl'; -const HIGH_WATERMARK = 100_000; // 100KB — pause PTY when pending exceeds this (visible panels) -const HIGH_WATERMARK_HIDDEN = 300_000; // 300KB — raised watermark for hidden panels so the slower 250ms cadence doesn't trigger pause/resume churn on verbose builds -const LOW_WATERMARK = 10_000; // 10KB — resume PTY when pending drops below this const OUTPUT_BATCH_INTERVAL = 32; // ms (~30fps) — wider window reduces TUI flicker const OUTPUT_BATCH_INTERVAL_HIDDEN = 250; // ms — background / hidden cadence to cut IPC wake-up cost const OUTPUT_BATCH_SIZE = 131072; // 128KB — timer-based flush preferred; size trigger is safety net -const OUTPUT_BATCH_SIZE_HIDDEN = 80_000; // 80KB — cap per-flush size on hidden panels below HIGH_WATERMARK so a single flush can't trip backpressure; high-throughput jobs degrade to size-driven flushes -const PAUSE_SAFETY_TIMEOUT = 5_000; // 5s — auto-resume PTY if no acks arrive (prevents permanent stall) +const OUTPUT_BATCH_SIZE_HIDDEN = 80_000; // 80KB — cap hidden flush size to avoid foreground backpressure churn const MAX_CONCURRENT_SPAWNS = 3; const IDLE_THRESHOLD_MS = 30_000; // 30s — mark panel idle after no PTY output const MAX_SCROLLBACK_BUFFER_SIZE = 500_000; // 500KB of normal shell history const MAX_ALTERNATE_SCREEN_BUFFER_SIZE = 100_000; // 100KB of recent TUI redraw state +/** + * IPty-compatible shim over a ptyHost `PtyHandle`. + * + * When the `usePtyHost` setting is on and the supervisor is available, + * terminal spawns route through the ptyHost UtilityProcess and we get back a + * `PtyHandle` whose methods are async. The managers treat the + * `TerminalProcess.pty` field as a sync `IPty`; this shim preserves that + * assumption by fire-and-forgetting the async calls (errors logged, not + * awaited at call sites) and exposing `pid`/`cols`/`rows` synchronously. + * + * Critical: `pid` is cached from the spawn response so the synchronous + * `.pid` reads in `getSessionPids()` and `killProcessTree` keep working. + */ +class PtyHandleShim implements pty.IPty { + readonly pid: number; + cols: number; + rows: number; + readonly process = 'ptyHost'; + handleFlowControl = false; + readonly ptyId: string; + private readonly handle: PtyHandle; + + constructor(handle: PtyHandle, cols: number, rows: number) { + this.handle = handle; + this.ptyId = handle.id; + this.pid = handle.pid; + this.cols = cols; + this.rows = rows; + } + + readonly onData = (listener: (data: string) => void): pty.IDisposable => { + return this.handle.onData(listener); + }; + + readonly onExit = ( + listener: (e: { exitCode: number; signal?: number }) => void, + ): pty.IDisposable => { + return this.handle.onExit((exitCode, signal) => { + listener({ + exitCode: exitCode ?? 0, + signal: signal === null ? undefined : signal, + }); + }); + }; + + resize(columns: number, rows: number): void { + this.cols = columns; + this.rows = rows; + this.handle.resize(columns, rows).catch((err: unknown) => { + console.warn('[ptyHost] resize failed', err); + }); + } + + clear(): void { + // No-op on non-Windows; ptyHost does not currently expose a clear RPC. + } + + write(data: string | Buffer): void { + const str = typeof data === 'string' ? data : data.toString(); + this.handle.write(str).catch((err: unknown) => { + console.warn('[ptyHost] write failed', err); + }); + } + + kill(signal?: string): void { + this.handle.kill(signal as NodeJS.Signals | undefined).catch((err: unknown) => { + console.warn('[ptyHost] kill failed', err); + }); + } + + pause(): void { + this.handle.pause().catch((err: unknown) => { + console.warn('[ptyHost] pause failed', err); + }); + } + + resume(): void { + this.handle.resume().catch((err: unknown) => { + console.warn('[ptyHost] resume failed', err); + }); + } +} + interface TerminalProcess { pty: pty.IPty; + /** Host-allocated PTY id when routed through ptyHost; undefined under legacy `pty.spawn`. */ + ptyId?: string; + /** True when `pty` is a `PtyHandleShim` wrapping a ptyHost handle. */ + isPtyHost: boolean; panelId: string; sessionId: string; scrollbackBuffer: string; @@ -33,10 +124,19 @@ interface TerminalProcess { currentCommand: string; lastActivity: Date; isWSL?: boolean; - // Flow control - pendingBytes: number; - isPaused: boolean; - pauseSafetyTimer: ReturnType | null; + /** + * WSL context captured at spawn time. Stored so `respawnAll` can re-inject + * the same distro / user / WSLENV propagation after a ptyHost supervisor + * restart without needing to reconstruct it from project state. + */ + wslContext: WSLContext | null; + /** + * Flow-control bookkeeping (pending bytes, pause state, safety timer, + * `pauseRpcInFlight` gate). Lives on the shared `FlowControlRecord` so the + * same state-machine semantics apply to both the legacy `pty.spawn` path + * and the ptyHost `usePtyHost` path. + */ + flowControl: FlowControlRecord; // Output batching outputBuffer: string; outputFlushTimer: ReturnType | null; @@ -160,10 +260,11 @@ export class TerminalPanelManager { return; } - // Track pending bytes for flow control - terminal.pendingBytes += data.length; - - // Send batched output to renderer + // Send batched output to renderer. Legacy path: IPC send via + // `terminal:output`. Flag-on ptyHost path: post the filtered bytes over + // the per-window MessagePort so `electronAPI.ptyHost.onData` subscribers + // fire. Both paths continue to run; the renderer short-circuits the + // legacy handler once a `ptyId` is set to avoid double-delivery. if (mainWindow) { mainWindow.webContents.send('terminal:output', { sessionId: terminal.sessionId, @@ -171,40 +272,76 @@ export class TerminalPanelManager { output: data }); } + if (terminal.isPtyHost && terminal.ptyId) { + const supervisor = getPtyHostSupervisor(); + supervisor?.postDataToRenderers(terminal.ptyId, data); + } - // Apply backpressure if watermark exceeded. Hidden panels get a higher - // watermark to absorb the 250ms cadence without constant pause/resume. - const watermark = terminal.isVisible ? HIGH_WATERMARK : HIGH_WATERMARK_HIDDEN; - if (terminal.pendingBytes > watermark && !terminal.isPaused) { - terminal.isPaused = true; - terminal.pty.pause(); - - // Safety valve: auto-resume if no acks arrive (e.g., renderer unmounted) - if (terminal.pauseSafetyTimer) clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = setTimeout(() => { - if (terminal.isPaused) { - terminal.isPaused = false; - terminal.pendingBytes = 0; - terminal.pty.resume(); - } - terminal.pauseSafetyTimer = null; - }, PAUSE_SAFETY_TIMEOUT); + // Update flow-control bookkeeping with the bytes just flushed. The record + // owns the HIGH/LOW watermark check, the `pauseRpcInFlight` gate, and the + // 5s safety timer; both the legacy and ptyHost paths go through the same + // state machine (see `main/src/ptyHost/flowControl.ts`). + flowControlOnPtyBytes( + terminal.flowControl, + data.length, + () => this.pausePty(terminal), + () => this.resumePty(terminal), + ); + } + + /** + * Pause the underlying PTY. Under the ptyHost flag, routes the RPC directly + * through the supervisor; flag-off uses the legacy `pty.IPty.pause()` path. + * + * Returns a promise so the flow-control state machine can defer arming its + * safety timer until the pause RPC actually lands (plan lines 619-624). + * Legacy path resolves synchronously; ptyHost path resolves when the RPC + * response returns. + */ + private pausePty(terminal: TerminalProcess): Promise { + if (terminal.isPtyHost && terminal.ptyId) { + const supervisor = getPtyHostSupervisor(); + if (supervisor) { + return supervisor.pause(terminal.ptyId).catch((err: unknown) => { + console.warn('[TerminalPanelManager] ptyHost pause failed', err); + }); + } } + terminal.pty.pause(); + return Promise.resolve(); + } + + /** + * Resume the underlying PTY. Mirror of `pausePty` for the resume side. + */ + private resumePty(terminal: TerminalProcess): void { + if (terminal.isPtyHost && terminal.ptyId) { + const supervisor = getPtyHostSupervisor(); + if (supervisor) { + supervisor.resume(terminal.ptyId).catch((err: unknown) => { + console.warn('[TerminalPanelManager] ptyHost resume failed', err); + }); + return; + } + } + terminal.pty.resume(); } acknowledgeBytes(panelId: string, bytesConsumed: number): void { const terminal = this.terminals.get(panelId); if (!terminal) return; - terminal.pendingBytes = Math.max(0, terminal.pendingBytes - bytesConsumed); + // Delegate to the shared flow-control helper. It decrements `pendingBytes`, + // clears the safety timer, and invokes the resume callback only when the + // record is actually paused and bytes drop below `LOW_WATERMARK`. + flowControlOnAck(terminal.flowControl, bytesConsumed, () => this.resumePty(terminal)); + } - if (terminal.isPaused && terminal.pendingBytes < LOW_WATERMARK) { - terminal.isPaused = false; - terminal.pty.resume(); - // Cancel safety timer — normal ack flow is working - if (terminal.pauseSafetyTimer) { - clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = null; + acknowledgePtyHostBytes(ptyId: string, bytesConsumed: number): void { + for (const [panelId, terminal] of this.terminals) { + if (terminal.ptyId === ptyId) { + this.acknowledgeBytes(panelId, bytesConsumed); + return; } } } @@ -220,14 +357,10 @@ export class TerminalPanelManager { // Once hidden, renderer ACKs stop. Do not leave a visible-mode pause // pending against bytes the renderer may never acknowledge. terminal.outputBuffer = ''; - terminal.pendingBytes = 0; - if (terminal.pauseSafetyTimer) { - clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = null; - } - if (terminal.isPaused) { - terminal.isPaused = false; - terminal.pty.resume(); + const wasPaused = terminal.flowControl.isPaused; + disposeFlowControlRecord(terminal.flowControl); + if (wasPaused) { + this.resumePty(terminal); } } else { // Hidden output is already present in scrollbackBuffer. Drop any stale @@ -243,19 +376,13 @@ export class TerminalPanelManager { console.log(`[TerminalPanelManager] Resetting flow control for panel ${panelId}`); - // Clear any pending safety timer - if (terminal.pauseSafetyTimer) { - clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = null; - } - - // Reset flow control state - terminal.pendingBytes = 0; + const wasPaused = terminal.flowControl.isPaused; + // Dispose clears timers, paused state, and pending bytes on the record. + disposeFlowControlRecord(terminal.flowControl); - // Resume PTY if paused - if (terminal.isPaused) { - terminal.isPaused = false; - terminal.pty.resume(); + // If we interrupted a paused PTY, explicitly resume so bytes flow again. + if (wasPaused) { + this.resumePty(terminal); } } @@ -316,7 +443,7 @@ export class TerminalPanelManager { * PANE_* var) silently disappear inside WSL terminals. */ const isWSL = !!wslContext && process.platform === 'win32'; - const wslEnvVars = isWSL + const wslEnvVars: Record = isWSL ? { WSLENV: buildWSLENV([ 'GIT_COMMITTER_NAME', @@ -330,31 +457,87 @@ export class TerminalPanelManager { } : {}; - // Create PTY process with enhanced environment - const ptyProcess = pty.spawn(shellPath, shellArgs, { - name: 'xterm-256color', - cols: initialDimensions?.cols || 80, - rows: initialDimensions?.rows || 30, - cwd: spawnCwd, - env: { - ...process.env, - ...GIT_ATTRIBUTION_ENV, - PATH: enhancedPath, - TERM: 'xterm-256color', - COLORTERM: 'truecolor', - LANG: process.env.LANG || 'en_US.UTF-8', - WORKTREE_PATH: cwd, - PANE_SESSION_ID: panel.sessionId, - PANE_PANEL_ID: panel.id, - PANE_PORT: String(panePort), - PANE_WORKSPACE_PATH: cwd, - ...wslEnvVars, + // Build spawn env once so legacy and ptyHost paths receive identical values. + const spawnCols = initialDimensions?.cols || 80; + const spawnRows = initialDimensions?.rows || 30; + + // `process.env` is `NodeJS.ProcessEnv` which allows `undefined` values; the + // ptyHost RPC DTO requires `Record`. Drop undefined keys so + // both the legacy `pty.spawn` path and the ptyHost path see the same shape. + const baseEnv: Record = {}; + for (const [key, value] of Object.entries(process.env)) { + if (typeof value === 'string') { + baseEnv[key] = value; } - }); - + } + const spawnEnv: Record = { + ...baseEnv, + ...GIT_ATTRIBUTION_ENV, + PATH: enhancedPath, + TERM: 'xterm-256color', + COLORTERM: 'truecolor', + LANG: process.env.LANG || 'en_US.UTF-8', + WORKTREE_PATH: cwd, + PANE_SESSION_ID: panel.sessionId, + PANE_PANEL_ID: panel.id, + PANE_PORT: String(panePort), + PANE_WORKSPACE_PATH: cwd, + ...wslEnvVars, + }; + + // Read the setting once per spawn so we don't scatter config reads. + // `getPtyHostSupervisor()` returns null when the setting is off or when + // supervisor startup failed; in either case we transparently fall back to + // the legacy `pty.spawn` path. + const useFlag = configManager.getUsePtyHost(); + let supervisor: PtyHostSupervisor | null = null; + if (useFlag) { + supervisor = getPtyHostSupervisor(); + if (!supervisor) { + console.warn('[ptyHost] supervisor unavailable, falling back to legacy pty.spawn'); + } + } + const usePtyHost = !!supervisor; + + let ptyProcess: pty.IPty; + let ptyHostId: string | undefined; + + if (usePtyHost && supervisor) { + // Flag-on path: spawn via ptyHost UtilityProcess. Critical invariant: + // `this.terminals.set(...)` happens only AFTER the spawn response lands + // so synchronous `.pid` readers (getSessionPids, killProcessTree) never + // observe a pid-less handle. + const spawned = await supervisor.spawn({ + shell: shellPath, + args: shellArgs, + cwd: spawnCwd, + cols: spawnCols, + rows: spawnRows, + env: spawnEnv, + name: 'xterm-256color', + }); + const handle = supervisor.getHandle(spawned.ptyId); + if (!handle) { + throw new Error(`[ptyHost] supervisor returned ptyId=${spawned.ptyId} but getHandle() was undefined`); + } + ptyProcess = new PtyHandleShim(handle, spawnCols, spawnRows); + ptyHostId = spawned.ptyId; + } else { + // Flag-off path: legacy direct pty.spawn. Unchanged behavior. + ptyProcess = pty.spawn(shellPath, shellArgs, { + name: 'xterm-256color', + cols: spawnCols, + rows: spawnRows, + cwd: spawnCwd, + env: spawnEnv, + }); + } + // Create terminal process object const terminalProcess: TerminalProcess = { pty: ptyProcess, + ptyId: ptyHostId, + isPtyHost: usePtyHost, panelId: panel.id, sessionId: panel.sessionId, scrollbackBuffer: '', @@ -363,9 +546,11 @@ export class TerminalPanelManager { currentCommand: '', lastActivity: new Date(), isWSL: !!(wslContext && process.platform === 'win32'), - pendingBytes: 0, - isPaused: false, - pauseSafetyTimer: null, + // Capture wslContext so `respawnAll` can re-inject the same WSLENV / + // distro / user settings after a ptyHost supervisor restart without + // having to reconstruct it from project state. + wslContext: wslContext ?? null, + flowControl: createFlowControlRecord(), outputBuffer: '', outputFlushTimer: null, isVisible: true, @@ -374,9 +559,21 @@ export class TerminalPanelManager { idleTimer: null, inSyncBlock: false }; - - // Store in map + + // Store in map (ptyHost path: pid is already populated on the shim). this.terminals.set(panel.id, terminalProcess); + + // Tell the renderer which `ptyId` to subscribe to for this panel so + // `TerminalPanel.tsx` can use `electronAPI.ptyHost.onData(ptyId, ...)` + // under the flag. Flag-off path skips this: the renderer keeps using + // the legacy `terminal:output` channel. + if (usePtyHost && ptyHostId && mainWindow) { + mainWindow.webContents.send('terminal:ptyReady', { + sessionId: panel.sessionId, + panelId: panel.id, + ptyId: ptyHostId, + }); + } // Get initialCommand from existing state before updating const existingState = panel.state.customState as TerminalPanelState | undefined; @@ -872,13 +1069,21 @@ export class TerminalPanelManager { const restorationMsg = `\r\n[Session Restored from ${state.lastActivityTime || 'previous session'}]\r\n`; terminal.pty.write(restorationMsg); - // Send scrollback to frontend + // Send scrollback to frontend. Dual-path mirrors `flushOutputBuffer`: + // `terminal:output` IPC for legacy subscribers, ptyHost port for flag-on. if (mainWindow && state.scrollbackBuffer) { + const output = typeof state.scrollbackBuffer === 'string' + ? state.scrollbackBuffer + restorationMsg + : state.scrollbackBuffer.join('\n') + restorationMsg; mainWindow.webContents.send('terminal:output', { sessionId: panel.sessionId, panelId: panel.id, - output: state.scrollbackBuffer + restorationMsg + output, }); + if (terminal.isPtyHost && terminal.ptyId) { + const supervisor = getPtyHostSupervisor(); + supervisor?.postDataToRenderers(terminal.ptyId, output); + } } } @@ -925,10 +1130,7 @@ export class TerminalPanelManager { clearTimeout(terminal.outputFlushTimer); terminal.outputFlushTimer = null; } - if (terminal.pauseSafetyTimer) { - clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = null; - } + disposeFlowControlRecord(terminal.flowControl); if (terminal.idleTimer) { clearTimeout(terminal.idleTimer); terminal.idleTimer = null; @@ -991,6 +1193,109 @@ export class TerminalPanelManager { } } + /** + * Re-spawn every live terminal panel after a ptyHost `UtilityProcess` restart. + * + * Order in the supervisor (see `ptyHostSupervisor.onProcExit`): + * rejectPendingRpcs → keep manager maps → await nextReady → respawnAll + * + * The supervisor intentionally does not emit synthetic exits on host crash: + * doing so would run `setupTerminalHandlers.onExit` and delete the state this + * method needs to respawn. Entries here reference stale `PtyHandleShim`s and + * are replaced in-place. + * + * Skip rules: + * - Legacy (non-ptyHost) terminals: supervisor restart is irrelevant to them. + * Their underlying `pty.IPty` is still alive; do not touch. + * - Panels where spawn never finished (`ptyId` absent): no live PTY to revive. + * + * Plan Task 6b: run per-panel respawns in parallel via Promise.all. + */ + async respawnAll(): Promise { + // Snapshot entries up-front so we can mutate `this.terminals` (delete + // stale shims) while iterating without affecting the working set. + const snapshots: Array<{ + panelId: string; + sessionId: string; + panel: ToolPanel; + cwd: string; + dimensions: { cols: number; rows: number }; + wslContext: WSLContext | null; + }> = []; + + for (const [panelId, terminal] of this.terminals) { + // Only ptyHost-backed terminals participate in supervisor restart. + // Legacy `pty.spawn` processes survive the ptyHost crash untouched. + if (!terminal.isPtyHost || !terminal.ptyId) { + continue; + } + + const panel = panelManager.getPanel(panelId); + if (!panel) { + console.warn(`[ptyHost] respawnAll: panel ${panelId} no longer exists, skipping`); + this.terminals.delete(panelId); + continue; + } + + // Read state for respawn: cwd is persisted on panel state by + // `initializeTerminal` (see lines 616-622). Dimensions likewise. + const cs = (panel.state.customState || {}) as TerminalPanelState; + const cwd = cs.cwd || process.cwd(); + const dimensions = cs.dimensions || { cols: 80, rows: 30 }; + + snapshots.push({ + panelId, + sessionId: terminal.sessionId, + panel, + cwd, + dimensions, + // Carry the original wslContext through so WSL panels get the same + // WSLENV / distro / user propagation on respawn. Without this, WSL + // terminals lose GIT_COMMITTER_* and PANE_* after a supervisor restart. + wslContext: terminal.wslContext, + }); + + // Clear the stale entry so `initializeTerminal`'s duplicate-check at + // `:304` doesn't early-return on the stub we're replacing. + // We also clear any active timers on the stale entry to prevent + // zombie callbacks firing against the new process. + if (terminal.outputFlushTimer) { + clearTimeout(terminal.outputFlushTimer); + terminal.outputFlushTimer = null; + } + disposeFlowControlRecord(terminal.flowControl); + if (terminal.idleTimer) { + clearTimeout(terminal.idleTimer); + terminal.idleTimer = null; + } + this.terminals.delete(panelId); + } + + if (snapshots.length === 0) { + console.log('[ptyHost] TerminalPanelManager respawnAll: no ptyHost-backed panels to restart'); + return; + } + + console.log(`[ptyHost] TerminalPanelManager respawnAll: ${snapshots.length} terminal panels`); + + // Run respawns in parallel. Individual failures don't cancel siblings. + // wslContext is the one captured at original spawn time (Option A); see + // snapshot construction above. + const results = await Promise.all(snapshots.map(async ({ panel, cwd, dimensions, panelId, wslContext }) => { + try { + await this.initializeTerminal(panel, cwd, wslContext, 1, dimensions); + return { panelId, ok: true as const }; + } catch (err) { + console.error(`[ptyHost] respawnAll: initializeTerminal failed for panel ${panelId}:`, err); + return { panelId, ok: false as const }; + } + })); + + const ok = results.filter(r => r.ok).length; + const failed = results.length - ok; + console.log(`[ptyHost] respawn complete: ${ok} terminal panels (${failed} failed)`); + } + /** * Get scrollback buffer for a specific terminal. * Returns null if terminal not found. @@ -1064,10 +1369,7 @@ export class TerminalPanelManager { clearTimeout(terminal.outputFlushTimer); terminal.outputFlushTimer = null; } - if (terminal.pauseSafetyTimer) { - clearTimeout(terminal.pauseSafetyTimer); - terminal.pauseSafetyTimer = null; - } + disposeFlowControlRecord(terminal.flowControl); if (terminal.idleTimer) { clearTimeout(terminal.idleTimer); terminal.idleTimer = null; diff --git a/main/src/services/terminalSessionManager.ts b/main/src/services/terminalSessionManager.ts index 3c1207e..19a2fe3 100644 --- a/main/src/services/terminalSessionManager.ts +++ b/main/src/services/terminalSessionManager.ts @@ -6,9 +6,96 @@ import * as os from 'os'; import { exec } from 'child_process'; import { promisify } from 'util'; import { GIT_ATTRIBUTION_ENV } from '../utils/attribution'; +import { configManager, getPtyHostSupervisor } from '../index'; +import type { PtyHandle, PtyHostSupervisor } from '../ptyHost/ptyHostSupervisor'; + +/** + * IPty-compatible shim over a ptyHost `PtyHandle`. + * + * Mirrors `PtyHandleShim` in `terminalPanelManager.ts`. Kept file-local + * because the other shim is not exported; a tiny amount of duplication is + * acceptable to keep each manager's surface decoupled. + * + * Critical: `pid` is cached from the spawn response so synchronous `.pid` + * reads in `killProcessTree` (lines ~203-311) and the `session.pty.pid` + * read at `:109` keep working after we route through the async RPC seam. + */ +class TerminalSessionPtyShim implements pty.IPty { + readonly pid: number; + cols: number; + rows: number; + readonly process = 'ptyHost'; + handleFlowControl = false; + readonly ptyId: string; + private readonly handle: PtyHandle; + + constructor(handle: PtyHandle, cols: number, rows: number) { + this.handle = handle; + this.ptyId = handle.id; + this.pid = handle.pid; + this.cols = cols; + this.rows = rows; + } + + readonly onData = (listener: (data: string) => void): pty.IDisposable => { + return this.handle.onData(listener); + }; + + readonly onExit = ( + listener: (e: { exitCode: number; signal?: number }) => void, + ): pty.IDisposable => { + return this.handle.onExit((exitCode, signal) => { + listener({ + exitCode: exitCode ?? 0, + signal: signal === null ? undefined : signal, + }); + }); + }; + + resize(columns: number, rows: number): void { + this.cols = columns; + this.rows = rows; + this.handle.resize(columns, rows).catch((err: unknown) => { + console.warn('[ptyHost] terminal-session resize failed', err); + }); + } + + clear(): void { + // No-op; ptyHost does not expose a clear RPC. + } + + write(data: string | Buffer): void { + const str = typeof data === 'string' ? data : data.toString(); + this.handle.write(str).catch((err: unknown) => { + console.warn('[ptyHost] terminal-session write failed', err); + }); + } + + kill(signal?: string): void { + this.handle.kill(signal as NodeJS.Signals | undefined).catch((err: unknown) => { + console.warn('[ptyHost] terminal-session kill failed', err); + }); + } + + pause(): void { + this.handle.pause().catch((err: unknown) => { + console.warn('[ptyHost] terminal-session pause failed', err); + }); + } + + resume(): void { + this.handle.resume().catch((err: unknown) => { + console.warn('[ptyHost] terminal-session resume failed', err); + }); + } +} interface TerminalSession { pty: pty.IPty; + /** Host-allocated PTY id when routed through ptyHost; undefined on legacy path. */ + ptyId?: string; + /** True when `pty` is a `TerminalSessionPtyShim` wrapping a ptyHost handle. */ + isPtyHost: boolean; sessionId: string; cwd: string; } @@ -36,26 +123,78 @@ export class TerminalSessionManager extends EventEmitter { const shellInfo = ShellDetector.getDefaultShell(); console.log(`Using shell: ${shellInfo.path} (${shellInfo.name})`); - // Create a new PTY instance with proper terminal settings - const ptyProcess = pty.spawn(shellInfo.path, shellInfo.args || [], { - name: 'xterm-256color', // Better terminal emulation - cwd: worktreePath, - cols: 80, - rows: 24, - env: { - ...process.env, - ...GIT_ATTRIBUTION_ENV, - PATH: shellPath, - WORKTREE_PATH: worktreePath, - TERM: 'xterm-256color', // Ensure TERM is set for color support - COLORTERM: 'truecolor', // Enable 24-bit color - LANG: process.env.LANG || 'en_US.UTF-8', // Set locale for proper character handling - }, - }); + // Build spawn env once so both paths see identical values. + const rawEnv: Record = { + ...process.env, + ...GIT_ATTRIBUTION_ENV, + PATH: shellPath, + WORKTREE_PATH: worktreePath, + TERM: 'xterm-256color', // Ensure TERM is set for color support + COLORTERM: 'truecolor', // Enable 24-bit color + LANG: process.env.LANG || 'en_US.UTF-8', // Set locale for proper character handling + }; + + const spawnCols = 80; + const spawnRows = 24; + const shellArgs = shellInfo.args || []; + + // When the `usePtyHost` setting is on (with a live supervisor) the spawn + // is routed through the ptyHost `UtilityProcess`; otherwise fall back to + // the legacy in-main `pty.spawn`. Under setting-off or when the + // supervisor is unavailable, behavior is byte-identical. + const useFlag = configManager.getUsePtyHost(); + let supervisor: PtyHostSupervisor | null = null; + if (useFlag) { + supervisor = getPtyHostSupervisor(); + if (!supervisor) { + console.warn('[ptyHost] supervisor unavailable, falling back to legacy pty.spawn for terminal-session'); + } + } + const usePtyHost = !!supervisor; + + let ptyProcess: pty.IPty; + let ptyHostId: string | undefined; + + if (usePtyHost && supervisor) { + // RPC DTO requires `Record`; drop undefined keys. + const envStr: Record = {}; + for (const [key, value] of Object.entries(rawEnv)) { + if (typeof value === 'string') { + envStr[key] = value; + } + } + const spawned = await supervisor.spawn({ + shell: shellInfo.path, + args: shellArgs, + cwd: worktreePath, + cols: spawnCols, + rows: spawnRows, + env: envStr, + name: 'xterm-256color', + }); + const handle = supervisor.getHandle(spawned.ptyId); + if (!handle) { + throw new Error(`[ptyHost] supervisor returned ptyId=${spawned.ptyId} but getHandle() was undefined`); + } + ptyProcess = new TerminalSessionPtyShim(handle, spawnCols, spawnRows); + ptyHostId = spawned.ptyId; + } else { + // Legacy path: direct in-main pty.spawn. Unchanged behavior. + ptyProcess = pty.spawn(shellInfo.path, shellArgs, { + name: 'xterm-256color', + cwd: worktreePath, + cols: spawnCols, + rows: spawnRows, + env: rawEnv as { [key: string]: string }, + }); + } - // Store the session + // Store the session. Pid is already cached on the shim for the ptyHost path + // so `session.pty.pid` reads inside `closeTerminalSession` stay synchronous. this.terminalSessions.set(sessionId, { pty: ptyProcess, + ptyId: ptyHostId, + isPtyHost: usePtyHost, sessionId, cwd: worktreePath, }); @@ -306,7 +445,62 @@ export class TerminalSessionManager extends EventEmitter { console.error('Error in killProcessTree:', error); success = false; } - + return success; } -} \ No newline at end of file + + /** + * Re-spawn every live terminal-session after a ptyHost `UtilityProcess` + * restart. + * + * Order in the supervisor (see `ptyHostSupervisor.onProcExit`): + * rejectPendingRpcs → keep manager maps → await nextReady → respawnAll + * + * The supervisor intentionally does not emit synthetic exits on host crash. + * A synthetic exit would run `createTerminalSession`'s `onExit` cleanup and + * delete the session records this method needs to respawn. + * + * Skip rules: + * - Legacy (non-ptyHost) sessions: supervisor restart is irrelevant; + * their `pty.IPty` is still alive. + * - Shell scrollback is NOT preserved; users see a fresh shell prompt. + * That matches the terminal-panel behavior documented in Task 6b. + */ + async respawnAll(): Promise { + const snapshots: Array<{ sessionId: string; cwd: string }> = []; + + for (const [sessionId, session] of this.terminalSessions) { + if (!session.isPtyHost || !session.ptyId) continue; + snapshots.push({ sessionId, cwd: session.cwd }); + } + + if (snapshots.length === 0) { + console.log('[ptyHost] TerminalSessionManager respawnAll: no ptyHost-backed sessions to restart'); + return; + } + + // Drop stale entries so `createTerminalSession`'s duplicate-check + // (`this.terminalSessions.has(sessionId)`) doesn't early-return. + for (const { sessionId } of snapshots) { + this.terminalSessions.delete(sessionId); + } + + console.log(`[ptyHost] TerminalSessionManager respawnAll: ${snapshots.length} sessions`); + + const results = await Promise.all( + snapshots.map(async ({ sessionId, cwd }) => { + try { + await this.createTerminalSession(sessionId, cwd); + return { sessionId, ok: true as const }; + } catch (err) { + console.error(`[ptyHost] respawnAll: createTerminalSession failed for ${sessionId}:`, err); + return { sessionId, ok: false as const }; + } + }), + ); + + const ok = results.filter(r => r.ok).length; + const failed = results.length - ok; + console.log(`[ptyHost] TerminalSessionManager respawn complete: ${ok} sessions (${failed} failed)`); + } +} diff --git a/main/src/types/config.ts b/main/src/types/config.ts index b5485f1..40890f8 100644 --- a/main/src/types/config.ts +++ b/main/src/types/config.ts @@ -65,6 +65,9 @@ export interface AppConfig { enableCommitFooter?: boolean; // Use interactive mode for Claude CLI (persistent process with stdin instead of spawn-per-message) useInteractiveMode?: boolean; + // Route PTY spawns through an isolated ptyHost UtilityProcess for crash isolation. + // Off by default. Requires app restart; the supervisor is forked once at `app.whenReady`. + usePtyHost?: boolean; // PostHog analytics settings analytics?: { enabled: boolean; @@ -122,6 +125,9 @@ export interface UpdateConfigRequest { disableCommitFooter?: boolean; // Use interactive mode for Claude CLI (persistent process with stdin instead of spawn-per-message) useInteractiveMode?: boolean; + // Route PTY spawns through an isolated ptyHost UtilityProcess for crash isolation. + // Off by default. Requires app restart to take effect. + usePtyHost?: boolean; // PostHog analytics settings analytics?: { enabled: boolean; diff --git a/package.json b/package.json index 943de39..a2312fc 100644 --- a/package.json +++ b/package.json @@ -117,7 +117,8 @@ "!node_modules/@anthropic-ai/claude-code/vendor/**", "main/dist/services/mcpPermissionBridge.js", "main/dist/services/mcpPermissionBridgeStandalone.js", - "main/dist/services/**/*.js" + "main/dist/services/**/*.js", + "main/dist/main/src/ptyHost/**/*.js" ], "afterSign": "build/afterSign.js", "mac": {