Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion packages/engine/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@ export interface EngineConfig {
ffmpegEncodeTimeout: number;
/** Timeout for FFmpeg mux/faststart processes (ms). Default: 300_000 */
ffmpegProcessTimeout: number;
/** Timeout for FFmpeg streaming encode (ms). Default: 600_000 */
/**
* Inactivity timeout for FFmpeg streaming encode (ms). The timer resets on
* every successful `writeFrame` call, so this caps the duration of a
* single "no frame arrived" gap (capture hang, dead Chrome), not the total
* render time. Default: 600_000 (10 minutes without any frame = dead).
*/
ffmpegStreamingTimeout: number;

// ── HDR ──────────────────────────────────────────────────────────────
Expand Down
67 changes: 67 additions & 0 deletions packages/engine/src/services/streamingEncoder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -572,4 +572,71 @@ describe("spawnStreamingEncoder lifecycle and cleanup", () => {
controller.abort();
expect(proc.kill).not.toHaveBeenCalled();
});

it("inactivity timeout fires only after a no-frame gap exceeds ffmpegStreamingTimeout", async () => {
vi.useFakeTimers();
try {
const { spawn, calls } = createSpawnSpy();
vi.resetModules();
vi.doMock("child_process", () => ({ spawn }));

const { spawnStreamingEncoder } = await import("./streamingEncoder.js");
const dir = mkdtempSync(join(tmpdir(), "se-heartbeat-"));
const encoder = await spawnStreamingEncoder(join(dir, "out.mp4"), baseOptions, undefined, {
ffmpegStreamingTimeout: 1000,
});

const proc = calls[0]!.proc;

// Frames every 900ms — under the 1000ms inactivity threshold — should
// keep resetting the timer. After 9× 900ms = 8.1s of "slow but
// progressing" capture the encoder must still be alive. The old total-
// render timeout would have fired SIGTERM at ~1000ms.
for (let i = 0; i < 9; i++) {
encoder.writeFrame(Buffer.from([i]));
vi.advanceTimersByTime(900);
}
expect(proc.kill).not.toHaveBeenCalled();

// Now stall — no writeFrame for longer than the threshold. SIGTERM fires.
vi.advanceTimersByTime(1100);
expect(proc.kill).toHaveBeenCalledWith("SIGTERM");
} finally {
vi.useRealTimers();
}
});

it("inactivity timeout still fires when stdin is backpressured (stalled ffmpeg, live producer)", async () => {
vi.useFakeTimers();
try {
// Simulate the FFmpeg-hangs-but-Chrome-keeps-producing case: stdin.write
// always returns false (Node has to buffer because ffmpeg isn't draining
// the pipe). The heartbeat must NOT reset on those buffered writes —
// otherwise a hung ffmpeg with a steady frame producer would never
// SIGTERM and we'd grow Node's stdin buffer until OOM.
const { spawn, calls } = createSpawnSpy();
vi.resetModules();
vi.doMock("child_process", () => ({ spawn }));

const { spawnStreamingEncoder } = await import("./streamingEncoder.js");
const dir = mkdtempSync(join(tmpdir(), "se-backpressure-"));
const encoder = await spawnStreamingEncoder(join(dir, "out.mp4"), baseOptions, undefined, {
ffmpegStreamingTimeout: 1000,
});

const proc = calls[0]!.proc;
proc.stdin.write = (_chunk: Buffer) => false;

// Pump 9 frames at 900ms intervals — all returning false. The reset
// should NOT fire (every write was buffered, not accepted), so the
// 1000ms timer (last reset on spawn) elapses near the start.
for (let i = 0; i < 9; i++) {
encoder.writeFrame(Buffer.from([i]));
vi.advanceTimersByTime(900);
}
expect(proc.kill).toHaveBeenCalledWith("SIGTERM");
} finally {
vi.useRealTimers();
}
});
});
47 changes: 36 additions & 11 deletions packages/engine/src/services/streamingEncoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -416,13 +416,26 @@ export async function spawnStreamingEncoder(
}
}

// Timeout safety
// Inactivity timeout: fires only when no frame has been written for
// `ffmpegStreamingTimeout` ms. A slow-but-progressing capture (e.g. a CI
// runner under load) keeps resetting the timer on each writeFrame, so total
// wall-clock render time is unbounded — only a true hang (Chrome dead,
// capture stuck, no frames arriving) trips SIGTERM. The 600s default was
// previously a total-render cap, which intermittently killed legitimate
// slow renders mid-encode (FFmpeg got SIGTERM after most frames were sent;
// libx264 printed its summary and exited 255, observable as
// "Streaming encode failed: FFmpeg exited with code 255" with audio:0kB).
const streamingTimeout = config?.ffmpegStreamingTimeout ?? DEFAULT_CONFIG.ffmpegStreamingTimeout;
const timer = setTimeout(() => {
if (exitStatus === "running") {
ffmpeg.kill("SIGTERM");
}
}, streamingTimeout);
let timer: NodeJS.Timeout | null = null;
const resetTimer = () => {
if (timer) clearTimeout(timer);
timer = setTimeout(() => {
if (exitStatus === "running") {
ffmpeg.kill("SIGTERM");
}
}, streamingTimeout);
};
resetTimer();

const encoder: StreamingEncoder = {
writeFrame: (buffer: Buffer): boolean => {
Expand All @@ -433,11 +446,20 @@ export async function spawnStreamingEncoder(
// provided buffer and drain it asynchronously. The HDR path's compositor
// reuses pre-allocated transOutput/normalCanvas buffers across frames,
// so without this copy the pipe would read partially-overwritten data
// and flicker. The SDR path doesn't invoke writeFrame at all (it pipes
// PNG files via encodeFramesFromDir), so the memcpy here is HDR-only
// and justified by correctness.
// and flicker.
const copy = Buffer.from(buffer);
return ffmpeg.stdin.write(copy);
const accepted = ffmpeg.stdin.write(copy);
// Reset inactivity timer ONLY on `accepted === true`. `true` means the
// write went through to the kernel pipe without buffering in Node —
// proof FFmpeg is actually consuming. `false` means Node's writable
// stream had to buffer (FFmpeg hasn't drained the pipe yet); we deliberately
// don't reset on `false` so a hung FFmpeg with a still-producing Chrome
// can't keep us alive forever while Node's stdin buffer grows to OOM. In
// steady state with a slower-but-alive FFmpeg, writes alternate between
// true and false as the buffer drains and refills; the trues are enough
// to keep the heartbeat ticking.
if (accepted) resetTimer();
return accepted;
},

close: async (): Promise<StreamingEncoderResult> => {
Expand All @@ -455,7 +477,10 @@ export async function spawnStreamingEncoder(
// repeated calls. If you change this method, preserve idempotency or
// a regression here will silently double-close ffmpeg and produce
// harder-to-trace errors at the orchestrator layer.
clearTimeout(timer);
if (timer) {
clearTimeout(timer);
timer = null;
}
if (signal) signal.removeEventListener("abort", onAbort);

const stdin = ffmpeg.stdin;
Expand Down
Loading