From 8869c95848558be3d657002e9fa413393d6f0a77 Mon Sep 17 00:00:00 2001 From: GeneCodeSavvy Date: Wed, 21 Jan 2026 14:14:44 +0530 Subject: [PATCH] fix(read): enhance file validation with comprehensive magic byte checking Improve validation against file signatures from Wikipedia's file signature reference to be more robust: - PNG: Validate full 8-byte signature (89 50 4E 47 0D 0A 1A 0A) - JPEG: Validate 3-byte start marker (FF D8 FF) - GIF: Check for GIF87a or GIF89a (6-byte format) - WebP: Validate RIFF...WEBP structure (12-byte check) - PDF: Validate PDF header (25 50 44 46) Add comprehensive test coverage for invalid GIF and PDF files to ensure the robust validation catches misnamed/corrupted files across all formats. --- packages/opencode/src/tool/read.ts | 72 +++++++++++++++++++++++ packages/opencode/test/tool/read.test.ts | 74 ++++++++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index 3b1484cbc0f..83a8a6a83f3 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -64,6 +64,13 @@ export const ReadTool = Tool.define("read", { file.type.startsWith("image/") && file.type !== "image/svg+xml" && file.type !== "image/vnd.fastbidsheet" const isPdf = file.type === "application/pdf" if (isImage || isPdf) { + const isValid = await isValidFile(filepath, file, file.type) + if (!isValid) { + const fileType = isPdf ? "PDF" : "image" + throw new Error( + `Invalid ${fileType} file: ${filepath}\n\nThe file has a ${file.type} extension but does not contain valid ${fileType} data. This usually happens when a non-${fileType} file is saved with a ${fileType} extension.`, + ) + } const mime = file.type const msg = `${isImage ? "Image" : "PDF"} read successfully` return { @@ -144,6 +151,71 @@ export const ReadTool = Tool.define("read", { }, }) +async function isValidFile(filepath: string, file: Bun.BunFile, detectedMime: string): Promise { + // Read the first 12 bytes to check magic numbers + const buffer = await file.slice(0, 12).arrayBuffer() + if (buffer.byteLength === 0) return false + + const bytes = new Uint8Array(buffer) + + // Check magic bytes for common image formats + // PNG: 89 50 4E 47 0D 0A 1A 0A (full 8-byte signature) + if (detectedMime === "image/png") { + return ( + bytes.length >= 8 && + bytes[0] === 0x89 && + bytes[1] === 0x50 && + bytes[2] === 0x4e && + bytes[3] === 0x47 && + bytes[4] === 0x0d && + bytes[5] === 0x0a && + bytes[6] === 0x1a && + bytes[7] === 0x0a + ) + } + + // JPEG: FF D8 FF (start marker, typically followed by FF E0 or FF E1) + if (detectedMime === "image/jpeg") { + return bytes.length >= 3 && bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff + } + + // GIF: GIF87a or GIF89a (47 49 46 38 37/39 61) + if (detectedMime === "image/gif") { + if (bytes.length >= 6) { + const isGif87 = + bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x38 && bytes[4] === 0x37 && bytes[5] === 0x61 + const isGif89 = + bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x38 && bytes[4] === 0x39 && bytes[5] === 0x61 + return isGif87 || isGif89 + } + // Fallback: at least check for "GIF" prefix + return bytes.length >= 3 && bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46 + } + + // WebP: RIFF ... WEBP + if (detectedMime === "image/webp") { + return ( + bytes.length >= 12 && + bytes[0] === 0x52 && + bytes[1] === 0x49 && + bytes[2] === 0x46 && + bytes[3] === 0x46 && + bytes[8] === 0x57 && + bytes[9] === 0x45 && + bytes[10] === 0x42 && + bytes[11] === 0x50 + ) + } + + // PDF: 25 50 44 46 (% P D F), typically followed by version or dash + if (detectedMime === "application/pdf") { + return bytes.length >= 4 && bytes[0] === 0x25 && bytes[1] === 0x50 && bytes[2] === 0x44 && bytes[3] === 0x46 + } + + // If we expect an image/PDF but couldn't verify magic bytes, it's likely not valid + return false +} + async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise { const ext = path.extname(filepath).toLowerCase() // binary check for common non-text extensions diff --git a/packages/opencode/test/tool/read.test.ts b/packages/opencode/test/tool/read.test.ts index 7250bd2fd1e..0a9db6a0c11 100644 --- a/packages/opencode/test/tool/read.test.ts +++ b/packages/opencode/test/tool/read.test.ts @@ -329,4 +329,78 @@ root_type Monster;` }, }) }) + + test("detects and rejects invalid image files (text saved with image extension)", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + // Create a text file with .jpeg extension + await Bun.write(path.join(dir, "fake-image.jpeg"), "hello world, this is not an image") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + const error = await read.execute({ filePath: path.join(tmp.path, "fake-image.jpeg") }, ctx).catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("Invalid image file") + expect(error.message).toContain("does not contain valid image data") + }, + }) + }) + + test("detects and rejects invalid PNG files", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + // Create a file with .png extension but PNG header + await Bun.write(path.join(dir, "fake-png.png"), "This is not a valid PNG file") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + const error = await read.execute({ filePath: path.join(tmp.path, "fake-png.png") }, ctx).catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("Invalid image file") + }, + }) + }) + + test("detects and rejects invalid GIF files", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + // Create a file with .gif extension but not a valid GIF + await Bun.write(path.join(dir, "fake-gif.gif"), "Not a GIF file at all") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + const error = await read.execute({ filePath: path.join(tmp.path, "fake-gif.gif") }, ctx).catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("Invalid image file") + }, + }) + }) + + test("detects and rejects invalid PDF files", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + // Create a file with .pdf extension but not valid PDF + await Bun.write(path.join(dir, "fake-pdf.pdf"), "This is not a PDF document") + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const read = await ReadTool.init() + const error = await read.execute({ filePath: path.join(tmp.path, "fake-pdf.pdf") }, ctx).catch((e) => e) + expect(error).toBeInstanceOf(Error) + expect(error.message).toContain("Invalid PDF file") + expect(error.message).toContain("does not contain valid PDF data") + }, + }) + }) })