Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions packages/opencode/src/tool/read.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ export const ReadTool = Tool.define("read", {
file.type.startsWith("image/") && file.type !== "image/svg+xml" && file.type !== "image/vnd.fastbidsheet"
const isPdf = file.type === "application/pdf"
if (isImage || isPdf) {
const isValid = await isValidFile(filepath, file, file.type)
if (!isValid) {
const fileType = isPdf ? "PDF" : "image"
throw new Error(
`Invalid ${fileType} file: ${filepath}\n\nThe file has a ${file.type} extension but does not contain valid ${fileType} data. This usually happens when a non-${fileType} file is saved with a ${fileType} extension.`,
)
}
const mime = file.type
const msg = `${isImage ? "Image" : "PDF"} read successfully`
return {
Expand Down Expand Up @@ -144,6 +151,71 @@ export const ReadTool = Tool.define("read", {
},
})

async function isValidFile(filepath: string, file: Bun.BunFile, detectedMime: string): Promise<boolean> {
// Read the first 12 bytes to check magic numbers
const buffer = await file.slice(0, 12).arrayBuffer()
if (buffer.byteLength === 0) return false

const bytes = new Uint8Array(buffer)

// Check magic bytes for common image formats
// PNG: 89 50 4E 47 0D 0A 1A 0A (full 8-byte signature)
if (detectedMime === "image/png") {
return (
bytes.length >= 8 &&
bytes[0] === 0x89 &&
bytes[1] === 0x50 &&
bytes[2] === 0x4e &&
bytes[3] === 0x47 &&
bytes[4] === 0x0d &&
bytes[5] === 0x0a &&
bytes[6] === 0x1a &&
bytes[7] === 0x0a
)
}

// JPEG: FF D8 FF (start marker, typically followed by FF E0 or FF E1)
if (detectedMime === "image/jpeg") {
return bytes.length >= 3 && bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff
}

// GIF: GIF87a or GIF89a (47 49 46 38 37/39 61)
if (detectedMime === "image/gif") {
if (bytes.length >= 6) {
const isGif87 =
bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x38 && bytes[4] === 0x37 && bytes[5] === 0x61
const isGif89 =
bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46 && bytes[3] === 0x38 && bytes[4] === 0x39 && bytes[5] === 0x61
return isGif87 || isGif89
}
// Fallback: at least check for "GIF" prefix
return bytes.length >= 3 && bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46
}

// WebP: RIFF ... WEBP
if (detectedMime === "image/webp") {
return (
bytes.length >= 12 &&
bytes[0] === 0x52 &&
bytes[1] === 0x49 &&
bytes[2] === 0x46 &&
bytes[3] === 0x46 &&
bytes[8] === 0x57 &&
bytes[9] === 0x45 &&
bytes[10] === 0x42 &&
bytes[11] === 0x50
)
}

// PDF: 25 50 44 46 (% P D F), typically followed by version or dash
if (detectedMime === "application/pdf") {
return bytes.length >= 4 && bytes[0] === 0x25 && bytes[1] === 0x50 && bytes[2] === 0x44 && bytes[3] === 0x46
}

// If we expect an image/PDF but couldn't verify magic bytes, it's likely not valid
return false
}

async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
const ext = path.extname(filepath).toLowerCase()
// binary check for common non-text extensions
Expand Down
74 changes: 74 additions & 0 deletions packages/opencode/test/tool/read.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -329,4 +329,78 @@ root_type Monster;`
},
})
})

test("detects and rejects invalid image files (text saved with image extension)", async () => {
await using tmp = await tmpdir({
init: async (dir) => {
// Create a text file with .jpeg extension
await Bun.write(path.join(dir, "fake-image.jpeg"), "hello world, this is not an image")
},
})
await Instance.provide({
directory: tmp.path,
fn: async () => {
const read = await ReadTool.init()
const error = await read.execute({ filePath: path.join(tmp.path, "fake-image.jpeg") }, ctx).catch((e) => e)
expect(error).toBeInstanceOf(Error)
expect(error.message).toContain("Invalid image file")
expect(error.message).toContain("does not contain valid image data")
},
})
})

test("detects and rejects invalid PNG files", async () => {
await using tmp = await tmpdir({
init: async (dir) => {
// Create a file with .png extension but PNG header
await Bun.write(path.join(dir, "fake-png.png"), "This is not a valid PNG file")
},
})
await Instance.provide({
directory: tmp.path,
fn: async () => {
const read = await ReadTool.init()
const error = await read.execute({ filePath: path.join(tmp.path, "fake-png.png") }, ctx).catch((e) => e)
expect(error).toBeInstanceOf(Error)
expect(error.message).toContain("Invalid image file")
},
})
})

test("detects and rejects invalid GIF files", async () => {
await using tmp = await tmpdir({
init: async (dir) => {
// Create a file with .gif extension but not a valid GIF
await Bun.write(path.join(dir, "fake-gif.gif"), "Not a GIF file at all")
},
})
await Instance.provide({
directory: tmp.path,
fn: async () => {
const read = await ReadTool.init()
const error = await read.execute({ filePath: path.join(tmp.path, "fake-gif.gif") }, ctx).catch((e) => e)
expect(error).toBeInstanceOf(Error)
expect(error.message).toContain("Invalid image file")
},
})
})

test("detects and rejects invalid PDF files", async () => {
await using tmp = await tmpdir({
init: async (dir) => {
// Create a file with .pdf extension but not valid PDF
await Bun.write(path.join(dir, "fake-pdf.pdf"), "This is not a PDF document")
},
})
await Instance.provide({
directory: tmp.path,
fn: async () => {
const read = await ReadTool.init()
const error = await read.execute({ filePath: path.join(tmp.path, "fake-pdf.pdf") }, ctx).catch((e) => e)
expect(error).toBeInstanceOf(Error)
expect(error.message).toContain("Invalid PDF file")
expect(error.message).toContain("does not contain valid PDF data")
},
})
})
})