Skip to content

Commit b82987c

Browse files
aayush-kapoorgr2m
andauthored
feat(openai): support retrieval for code interpreter generated files (#9899)
## Background OpenAI Code Interpreter tool generates files based on users question/query but our current implementation of `annotations` didn't support `container_file_citation`. Hence files generated were getting lost See #9175 ## Summary Updated spec to include `container_file_citation` and `file_path` as specified in the [OAI spec here](https://platform.openai.com/docs/api-reference/responses/object#responses-object-output-output_message-content-output_text-annotations) ## Manual Verification Ran + updated examples `examples/ai-core/src/generate-text/openai-responses-code-interpreter.ts` & `examples/ai-core/src/generate-text/openai-code-interpreter-tool.ts` ## Checklist - [x] Tests have been added / updated (for bug fixes / features) - [ ] Documentation has been added / updated (for bug fixes / features) - [x] A _patch_ changeset for relevant packages has been added (for bug fixes / features - run `pnpm changeset` in the project root) ## Future Work - update docs - try integrating it with a UI example so that the files can be downloaded. ## Related Issues Fixes #9175 --------- Co-authored-by: Gregor Martynus <[email protected]>
1 parent 35e445d commit b82987c

File tree

12 files changed

+1164
-77
lines changed

12 files changed

+1164
-77
lines changed

.changeset/pink-garlics-care.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/openai': patch
3+
---
4+
5+
feat(openai): support openai code-interpreter annotations

examples/ai-core/src/generate-text/openai-code-interpreter-tool.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ run(async () => {
99
code_interpreter: openai.tools.codeInterpreter(),
1010
},
1111
prompt:
12-
'Simulate rolling two dice 10000 times and and return the sum all the results.',
12+
'Simulate rolling two dice 10000 times and, return the sum of all the results, and upload the result to a file.',
1313
});
1414

1515
console.dir(result.content, { depth: Infinity });

examples/ai-core/src/generate-text/openai-responses-code-interpreter.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ async function main() {
77
const basicResult = await generateText({
88
model: openai.responses('gpt-4.1-mini'),
99
prompt:
10-
'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results.',
10+
'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
1111
tools: {
1212
code_interpreter: openai.tools.codeInterpreter({}),
1313
},
@@ -18,6 +18,15 @@ async function main() {
1818
console.log('\n=== Other Outputs ===');
1919
console.log(basicResult.toolCalls);
2020
console.log(basicResult.toolResults);
21+
console.log('\n=== Code Interpreter Annotations ===');
22+
for (const part of basicResult.content) {
23+
if (part.type === 'text') {
24+
const annotations = part.providerMetadata?.openai?.annotations;
25+
if (annotations) {
26+
console.dir(annotations);
27+
}
28+
}
29+
}
2130
}
2231

2332
main().catch(console.error);
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import { openai, OpenAIResponsesProviderOptions } from '@ai-sdk/openai';
2+
import {
3+
convertToModelMessages,
4+
InferUITools,
5+
streamText,
6+
ToolSet,
7+
UIDataTypes,
8+
UIMessage,
9+
validateUIMessages,
10+
} from 'ai';
11+
12+
const tools = {
13+
code_interpreter: openai.tools.codeInterpreter(),
14+
} satisfies ToolSet;
15+
16+
export type OpenAICodeInterpreterMessage = UIMessage<
17+
{
18+
downloadLinks?: Array<{
19+
filename: string;
20+
url: string;
21+
}>;
22+
},
23+
UIDataTypes,
24+
InferUITools<typeof tools>
25+
>;
26+
27+
export async function POST(req: Request) {
28+
const { messages } = await req.json();
29+
const uiMessages = await validateUIMessages({ messages });
30+
31+
// Collect sources with container file citations as they're generated
32+
const containerFileSources: Array<{
33+
containerId: string;
34+
fileId: string;
35+
filename: string;
36+
}> = [];
37+
38+
const result = streamText({
39+
model: openai('gpt-5-nano'),
40+
tools,
41+
messages: convertToModelMessages(uiMessages),
42+
onStepFinish: async ({ sources, request }) => {
43+
console.log(JSON.stringify(request.body, null, 2));
44+
45+
// Collect container file citations from sources
46+
for (const source of sources) {
47+
if (
48+
source.sourceType === 'document' &&
49+
source.providerMetadata?.openai?.containerId &&
50+
source.providerMetadata?.openai?.fileId
51+
) {
52+
const containerId = String(
53+
source.providerMetadata.openai.containerId || '',
54+
);
55+
const fileId = String(source.providerMetadata.openai.fileId || '');
56+
const filename = source.filename || source.title || 'file';
57+
58+
// Avoid duplicates
59+
const exists = containerFileSources.some(
60+
s => s.containerId === containerId && s.fileId === fileId,
61+
);
62+
if (!exists) {
63+
containerFileSources.push({ containerId, fileId, filename });
64+
}
65+
}
66+
}
67+
},
68+
providerOptions: {
69+
openai: {
70+
store: true,
71+
} satisfies OpenAIResponsesProviderOptions,
72+
},
73+
});
74+
75+
return result.toUIMessageStreamResponse({
76+
originalMessages: uiMessages,
77+
messageMetadata: ({ part }) => {
78+
// When streaming finishes, create download links from collected sources
79+
if (part.type === 'finish' && containerFileSources.length > 0) {
80+
const downloadLinks = containerFileSources.map(source => ({
81+
filename: source.filename,
82+
url: `/api/download-container-file?container_id=${encodeURIComponent(source.containerId)}&file_id=${encodeURIComponent(source.fileId)}&filename=${encodeURIComponent(source.filename)}`,
83+
}));
84+
85+
return {
86+
downloadLinks,
87+
};
88+
}
89+
},
90+
});
91+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
export async function GET(req: Request) {
2+
const { searchParams } = new URL(req.url);
3+
const containerId = searchParams.get('container_id');
4+
const fileId = searchParams.get('file_id');
5+
const filename = searchParams.get('filename') || 'file';
6+
7+
if (!containerId || !fileId) {
8+
return new Response('Missing container_id or file_id', { status: 400 });
9+
}
10+
11+
const apiKey = process.env.OPENAI_API_KEY;
12+
if (!apiKey) {
13+
return new Response('OPENAI_API_KEY not configured', { status: 500 });
14+
}
15+
16+
try {
17+
const response = await fetch(
18+
`https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`,
19+
{
20+
headers: {
21+
Authorization: `Bearer ${apiKey}`,
22+
},
23+
},
24+
);
25+
26+
if (!response.ok) {
27+
return new Response(`Failed to fetch file: ${response.statusText}`, {
28+
status: response.status,
29+
});
30+
}
31+
32+
const arrayBuffer = await response.arrayBuffer();
33+
const contentType =
34+
response.headers.get('content-type') || 'application/octet-stream';
35+
36+
return new Response(arrayBuffer, {
37+
headers: {
38+
'Content-Type': contentType,
39+
'Content-Disposition': `attachment; filename="${filename}"`,
40+
},
41+
});
42+
} catch (error) {
43+
console.error('Error downloading file:', error);
44+
return new Response('Error downloading file', { status: 500 });
45+
}
46+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
'use client';
2+
3+
import { useChat } from '@ai-sdk/react';
4+
import { DefaultChatTransport } from 'ai';
5+
import ChatInput from '@/components/chat-input';
6+
import { OpenAICodeInterpreterMessage } from '@/app/api/chat-openai-code-interpreter-annotation-download/route';
7+
import CodeInterpreterView from '@/components/tool/openai-code-interpreter-view';
8+
9+
export default function TestOpenAIWebSearch() {
10+
const { status, sendMessage, messages } =
11+
useChat<OpenAICodeInterpreterMessage>({
12+
transport: new DefaultChatTransport({
13+
api: '/api/chat-openai-code-interpreter-annotation-download',
14+
}),
15+
});
16+
17+
return (
18+
<div className="flex flex-col py-24 mx-auto w-full max-w-md stretch">
19+
<h1 className="mb-4 text-xl font-bold">OpenAI Code Interpreter Test</h1>
20+
21+
{messages.map(message => (
22+
<div key={message.id} className="whitespace-pre-wrap">
23+
{message.role === 'user' ? 'User: ' : 'AI: '}
24+
{message.parts.map((part, index) => {
25+
switch (part.type) {
26+
case 'text':
27+
return <div key={index}>{part.text}</div>;
28+
case 'tool-code_interpreter':
29+
return <CodeInterpreterView key={index} invocation={part} />;
30+
}
31+
})}
32+
{message.metadata?.downloadLinks &&
33+
message.metadata.downloadLinks.length > 0 && (
34+
<div className="mt-2 space-y-1">
35+
{message.metadata.downloadLinks.map((link, idx) => (
36+
<a
37+
key={idx}
38+
href={link.url}
39+
download={link.filename}
40+
className="text-blue-600 hover:underline block"
41+
>
42+
📥 Download {link.filename}
43+
</a>
44+
))}
45+
</div>
46+
)}
47+
</div>
48+
))}
49+
50+
<ChatInput status={status} onSubmit={text => sendMessage({ text })} />
51+
</div>
52+
);
53+
}

packages/azure/src/__snapshots__/azure-openai-provider.test.ts.snap

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,14 @@ providers and models, and which ones are available in the AI SDK.",
151151
{
152152
"providerMetadata": {
153153
"openai": {
154+
"annotations": [
155+
{
156+
"file_id": "file-Ebzhf8H4DPGPr9pUhr7n7v",
157+
"filename": "ai.pdf",
158+
"index": 350,
159+
"type": "file_citation",
160+
},
161+
],
154162
"itemId": "msg_0365d26c32c64c650068cabb0e66b081949f66f61dacef39f3",
155163
},
156164
},
@@ -219,6 +227,14 @@ exports[`responses > doGenerate > file search tool > without results include > s
219227
{
220228
"providerMetadata": {
221229
"openai": {
230+
"annotations": [
231+
{
232+
"file_id": "file-Ebzhf8H4DPGPr9pUhr7n7v",
233+
"filename": "ai.pdf",
234+
"index": 438,
235+
"type": "file_citation",
236+
},
237+
],
222238
"itemId": "msg_0a098396a8feca410068caae457c508196b2fcd079d1d3ec74",
223239
},
224240
},

packages/openai/src/responses/__fixtures__/openai-code-interpreter-tool.1.json

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
{
2-
"id": "resp_68c2e2c6f7208190b9a439ac98147eb40b24aae9c6c01e4f",
2+
"id": "resp_024ee52fc1900767006903bf276a60819395647a8a01d4a3d8",
33
"object": "response",
4-
"created_at": 1757602503,
4+
"created_at": 1761853223,
55
"status": "completed",
66
"background": false,
7+
"billing": {
8+
"payer": "developer"
9+
},
710
"error": null,
811
"incomplete_details": null,
912
"instructions": null,
@@ -12,56 +15,78 @@
1215
"model": "gpt-5-nano-2025-08-07",
1316
"output": [
1417
{
15-
"id": "rs_68c2e2cbc0a08190bc754d734af0940f0b24aae9c6c01e4f",
18+
"id": "rs_024ee52fc1900767006903bf2cf8348193be2e9dedeedfd7eb",
1619
"type": "reasoning",
1720
"summary": []
1821
},
1922
{
20-
"id": "ci_68c2e2cf522c81908f3e2c1bccd1493b0b24aae9c6c01e4f",
23+
"id": "ci_024ee52fc1900767006903bf34e2b08193a689f71dcc3724f7",
2124
"type": "code_interpreter_call",
2225
"status": "completed",
23-
"code": "import random\nrandom.seed() # system time\nn=10000\nsums=[(random.randint(1,6)+random.randint(1,6)) for _ in range(n)]\ntotal=sum(sums)\nmean=total/n\nfrom collections import Counter\ncounts=Counter(sums)\ncounts_sorted=[(sum_val, counts[sum_val]) for sum_val in range(2,13)]\ntotal, mean, counts_sorted[:5], counts_sorted[-5:]",
24-
"container_id": "cntr_68c2e2caf50c8193b4069276f4c75ef301e63bd5fd6a4d4d",
26+
"code": "import random\r\n\r\ntrials = 10000\r\nsums = []\r\nfor _ in range(trials):\r\n die1 = random.randint(1,6)\r\n die2 = random.randint(1,6)\r\n sums.append(die1 + die2)\r\ntotal_sum = sum(sums)\r\nlen(sums), total_sum\n",
27+
"container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
2528
"outputs": [
2629
{
2730
"type": "logs",
28-
"logs": "(70141,\n 7.0141,\n [(2, 269), (3, 511), (4, 865), (5, 1130), (6, 1353)],\n [(8, 1387), (9, 1110), (10, 828), (11, 538), (12, 299)])"
31+
"logs": "(10000, 70024)"
2932
}
3033
]
3134
},
3235
{
33-
"id": "rs_68c2e2d67f60819097167827b0e29b4f0b24aae9c6c01e4f",
36+
"id": "rs_024ee52fc1900767006903bf381cec8193a48068baa82e17a3",
37+
"type": "reasoning",
38+
"summary": []
39+
},
40+
{
41+
"id": "ci_024ee52fc1900767006903bf38f1f08193a0b46ddc935fa028",
42+
"type": "code_interpreter_call",
43+
"status": "completed",
44+
"code": "filename = \"/mnt/data/two_dice_sums_10000.txt\"\r\nwith open(filename, \"w\") as f:\r\n for s in sums:\r\n f.write(str(s) + \"\\n\")\r\n f.write(\"TOTAL: \" + str(total_sum) + \"\\n\")\r\nfilename, os.path.getsize(filename)\n",
45+
"container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
46+
"outputs": []
47+
},
48+
{
49+
"id": "rs_024ee52fc1900767006903bf3ddb0881939e1af28db8fb9f17",
3450
"type": "reasoning",
3551
"summary": []
3652
},
3753
{
38-
"id": "ci_68c2e2d75fe08190b27c6e44213143010b24aae9c6c01e4f",
54+
"id": "ci_024ee52fc1900767006903bf3e05b48193bbb2367cbc9a299e",
3955
"type": "code_interpreter_call",
4056
"status": "completed",
41-
"code": "counts_sorted_full = [(s,c) for s,c in sorted(counts.items())]\ncounts_sorted_full",
42-
"container_id": "cntr_68c2e2caf50c8193b4069276f4c75ef301e63bd5fd6a4d4d",
57+
"code": "import os\r\nfilename = \"/mnt/data/two_dice_sums_10000.txt\"\r\nwith open(filename, \"w\") as f:\r\n for s in sums:\r\n f.write(str(s) + \"\\n\")\r\n f.write(\"TOTAL: \" + str(total_sum) + \"\\n\")\r\nos.path.getsize(filename), filename\n",
58+
"container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
4359
"outputs": [
4460
{
4561
"type": "logs",
46-
"logs": "[(2, 269),\n (3, 511),\n (4, 865),\n (5, 1130),\n (6, 1353),\n (7, 1710),\n (8, 1387),\n (9, 1110),\n (10, 828),\n (11, 538),\n (12, 299)]"
62+
"logs": "(21680, '/mnt/data/two_dice_sums_10000.txt')"
4763
}
4864
]
4965
},
5066
{
51-
"id": "rs_68c2e2dab6508190987f0d3db143b0580b24aae9c6c01e4f",
67+
"id": "rs_024ee52fc1900767006903bf40cd488193b3a36868bf31054a",
5268
"type": "reasoning",
5369
"summary": []
5470
},
5571
{
56-
"id": "msg_68c2e2e513c88190a72cefb37140d19a0b24aae9c6c01e4f",
72+
"id": "msg_024ee52fc1900767006903bf43b66081939b669e0ce1deb286",
5773
"type": "message",
5874
"status": "completed",
5975
"content": [
6076
{
6177
"type": "output_text",
62-
"annotations": [],
78+
"annotations": [
79+
{
80+
"type": "container_file_citation",
81+
"container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
82+
"end_index": 236,
83+
"file_id": "cfile_6903bf45e3288191af3d56e6d23c3a4d",
84+
"filename": "two_dice_sums_10000.txt",
85+
"start_index": 195
86+
}
87+
],
6388
"logprobs": [],
64-
"text": "Here’s a quick result from simulating 10,000 rolls of two fair six-sided dice (sum per trial, then total across all trials):\n\n- Total sum of all 10,000 trial sums: 70141\n- Average sum per trial: 7.0141\n\nDistribution of the per-trial sums (2–12):\n- 2: 269 (2.69%)\n- 3: 511 (5.11%)\n- 4: 865 (8.65%)\n- 5: 1130 (11.30%)\n- 6: 1353 (13.53%)\n- 7: 1710 (17.10%)\n- 8: 1387 (13.87%)\n- 9: 1110 (11.10%)\n- 10: 828 (8.28%)\n- 11: 538 (5.38%)\n- 12: 299 (2.99%)\n\nNotes:\n- The total is around 7,0000 since the expected sum per trial is 7, so 10,000 trials ≈ 70000. Your exact total will vary with each run unless you fix a random seed.\n- If you’d like, I can provide the per-trial sums as a CSV file or share a reproducible run with a fixed seed."
89+
"text": "I ran 10,000 trials of rolling two dice and summed the results.\n\n- Total sum across all 10,000 rolls: 70024\n- Per-trial sums were saved to a file. You can download it here:\n [Download the file](sandbox:/mnt/data/two_dice_sums_10000.txt)\n\nThe file contains 10,000 lines (one sum per line) followed by a final line with the total, e.g., \"TOTAL: 70024\".\n\nIf you’d like the file in a different format (CSV, JSON) or with only the total, I can adjust and re-upload."
6590
}
6691
],
6792
"role": "assistant"
@@ -97,16 +122,16 @@
97122
"top_p": 1,
98123
"truncation": "disabled",
99124
"usage": {
100-
"input_tokens": 4071,
125+
"input_tokens": 2283,
101126
"input_tokens_details": {
102127
"cached_tokens": 0
103128
},
104-
"output_tokens": 2456,
129+
"output_tokens": 1928,
105130
"output_tokens_details": {
106-
"reasoning_tokens": 2176
131+
"reasoning_tokens": 1792
107132
},
108-
"total_tokens": 6527
133+
"total_tokens": 4211
109134
},
110135
"user": null,
111136
"metadata": {}
112-
}
137+
}

0 commit comments

Comments
 (0)