feat(openai): support retrieval for code interpreter generated files (#9899)

aayush-kapoor · gr2m · web-flow · commit b82987ce29ff · 2025-11-03T15:08:01.000-05:00
## Background OpenAI Code Interpreter tool generates files based on users question/query but our current implementation of `annotations` didn't support `container_file_citation`. Hence files generated were getting lost See #9175 ## Summary Updated spec to include `container_file_citation` and `file_path` as specified in the [OAI spec here](https://platform.openai.com/docs/api-reference/responses/object#responses-object-output-output_message-content-output_text-annotations) ## Manual Verification Ran + updated examples `examples/ai-core/src/generate-text/openai-responses-code-interpreter.ts` & `examples/ai-core/src/generate-text/openai-code-interpreter-tool.ts` ## Checklist - [x] Tests have been added / updated (for bug fixes / features) - [ ] Documentation has been added / updated (for bug fixes / features) - [x] A _patch_ changeset for relevant packages has been added (for bug fixes / features - run `pnpm changeset` in the project root) ## Future Work - update docs - try integrating it with a UI example so that the files can be downloaded. ## Related Issues Fixes #9175 --------- Co-authored-by: Gregor Martynus <39992+gr2m@users.noreply.github.com>
diff --git a/.changeset/pink-garlics-care.md b/.changeset/pink-garlics-care.md
@@ -0,0 +1,5 @@
+---
+'@ai-sdk/openai': patch
+---
+
+feat(openai): support openai code-interpreter annotations
diff --git a/examples/ai-core/src/generate-text/openai-code-interpreter-tool.ts b/examples/ai-core/src/generate-text/openai-code-interpreter-tool.ts
@@ -9,7 +9,7 @@ run(async () => {
       code_interpreter: openai.tools.codeInterpreter(),
     },
     prompt:
-      'Simulate rolling two dice 10000 times and and return the sum all the results.',
+      'Simulate rolling two dice 10000 times and, return the sum of all the results, and upload the result to a file.',
   });
 
   console.dir(result.content, { depth: Infinity });
diff --git a/examples/ai-core/src/generate-text/openai-responses-code-interpreter.ts b/examples/ai-core/src/generate-text/openai-responses-code-interpreter.ts
@@ -7,7 +7,7 @@ async function main() {
   const basicResult = await generateText({
     model: openai.responses('gpt-4.1-mini'),
     prompt:
-      'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results.',
+      'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
     tools: {
       code_interpreter: openai.tools.codeInterpreter({}),
     },
@@ -18,6 +18,15 @@ async function main() {
   console.log('\n=== Other Outputs ===');
   console.log(basicResult.toolCalls);
   console.log(basicResult.toolResults);
+  console.log('\n=== Code Interpreter Annotations ===');
+  for (const part of basicResult.content) {
+    if (part.type === 'text') {
+      const annotations = part.providerMetadata?.openai?.annotations;
+      if (annotations) {
+        console.dir(annotations);
+      }
+    }
+  }
 }
 
 main().catch(console.error);
diff --git a/examples/next-openai/app/api/chat-openai-code-interpreter-annotation-download/route.ts b/examples/next-openai/app/api/chat-openai-code-interpreter-annotation-download/route.ts
@@ -0,0 +1,91 @@
+import { openai, OpenAIResponsesProviderOptions } from '@ai-sdk/openai';
+import {
+  convertToModelMessages,
+  InferUITools,
+  streamText,
+  ToolSet,
+  UIDataTypes,
+  UIMessage,
+  validateUIMessages,
+} from 'ai';
+
+const tools = {
+  code_interpreter: openai.tools.codeInterpreter(),
+} satisfies ToolSet;
+
+export type OpenAICodeInterpreterMessage = UIMessage<
+  {
+    downloadLinks?: Array<{
+      filename: string;
+      url: string;
+    }>;
+  },
+  UIDataTypes,
+  InferUITools<typeof tools>
+>;
+
+export async function POST(req: Request) {
+  const { messages } = await req.json();
+  const uiMessages = await validateUIMessages({ messages });
+
+  // Collect sources with container file citations as they're generated
+  const containerFileSources: Array<{
+    containerId: string;
+    fileId: string;
+    filename: string;
+  }> = [];
+
+  const result = streamText({
+    model: openai('gpt-5-nano'),
+    tools,
+    messages: convertToModelMessages(uiMessages),
+    onStepFinish: async ({ sources, request }) => {
+      console.log(JSON.stringify(request.body, null, 2));
+
+      // Collect container file citations from sources
+      for (const source of sources) {
+        if (
+          source.sourceType === 'document' &&
+          source.providerMetadata?.openai?.containerId &&
+          source.providerMetadata?.openai?.fileId
+        ) {
+          const containerId = String(
+            source.providerMetadata.openai.containerId || '',
+          );
+          const fileId = String(source.providerMetadata.openai.fileId || '');
+          const filename = source.filename || source.title || 'file';
+
+          // Avoid duplicates
+          const exists = containerFileSources.some(
+            s => s.containerId === containerId && s.fileId === fileId,
+          );
+          if (!exists) {
+            containerFileSources.push({ containerId, fileId, filename });
+          }
+        }
+      }
+    },
+    providerOptions: {
+      openai: {
+        store: true,
+      } satisfies OpenAIResponsesProviderOptions,
+    },
+  });
+
+  return result.toUIMessageStreamResponse({
+    originalMessages: uiMessages,
+    messageMetadata: ({ part }) => {
+      // When streaming finishes, create download links from collected sources
+      if (part.type === 'finish' && containerFileSources.length > 0) {
+        const downloadLinks = containerFileSources.map(source => ({
+          filename: source.filename,
+          url: `/api/download-container-file?container_id=${encodeURIComponent(source.containerId)}&file_id=${encodeURIComponent(source.fileId)}&filename=${encodeURIComponent(source.filename)}`,
+        }));
+
+        return {
+          downloadLinks,
+        };
+      }
+    },
+  });
+}
diff --git a/examples/next-openai/app/api/download-container-file/route.ts b/examples/next-openai/app/api/download-container-file/route.ts
@@ -0,0 +1,46 @@
+export async function GET(req: Request) {
+  const { searchParams } = new URL(req.url);
+  const containerId = searchParams.get('container_id');
+  const fileId = searchParams.get('file_id');
+  const filename = searchParams.get('filename') || 'file';
+
+  if (!containerId || !fileId) {
+    return new Response('Missing container_id or file_id', { status: 400 });
+  }
+
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    return new Response('OPENAI_API_KEY not configured', { status: 500 });
+  }
+
+  try {
+    const response = await fetch(
+      `https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`,
+      {
+        headers: {
+          Authorization: `Bearer ${apiKey}`,
+        },
+      },
+    );
+
+    if (!response.ok) {
+      return new Response(`Failed to fetch file: ${response.statusText}`, {
+        status: response.status,
+      });
+    }
+
+    const arrayBuffer = await response.arrayBuffer();
+    const contentType =
+      response.headers.get('content-type') || 'application/octet-stream';
+
+    return new Response(arrayBuffer, {
+      headers: {
+        'Content-Type': contentType,
+        'Content-Disposition': `attachment; filename="${filename}"`,
+      },
+    });
+  } catch (error) {
+    console.error('Error downloading file:', error);
+    return new Response('Error downloading file', { status: 500 });
+  }
+}
diff --git a/examples/next-openai/app/test-openai-code-interpreter-annotation-download/page.tsx b/examples/next-openai/app/test-openai-code-interpreter-annotation-download/page.tsx
@@ -0,0 +1,53 @@
+'use client';
+
+import { useChat } from '@ai-sdk/react';
+import { DefaultChatTransport } from 'ai';
+import ChatInput from '@/components/chat-input';
+import { OpenAICodeInterpreterMessage } from '@/app/api/chat-openai-code-interpreter-annotation-download/route';
+import CodeInterpreterView from '@/components/tool/openai-code-interpreter-view';
+
+export default function TestOpenAIWebSearch() {
+  const { status, sendMessage, messages } =
+    useChat<OpenAICodeInterpreterMessage>({
+      transport: new DefaultChatTransport({
+        api: '/api/chat-openai-code-interpreter-annotation-download',
+      }),
+    });
+
+  return (
+    <div className="flex flex-col py-24 mx-auto w-full max-w-md stretch">
+      <h1 className="mb-4 text-xl font-bold">OpenAI Code Interpreter Test</h1>
+
+      {messages.map(message => (
+        <div key={message.id} className="whitespace-pre-wrap">
+          {message.role === 'user' ? 'User: ' : 'AI: '}
+          {message.parts.map((part, index) => {
+            switch (part.type) {
+              case 'text':
+                return <div key={index}>{part.text}</div>;
+              case 'tool-code_interpreter':
+                return <CodeInterpreterView key={index} invocation={part} />;
+            }
+          })}
+          {message.metadata?.downloadLinks &&
+            message.metadata.downloadLinks.length > 0 && (
+              <div className="mt-2 space-y-1">
+                {message.metadata.downloadLinks.map((link, idx) => (
+                  <a
+                    key={idx}
+                    href={link.url}
+                    download={link.filename}
+                    className="text-blue-600 hover:underline block"
+                  >
+                    📥 Download {link.filename}
+                  </a>
+                ))}
+              </div>
+            )}
+        </div>
+      ))}
+
+      <ChatInput status={status} onSubmit={text => sendMessage({ text })} />
+    </div>
+  );
+}
diff --git a/packages/azure/src/__snapshots__/azure-openai-provider.test.ts.snap b/packages/azure/src/__snapshots__/azure-openai-provider.test.ts.snap
@@ -151,6 +151,14 @@ providers and models, and which ones are available in the AI SDK.",
   {
     "providerMetadata": {
       "openai": {
+        "annotations": [
+          {
+            "file_id": "file-Ebzhf8H4DPGPr9pUhr7n7v",
+            "filename": "ai.pdf",
+            "index": 350,
+            "type": "file_citation",
+          },
+        ],
         "itemId": "msg_0365d26c32c64c650068cabb0e66b081949f66f61dacef39f3",
       },
     },
@@ -219,6 +227,14 @@ exports[`responses > doGenerate > file search tool > without results include > s
   {
     "providerMetadata": {
       "openai": {
+        "annotations": [
+          {
+            "file_id": "file-Ebzhf8H4DPGPr9pUhr7n7v",
+            "filename": "ai.pdf",
+            "index": 438,
+            "type": "file_citation",
+          },
+        ],
         "itemId": "msg_0a098396a8feca410068caae457c508196b2fcd079d1d3ec74",
       },
     },
diff --git a/packages/openai/src/responses/__fixtures__/openai-code-interpreter-tool.1.json b/packages/openai/src/responses/__fixtures__/openai-code-interpreter-tool.1.json
@@ -1,9 +1,12 @@
 {
-  "id": "resp_68c2e2c6f7208190b9a439ac98147eb40b24aae9c6c01e4f",
+  "id": "resp_024ee52fc1900767006903bf276a60819395647a8a01d4a3d8",
   "object": "response",
-  "created_at": 1757602503,
+  "created_at": 1761853223,
   "status": "completed",
   "background": false,
+  "billing": {
+    "payer": "developer"
+  },
   "error": null,
   "incomplete_details": null,
   "instructions": null,
@@ -12,56 +15,78 @@
   "model": "gpt-5-nano-2025-08-07",
   "output": [
     {
-      "id": "rs_68c2e2cbc0a08190bc754d734af0940f0b24aae9c6c01e4f",
+      "id": "rs_024ee52fc1900767006903bf2cf8348193be2e9dedeedfd7eb",
       "type": "reasoning",
       "summary": []
     },
     {
-      "id": "ci_68c2e2cf522c81908f3e2c1bccd1493b0b24aae9c6c01e4f",
+      "id": "ci_024ee52fc1900767006903bf34e2b08193a689f71dcc3724f7",
       "type": "code_interpreter_call",
       "status": "completed",
-      "code": "import random\nrandom.seed()  # system time\nn=10000\nsums=[(random.randint(1,6)+random.randint(1,6)) for _ in range(n)]\ntotal=sum(sums)\nmean=total/n\nfrom collections import Counter\ncounts=Counter(sums)\ncounts_sorted=[(sum_val, counts[sum_val]) for sum_val in range(2,13)]\ntotal, mean, counts_sorted[:5], counts_sorted[-5:]",
-      "container_id": "cntr_68c2e2caf50c8193b4069276f4c75ef301e63bd5fd6a4d4d",
+      "code": "import random\r\n\r\ntrials = 10000\r\nsums = []\r\nfor _ in range(trials):\r\n    die1 = random.randint(1,6)\r\n    die2 = random.randint(1,6)\r\n    sums.append(die1 + die2)\r\ntotal_sum = sum(sums)\r\nlen(sums), total_sum\n",
+      "container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
       "outputs": [
         {
           "type": "logs",
-          "logs": "(70141,\n 7.0141,\n [(2, 269), (3, 511), (4, 865), (5, 1130), (6, 1353)],\n [(8, 1387), (9, 1110), (10, 828), (11, 538), (12, 299)])"
+          "logs": "(10000, 70024)"
         }
       ]
     },
     {
-      "id": "rs_68c2e2d67f60819097167827b0e29b4f0b24aae9c6c01e4f",
+      "id": "rs_024ee52fc1900767006903bf381cec8193a48068baa82e17a3",
+      "type": "reasoning",
+      "summary": []
+    },
+    {
+      "id": "ci_024ee52fc1900767006903bf38f1f08193a0b46ddc935fa028",
+      "type": "code_interpreter_call",
+      "status": "completed",
+      "code": "filename = \"/mnt/data/two_dice_sums_10000.txt\"\r\nwith open(filename, \"w\") as f:\r\n    for s in sums:\r\n        f.write(str(s) + \"\\n\")\r\n    f.write(\"TOTAL: \" + str(total_sum) + \"\\n\")\r\nfilename, os.path.getsize(filename)\n",
+      "container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
+      "outputs": []
+    },
+    {
+      "id": "rs_024ee52fc1900767006903bf3ddb0881939e1af28db8fb9f17",
       "type": "reasoning",
       "summary": []
     },
     {
-      "id": "ci_68c2e2d75fe08190b27c6e44213143010b24aae9c6c01e4f",
+      "id": "ci_024ee52fc1900767006903bf3e05b48193bbb2367cbc9a299e",
       "type": "code_interpreter_call",
       "status": "completed",
-      "code": "counts_sorted_full = [(s,c) for s,c in sorted(counts.items())]\ncounts_sorted_full",
-      "container_id": "cntr_68c2e2caf50c8193b4069276f4c75ef301e63bd5fd6a4d4d",
+      "code": "import os\r\nfilename = \"/mnt/data/two_dice_sums_10000.txt\"\r\nwith open(filename, \"w\") as f:\r\n    for s in sums:\r\n        f.write(str(s) + \"\\n\")\r\n    f.write(\"TOTAL: \" + str(total_sum) + \"\\n\")\r\nos.path.getsize(filename), filename\n",
+      "container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
       "outputs": [
         {
           "type": "logs",
-          "logs": "[(2, 269),\n (3, 511),\n (4, 865),\n (5, 1130),\n (6, 1353),\n (7, 1710),\n (8, 1387),\n (9, 1110),\n (10, 828),\n (11, 538),\n (12, 299)]"
+          "logs": "(21680, '/mnt/data/two_dice_sums_10000.txt')"
         }
       ]
     },
     {
-      "id": "rs_68c2e2dab6508190987f0d3db143b0580b24aae9c6c01e4f",
+      "id": "rs_024ee52fc1900767006903bf40cd488193b3a36868bf31054a",
       "type": "reasoning",
       "summary": []
     },
     {
-      "id": "msg_68c2e2e513c88190a72cefb37140d19a0b24aae9c6c01e4f",
+      "id": "msg_024ee52fc1900767006903bf43b66081939b669e0ce1deb286",
       "type": "message",
       "status": "completed",
       "content": [
         {
           "type": "output_text",
-          "annotations": [],
+          "annotations": [
+            {
+              "type": "container_file_citation",
+              "container_id": "cntr_6903bf2c0470819090b2b1e63e0b66800c139a5d654a42ec",
+              "end_index": 236,
+              "file_id": "cfile_6903bf45e3288191af3d56e6d23c3a4d",
+              "filename": "two_dice_sums_10000.txt",
+              "start_index": 195
+            }
+          ],
           "logprobs": [],
-          "text": "Here’s a quick result from simulating 10,000 rolls of two fair six-sided dice (sum per trial, then total across all trials):\n\n- Total sum of all 10,000 trial sums: 70141\n- Average sum per trial: 7.0141\n\nDistribution of the per-trial sums (2–12):\n- 2: 269 (2.69%)\n- 3: 511 (5.11%)\n- 4: 865 (8.65%)\n- 5: 1130 (11.30%)\n- 6: 1353 (13.53%)\n- 7: 1710 (17.10%)\n- 8: 1387 (13.87%)\n- 9: 1110 (11.10%)\n- 10: 828 (8.28%)\n- 11: 538 (5.38%)\n- 12: 299 (2.99%)\n\nNotes:\n- The total is around 7,0000 since the expected sum per trial is 7, so 10,000 trials ≈ 70000. Your exact total will vary with each run unless you fix a random seed.\n- If you’d like, I can provide the per-trial sums as a CSV file or share a reproducible run with a fixed seed."
+          "text": "I ran 10,000 trials of rolling two dice and summed the results.\n\n- Total sum across all 10,000 rolls: 70024\n- Per-trial sums were saved to a file. You can download it here:\n  [Download the file](sandbox:/mnt/data/two_dice_sums_10000.txt)\n\nThe file contains 10,000 lines (one sum per line) followed by a final line with the total, e.g., \"TOTAL: 70024\".\n\nIf you’d like the file in a different format (CSV, JSON) or with only the total, I can adjust and re-upload."
         }
       ],
       "role": "assistant"
@@ -97,16 +122,16 @@
   "top_p": 1,
   "truncation": "disabled",
   "usage": {
-    "input_tokens": 4071,
+    "input_tokens": 2283,
     "input_tokens_details": {
       "cached_tokens": 0
     },
-    "output_tokens": 2456,
+    "output_tokens": 1928,
     "output_tokens_details": {
-      "reasoning_tokens": 2176
+      "reasoning_tokens": 1792
     },
-    "total_tokens": 6527
+    "total_tokens": 4211
   },
   "user": null,
   "metadata": {}
-}
+}
diff --git a/packages/openai/src/responses/__snapshots__/openai-responses-language-model.test.ts.snap b/packages/openai/src/responses/__snapshots__/openai-responses-language-model.test.ts.snap
diff --git a/packages/openai/src/responses/openai-responses-api.ts b/packages/openai/src/responses/openai-responses-api.ts
diff --git a/packages/openai/src/responses/openai-responses-language-model.test.ts b/packages/openai/src/responses/openai-responses-language-model.test.ts
diff --git a/packages/openai/src/responses/openai-responses-language-model.ts b/packages/openai/src/responses/openai-responses-language-model.ts

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'@ai-sdk/openai': patch
 +---
++
 +feat(openai): support openai code-interpreter annotations