feat(ai): Add support for Thinking Budget (#9156)

dlarocque · web-flow · commit 6ab4e13a1665 · 2025-07-09T14:25:32.000-04:00
diff --git a/.changeset/hip-impalas-divide.md b/.changeset/hip-impalas-divide.md
@@ -0,0 +1,6 @@
+---
+'firebase': minor
+'@firebase/ai': minor
+---
+
+Add support for Thinking Budget.
diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md
@@ -381,6 +381,7 @@ export interface GenerationConfig {
     stopSequences?: string[];
     // (undocumented)
     temperature?: number;
+    thinkingConfig?: ThinkingConfig;
     // (undocumented)
     topK?: number;
     // (undocumented)
@@ -925,6 +926,11 @@ export interface TextPart {
     text: string;
 }
 
+// @public
+export interface ThinkingConfig {
+    thinkingBudget?: number;
+}
+
 // @public
 export type Tool = FunctionDeclarationsTool | GoogleSearchTool;
 
@@ -947,6 +953,7 @@ export interface UsageMetadata {
     promptTokenCount: number;
     // (undocumented)
     promptTokensDetails?: ModalityTokenCount[];
+    thoughtsTokenCount?: number;
     // (undocumented)
     totalTokenCount: number;
 }
diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml
@@ -148,6 +148,8 @@ toc:
     path: /docs/reference/js/ai.stringschema.md
   - title: TextPart
     path: /docs/reference/js/ai.textpart.md
+  - title: ThinkingConfig
+    path: /docs/reference/js/ai.thinkingconfig.md
   - title: ToolConfig
     path: /docs/reference/js/ai.toolconfig.md
   - title: UsageMetadata
diff --git a/docs-devsite/ai.generationconfig.md b/docs-devsite/ai.generationconfig.md
@@ -31,6 +31,7 @@ export interface GenerationConfig
 |  [responseSchema](./ai.generationconfig.md#generationconfigresponseschema) | [TypedSchema](./ai.md#typedschema) \| [SchemaRequest](./ai.schemarequest.md#schemarequest_interface) | Output response schema of the generated candidate text. This value can be a class generated with a [Schema](./ai.schema.md#schema_class) static method like <code>Schema.string()</code> or <code>Schema.object()</code> or it can be a plain JS object matching the [SchemaRequest](./ai.schemarequest.md#schemarequest_interface) interface. <br/>Note: This only applies when the specified <code>responseMIMEType</code> supports a schema; currently this is limited to <code>application/json</code> and <code>text/x.enum</code>. |
 |  [stopSequences](./ai.generationconfig.md#generationconfigstopsequences) | string\[\] |  |
 |  [temperature](./ai.generationconfig.md#generationconfigtemperature) | number |  |
+|  [thinkingConfig](./ai.generationconfig.md#generationconfigthinkingconfig) | [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models. |
 |  [topK](./ai.generationconfig.md#generationconfigtopk) | number |  |
 |  [topP](./ai.generationconfig.md#generationconfigtopp) | number |  |
 
@@ -117,6 +118,16 @@ stopSequences?: string[];
 temperature?: number;
 ```
 
+## GenerationConfig.thinkingConfig
+
+Configuration for "thinking" behavior of compatible Gemini models.
+
+<b>Signature:</b>
+
+```typescript
+thinkingConfig?: ThinkingConfig;
+```
+
 ## GenerationConfig.topK
 
 <b>Signature:</b>
diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
@@ -124,6 +124,7 @@ The Firebase AI Web SDK.
 |  [Segment](./ai.segment.md#segment_interface) | Represents a specific segment within a [Content](./ai.content.md#content_interface) object, often used to pinpoint the exact location of text or data that grounding information refers to. |
 |  [StartChatParams](./ai.startchatparams.md#startchatparams_interface) | Params for [GenerativeModel.startChat()](./ai.generativemodel.md#generativemodelstartchat)<!-- -->. |
 |  [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. |
+|  [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.<!-- -->Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. |
 |  [ToolConfig](./ai.toolconfig.md#toolconfig_interface) | Tool config. This config is shared for all tools provided in the request. |
 |  [UsageMetadata](./ai.usagemetadata.md#usagemetadata_interface) | Usage metadata about a [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface)<!-- -->. |
 |  [VertexAIOptions](./ai.vertexaioptions.md#vertexaioptions_interface) | Options when initializing the Firebase AI SDK. |
diff --git a/docs-devsite/ai.thinkingconfig.md b/docs-devsite/ai.thinkingconfig.md
@@ -0,0 +1,43 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# ThinkingConfig interface
+Configuration for "thinking" behavior of compatible Gemini models.
+
+Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer.
+
+<b>Signature:</b>
+
+```typescript
+export interface ThinkingConfig 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [thinkingBudget](./ai.thinkingconfig.md#thinkingconfigthinkingbudget) | number | The thinking budget, in tokens.<!-- -->This parameter sets an upper limit on the number of tokens the model can use for its internal "thinking" process. A higher budget may result in higher quality responses for complex tasks but can also increase latency and cost.<!-- -->If you don't specify a budget, the model will determine the appropriate amount of thinking based on the complexity of the prompt.<!-- -->An error will be thrown if you set a thinking budget for a model that does not support this feature or if the specified budget is not within the model's supported range. |
+
+## ThinkingConfig.thinkingBudget
+
+The thinking budget, in tokens.
+
+This parameter sets an upper limit on the number of tokens the model can use for its internal "thinking" process. A higher budget may result in higher quality responses for complex tasks but can also increase latency and cost.
+
+If you don't specify a budget, the model will determine the appropriate amount of thinking based on the complexity of the prompt.
+
+An error will be thrown if you set a thinking budget for a model that does not support this feature or if the specified budget is not within the model's supported range.
+
+<b>Signature:</b>
+
+```typescript
+thinkingBudget?: number;
+```
diff --git a/docs-devsite/ai.usagemetadata.md b/docs-devsite/ai.usagemetadata.md
@@ -26,6 +26,7 @@ export interface UsageMetadata
 |  [candidatesTokensDetails](./ai.usagemetadata.md#usagemetadatacandidatestokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)<!-- -->\[\] |  |
 |  [promptTokenCount](./ai.usagemetadata.md#usagemetadataprompttokencount) | number |  |
 |  [promptTokensDetails](./ai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./ai.modalitytokencount.md#modalitytokencount_interface)<!-- -->\[\] |  |
+|  [thoughtsTokenCount](./ai.usagemetadata.md#usagemetadatathoughtstokencount) | number | The number of tokens used by the model's internal "thinking" process. |
 |  [totalTokenCount](./ai.usagemetadata.md#usagemetadatatotaltokencount) | number |  |
 
 ## UsageMetadata.candidatesTokenCount
@@ -60,6 +61,16 @@ promptTokenCount: number;
 promptTokensDetails?: ModalityTokenCount[];
 ```
 
+## UsageMetadata.thoughtsTokenCount
+
+The number of tokens used by the model's internal "thinking" process.
+
+<b>Signature:</b>
+
+```typescript
+thoughtsTokenCount?: number;
+```
+
 ## UsageMetadata.totalTokenCount
 
 <b>Signature:</b>
diff --git a/packages/ai/integration/generate-content.test.ts b/packages/ai/integration/generate-content.test.ts
@@ -91,6 +91,10 @@ describe('Generate Content', () => {
             2,
             TOKEN_COUNT_DELTA
           );
+          expect(response.usageMetadata!.thoughtsTokenCount).to.be.closeTo(
+            30,
+            TOKEN_COUNT_DELTA * 2
+          );
           expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
             55,
             TOKEN_COUNT_DELTA * 2
diff --git a/packages/ai/src/types/requests.ts b/packages/ai/src/types/requests.ts
@@ -113,6 +113,10 @@ export interface GenerationConfig {
    * @beta
    */
   responseModalities?: ResponseModality[];
+  /**
+   * Configuration for "thinking" behavior of compatible Gemini models.
+   */
+  thinkingConfig?: ThinkingConfig;
 }
 
 /**
@@ -266,3 +270,28 @@ export interface FunctionCallingConfig {
   mode?: FunctionCallingMode;
   allowedFunctionNames?: string[];
 }
+
+/**
+ * Configuration for "thinking" behavior of compatible Gemini models.
+ *
+ * Certain models utilize a thinking process before generating a response. This allows them to
+ * reason through complex problems and plan a more coherent and accurate answer.
+ *
+ * @public
+ */
+export interface ThinkingConfig {
+  /**
+   * The thinking budget, in tokens.
+   *
+   * This parameter sets an upper limit on the number of tokens the model can use for its internal
+   * "thinking" process. A higher budget may result in higher quality responses for complex tasks
+   * but can also increase latency and cost.
+   *
+   * If you don't specify a budget, the model will determine the appropriate amount
+   * of thinking based on the complexity of the prompt.
+   *
+   * An error will be thrown if you set a thinking budget for a model that does not support this
+   * feature or if the specified budget is not within the model's supported range.
+   */
+  thinkingBudget?: number;
+}
diff --git a/packages/ai/src/types/responses.ts b/packages/ai/src/types/responses.ts
@@ -92,6 +92,10 @@ export interface GenerateContentResponse {
 export interface UsageMetadata {
   promptTokenCount: number;
   candidatesTokenCount: number;
+  /**
+   * The number of tokens used by the model's internal "thinking" process.
+   */
+  thoughtsTokenCount?: number;
   totalTokenCount: number;
   promptTokensDetails?: ModalityTokenCount[];
   candidatesTokensDetails?: ModalityTokenCount[];