diff --git a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap index b38db39a09..82972f4f6c 100644 --- a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap +++ b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap @@ -3370,6 +3370,92 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, + "gpt-4.1-nano-2025-04-14:azure": { + "context": 1047576, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "gpt-4.1-nano-2025-04-14", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4.1-nano-2025-04-14:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 8192, + "modelId": "pa/gt-4.1-n-2025", + "parameters": [ + "max_tokens", + "stop", + "temperature", + "top_p", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4.1-nano-2025-04-14:openai": { + "context": 1047576, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "gpt-4.1-nano-2025-04-14", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4.1-nano-2025-04-14:openrouter": { + "context": 1047576, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "openai/gpt-4.1-nano-2025-04-14", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "gpt-4.1-nano:azure": { "context": 1047576, "crossRegion": false, @@ -3592,6 +3678,94 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, + "gpt-4o-mini-search-preview-2025-03-11:azure": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-4o-mini-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-mini-search-preview-2025-03-11:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "pa/gt-4p-m-sp", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "stop", + "temperature", + "top_p", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-mini-search-preview-2025-03-11:openai": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-4o-mini-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-mini-search-preview-2025-03-11:openrouter": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "openai/gpt-4o-mini-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "gpt-4o-mini:azure": { "context": 128000, "crossRegion": false, @@ -3680,6 +3854,94 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, + "gpt-4o-search-preview-2025-03-11:azure": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-4o-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-search-preview-2025-03-11:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "pa/gt-4p-sp", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "stop", + "temperature", + "top_p", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-search-preview-2025-03-11:openai": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-4o-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-4o-search-preview-2025-03-11:openrouter": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "openai/gpt-4o-search-preview-2025-03-11", + "parameters": [ + "frequency_penalty", + "max_tokens", + "presence_penalty", + "response_format", + "seed", + "stop", + "temperature", + "tool_choice", + "tools", + "top_p", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "gpt-4o:azure": { "context": 128000, "crossRegion": false, @@ -3770,6 +4032,156 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` }, }, "openai/gpt-5": { + "gpt-5-2025-08-07:azure": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-2025-08-07:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "pa/gpt-5-2025-08-07", + "parameters": [ + "max_completion_tokens", + "stop", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-2025-08-07:openai": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-2025-08-07:openrouter": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "openai/gpt-5-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-chat-2025-08-07:azure": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-5-chat-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-chat-2025-08-07:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "pa/gpt-5-chat-2025-08-07", + "parameters": [ + "max_completion_tokens", + "stop", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-chat-2025-08-07:openai": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "gpt-5-chat-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-chat-2025-08-07:openrouter": { + "context": 128000, + "crossRegion": false, + "maxTokens": 16384, + "modelId": "openai/gpt-5-chat-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "gpt-5-chat-latest:helicone": { "context": 128000, "crossRegion": false, @@ -3814,21 +4226,168 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "max_completion_tokens", "response_format", "seed", - "stop", + "stop", + "tool_choice", + "tools", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-codex:helicone": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "pa/gpt-5-codex", + "parameters": [ + "max_completion_tokens", + "stop", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-codex:openai": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-codex", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-codex:openrouter": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "openai/gpt-5-codex", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-mini-2025-08-07:azure": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-mini-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-mini-2025-08-07:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "pa/gpt-5-mini-2025-08-07", + "parameters": [ + "max_completion_tokens", + "stop", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-mini-2025-08-07:openai": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-mini-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-mini-2025-08-07:openrouter": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "openai/gpt-5-mini-2025-08-07", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5-mini:azure": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5-mini", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "structured_outputs", "tool_choice", "tools", ], - "provider": "openrouter", + "provider": "azure", "ptbEnabled": true, "regions": [ "*", ], }, - "gpt-5-codex:helicone": { - "context": 400000, + "gpt-5-mini:helicone": { + "context": 128000, "crossRegion": false, - "maxTokens": 128000, - "modelId": "pa/gpt-5-codex", + "maxTokens": 16384, + "modelId": "pa/gpt-5-mini", "parameters": [ "max_completion_tokens", "stop", @@ -3839,11 +4398,11 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-codex:openai": { + "gpt-5-mini:openai": { "context": 400000, "crossRegion": false, "maxTokens": 128000, - "modelId": "gpt-5-codex", + "modelId": "gpt-5-mini", "parameters": [ "max_completion_tokens", "response_format", @@ -3851,6 +4410,7 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "stop", "tool_choice", "tools", + "verbosity", ], "provider": "openai", "ptbEnabled": true, @@ -3858,11 +4418,11 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-codex:openrouter": { + "gpt-5-mini:openrouter": { "context": 400000, "crossRegion": false, "maxTokens": 128000, - "modelId": "openai/gpt-5-codex", + "modelId": "openai/gpt-5-mini", "parameters": [ "max_completion_tokens", "response_format", @@ -3877,18 +4437,19 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-mini:azure": { + "gpt-5-nano-2025-08-07:azure": { "context": 400000, "crossRegion": false, "maxTokens": 128000, - "modelId": "gpt-5-mini", + "modelId": "gpt-5-nano-2025-08-07", "parameters": [ "max_completion_tokens", "response_format", "seed", - "structured_outputs", + "stop", "tool_choice", "tools", + "verbosity", ], "provider": "azure", "ptbEnabled": true, @@ -3896,11 +4457,11 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-mini:helicone": { + "gpt-5-nano-2025-08-07:helicone": { "context": 128000, "crossRegion": false, - "maxTokens": 16384, - "modelId": "pa/gpt-5-mini", + "maxTokens": 32768, + "modelId": "pa/gpt-5-nano-2025-08-07", "parameters": [ "max_completion_tokens", "stop", @@ -3911,11 +4472,11 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-mini:openai": { + "gpt-5-nano-2025-08-07:openai": { "context": 400000, "crossRegion": false, "maxTokens": 128000, - "modelId": "gpt-5-mini", + "modelId": "gpt-5-nano-2025-08-07", "parameters": [ "max_completion_tokens", "response_format", @@ -3931,11 +4492,11 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, - "gpt-5-mini:openrouter": { + "gpt-5-nano-2025-08-07:openrouter": { "context": 400000, "crossRegion": false, "maxTokens": 128000, - "modelId": "openai/gpt-5-mini", + "modelId": "openai/gpt-5-nano-2025-08-07", "parameters": [ "max_completion_tokens", "response_format", @@ -3943,6 +4504,7 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "stop", "tool_choice", "tools", + "verbosity", ], "provider": "openrouter", "ptbEnabled": true, @@ -4128,6 +4690,80 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, + "gpt-5.1-2025-11-13:azure": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5.1-2025-11-13", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "azure", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5.1-2025-11-13:helicone": { + "context": 128000, + "crossRegion": false, + "maxTokens": 32768, + "modelId": "pa/gpt-5.1-2025-11-13", + "parameters": [ + "max_completion_tokens", + "stop", + ], + "provider": "helicone", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5.1-2025-11-13:openai": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "gpt-5.1-2025-11-13", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + "verbosity", + ], + "provider": "openai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, + "gpt-5.1-2025-11-13:openrouter": { + "context": 400000, + "crossRegion": false, + "maxTokens": 128000, + "modelId": "openai/gpt-5.1-2025-11-13", + "parameters": [ + "max_completion_tokens", + "response_format", + "seed", + "stop", + "tool_choice", + "tools", + ], + "provider": "openrouter", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "gpt-5.1-chat-latest:openai": { "context": 128000, "crossRegion": false, @@ -5348,6 +5984,8 @@ exports[`Registry Snapshots model coverage snapshot 1`] = ` "azure", "azure", "azure", + "azure", + "helicone", "helicone", "helicone", "helicone", @@ -5355,37 +5993,63 @@ exports[`Registry Snapshots model coverage snapshot 1`] = ` "openai", "openai", "openai", + "openai", + "openrouter", "openrouter", "openrouter", "openrouter", "openrouter", ], "openai/gpt-4o": [ + "azure", + "azure", "azure", "azure", "helicone", "helicone", + "helicone", + "helicone", + "openai", + "openai", "openai", "openai", "openai", "openrouter", "openrouter", "openrouter", + "openrouter", + "openrouter", ], "openai/gpt-5": [ "azure", "azure", + "azure", + "azure", + "azure", + "azure", + "helicone", + "helicone", "helicone", "helicone", "helicone", "helicone", "helicone", "helicone", + "helicone", + "helicone", + "openai", + "openai", "openai", "openai", "openai", "openai", "openai", + "openai", + "openai", + "openrouter", + "openrouter", + "openrouter", + "openrouter", "openrouter", "openrouter", "openrouter", @@ -5393,11 +6057,15 @@ exports[`Registry Snapshots model coverage snapshot 1`] = ` "openrouter", ], "openai/gpt-5.1": [ + "azure", + "helicone", "openai", "openai", "openai", "openai", "openai", + "openai", + "openrouter", "openrouter", "openrouter", "openrouter", @@ -6724,12 +7392,12 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "azure": [ { "cacheMultipliers": { - "cachedInput": 0.3, + "cachedInput": 0.25, }, "input": 1e-7, "output": 4e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "helicone": [ @@ -6737,10 +7405,10 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "cacheMultipliers": { "cachedInput": 0.25, }, - "input": 4e-7, - "output": 0.0000016, + "input": 1e-7, + "output": 4e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "openai": [ @@ -6751,7 +7419,7 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "input": 1e-7, "output": 4e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "openrouter": [ @@ -6759,7 +7427,7 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "input": 1.1e-7, "output": 4.2e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], }, @@ -6772,7 +7440,7 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "input": 1.5e-7, "output": 6e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "helicone": [ @@ -6783,7 +7451,7 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "input": 1.5e-7, "output": 6e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "openai": [ @@ -6791,18 +7459,18 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "cacheMultipliers": { "cachedInput": 0.5, }, - "input": 0.000005, - "output": 0.00002, + "input": 1.5e-7, + "output": 6e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], "openrouter": [ { - "input": 0.00000528, - "output": 0.00001582, + "input": 1.6e-7, + "output": 6.3e-7, "threshold": 0, - "web_search": 0.01, + "web_search": 0.02, }, ], }, @@ -6812,8 +7480,8 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "cacheMultipliers": { "cachedInput": 0.104, }, - "input": 0.00000125, - "output": 0.00001, + "input": 2.5e-7, + "output": 0.000002, "threshold": 0, "web_search": 0.01, }, @@ -6823,8 +7491,8 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "cacheMultipliers": { "cachedInput": 0.1, }, - "input": 0.00000125, - "output": 0.00001, + "input": 2.5e-7, + "output": 0.000002, "threshold": 0, "web_search": 0.01, }, @@ -6834,37 +7502,59 @@ exports[`Registry Snapshots pricing snapshot 1`] = ` "cacheMultipliers": { "cachedInput": 0.1, }, - "input": 0.00000125, - "output": 0.00001, + "input": 2.5e-7, + "output": 0.000002, "threshold": 0, "web_search": 0.01, }, ], "openrouter": [ { - "input": 0.00000132, - "output": 0.00001055, + "input": 2.6e-7, + "output": 0.00000211, "threshold": 0, "web_search": 0.01, }, ], }, "openai/gpt-5.1": { + "azure": [ + { + "cacheMultipliers": { + "cachedInput": 0.104, + }, + "input": 0.00000125, + "output": 0.00001, + "threshold": 0, + "web_search": 0.01, + }, + ], + "helicone": [ + { + "cacheMultipliers": { + "cachedInput": 0.1, + }, + "input": 0.00000125, + "output": 0.00001, + "threshold": 0, + "web_search": 0.01, + }, + ], "openai": [ { "cacheMultipliers": { - "cachedInput": 0.25, + "cachedInput": 0.1, }, - "input": 0.0000015, - "output": 0.000006, + "input": 0.00000125, + "output": 0.00001, "threshold": 0, "web_search": 0.01, }, ], "openrouter": [ { - "input": 0.00000158, - "output": 0.00000633, + "input": 0.00000132, + "output": 0.00001055, "threshold": 0, "web_search": 0.01, }, @@ -7341,6 +8031,15 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-4.1-nano-2025-04-14", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-4o", "providers": [ @@ -7359,6 +8058,24 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-4o-mini-search-preview-2025-03-11", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, + { + "model": "gpt-4o-search-preview-2025-03-11", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-5", "providers": [ @@ -7368,6 +8085,24 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-5-2025-08-07", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, + { + "model": "gpt-5-chat-2025-08-07", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-5-chat-latest", "providers": [ @@ -7393,6 +8128,15 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-5-mini-2025-08-07", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-5-nano", "providers": [ @@ -7401,6 +8145,15 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-5-nano-2025-08-07", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-5-pro", "providers": [ @@ -7414,6 +8167,15 @@ exports[`Registry Snapshots verify registry state 1`] = ` "openrouter", ], }, + { + "model": "gpt-5.1-2025-11-13", + "providers": [ + "azure", + "helicone", + "openai", + "openrouter", + ], + }, { "model": "gpt-5.1-chat-latest", "providers": [ @@ -7746,7 +8508,7 @@ exports[`Registry Snapshots verify registry state 1`] = ` "provider": "anthropic", }, { - "modelCount": 10, + "modelCount": 18, "provider": "azure", }, { @@ -7786,7 +8548,7 @@ exports[`Registry Snapshots verify registry state 1`] = ` "provider": "groq", }, { - "modelCount": 29, + "modelCount": 37, "provider": "helicone", }, { @@ -7802,11 +8564,11 @@ exports[`Registry Snapshots verify registry state 1`] = ` "provider": "novita", }, { - "modelCount": 21, + "modelCount": 29, "provider": "openai", }, { - "modelCount": 55, + "modelCount": 63, "provider": "openrouter", }, { @@ -7856,15 +8618,23 @@ exports[`Registry Snapshots verify registry state 1`] = ` "gpt-4.1-mini", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano", + "gpt-4.1-nano-2025-04-14", "gpt-4o", "gpt-4o-mini", + "gpt-4o-mini-search-preview-2025-03-11", + "gpt-4o-search-preview-2025-03-11", "gpt-5", + "gpt-5-2025-08-07", + "gpt-5-chat-2025-08-07", "gpt-5-chat-latest", "gpt-5-codex", "gpt-5-mini", + "gpt-5-mini-2025-08-07", "gpt-5-nano", + "gpt-5-nano-2025-08-07", "gpt-5-pro", "gpt-5.1", + "gpt-5.1-2025-11-13", "gpt-5.1-chat-latest", "gpt-5.1-codex", "gpt-5.1-codex-mini", @@ -7922,9 +8692,9 @@ exports[`Registry Snapshots verify registry state 1`] = ` "claude-3.5-haiku:anthropic:*", ], "totalArchivedConfigs": 0, - "totalEndpoints": 230, - "totalModelProviderConfigs": 230, - "totalModelsWithPtb": 89, + "totalEndpoints": 262, + "totalModelProviderConfigs": 262, + "totalModelsWithPtb": 97, "totalProviders": 20, } `; diff --git a/packages/cost/models/authors/openai/gpt-4.1/endpoints.ts b/packages/cost/models/authors/openai/gpt-4.1/endpoints.ts index 846e375d3d..cc9c159c4b 100644 --- a/packages/cost/models/authors/openai/gpt-4.1/endpoints.ts +++ b/packages/cost/models/authors/openai/gpt-4.1/endpoints.ts @@ -11,7 +11,7 @@ export const endpoints = { threshold: 0, input: 0.000002, output: 0.000008, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -50,7 +50,7 @@ export const endpoints = { threshold: 0, input: 0.0000004, output: 0.0000016, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -89,7 +89,7 @@ export const endpoints = { threshold: 0, input: 0.0000004, output: 0.0000016, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -128,7 +128,7 @@ export const endpoints = { threshold: 0, input: 0.0000001, output: 0.0000004, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -167,7 +167,7 @@ export const endpoints = { threshold: 0, input: 0.000002, output: 0.000008, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -205,7 +205,7 @@ export const endpoints = { threshold: 0, input: 0.0000004, output: 0.0000016, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -243,7 +243,7 @@ export const endpoints = { threshold: 0, input: 0.0000004, output: 0.0000016, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, }, @@ -281,9 +281,9 @@ export const endpoints = { threshold: 0, input: 0.0000001, output: 0.0000004, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { - cachedInput: 0.3, + cachedInput: 0.25, }, }, ], @@ -319,7 +319,7 @@ export const endpoints = { threshold: 0, input: 0.00000211, // $2.11/1M - worst-case: $2.00/1M (OpenAI) * 1.055 output: 0.00000844, // $8.44/1M - worst-case: $8.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 1_047_576, @@ -350,7 +350,7 @@ export const endpoints = { threshold: 0, input: 0.00000042, // $0.42/1M - worst-case: $0.40/1M (OpenAI) * 1.055 output: 0.00000169, // $1.69/1M - worst-case: $1.60/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 1_047_576, @@ -381,7 +381,7 @@ export const endpoints = { threshold: 0, input: 0.00000042, // $0.42/1M - worst-case: $0.40/1M (OpenAI) * 1.055 output: 0.00000169, // $1.69/1M - worst-case: $1.60/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 1_047_576, @@ -412,7 +412,7 @@ export const endpoints = { threshold: 0, input: 0.00000011, // $0.11/1M - worst-case: $0.10/1M (OpenAI) * 1.055 output: 0.00000042, // $0.42/1M - worst-case: $0.40/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 1_047_576, @@ -443,7 +443,7 @@ export const endpoints = { threshold: 0, input: 0.000002, // $2.00 per 1M tokens output: 0.000008, // $8.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, // $0.50 per 1M tokens }, @@ -473,7 +473,7 @@ export const endpoints = { threshold: 0, input: 0.0000001, // $0.10 per 1M tokens output: 0.0000004, // $0.40 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, // $0.025 per 1M tokens }, @@ -496,7 +496,7 @@ export const endpoints = { threshold: 0, input: 0.0000004, // $0.40 per 1M tokens output: 0.0000016, // $1.60 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, // $0.10 per 1M tokens }, @@ -517,6 +517,137 @@ export const endpoints = { "*": {}, }, }, + "gpt-4.1-nano-2025-04-14:openai": { + providerModelId: "gpt-4.1-nano-2025-04-14", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.0000001, + output: 0.0000004, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.25, + }, + }, + ], + contextLength: 1047576, + maxCompletionTokens: 32768, + rateLimits: { + rpm: 30000, + tpm: 150000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4.1-nano-2025-04-14:azure": { + providerModelId: "gpt-4.1-nano-2025-04-14", + provider: "azure", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.0000001, + output: 0.0000004, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.25, + }, + }, + ], + contextLength: 1047576, + maxCompletionTokens: 32768, + rateLimits: { + rpm: 200, + tpm: 200000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4.1-nano-2025-04-14:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-4.1-nano-2025-04-14", + pricing: [ + { + threshold: 0, + input: 0.00000011, // $0.11/1M - worst-case: $0.10/1M (OpenAI) * 1.055 + output: 0.00000042, // $0.42/1M - worst-case: $0.40/1M (OpenAI) * 1.055 + web_search: 0.02, // $20 per 1000 searches + }, + ], + contextLength: 1_047_576, + maxCompletionTokens: 32_768, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4.1-nano-2025-04-14:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gt-4.1-n-2025", + pricing: [ + { + threshold: 0, + input: 0.0000001, // $0.10 per 1M tokens + output: 0.0000004, // $0.40 per 1M tokens + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.25, // $0.025 per 1M tokens + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 8192, + supportedParameters: ["max_tokens", "temperature", "top_p", "stop"], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, } satisfies Partial< Record<`${GPT41ModelName}:${ModelProviderName}`, ModelProviderConfig> >; diff --git a/packages/cost/models/authors/openai/gpt-4.1/models.ts b/packages/cost/models/authors/openai/gpt-4.1/models.ts index 0e9f66a208..9d7fafac0e 100644 --- a/packages/cost/models/authors/openai/gpt-4.1/models.ts +++ b/packages/cost/models/authors/openai/gpt-4.1/models.ts @@ -46,6 +46,18 @@ export const models = { tokenizer: "GPT", pinnedVersionOfModel: "gpt-4.1-mini", }, + "gpt-4.1-nano-2025-04-14": { + name: "OpenAI GPT-4.1 Nano", + author: "openai", + description: + "For tasks that demand low latency, GPT-4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT-4o mini. It's ideal for tasks like classification or autocompletion.", + contextLength: 1047576, + maxOutputTokens: 32768, + created: "2025-04-14T17:22:49.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-4.1-nano", + }, } satisfies Record; export type GPT41ModelName = keyof typeof models; diff --git a/packages/cost/models/authors/openai/gpt-4o/endpoints.ts b/packages/cost/models/authors/openai/gpt-4o/endpoints.ts index 805eb2307a..61b420cc53 100644 --- a/packages/cost/models/authors/openai/gpt-4o/endpoints.ts +++ b/packages/cost/models/authors/openai/gpt-4o/endpoints.ts @@ -11,7 +11,7 @@ export const endpoints = { threshold: 0, input: 0.0000025, output: 0.00001, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, }, @@ -50,7 +50,7 @@ export const endpoints = { threshold: 0, input: 0.0000025, output: 0.00001, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, }, @@ -88,7 +88,7 @@ export const endpoints = { threshold: 0, input: 0.00000015, output: 0.0000006, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, }, @@ -127,7 +127,7 @@ export const endpoints = { threshold: 0, input: 0.00000015, output: 0.0000006, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, }, @@ -165,7 +165,7 @@ export const endpoints = { threshold: 0, input: 0.000005, output: 0.00002, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, }, @@ -204,7 +204,7 @@ export const endpoints = { threshold: 0, input: 0.00000264, // $2.64/1M - worst-case: $2.50/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128_000, @@ -235,7 +235,7 @@ export const endpoints = { threshold: 0, input: 0.00000016, // $0.16/1M - worst-case: $0.15/1M (OpenAI) * 1.055 output: 0.00000063, // $0.63/1M - worst-case: $0.60/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128_000, @@ -266,7 +266,7 @@ export const endpoints = { threshold: 0, input: 0.00000528, // $5.28/1M - worst-case: $5.00/1M (OpenAI) * 1.055 output: 0.00001582, // $15.82/1M - worst-case: $15.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128_000, @@ -297,7 +297,7 @@ export const endpoints = { threshold: 0, input: 0.0000025, // $2.50 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, // $1.25 per 1M tokens }, @@ -327,7 +327,283 @@ export const endpoints = { threshold: 0, input: 0.00000015, // $0.15 per 1M tokens output: 0.0000006, // $0.60 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, // $0.075 per 1M tokens + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-search-preview-2025-03-11:openai": { + providerModelId: "gpt-4o-search-preview-2025-03-11", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.0000025, + output: 0.00001, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, + }, + }, + ], + rateLimits: { + rpm: 10000, + tpm: 30000000, + tpd: 15000000000, + }, + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-search-preview-2025-03-11:azure": { + providerModelId: "gpt-4o-search-preview-2025-03-11", + provider: "azure", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.0000025, + output: 0.00001, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + rateLimits: { + rpm: 300, + tpm: 50000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-search-preview-2025-03-11:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-4o-search-preview-2025-03-11", + pricing: [ + { + threshold: 0, + input: 0.00000264, // $2.64/1M - worst-case: $2.50/1M (OpenAI) * 1.055 + output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 + web_search: 0.02, // $20 per 1000 searches + }, + ], + contextLength: 128_000, + maxCompletionTokens: 16_384, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-search-preview-2025-03-11:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gt-4p-sp", + pricing: [ + { + threshold: 0, + input: 0.0000025, // $2.50 per 1M tokens + output: 0.00001, // $10.00 per 1M tokens + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, // $1.25 per 1M tokens + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-mini-search-preview-2025-03-11:openai": { + providerModelId: "gpt-4o-mini-search-preview-2025-03-11", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000015, + output: 0.0000006, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, + }, + }, + ], + rateLimits: { + rpm: 30000, + tpm: 150000000, + tpd: 15000000000, + }, + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-mini-search-preview-2025-03-11:azure": { + providerModelId: "gpt-4o-mini-search-preview-2025-03-11", + provider: "azure", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000015, + output: 0.0000006, + web_search: 0.02, // $20 per 1000 searches + cacheMultipliers: { + cachedInput: 0.5, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + rateLimits: { + rpm: 2000, + tpm: 200000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-mini-search-preview-2025-03-11:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-4o-mini-search-preview-2025-03-11", + pricing: [ + { + threshold: 0, + input: 0.00000016, // $0.16/1M - worst-case: $0.15/1M (OpenAI) * 1.055 + output: 0.00000063, // $0.63/1M - worst-case: $0.60/1M (OpenAI) * 1.055 + web_search: 0.02, // $20 per 1000 searches + }, + ], + contextLength: 128_000, + maxCompletionTokens: 16_384, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_tokens", + "response_format", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-4o-mini-search-preview-2025-03-11:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gt-4p-m-sp", + pricing: [ + { + threshold: 0, + input: 0.00000015, // $0.15 per 1M tokens + output: 0.0000006, // $0.60 per 1M tokens + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.5, // $0.075 per 1M tokens }, diff --git a/packages/cost/models/authors/openai/gpt-4o/models.ts b/packages/cost/models/authors/openai/gpt-4o/models.ts index daf9ba2fb8..2288abd567 100644 --- a/packages/cost/models/authors/openai/gpt-4o/models.ts +++ b/packages/cost/models/authors/openai/gpt-4o/models.ts @@ -34,6 +34,30 @@ export const models = { modality: { inputs: ["text", "image"], outputs: ["text"] }, tokenizer: "GPT", }, + "gpt-4o-search-preview-2025-03-11": { + name: "OpenAI GPT-4o Search Preview", + author: "openai", + description: + 'GPT-4o ("o" for "omni") is OpenAI\'s latest AI model with enhanced web search capabilities. This preview version supports both text and image inputs with text outputs, maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective. It includes improved performance in processing non-English languages and enhanced visual capabilities, plus integrated web search functionality.', + contextLength: 128000, + maxOutputTokens: 16384, + created: "2025-03-11T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-4o", + }, + "gpt-4o-mini-search-preview-2025-03-11": { + name: "OpenAI GPT-4o-mini Search Preview", + author: "openai", + description: + "GPT-4o mini is OpenAI's most cost-effective model with enhanced web search capabilities. This preview version supports both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than GPT-3.5 Turbo. It maintains SOTA intelligence while being significantly more cost-effective, with integrated web search functionality.", + contextLength: 128000, + maxOutputTokens: 16384, + created: "2025-03-11T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-4o-mini", + }, } satisfies Record; export type GPT4oModelName = keyof typeof models; diff --git a/packages/cost/models/authors/openai/gpt-5.1/endpoints.ts b/packages/cost/models/authors/openai/gpt-5.1/endpoints.ts index 0eb583a1d4..9b0d540021 100644 --- a/packages/cost/models/authors/openai/gpt-5.1/endpoints.ts +++ b/packages/cost/models/authors/openai/gpt-5.1/endpoints.ts @@ -12,7 +12,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -58,7 +58,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -95,7 +95,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -141,7 +141,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -179,7 +179,7 @@ export const endpoints = { threshold: 0, input: 0.00000025, // $0.25 per 1M tokens output: 0.000002, // $2.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.025 per 1M tokens }, @@ -225,7 +225,7 @@ export const endpoints = { threshold: 0, input: 0.00000026, // $0.26/1M - worst-case: $0.25/1M (OpenAI) * 1.055 output: 0.00000211, // $2.11/1M - worst-case: $2.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -263,7 +263,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -310,7 +310,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128_000, @@ -348,7 +348,7 @@ export const endpoints = { threshold: 0, input: 0.0000015, // $1.50 per 1M tokens output: 0.000006, // $6.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.25, // $0.375 per 1M tokens }, @@ -394,7 +394,7 @@ export const endpoints = { threshold: 0, input: 0.00000158, // $1.58/1M - worst-case: $1.50/1M (OpenAI) * 1.055 output: 0.00000633, // $6.33/1M - worst-case: $6.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 200_000, @@ -423,6 +423,170 @@ export const endpoints = { "*": {}, }, }, + "gpt-5.1-2025-11-13:openai": { + providerModelId: "gpt-5.1-2025-11-13", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 15000, + tpm: 40000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5.1-2025-11-13:azure": { + provider: "azure", + author: "openai", + providerModelId: "gpt-5.1-2025-11-13", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.104, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 50, + tpm: 100000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5.1-2025-11-13:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-5.1-2025-11-13", + pricing: [ + { + threshold: 0, + input: 0.00000132, + output: 0.00001055, + web_search: 0.01, + }, + ], + contextLength: 400_000, + maxCompletionTokens: 128_000, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5.1-2025-11-13:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gpt-5.1-2025-11-13", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 32768, + supportedParameters: [ + "max_completion_tokens", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, } satisfies Partial< Record<`${GPT51ModelName}:${ModelProviderName}`, ModelProviderConfig> >; diff --git a/packages/cost/models/authors/openai/gpt-5.1/models.ts b/packages/cost/models/authors/openai/gpt-5.1/models.ts index 0d297e2b29..3015db1a13 100644 --- a/packages/cost/models/authors/openai/gpt-5.1/models.ts +++ b/packages/cost/models/authors/openai/gpt-5.1/models.ts @@ -53,6 +53,18 @@ export const models = { modality: { inputs: ["text", "image"], outputs: ["text"] }, tokenizer: "GPT", }, + "gpt-5.1-2025-11-13": { + name: "OpenAI GPT-5.1", + author: "openai", + description: + "GPT-5.1 is an enhanced version of GPT-5 with improved performance and capabilities. It features the same 400K context window and advanced tool calling capabilities as GPT-5, with optimized pricing for better cost efficiency.", + contextLength: 400000, + maxOutputTokens: 128000, + created: "2025-11-13T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text", "image"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-5.1", + }, } satisfies Record; export type GPT51ModelName = keyof typeof models; diff --git a/packages/cost/models/authors/openai/gpt-5/endpoints.ts b/packages/cost/models/authors/openai/gpt-5/endpoints.ts index f0fcad915b..4f65ac57bf 100644 --- a/packages/cost/models/authors/openai/gpt-5/endpoints.ts +++ b/packages/cost/models/authors/openai/gpt-5/endpoints.ts @@ -12,7 +12,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, output: 0.00001, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, }, @@ -58,7 +58,7 @@ export const endpoints = { threshold: 0, input: 0.00000025, output: 0.000002, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, }, @@ -104,7 +104,7 @@ export const endpoints = { threshold: 0, input: 0.00000025, output: 0.000002, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.12, }, @@ -144,7 +144,7 @@ export const endpoints = { threshold: 0, input: 0.00000005, output: 0.0000004, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, }, @@ -190,7 +190,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, output: 0.00001, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, }, @@ -237,7 +237,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -274,7 +274,7 @@ export const endpoints = { threshold: 0, input: 0.00000026, // $0.26/1M - worst-case: $0.25/1M (OpenAI) * 1.055 output: 0.00000211, // $2.11/1M - worst-case: $2.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -311,7 +311,7 @@ export const endpoints = { threshold: 0, input: 0.00000005, // $0.05/1M - worst-case: $0.05/1M (OpenAI) * 1.055 output: 0.00000042, // $0.42/1M - worst-case: $0.40/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -348,7 +348,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128_000, @@ -386,7 +386,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -422,7 +422,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.104, // $0.13 per 1M tokens }, @@ -464,7 +464,7 @@ export const endpoints = { threshold: 0, input: 0.00000025, // $0.25 per 1M tokens output: 0.000002, // $2.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.025 per 1M tokens }, @@ -500,7 +500,7 @@ export const endpoints = { threshold: 0, input: 0.00000005, // $0.05 per 1M tokens output: 0.0000004, // $0.40 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.005 per 1M tokens }, @@ -534,7 +534,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -571,7 +571,7 @@ export const endpoints = { threshold: 0, input: 0.000015, // $15.00 per 1M tokens output: 0.00012, // $120.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 128000, @@ -604,7 +604,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, output: 0.00001, - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, }, @@ -650,7 +650,7 @@ export const endpoints = { threshold: 0, input: 0.00000132, // $1.32/1M - worst-case: $1.25/1M (OpenAI) * 1.055 output: 0.00001055, // $10.55/1M - worst-case: $10.00/1M (OpenAI) * 1.055 - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches }, ], contextLength: 400_000, @@ -688,7 +688,7 @@ export const endpoints = { threshold: 0, input: 0.00000125, // $1.25 per 1M tokens output: 0.00001, // $10.00 per 1M tokens - web_search: 0.01, // $10 per 1000 searches (1:1 USD; 10/1K) + web_search: 0.02, // $20 per 1000 searches cacheMultipliers: { cachedInput: 0.1, // $0.125 per 1M tokens }, @@ -716,6 +716,666 @@ export const endpoints = { "*": {}, }, }, + "gpt-5-2025-08-07:openai": { + providerModelId: "gpt-5-2025-08-07", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 15000, + tpm: 40000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-2025-08-07:azure": { + provider: "azure", + author: "openai", + providerModelId: "gpt-5-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.104, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 50, + tpm: 100000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-2025-08-07:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-5-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000132, + output: 0.00001055, + web_search: 0.01, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-2025-08-07:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gpt-5-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000125, + output: 0.00001, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 32768, + supportedParameters: [ + "max_completion_tokens", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-mini-2025-08-07:openai": { + providerModelId: "gpt-5-mini-2025-08-07", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 30000, + tpm: 180000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-mini-2025-08-07:azure": { + provider: "azure", + author: "openai", + providerModelId: "gpt-5-mini-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.104, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 200, + tpm: 200000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-mini-2025-08-07:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-5-mini-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000026, + output: 0.00000211, + web_search: 0.01, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-mini-2025-08-07:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gpt-5-mini-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 32768, + supportedParameters: [ + "max_completion_tokens", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-nano-2025-08-07:openai": { + providerModelId: "gpt-5-nano-2025-08-07", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000005, + output: 0.0000004, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 30000, + tpm: 180000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-nano-2025-08-07:azure": { + provider: "azure", + author: "openai", + providerModelId: "gpt-5-nano-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000005, + output: 0.0000004, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.104, + }, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + rateLimits: { + rpm: 200, + tpm: 200000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-nano-2025-08-07:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-5-nano-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000005, + output: 0.00000042, + web_search: 0.01, + }, + ], + contextLength: 400000, + maxCompletionTokens: 128000, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-nano-2025-08-07:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gpt-5-nano-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000005, + output: 0.0000004, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 32768, + supportedParameters: [ + "max_completion_tokens", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-chat-2025-08-07:openai": { + providerModelId: "gpt-5-chat-2025-08-07", + provider: "openai", + author: "openai", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + rateLimits: { + rpm: 30000, + tpm: 180000000, + tpd: 15000000000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-chat-2025-08-07:azure": { + provider: "azure", + author: "openai", + providerModelId: "gpt-5-chat-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.104, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + rateLimits: { + rpm: 200, + tpm: 200000, + }, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-chat-2025-08-07:openrouter": { + provider: "openrouter", + author: "openai", + providerModelId: "openai/gpt-5-chat-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000026, + output: 0.00000211, + web_search: 0.01, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "tools", + "tool_choice", + "seed", + "max_completion_tokens", + "response_format", + "stop", + "verbosity", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "gpt-5-chat-2025-08-07:helicone": { + provider: "helicone", + author: "openai", + providerModelId: "pa/gpt-5-chat-2025-08-07", + pricing: [ + { + threshold: 0, + input: 0.00000025, + output: 0.000002, + web_search: 0.01, + cacheMultipliers: { + cachedInput: 0.1, + }, + }, + ], + contextLength: 128000, + maxCompletionTokens: 16384, + supportedParameters: [ + "max_completion_tokens", + "stop", + ], + unsupportedParameters: [ + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "logprobs", + "top_logprobs", + "logit_bias", + "max_tokens", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, } satisfies Partial< Record<`${GPT5ModelName}:${ModelProviderName}`, ModelProviderConfig> >; diff --git a/packages/cost/models/authors/openai/gpt-5/models.ts b/packages/cost/models/authors/openai/gpt-5/models.ts index d8e0cef976..dbe5b96e4f 100644 --- a/packages/cost/models/authors/openai/gpt-5/models.ts +++ b/packages/cost/models/authors/openai/gpt-5/models.ts @@ -65,6 +65,54 @@ export const models = { modality: { inputs: ["text"], outputs: ["text"] }, tokenizer: "GPT", }, + "gpt-5-2025-08-07": { + name: "OpenAI GPT-5", + author: "openai", + description: + "GPT-5 is OpenAI's most advanced language model, featuring enhanced reasoning capabilities with 80% fewer factual errors than o3. It supports a 400K total context (272K input + 128K output), advanced tool calling with reliable chaining of dozens of calls, and a new verbosity parameter for response length control. Ideal for complex reasoning, multi-step planning, and applications requiring high accuracy.", + contextLength: 400000, + maxOutputTokens: 128000, + created: "2025-08-07T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-5", + }, + "gpt-5-mini-2025-08-07": { + name: "OpenAI GPT-5 Mini", + author: "openai", + description: + "GPT-5 Mini delivers GPT-5-level performance at a fraction of the cost and latency. With the same 400K context window and advanced capabilities including tool calling and verbosity control, it's optimized for speed and efficiency while maintaining strong reasoning and instruction-following capabilities. Perfect for high-volume applications requiring advanced AI capabilities with resource constraints.", + contextLength: 400000, + maxOutputTokens: 128000, + created: "2025-08-07T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-5-mini", + }, + "gpt-5-nano-2025-08-07": { + name: "OpenAI GPT-5 Nano", + author: "openai", + description: + "GPT-5 Nano is the smallest and fastest model in the GPT-5 family, designed for ultra-low latency applications. Despite its compact size, it maintains the full 400K context window and delivers impressive performance on classification, completion, and simple reasoning tasks. Ideal for real-time applications, edge deployments, and high-throughput scenarios.", + contextLength: 400000, + maxOutputTokens: 128000, + created: "2025-08-07T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-5-nano", + }, + "gpt-5-chat-2025-08-07": { + name: "OpenAI GPT-5 Chat", + author: "openai", + description: + "GPT-5 Chat is a dated snapshot of the conversational version of GPT-5 optimized for dialogue interactions. It features a 128K context window and 16K max output tokens, making it ideal for focused conversations. This version is pinned to ensure consistency across deployments.", + contextLength: 128000, + maxOutputTokens: 16384, + created: "2025-08-07T00:00:00.000Z", + modality: { inputs: ["text", "image"], outputs: ["text"] }, + tokenizer: "GPT", + pinnedVersionOfModel: "gpt-5-chat-latest", + }, } satisfies Record; export type GPT5ModelName = keyof typeof models;