diff --git a/.changeset/short-colts-slide.md b/.changeset/short-colts-slide.md new file mode 100644 index 000000000..a649bf991 --- /dev/null +++ b/.changeset/short-colts-slide.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +previously it throws StagehandDefaultError if there is no LLM api key present which doesn't give a clear understanding of the error. In this change it will provide a concise error info that key is invalid. diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 27d8f4c7c..f2c95a250 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -14,7 +14,10 @@ import { LLMClient, LLMResponse, } from "./LLMClient"; -import { CreateChatCompletionResponseError } from "@/types/stagehandErrors"; +import { + CreateChatCompletionResponseError, + InvalidLLMKeyError, +} from "@/types/stagehandErrors"; export class AnthropicClient extends LLMClient { public type = "anthropic" as const; @@ -53,325 +56,348 @@ export class AnthropicClient extends LLMClient { }: CreateChatCompletionOptions): Promise { const optionsWithoutImage = { ...options }; delete optionsWithoutImage.image; - - logger({ - category: "anthropic", - message: "creating chat completion", - level: 2, - auxiliary: { - options: { - value: JSON.stringify(optionsWithoutImage), - type: "object", + try { + logger({ + category: "anthropic", + message: "creating chat completion", + level: 2, + auxiliary: { + options: { + value: JSON.stringify(optionsWithoutImage), + type: "object", + }, }, - }, - }); + }); - // Try to get cached response - const cacheOptions = { - model: this.modelName, - messages: options.messages, - temperature: options.temperature, - image: options.image, - response_model: options.response_model, - tools: options.tools, - retries: retries, - }; + // Try to get cached response + const cacheOptions = { + model: this.modelName, + messages: options.messages, + temperature: options.temperature, + image: options.image, + response_model: options.response_model, + tools: options.tools, + retries: retries, + }; - if (this.enableCaching) { - const cachedResponse = await this.cache.get( - cacheOptions, - options.requestId, - ); - if (cachedResponse) { - logger({ - category: "llm_cache", - message: "LLM cache hit - returning cached response", - level: 1, - auxiliary: { - cachedResponse: { - value: JSON.stringify(cachedResponse), - type: "object", - }, - requestId: { - value: options.requestId, - type: "string", - }, - cacheOptions: { - value: JSON.stringify(cacheOptions), - type: "object", - }, - }, - }); - return cachedResponse as T; - } else { - logger({ - category: "llm_cache", - message: "LLM cache miss - no cached response found", - level: 1, - auxiliary: { - cacheOptions: { - value: JSON.stringify(cacheOptions), - type: "object", + if (this.enableCaching) { + const cachedResponse = await this.cache.get( + cacheOptions, + options.requestId, + ); + if (cachedResponse) { + logger({ + category: "llm_cache", + message: "LLM cache hit - returning cached response", + level: 1, + auxiliary: { + cachedResponse: { + value: JSON.stringify(cachedResponse), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, }, - requestId: { - value: options.requestId, - type: "string", + }); + return cachedResponse as T; + } else { + logger({ + category: "llm_cache", + message: "LLM cache miss - no cached response found", + level: 1, + auxiliary: { + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, }, - }, - }); + }); + } } - } - const systemMessage = options.messages.find((msg) => { - if (msg.role === "system") { + const systemMessage = options.messages.find((msg) => { + if (msg.role === "system") { + if (typeof msg.content === "string") { + return true; + } else if (Array.isArray(msg.content)) { + return msg.content.every((content) => content.type !== "image_url"); + } + } + return false; + }); + + const userMessages = options.messages.filter( + (msg) => msg.role !== "system", + ); + + const formattedMessages: MessageParam[] = userMessages.map((msg) => { if (typeof msg.content === "string") { - return true; - } else if (Array.isArray(msg.content)) { - return msg.content.every((content) => content.type !== "image_url"); + return { + role: msg.role as "user" | "assistant", // ensure its not checking for system types + content: msg.content, + }; + } else { + return { + role: msg.role as "user" | "assistant", + content: msg.content.map((content) => { + if ("image_url" in content) { + const formattedContent: ImageBlockParam = { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: content.image_url.url, + }, + }; + + return formattedContent; + } else { + return { type: "text", text: content.text }; + } + }), + }; } - } - return false; - }); + }); - const userMessages = options.messages.filter( - (msg) => msg.role !== "system", - ); + if (options.image) { + const screenshotMessage: MessageParam = { + role: "user", + content: [ + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: options.image.buffer.toString("base64"), + }, + }, + ], + }; + if ( + options.image.description && + Array.isArray(screenshotMessage.content) + ) { + screenshotMessage.content.push({ + type: "text", + text: options.image.description, + }); + } + + formattedMessages.push(screenshotMessage); + } - const formattedMessages: MessageParam[] = userMessages.map((msg) => { - if (typeof msg.content === "string") { + let anthropicTools: Tool[] = options.tools?.map((tool) => { return { - role: msg.role as "user" | "assistant", // ensure its not checking for system types - content: msg.content, + name: tool.name, + description: tool.description, + input_schema: { + type: "object", + properties: tool.parameters.properties, + required: tool.parameters.required, + }, }; - } else { - return { - role: msg.role as "user" | "assistant", - content: msg.content.map((content) => { - if ("image_url" in content) { - const formattedContent: ImageBlockParam = { - type: "image", - source: { - type: "base64", - media_type: "image/jpeg", - data: content.image_url.url, - }, - }; + }); + + let toolDefinition: Tool | undefined; + if (options.response_model) { + const jsonSchema = zodToJsonSchema(options.response_model.schema); + const { properties: schemaProperties, required: schemaRequired } = + extractSchemaProperties(jsonSchema); - return formattedContent; - } else { - return { type: "text", text: content.text }; - } - }), + toolDefinition = { + name: "print_extracted_data", + description: + "Prints the extracted data based on the provided schema.", + input_schema: { + type: "object", + properties: schemaProperties, + required: schemaRequired, + }, }; } - }); - if (options.image) { - const screenshotMessage: MessageParam = { - role: "user", - content: [ - { - type: "image", - source: { - type: "base64", - media_type: "image/jpeg", - data: options.image.buffer.toString("base64"), - }, - }, - ], - }; - if ( - options.image.description && - Array.isArray(screenshotMessage.content) - ) { - screenshotMessage.content.push({ - type: "text", - text: options.image.description, - }); + if (toolDefinition) { + anthropicTools = anthropicTools ?? []; + anthropicTools.push(toolDefinition); } - formattedMessages.push(screenshotMessage); - } + const response = await this.client.messages.create({ + model: this.modelName, + max_tokens: options.maxTokens || 8192, + messages: formattedMessages, + tools: anthropicTools, + system: systemMessage + ? (systemMessage.content as string | TextBlockParam[]) // we can cast because we already filtered out image content + : undefined, + temperature: options.temperature, + }); - let anthropicTools: Tool[] = options.tools?.map((tool) => { - return { - name: tool.name, - description: tool.description, - input_schema: { - type: "object", - properties: tool.parameters.properties, - required: tool.parameters.required, + logger({ + category: "anthropic", + message: "response", + level: 2, + auxiliary: { + response: { + value: JSON.stringify(response), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, }, - }; - }); - - let toolDefinition: Tool | undefined; - if (options.response_model) { - const jsonSchema = zodToJsonSchema(options.response_model.schema); - const { properties: schemaProperties, required: schemaRequired } = - extractSchemaProperties(jsonSchema); + }); - toolDefinition = { - name: "print_extracted_data", - description: "Prints the extracted data based on the provided schema.", - input_schema: { - type: "object", - properties: schemaProperties, - required: schemaRequired, - }, + // We'll compute usage data from the response + const usageData = { + prompt_tokens: response.usage.input_tokens, + completion_tokens: response.usage.output_tokens, + total_tokens: + response.usage.input_tokens + response.usage.output_tokens, }; - } - - if (toolDefinition) { - anthropicTools = anthropicTools ?? []; - anthropicTools.push(toolDefinition); - } - const response = await this.client.messages.create({ - model: this.modelName, - max_tokens: options.maxTokens || 8192, - messages: formattedMessages, - tools: anthropicTools, - system: systemMessage - ? (systemMessage.content as string | TextBlockParam[]) // we can cast because we already filtered out image content - : undefined, - temperature: options.temperature, - }); - - logger({ - category: "anthropic", - message: "response", - level: 2, - auxiliary: { - response: { - value: JSON.stringify(response), - type: "object", - }, - requestId: { - value: options.requestId, - type: "string", - }, - }, - }); - - // We'll compute usage data from the response - const usageData = { - prompt_tokens: response.usage.input_tokens, - completion_tokens: response.usage.output_tokens, - total_tokens: response.usage.input_tokens + response.usage.output_tokens, - }; - - const transformedResponse: LLMResponse = { - id: response.id, - object: "chat.completion", - created: Date.now(), - model: response.model, - choices: [ - { - index: 0, - message: { - role: "assistant", - content: - response.content.find((c) => c.type === "text")?.text || null, - tool_calls: response.content - .filter((c) => c.type === "tool_use") - .map((toolUse) => ({ - id: toolUse.id, - type: "function", - function: { - name: toolUse.name, - arguments: JSON.stringify(toolUse.input), - }, - })), + const transformedResponse: LLMResponse = { + id: response.id, + object: "chat.completion", + created: Date.now(), + model: response.model, + choices: [ + { + index: 0, + message: { + role: "assistant", + content: + response.content.find((c) => c.type === "text")?.text || null, + tool_calls: response.content + .filter((c) => c.type === "tool_use") + .map((toolUse) => ({ + id: toolUse.id, + type: "function", + function: { + name: toolUse.name, + arguments: JSON.stringify(toolUse.input), + }, + })), + }, + finish_reason: response.stop_reason, }, - finish_reason: response.stop_reason, - }, - ], - usage: usageData, - }; + ], + usage: usageData, + }; - logger({ - category: "anthropic", - message: "transformed response", - level: 2, - auxiliary: { - transformedResponse: { - value: JSON.stringify(transformedResponse), - type: "object", - }, - requestId: { - value: options.requestId, - type: "string", + logger({ + category: "anthropic", + message: "transformed response", + level: 2, + auxiliary: { + transformedResponse: { + value: JSON.stringify(transformedResponse), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, }, - }, - }); + }); - if (options.response_model) { - const toolUse = response.content.find((c) => c.type === "tool_use"); - if (toolUse && "input" in toolUse) { - const result = toolUse.input; + if (options.response_model) { + const toolUse = response.content.find((c) => c.type === "tool_use"); + if (toolUse && "input" in toolUse) { + const result = toolUse.input; - const finalParsedResponse = { - data: result, - usage: usageData, - } as unknown as T; + const finalParsedResponse = { + data: result, + usage: usageData, + } as unknown as T; - if (this.enableCaching) { - this.cache.set(cacheOptions, finalParsedResponse, options.requestId); - } + if (this.enableCaching) { + this.cache.set( + cacheOptions, + finalParsedResponse, + options.requestId, + ); + } - return finalParsedResponse; - } else { - if (!retries || retries < 5) { - return this.createChatCompletion({ - options, - logger, - retries: (retries ?? 0) + 1, + return finalParsedResponse; + } else { + if (!retries || retries < 5) { + return this.createChatCompletion({ + options, + logger, + retries: (retries ?? 0) + 1, + }); + } + logger({ + category: "anthropic", + message: "error creating chat completion", + level: 0, + auxiliary: { + requestId: { + value: options.requestId, + type: "string", + }, + }, }); + throw new CreateChatCompletionResponseError( + "No tool use with input in response", + ); } + } + + if (this.enableCaching) { + this.cache.set(cacheOptions, transformedResponse, options.requestId); logger({ category: "anthropic", - message: "error creating chat completion", - level: 0, + message: "cached response", + level: 1, auxiliary: { requestId: { value: options.requestId, type: "string", }, + transformedResponse: { + value: JSON.stringify(transformedResponse), + type: "object", + }, + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, }, }); - throw new CreateChatCompletionResponseError( - "No tool use with input in response", - ); } - } - if (this.enableCaching) { - this.cache.set(cacheOptions, transformedResponse, options.requestId); - logger({ - category: "anthropic", - message: "cached response", - level: 1, - auxiliary: { - requestId: { - value: options.requestId, - type: "string", - }, - transformedResponse: { - value: JSON.stringify(transformedResponse), - type: "object", - }, - cacheOptions: { - value: JSON.stringify(cacheOptions), - type: "object", + // if the function was called with a response model, it would have returned earlier + // so we can safely cast here to T, which defaults to AnthropicTransformedResponse + return transformedResponse as T; + } catch (error) { + if (error instanceof Anthropic.AuthenticationError) { + logger({ + category: "anthropic", + message: "Invalid Anthropic API key", + level: 0, + auxiliary: { + error: { + value: error.message, + type: "string", + }, }, - }, - }); + }); + throw new InvalidLLMKeyError(); + } + throw error; } - - // if the function was called with a response model, it would have returned earlier - // so we can safely cast here to T, which defaults to AnthropicTransformedResponse - return transformedResponse as T; } } diff --git a/lib/llm/CerebrasClient.ts b/lib/llm/CerebrasClient.ts index 4d5a0daca..0068bb230 100644 --- a/lib/llm/CerebrasClient.ts +++ b/lib/llm/CerebrasClient.ts @@ -10,7 +10,10 @@ import { LLMClient, LLMResponse, } from "./LLMClient"; -import { CreateChatCompletionResponseError } from "@/types/stagehandErrors"; +import { + CreateChatCompletionResponseError, + InvalidLLMKeyError, +} from "@/types/stagehandErrors"; export class CerebrasClient extends LLMClient { public type = "cerebras" as const; @@ -306,6 +309,24 @@ export class CerebrasClient extends LLMClient { return response as T; } catch (error) { + if (error instanceof OpenAI.AuthenticationError) { + logger({ + category: "cerebras", + message: "Invalid Cerebras API key", + level: 0, + auxiliary: { + error: { + value: error.message, + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); + throw new InvalidLLMKeyError(); + } logger({ category: "cerebras", message: "error creating chat completion", diff --git a/lib/llm/GroqClient.ts b/lib/llm/GroqClient.ts index fe91d06ba..bcc665711 100644 --- a/lib/llm/GroqClient.ts +++ b/lib/llm/GroqClient.ts @@ -10,7 +10,10 @@ import { LLMClient, LLMResponse, } from "./LLMClient"; -import { CreateChatCompletionResponseError } from "@/types/stagehandErrors"; +import { + CreateChatCompletionResponseError, + InvalidLLMKeyError, +} from "@/types/stagehandErrors"; export class GroqClient extends LLMClient { public type = "groq" as const; @@ -306,6 +309,24 @@ export class GroqClient extends LLMClient { return response as T; } catch (error) { + if (error instanceof OpenAI.AuthenticationError) { + logger({ + category: "groq", + message: "Invalid Groq API key", + level: 0, + auxiliary: { + error: { + value: error.message, + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); + throw new InvalidLLMKeyError(); + } logger({ category: "groq", message: "error creating chat completion", diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 5086fa41d..2a546cf81 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -24,6 +24,7 @@ import { import { CreateChatCompletionResponseError, StagehandError, + InvalidLLMKeyError, } from "@/types/stagehandErrors"; export class OpenAIClient extends LLMClient { @@ -342,129 +343,152 @@ export class OpenAIClient extends LLMClient { type: "function", })), }; + try { + const response = await this.client.chat.completions.create(body); - const response = await this.client.chat.completions.create(body); - - // For O1 models, we need to parse the tool call response manually and add it to the response. - if (isToolsOverridedForO1) { - try { - const parsedContent = JSON.parse(response.choices[0].message.content); - - response.choices[0].message.tool_calls = [ - { - function: { - name: parsedContent["name"], - arguments: JSON.stringify(parsedContent["arguments"]), - }, - type: "function", - id: "-1", - }, - ]; - response.choices[0].message.content = null; - } catch (error) { - logger({ - category: "openai", - message: "Failed to parse tool call response", - level: 0, - auxiliary: { - error: { - value: error.message, - type: "string", + // For O1 models, we need to parse the tool call response manually and add it to the response. + if (isToolsOverridedForO1) { + try { + const parsedContent = JSON.parse(response.choices[0].message.content); + + response.choices[0].message.tool_calls = [ + { + function: { + name: parsedContent["name"], + arguments: JSON.stringify(parsedContent["arguments"]), + }, + type: "function", + id: "-1", }, - content: { - value: response.choices[0].message.content, - type: "string", + ]; + response.choices[0].message.content = null; + } catch (error) { + logger({ + category: "openai", + message: "Failed to parse tool call response", + level: 0, + auxiliary: { + error: { + value: error.message, + type: "string", + }, + content: { + value: response.choices[0].message.content, + type: "string", + }, }, - }, - }); - - if (retries > 0) { - // as-casting to account for o1 models not supporting all options - return this.createChatCompletion({ - options: options as ChatCompletionOptions, - logger, - retries: retries - 1, }); - } - throw error; + if (retries > 0) { + // as-casting to account for o1 models not supporting all options + return this.createChatCompletion({ + options: options as ChatCompletionOptions, + logger, + retries: retries - 1, + }); + } + + throw error; + } } - } - logger({ - category: "openai", - message: "response", - level: 2, - auxiliary: { - response: { - value: JSON.stringify(response), - type: "object", - }, - requestId: { - value: requestId, - type: "string", + logger({ + category: "openai", + message: "response", + level: 2, + auxiliary: { + response: { + value: JSON.stringify(response), + type: "object", + }, + requestId: { + value: requestId, + type: "string", + }, }, - }, - }); + }); - if (options.response_model) { - const extractedData = response.choices[0].message.content; - const parsedData = JSON.parse(extractedData); - - if (!validateZodSchema(options.response_model.schema, parsedData)) { - if (retries > 0) { - // as-casting to account for o1 models not supporting all options - return this.createChatCompletion({ - options: options as ChatCompletionOptions, - logger, - retries: retries - 1, - }); + if (options.response_model) { + const extractedData = response.choices[0].message.content; + const parsedData = JSON.parse(extractedData); + + if (!validateZodSchema(options.response_model.schema, parsedData)) { + if (retries > 0) { + // as-casting to account for o1 models not supporting all options + return this.createChatCompletion({ + options: options as ChatCompletionOptions, + logger, + retries: retries - 1, + }); + } + + throw new CreateChatCompletionResponseError( + "Invalid response schema", + ); } - throw new CreateChatCompletionResponseError("Invalid response schema"); + if (this.enableCaching) { + this.cache.set( + cacheOptions, + { + ...parsedData, + }, + options.requestId, + ); + } + + return { + data: parsedData, + usage: response.usage, + } as T; } if (this.enableCaching) { - this.cache.set( - cacheOptions, - { - ...parsedData, + logger({ + category: "llm_cache", + message: "caching response", + level: 1, + auxiliary: { + requestId: { + value: options.requestId, + type: "string", + }, + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, + response: { + value: JSON.stringify(response), + type: "object", + }, }, - options.requestId, - ); + }); + this.cache.set(cacheOptions, response, options.requestId); } - return { - data: parsedData, - usage: response.usage, - } as T; - } - - if (this.enableCaching) { - logger({ - category: "llm_cache", - message: "caching response", - level: 1, - auxiliary: { - requestId: { - value: options.requestId, - type: "string", - }, - cacheOptions: { - value: JSON.stringify(cacheOptions), - type: "object", - }, - response: { - value: JSON.stringify(response), - type: "object", + // if the function was called with a response model, it would have returned earlier + // so we can safely cast here to T, which defaults to ChatCompletion + return response as T; + } catch (error) { + if (error instanceof OpenAI.AuthenticationError) { + logger({ + category: "openai", + message: "Invalid OpenAI API key", + level: 0, + auxiliary: { + error: { + value: error.message, + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, }, - }, - }); - this.cache.set(cacheOptions, response, options.requestId); + }); + throw new InvalidLLMKeyError(); + } + throw error; } - - // if the function was called with a response model, it would have returned earlier - // so we can safely cast here to T, which defaults to ChatCompletion - return response as T; } } diff --git a/types/stagehandErrors.ts b/types/stagehandErrors.ts index e5efeb2ed..b7a8d731c 100644 --- a/types/stagehandErrors.ts +++ b/types/stagehandErrors.ts @@ -86,6 +86,14 @@ export class MissingLLMConfigurationError extends StagehandError { } } +export class InvalidLLMKeyError extends StagehandError { + constructor() { + super( + "Invalid LLM API key or LLM Client configured. Please check your configuration.", + ); + } +} + export class HandlerNotInitializedError extends StagehandError { constructor(handlerType: string) { super(`${handlerType} handler not initialized`);