Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dev-notes/
.turbo
.env
.claude
*.snapshot-cache.json
163 changes: 91 additions & 72 deletions payloads/cases/advanced.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
OPENAI_RESPONSES_MODEL,
ANTHROPIC_MODEL,
} from "./models";
import { readFileAsBase64 } from "./utils";

// Advanced test cases - complex functionality testing
export const advancedCases: TestCaseCollection = {
Expand Down Expand Up @@ -114,124 +115,142 @@ export const advancedCases: TestCaseCollection = {
},
},

reasoningWithOutput: {
base64ImageRequest: {
responses: {
model: OPENAI_RESPONSES_MODEL,
reasoning: { effort: "low" },
reasoning: { effort: "minimal" },
input: [
{
role: "user",
content: "What color is the sky?",
content: [
{
type: "input_image",
detail: "auto",
image_url: `data:image/png;base64,${readFileAsBase64("test-image.png")}`,
},
{
type: "input_text",
text: "What color is this image?",
},
],
},
],
max_output_tokens: 300,
},
"chat-completions": {
model: OPENAI_CHAT_COMPLETIONS_MODEL,
reasoning_effort: "low",
messages: [
{
role: "user",
content: "What color is the sky?",
content: [
{
type: "image_url",
image_url: {
url: `data:image/png;base64,${readFileAsBase64("test-image.png")}`,
},
},
{
type: "text",
text: "What color is this image?",
},
],
},
],
max_completion_tokens: 300,
},
anthropic: {
model: ANTHROPIC_MODEL,
max_tokens: 20000,
max_tokens: 300,
messages: [
{
role: "user",
content: "What color is the sky?",
content: [
{
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: readFileAsBase64("test-image.png"),
},
},
{
type: "text",
text: "What color is this image?",
},
],
},
],
},
},

toolCallRequest: {
"chat-completions": {
model: OPENAI_CHAT_COMPLETIONS_MODEL,
messages: [
documentInputBase64Request: {
responses: {
model: OPENAI_RESPONSES_MODEL,
reasoning: { effort: "minimal" },
input: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools: [
{
type: "function",
function: {
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
content: [
{
type: "input_file",
file_data: `data:application/pdf;base64,${readFileAsBase64("test-document.pdf")}`,
filename: "test-document.pdf",
},
},
{
type: "input_text",
text: "What is in this document?",
},
],
},
],
tool_choice: "auto",
max_output_tokens: 300,
},
anthropic: {
model: ANTHROPIC_MODEL,
max_tokens: 20000,
"chat-completions": {
model: OPENAI_CHAT_COMPLETIONS_MODEL,
reasoning_effort: "low",
messages: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools: [
{
name: "get_weather",
description: "Get the current weather for a location",
input_schema: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
content: [
{
type: "file",
file: {
file_data: `data:application/pdf;base64,${readFileAsBase64("test-document.pdf")}`,
filename: "test-document.pdf",
},
},
required: ["location"],
},
{
type: "text",
text: "What is in this document?",
},
],
},
],
tool_choice: {
type: "auto",
},
max_completion_tokens: 300,
},
responses: {
model: OPENAI_RESPONSES_MODEL,
input: [
anthropic: {
model: ANTHROPIC_MODEL,
max_tokens: 300,
messages: [
{
role: "user",
content: "What's the weather like in San Francisco?",
},
],
tools: [
{
type: "function",
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
content: [
{
type: "document",
source: {
type: "base64",
media_type: "application/pdf",
data: readFileAsBase64("test-document.pdf"),
},
},
required: ["location"],
},
strict: false,
{
type: "text",
text: "What is in this document?",
},
],
},
],
tool_choice: "auto",
},
},
};
Loading