openai · isakshamgoyal · Jun 22, 2025 · Jun 22, 2025 · Jun 22, 2025 · Jun 22, 2025
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,6 @@ yarn-error.log*
 # typescript
 *.tsbuildinfo
 next-env.d.ts
+
+# logs
+logs/
diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
-# Testing Agent Demo
+# Testing Agent Demo - Azure OpenAI Edition
 
 [![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](frontend/LICENSE)
 
-This monorepo demonstrates how you can use OpenAI's CUA model and [computer use tool](https://platform.openai.com/docs/guides/tools-computer-use) to automate frontend testing. It uses [Playwright](https://playwright.dev) to spin up a browser instance and navigate to the web app to be tested. The CUA model then follows the provided test case and executes actions on the interface until the test case is done.  
+This monorepo demonstrates how you can use Azure OpenAI's computer-use-preview model and [computer use tool](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/computer-use) to automate frontend testing. It uses [Playwright](https://playwright.dev) to spin up a browser instance and navigate to the web app to be tested. The Azure OpenAI computer-use-preview model then follows the provided test case and executes actions on the interface until the test case is done.  
 
 The repo contains three applications that work together:
 
 - **frontend** – Next.js web interface used to configure tests and watch them run.
-- **cua-server** – Node service that communicates with the OpenAI CUA model and drives Playwright to interact in a browser with the sample app.
+- **cua-server** – Node service that communicates with the Azure OpenAI computer-use-preview model and drives Playwright to interact in a browser with the sample app.
 - **sample-test-app** – Example e‑commerce site used as an example app to test by the agent.
 
 ![screenshot](./screenshot.jpg)
@@ -24,9 +24,16 @@ The repo contains three applications that work together:
    cd openai-testing-agent-demo
    ```
 
-2. **Prepare environment files**
+2. **Set up Azure OpenAI**
 
-   If you haven't set your `OPENAI_API_KEY` environment variable on your terminal or globally on your machine (set up instructions [here](https://platform.openai.com/docs/libraries#create-and-export-an-api-key)), edit each `.env.development` file and set `OPENAI_API_KEY`.
+   - Create an Azure OpenAI resource in the Azure portal
+   - Deploy the `computer-use-preview` model for computer use capabilities
+   - Deploy a chat model like `gpt-4o` for test case generation
+   - Get your API key and endpoint from the Azure portal
+
+3. **Prepare environment files**
+
+   Copy the example environment files and configure with your Azure OpenAI settings:
 
    ```bash
    cp frontend/.env.example frontend/.env.development
@@ -43,14 +50,14 @@ The repo contains three applications that work together:
 
    Make sure you add a `sample-test-app/.env.development` file with the example credentials to run the demo.
 
-3. **Install dependencies**
+4. **Install dependencies**
 
    ```bash
    npm install
    npx playwright install
    ```
 
-4. **Run all apps**
+5. **Run all apps**
 
    ```bash
    npm run dev
@@ -84,6 +91,7 @@ You are welcome to open issues or submit PRs to improve this app, however, pleas
 
 - This project is meant to be used on test environments only.
 - Do not use real user data in production.
+- Ensure your Azure OpenAI API keys are kept secure and not committed to version control.
 
 ## License
 

diff --git a/cua-server/.env.example b/cua-server/.env.example
@@ -1 +1,32 @@
-OPENAI_API_KEY=your-openai-key
+# PROVIDER CONFIGURATION
+USE_OPENAI=false
+
+# OpenAI Configuration
+OPENAI_API_KEY=your_openai_api_key_here
+
+# OpenAI Model Configuration
+OPENAI_COMPUTER_USE_MODEL=computer-use-preview
+OPENAI_TEST_CASE_AGENT=o3-mini
+OPENAI_TEST_SCRIPT_REVIEW_AGENT=gpt-4o
+
+# Azure-OpenAI Configuration
+AZURE_API_KEY=your_azure_openai_api_key_here
+AZURE_ENDPOINT=your_azure_openai_api_endpoint
+AZURE_API_VERSION=2025-03-01-preview
+
+# Azure-OpenAI Model Deployment Names
+AZURE_COMPUTER_USE_MODEL_DEPLOYMENT_NAME=computer-use-preview
+AZURE_TEST_CASE_AGENT_DEPLOYMENT_NAME=o3-mini
+AZURE_TEST_SCRIPT_REVIEW_AGENT_DEPLOYMENT_NAME=gpt-4o-2
+
+# Display Configuration
+DISPLAY_WIDTH=1024
+DISPLAY_HEIGHT=768
+
+# Server Configuration
+SOCKET_PORT=8000
+CORS_ORIGIN=*
+
+# Logging
+LOG_LEVEL=debug
+NODE_ENV=development
diff --git a/cua-server/README.md b/cua-server/README.md
@@ -1,26 +1,141 @@
-# CUA Server
+# CUA Server - Azure OpenAI Edition
 
-![OpenAI API](https://img.shields.io/badge/Powered_by-OpenAI_API-orange)
+This is the core testing agent server that communicates with Azure OpenAI's computer-use-preview model to drive Playwright automation.
 
-A Node.js service that interfaces with the OpenAI CUA model and exposes a Socket.IO WebSocket API used by the frontend.
+## Prerequisites
 
-## Setup
+- Azure OpenAI resource with access to the computer-use-preview model
+- Node.js and npm
+- Valid Azure OpenAI API key and endpoint
 
-1. Copy the example environment file and add your OpenAI key:
+## Azure OpenAI Setup
+
+1. **Create Azure OpenAI Resource**: Set up an Azure OpenAI resource in your Azure portal
+2. **Deploy Models**: Deploy the following models in your Azure OpenAI resource:
+   - `computer-use-preview` (for computer use capabilities)
+   - `gpt-4o` (for test case generation and review)
+3. **Get API Key and Endpoint**: Obtain your API key and endpoint from the Azure portal
+
+## Configuration
+
+1. **Copy environment files**:
    ```bash
    cp .env.example .env.development
    # edit .env.development
    ```
-2. Install dependencies and launch the server:
+
+2. **Configure Azure OpenAI settings** in `.env.development`:
+   ```bash
+   # Your Azure OpenAI API key
+   AZURE_API_KEY=your_azure_openai_api_key_here
+
+   # Your Azure OpenAI endpoint
+   AZURE_ENDPOINT=https://your-resource-name.openai.azure.com
+
+   # API version (use the latest preview version)
+   AZURE_API_VERSION=2025-04-01-preview
+
+   # Deployment names from your Azure OpenAI resource
+   AZURE_DEPLOYMENT_NAME=computer-use-preview
+   AZURE_DEPLOYMENT_NAME_CHAT=gpt-4o
+   ```
+
+3. **Optional configurations**:
+   ```bash
+   # Display dimensions for the browser automation
+   DISPLAY_WIDTH=1024
+   DISPLAY_HEIGHT=768
+
+   # Server configuration
+   SOCKET_PORT=8000
+   CORS_ORIGIN=*
+
+   # Logging level
+   LOG_LEVEL=info
+
+   # Environment-specific instructions (e.g., for macOS)
+   ENV_SPECIFIC_INSTRUCTIONS=Use CMD key instead of CTRL key for macOS
+   ```
+
+## Usage
+
+1. **Install dependencies**:
    ```bash
    npm install
-   npx playwright install
-   npm run dev   # or npm start
    ```
-   The server listens on port `8000` by default. Set `SOCKET_PORT` to change it.
 
-### Environment Variables
+2. **Run in development mode**:
+   ```bash
+   npm run dev
+   ```
+
+3. **Build and run**:
+   ```bash
+   npm run build
+   npm start
+   ```
+
+## Azure OpenAI API Compatibility
+
+This server now uses Azure OpenAI API instead of the standard OpenAI API. The key differences:
+
+- Uses `AzureOpenAI` client instead of `OpenAI`
+- Requires endpoint and API version configuration
+- Uses deployment names instead of model names
+- Supports the same computer-use-preview functionality via Azure
+
+## API Example
+
+The server makes requests similar to this curl example:
+```bash
+curl -X POST "https://your-resource-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $AZURE_API_KEY" \
+  -d '{
+     "model": "computer-use-preview",
+     "input": [...]
+    }'
+```
+
+## Environment Variables Reference
+
+| Variable | Description | Required | Default |
+|----------|-------------|----------|---------|
+| `AZURE_API_KEY` | Your Azure OpenAI API key | Yes | - |
+| `AZURE_ENDPOINT` | Your Azure OpenAI endpoint URL | Yes | - |
+| `AZURE_API_VERSION` | Azure OpenAI API version | No | `2025-04-01-preview` |
+| `AZURE_DEPLOYMENT_NAME` | Computer-use model deployment name | No | `computer-use-preview` |
+| `AZURE_DEPLOYMENT_NAME_CHAT` | Chat model deployment name | No | `gpt-4o` |
+| `DISPLAY_WIDTH` | Browser viewport width | No | `1024` |
+| `DISPLAY_HEIGHT` | Browser viewport height | No | `768` |
+| `SOCKET_PORT` | WebSocket server port | No | `8000` |
+| `CORS_ORIGIN` | CORS origin setting | No | `*` |
+| `LOG_LEVEL` | Logging level | No | `info` |
+| `ENV_SPECIFIC_INSTRUCTIONS` | OS-specific instructions | No | - |
+
+## Architecture
+
+The server includes several key components:
+
+- **OpenAI CUA Client** (`services/openai-cua-client.ts`): Handles Azure OpenAI API communication
+- **Test Case Agent** (`agents/test-case-agent.ts`): Generates test case steps
+- **Test Script Review Agent** (`agents/test-script-review-agent.ts`): Reviews and updates test progress
+- **Computer Use Loop** (`lib/computer-use-loop.ts`): Main automation loop
+- **Handlers**: WebSocket and browser automation handlers
+
+## Troubleshooting
+
+1. **Authentication Error**: Verify your `AZURE_API_KEY` and `AZURE_ENDPOINT` are correct
+2. **Model Not Found**: Ensure your deployment names match what's configured in Azure
+3. **API Version Error**: Try using a different `AZURE_API_VERSION` if the preview version is not available
+4. **Rate Limiting**: Azure OpenAI has rate limits that may be different from standard OpenAI
+
+## Migration from OpenAI
+
+This project has been updated from standard OpenAI to Azure OpenAI. Key changes:
 
-- `OPENAI_API_KEY` – required for calls to the CUA model.
-- `SOCKET_PORT` (optional) – WebSocket port (default `8000`).
-- `CORS_ORIGIN` (optional) – allowed CORS origin for incoming connections.
+1. `OpenAI` client replaced with `AzureOpenAI`
+2. `OPENAI_API_KEY` replaced with `AZURE_API_KEY`
+3. Added `AZURE_ENDPOINT` and `AZURE_API_VERSION` requirements
+4. Model names replaced with deployment names
+5. All API calls now go through Azure OpenAI endpoints
diff --git a/cua-server/src/agents/test-case-agent.ts b/cua-server/src/agents/test-case-agent.ts
@@ -1,53 +1,51 @@
 import { PROMPT_WITHOUT_LOGIN, PROMPT_WITH_LOGIN } from "../lib/constants";
 import logger from "../utils/logger";
-import OpenAI from "openai";
-import { z } from "zod";
-import { zodTextFormat } from "openai/helpers/zod";
-
-export const TestCaseStepSchema = z.object({
-  step_number: z.number(),
-  step_instructions: z.string(),
-  status: z.string().nullable(),
-});
-
-export const TestCaseSchema = z.object({
-  steps: z.array(TestCaseStepSchema),
-});
-
-export type TestCase = z.infer<typeof TestCaseSchema>;
-
-const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
+import { openai_service } from "../services/openai-service";
+import { TestCase, TEST_CASE_JSON_SCHEMA } from "../utils/test-case-utils";
 
 class TestCaseAgent {
-  private readonly model = "o3-mini";
-  private readonly developer_prompt: string;
+  private readonly model: string;
+  private readonly system_prompt: string;
   private readonly login_required: boolean;
 
   constructor(login_required = false) {
     this.login_required = login_required;
-    this.developer_prompt = login_required
-      ? PROMPT_WITH_LOGIN
-      : PROMPT_WITHOUT_LOGIN;
-    logger.trace(`Developer prompt: ${this.developer_prompt}`);
+    this.system_prompt = login_required ? PROMPT_WITH_LOGIN : PROMPT_WITHOUT_LOGIN;
+
+    // Use different model names based on provider
+    if (process.env.USE_OPENAI === 'true') {
+      this.model = process.env.OPENAI_TEST_CASE_AGENT || "o3-mini";
+    } else {
+      this.model = process.env.AZURE_TEST_CASE_AGENT_DEPLOYMENT_NAME || "o3-mini";
+    }
   }
 
   /**
-   * Generate structured test steps via the Responses API.
+   * Generate test steps via the unified Response API.
    */
-  async invokeResponseAPI(userInstruction: string): Promise<TestCase> {
-    logger.debug("Invoking Response API", { userInstruction });
-    const response = await openai.responses.parse({
+  async generateTestCases(userInstruction: string): Promise<TestCase> {
+    logger.info("Generating Test Cases");
+
+    const response = await openai_service.responseAPI({
+      systemPrompt: this.system_prompt,
+      userMessage: userInstruction,
       model: this.model,
-      input: [
-        { role: "system", content: this.developer_prompt },
-        { role: "user", content: userInstruction },
-      ],
-      text: {
-        format: zodTextFormat(TestCaseSchema, "test_case"),
-      },
+      schema: TEST_CASE_JSON_SCHEMA,
+      schemaName: "test_case"
     });
-    logger.debug("Response API output", { output: response.output_parsed });
-    return response.output_parsed!;
+
+    if (!response.output_text) {
+      throw new Error("No output text received from OpenAI service");
+    }
+
+    const result: TestCase = JSON.parse(response.output_text);
+
+    logger.info(`Test Cases Generated Successfully:\n${JSON.stringify({ 
+      loginRequired: this.login_required,
+      steps: result.steps
+    }, null, 2)}`);
+
+    return result;
   }
 }