diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5e3fedd --- /dev/null +++ b/.dockerignore @@ -0,0 +1,64 @@ +# Git files +.git +.gitignore +.gitattributes + +# Python cache +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS files +.DS_Store +Thumbs.db + +# Workspace and cache (will be mounted as volumes) +workspace/ +cache/ + +# Documentation and examples +README.md +LICENSE +asset/ +examples/ +evaluation/ + +# Docker files themselves +Dockerfile +docker-compose.yaml +.dockerignore + +# Logs +*.log +logs/ + +# Test files +tests/ +*.test +.pytest_cache/ +.coverage +htmlcov/ + +# Temporary files +tmp/ +temp/ +*.tmp diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d97248c --- /dev/null +++ b/.env.example @@ -0,0 +1,61 @@ +# InfantAgent Environment Configuration +# Copy this file to .env and fill in your values + +# Required: Anthropic API Key +# Get your key from: https://console.anthropic.com/ +ANTHROPIC_API_KEY=your_anthropic_api_key_here + +# Optional: Hugging Face Token (for downloading UI-TARS model) +# Get your token from: https://huggingface.co/settings/tokens +# Required if the model is gated or for faster downloads +HF_API_KEY=your_huggingface_token_here + +# Optional: Hugging Face cache directory +# HF_HOME=~/.cache/huggingface + +# Optional: GPU Configuration +# Specify which CUDA devices to use (comma-separated list) +# Default GPU allocation: +# - GPU 0: Computer container (display rendering) +# - GPU 2,3: vLLM server (UI-TARS model with tensor parallelism) +# - InfantAgent server: Runs on CPU (no GPU needed) +# Note: Adjust based on your available GPUs +# CUDA_VISIBLE_DEVICES=0,2,3 + +# Optional: Computer Container Configuration +# NVIDIA driver type: Tesla, GeForce, or other +# NVIDIA_DRIVER=Tesla +# GPU device(s) visible to the computer container +# NVIDIA_VISIBLE_DEVICES=0 +# Specific GPU device ID(s) for docker deploy (e.g., '0' or '0,1') +# NVIDIA_DEVICE_ID=0 +# Render type: gpu or software +# RENDER_TYPE=gpu +# User account to create in the container: infant or root +# CREATE_USER_ACCOUNT=infant +# Workspace mount path on host (default: ./workspace) +# WORKSPACE_MOUNT_PATH=./workspace +# Cache directory on host (default: ./cache) +# CACHE_DIR=./cache + +# Optional: Other API Keys (if using different models) +# OPENAI_API_KEY=your_openai_key_here +# GOOGLE_API_KEY=your_google_key_here + +# Optional: Custom Model Configuration +# Uncomment and modify if you want to use different models +# MODEL=claude-sonnet-4-6 +# MODEL_OSS=ByteDance-Seed/UI-TARS-1.5-7B + +# Optional: Agent Configuration +# MAX_ITERATIONS=100 +# MAX_BUDGET_PER_TASK=20 +# DEBUG=false + +# Optional: Network Configuration +# AGENT_PORT=8000 +# GUI_PORT=4443 # Guacamole web interface port (maps to container's 8080) +# SSH_PORT=58673 # SSH port for computer container (maps to container's 22) + +# Optional: Timezone +# TZ=America/New_York diff --git a/DOCKER_SETUP.md b/DOCKER_SETUP.md new file mode 100644 index 0000000..b16b457 --- /dev/null +++ b/DOCKER_SETUP.md @@ -0,0 +1,444 @@ +# Docker Setup Guide for InfantAgent + +This guide explains how to run InfantAgent using Docker and Docker Compose. + +## Prerequisites + +- Docker Engine 20.10+ with Docker Compose V2 +- NVIDIA Docker runtime (nvidia-docker2) - only required for GPU-accelerated components +- NVIDIA GPU with CUDA support (minimum 3 GPUs recommended for full setup) + - 2 GPUs for vLLM server (UI-TARS model with tensor parallelism) + - 1 GPU for computer container (display rendering) + - InfantAgent server runs on CPU (no GPU needed) + - Can work with fewer GPUs by adjusting configuration +- At least 32GB RAM (64GB recommended) +- 100GB free disk space (for model downloads and workspace) + +## Quick Start + +### 1. Clone the Repository + +```bash +git clone https://github.com/bin123apple/InfantAgent.git +cd InfantAgent +``` + +### 2. Set Environment Variables + +Create a `.env` file in the project root: + +```bash +# Copy the example file +cp .env.example .env + +# Edit with your values +nano .env +``` + +Required variables: +```bash +# Required: Your Anthropic API key +ANTHROPIC_API_KEY=your_api_key_here + +# Optional but recommended: Hugging Face token for UI-TARS model +HUGGING_FACE_HUB_TOKEN=your_huggingface_token_here + +# Optional: Specify which GPUs to use (default: 0,1,2,3) +CUDA_VISIBLE_DEVICES=0,1,2,3 +``` + +### 3. Build and Start Services + +```bash +# Build both containers +docker-compose build + +# Start all services +docker-compose up -d + +# View logs +docker-compose logs -f +``` + +### 4. Access the Services + +- **InfantAgent API**: http://localhost:8000 +- **vLLM Server API**: http://localhost:8001/v1 (OpenAI-compatible API) +- **Guacamole Desktop**: http://localhost:4443/guacamole/#/client/GNOME + - Username: `web` + - Password: `web` + - Desktop credentials: `infant` / `123` + +## Architecture + +The Docker Compose setup includes three main services: + +### 1. vllm-server (OSS Model Server) +- Hosts the UI-TARS-1.5-7B model using vLLM +- OpenAI-compatible API endpoint +- Tensor parallelism across 2 GPUs for faster inference +- Exposed on port 8001 +- Automatic model download from Hugging Face + +### 2. infant-agent (Main Server) +- Runs the FastAPI backend server +- Handles agent logic and orchestration +- Connects to vLLM server for OSS model inference +- Runs on CPU (no GPU required) +- Exposed on port 8000 + +### 3. computer-container (Desktop Environment) +- Ubuntu 22.04 with GNOME desktop +- Accessible via Guacamole web interface +- SSH access on port 22222 +- Shared workspace with agent server + +## Configuration + +### GPU Configuration + +By default, the setup uses 3 GPUs: +- **GPU 2,3**: vLLM server (UI-TARS-1.5-7B model with tensor parallelism) +- **GPU 0**: Computer container (display rendering) +- **InfantAgent server**: Runs on CPU (no GPU needed) + +**For systems with fewer GPUs:** + +If you only have 2 GPUs, modify `docker-compose.yaml`: + +```yaml +# vLLM server - use single GPU +vllm-server: + environment: + - CUDA_VISIBLE_DEVICES=1 + command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 1 # Changed from 2 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['1'] # Single GPU + +# Computer - use first GPU +computer-container: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + +# InfantAgent - no GPU needed (runs on CPU) +``` + +If you only have 1 GPU (vLLM only, no desktop GUI): +```yaml +# vLLM server - use the only GPU +vllm-server: + environment: + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + +# Computer container - disable or run without GPU +computer-container: + # Comment out the deploy.resources section to run without GPU + # Or don't start it: docker-compose up -d vllm-server infant-agent +``` + +### Workspace Volumes + +The `./workspace` directory is shared between: +- Host machine: `./workspace` +- Agent server: `/app/workspace` +- Computer container: `/workspace` + +Files created in any location are accessible from all others. + +### Config Updates + +To update the configuration without rebuilding: + +1. Edit `config.toml` +2. Restart the agent service: + ```bash + docker-compose restart infant-agent + ``` + +## Common Commands + +```bash +# Start services +docker-compose up -d + +# Stop services +docker-compose down + +# View logs +docker-compose logs -f infant-agent +docker-compose logs -f vllm-server +docker-compose logs -f computer-container + +# Restart a specific service +docker-compose restart infant-agent + +# Check vLLM server status and model loading progress +docker-compose logs -f vllm-server | grep -i "model\|loading\|ready" + +# Rebuild after code changes +docker-compose build --no-cache infant-agent +docker-compose up -d + +# Access agent container shell +docker exec -it infant-agent-server bash + +# Access computer container shell +docker exec -it infant-computer bash + +# Monitor resource usage +docker stats +``` + +## Troubleshooting + +### GPU Not Detected + +Verify NVIDIA Docker runtime is installed: + +```bash +docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi +``` + +If this fails, install nvidia-docker2: + +```bash +# Ubuntu/Debian +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list +sudo apt-get update && sudo apt-get install -y nvidia-docker2 +sudo systemctl restart docker +``` + +### Container Won't Start + +Check logs for errors: + +```bash +docker-compose logs computer-container +``` + +Common issues: +- Port conflicts: Change ports in `docker-compose.yaml` +- Insufficient memory: Increase Docker memory limit +- Missing GPU: Check `nvidia-smi` output + +### Connection Refused to Guacamole + +The computer container takes 1-2 minutes to fully initialize: + +1. Check container status: `docker-compose ps` +2. Wait for health check to pass +3. Try accessing: http://localhost:4443/guacamole/ + +### vLLM Server Issues + +**Model fails to download:** +- Check Hugging Face token is set: `echo $HUGGING_FACE_HUB_TOKEN` +- Verify internet connection +- Check disk space: `df -h` +- View download progress: `docker-compose logs -f vllm-server` + +**Out of memory errors:** +- Reduce `--gpu-memory-utilization` from 0.9 to 0.7 +- Use single GPU instead of tensor parallelism +- Reduce `--max-model-len` from 8192 to 4096 + +**vLLM server not responding:** +```bash +# Check if model is loaded +curl http://localhost:8001/v1/models + +# Test inference +curl http://localhost:8001/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ByteDance-Seed/UI-TARS-1.5-7B", + "prompt": "Hello, how are you?", + "max_tokens": 50 + }' +``` + +The vLLM server takes 2-5 minutes to download and load the model on first startup. + +### Build Failures + +Clear Docker cache and rebuild: + +```bash +docker-compose down -v +docker system prune -a +docker-compose build --no-cache +docker-compose up -d +``` + +## Production Deployment + +For production use, consider: + +1. **Use docker-compose.prod.yaml** with: + - Resource limits + - Logging configuration + - Secrets management + - Network security + +2. **Enable HTTPS** for Guacamole: + - Use a reverse proxy (nginx/traefik) + - Configure SSL certificates + +3. **Persistent Storage**: + - Use named volumes instead of bind mounts + - Regular backup of workspace data + +4. **Monitoring**: + - Add Prometheus exporters + - Configure health check endpoints + - Set up alerting + +## Advanced Configuration + +### Custom Build Args + +Build with custom Python version: + +```bash +docker-compose build --build-arg PYTHON_VERSION=3.11 infant-agent +``` + +### Multi-GPU Setup + +For systems with 4+ GPUs, distribute workload: + +```yaml +infant-agent: + environment: + - CUDA_VISIBLE_DEVICES=0,1,2,3 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 4 + capabilities: [gpu] +``` + +### Using Different OSS Models + +To use a different model with vLLM, modify the `docker-compose.yaml`: + +```yaml +vllm-server: + command: > + --model meta-llama/Llama-3-8B # Change model here + --host 0.0.0.0 + --port 8000 + --tensor-parallel-size 1 # Adjust based on model size + --max-model-len 4096 + --gpu-memory-utilization 0.9 + --trust-remote-code +``` + +Popular models: +- `meta-llama/Llama-3-8B` +- `mistralai/Mistral-7B-v0.1` +- `Qwen/Qwen2-7B` +- `ByteDance-Seed/UI-TARS-1.5-7B` (default, optimized for UI tasks) + +### vLLM Performance Tuning + +**For faster inference:** +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --enable-prefix-caching # Cache common prefixes + --enable-chunked-prefill # Process long prompts efficiently + --max-num-seqs 16 # Increase batch size +``` + +**For lower memory usage:** +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --gpu-memory-utilization 0.7 # Reduce from 0.9 + --max-model-len 4096 # Reduce from 8192 + --quantization awq # Use quantization (if model supports) +``` + +### Development Mode + +Mount source code for live reloading: + +```yaml +infant-agent: + volumes: + - ./infant:/app/infant + - ./backend.py:/app/backend.py + command: uvicorn backend:app --reload --host 0.0.0.0 --port 8000 +``` + +### Running Without vLLM (API-only mode) + +If you want to use only commercial APIs (no OSS models): + +```bash +# Start without vLLM server +docker-compose up -d computer-container infant-agent + +# Or comment out vllm-server in docker-compose.yaml +``` + +Update `config.toml`: +```toml +use_oss_llm = false +``` + +## Maintenance + +### Update to Latest Version + +```bash +cd InfantAgent +git pull origin main +docker-compose build --no-cache +docker-compose up -d +``` + +### Clean Up Resources + +```bash +# Remove stopped containers +docker-compose down + +# Remove all data (including volumes) +docker-compose down -v + +# Clean up unused Docker resources +docker system prune -a --volumes +``` + +## Support + +For issues and questions: +- GitHub Issues: https://github.com/bin123apple/InfantAgent/issues +- Discord: https://discord.gg/urxApEGcwV +- Documentation: https://github.com/bin123apple/InfantAgent + +## License + +This project is licensed under the MIT License. See LICENSE file for details. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e32a82d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,58 @@ +# Dockerfile for InfantAgent Server +# Build from project root: docker build -t infant-agent:latest -f Dockerfile . +FROM python:3.11-slim-bookworm + +# Prevent interactive prompts during installation +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies including OpenCV requirements +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + build-essential \ + git \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + openssh-client \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +# Download and install uv +ADD https://astral.sh/uv/install.sh /uv-installer.sh +RUN sh /uv-installer.sh && rm /uv-installer.sh +ENV PATH="/root/.local/bin/:$PATH" + +# Create working directory +WORKDIR /app + +# Copy dependency files first for better layer caching +COPY pyproject.toml ./ + +# Create virtual environment and install dependencies +RUN uv venv && \ + uv pip install -e . + +# Copy application code +COPY infant ./infant +COPY config.toml ./ +COPY backend.py ./ +COPY frontend ./frontend + +# Activate virtual environment +ENV PATH="/app/.venv/bin:$PATH" +ENV VIRTUAL_ENV="/app/.venv" + +# Create necessary directories +RUN mkdir -p /tmp/cache /tmp/file_store /app/workspace && \ + chmod 777 /tmp/cache /tmp/file_store /app/workspace + +# Expose the backend port +EXPOSE 8008 + +# Run the web server +CMD ["python", "backend.py"] + \ No newline at end of file diff --git a/Dockerfile_vllm b/Dockerfile_vllm new file mode 100644 index 0000000..fa74679 --- /dev/null +++ b/Dockerfile_vllm @@ -0,0 +1,11 @@ +FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 + +RUN apt-get update && apt-get install -y \ + curl \ + python3 python3-pip \ + && rm -rf /var/lib/apt/lists/* + +RUN pip3 install vllm + + +CMD ["vllm", "serve", "ByteDance-Seed/UI-TARS-1.5-7B", "--host", "0.0.0.0", "--port", "8005", "--max-model-len", "8192", "--gpu-memory-utilization", "0.8", "--dtype", "float16", "--api-key", "infant"] diff --git a/README.md b/README.md index 1afc00e..5fedd24 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,134 @@ Weโve switched from NoMachine to the open-source Guacamole for desktop sharing Now, it is only tested on `linux` server with `Nvidia Tesla GPU (A100, H200 ...)`. The GPU is for open-spurce model inference. There may be some bugs for Mac/Windows. -## Setup +## Setup + +### Option 1: Docker Compose Setup (Recommended) + +This is the recommended approach for running InfantAgent as a complete containerized system with all services. + +1. **Prerequisites** + - Docker and Docker Compose installed + - NVIDIA GPU with drivers installed (for vLLM inference) + - NVIDIA Container Toolkit installed + +2. **Configure Environment Variables** + + Create a `.env` file in the project root: + ```bash + cd InfantAgent + cp .env.example .env + ``` + + Edit `.env` and add your API keys: + ```bash + # Required: Claude API key for the agent + ANTHROPIC_API_KEY=your_anthropic_api_key_here + + # Optional: Hugging Face token for downloading models + HF_API_KEY=your_huggingface_token_here + + # Optional: Custom port configuration + SSH_PORT=63710 + GUI_PORT=4443 + ``` + +3. **Build and Start All Services** + ```bash + # Build all containers (first time only) + docker compose build + + # Start all services in background + docker compose up -d + + # View logs + docker compose logs -f + + # View agent logs specifically + docker compose logs -f infant-agent-cli + ``` + +4. **Access the Services** + - **Guacamole Web Desktop**: http://localhost:4443/guacamole/ + - Login: `web` / `web` + - Connection: Click "GNOME Desktop (RDP)" + - **SSH to Computer Container**: `ssh infant@localhost -p 63710` (password: `123`) + - **RDP Direct Access**: `localhost:3389` (username: `infant`, password: `123`) + - **vLLM Server**: http://localhost:8005 (for OSS model inference) + +5. **Managing Containers** + ```bash + # Stop all services + docker compose down + + # Restart specific service + docker compose restart infant-agent-cli + + # Rebuild after code changes + docker compose down + docker compose build infant-agent + docker compose up -d + + # View running containers + docker compose ps + + # Execute commands in agent container + docker exec -it infant-agent-cli bash + ``` + +6. **Interacting with the Agent** + + The agent is ready to receive prompts once you see the log: + ``` + INFO: Current working directory: /workspace + Input your request or use type exit to refresh the agent: + ``` + + **Recommended Method - Direct Attach:** + ```bash + # Attach to the running container + docker attach infant-agent-cli + + # Type your prompt and press Enter + # Example: "Create a Python script to analyze data.csv" + + # To detach without stopping, press: Ctrl+P then Ctrl+Q + ``` + + **Alternative Methods:** + ```bash + # Send prompt via pipe + echo "Your task here" | docker attach --no-stdin --sig-proxy=false infant-agent-cli + + # View logs in real-time + docker logs -f infant-agent-cli + + # Using provided scripts (may have limitations) + python3 send_prompt.py "Create a Python script" + ./agent_cli.sh logs -f + ``` + + **๐ For detailed usage instructions, see [USAGE.md](USAGE.md)** + +7. **Troubleshooting** + ```bash + # Check service health + docker compose ps + + # View detailed logs + docker compose logs infant-computer + docker compose logs vllm-server + + # Restart unhealthy services + docker compose restart + + # Clean up and rebuild + docker compose down -v # Remove volumes + docker compose build --no-cache + docker compose up -d + ``` + +### Option 2: Manual Setup (Advanced) 1. Setup environment ``` diff --git a/VLLM_SETUP.md b/VLLM_SETUP.md new file mode 100644 index 0000000..4d98865 --- /dev/null +++ b/VLLM_SETUP.md @@ -0,0 +1,411 @@ +# vLLM Server Setup Guide + +This guide focuses on the vLLM server component for hosting the UI-TARS-1.5-7B model. + +## Quick Start + +The vLLM server is automatically started when you run `docker-compose up -d`. It serves the UI-TARS-1.5-7B model via an OpenAI-compatible API. + +## Configuration + +### Default Settings + +The vLLM server is configured in [docker-compose.yaml](docker-compose.yaml): + +```yaml +vllm-server: + image: vllm/vllm-openai:latest + ports: + - "8001:8000" + environment: + - CUDA_VISIBLE_DEVICES=2,3 + command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 2 + --max-model-len 8192 + --gpu-memory-utilization 0.9 +``` + +### GPU Requirements + +- **Minimum**: 1x GPU with 16GB VRAM +- **Recommended**: 2x GPUs with 24GB VRAM each (for tensor parallelism) +- **Model Size**: ~14GB (7B parameters) + +### Environment Variables + +Set in your `.env` file: + +```bash +# Optional but recommended for faster downloads +HUGGING_FACE_HUB_TOKEN=your_token_here + +# GPU allocation (adjust based on your system) +CUDA_VISIBLE_DEVICES=0,1,2,3 +``` + +## Accessing the API + +### Health Check + +```bash +curl http://localhost:8001/health +``` + +### List Models + +```bash +curl http://localhost:8001/v1/models +``` + +### Test Inference + +```bash +curl http://localhost:8001/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ByteDance-Seed/UI-TARS-1.5-7B", + "prompt": "Click on the login button", + "max_tokens": 100, + "temperature": 0.7 + }' +``` + +### Chat Completions + +```bash +curl http://localhost:8001/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ByteDance-Seed/UI-TARS-1.5-7B", + "messages": [ + {"role": "user", "content": "What should I click to login?"} + ], + "max_tokens": 100 + }' +``` + +## Integration with InfantAgent + +The InfantAgent server automatically connects to vLLM via the environment variable: + +```yaml +infant-agent: + environment: + - VLLM_BASE_URL=http://vllm-server:8000/v1 +``` + +In your `config.toml`, set: + +```toml +use_oss_llm = true +base_url_oss = "http://vllm-server:8000/v1" +model_oss = "ByteDance-Seed/UI-TARS-1.5-7B" +``` + +## Performance Tuning + +### For 2 GPUs (Recommended) + +```yaml +vllm-server: + environment: + - CUDA_VISIBLE_DEVICES=2,3 + command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 2 + --max-model-len 8192 + --gpu-memory-utilization 0.9 + --disable-custom-all-reduce +``` + +### For 1 GPU (Minimum) + +```yaml +vllm-server: + environment: + - CUDA_VISIBLE_DEVICES=1 + command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 1 + --max-model-len 4096 + --gpu-memory-utilization 0.85 +``` + +### High Throughput Mode + +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 2 + --max-model-len 8192 + --max-num-seqs 32 + --enable-prefix-caching + --enable-chunked-prefill +``` + +### Low Memory Mode + +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 1 + --max-model-len 2048 + --gpu-memory-utilization 0.7 + --max-num-seqs 8 +``` + +## Using Alternative Models + +### Llama 3 8B + +```yaml +vllm-server: + command: > + --model meta-llama/Llama-3-8B + --tensor-parallel-size 1 + --max-model-len 8192 +``` + +### Mistral 7B + +```yaml +vllm-server: + command: > + --model mistralai/Mistral-7B-v0.1 + --tensor-parallel-size 1 + --max-model-len 8192 +``` + +### Qwen 2 7B + +```yaml +vllm-server: + command: > + --model Qwen/Qwen2-7B + --tensor-parallel-size 1 + --max-model-len 8192 + --trust-remote-code +``` + +## Monitoring + +### View Logs + +```bash +# All logs +docker-compose logs -f vllm-server + +# Model loading progress +docker-compose logs -f vllm-server | grep -i "loading\|model\|ready" + +# Errors only +docker-compose logs vllm-server | grep -i "error\|fail" +``` + +### Check GPU Usage + +```bash +# From host +nvidia-smi + +# From container +docker exec vllm-server nvidia-smi +``` + +### Monitor Performance + +```bash +# Container stats +docker stats vllm-server + +# Request latency +time curl http://localhost:8001/v1/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "ByteDance-Seed/UI-TARS-1.5-7B", "prompt": "test", "max_tokens": 10}' +``` + +## Troubleshooting + +### Model Download Fails + +**Issue**: Cannot download model from Hugging Face + +**Solutions**: +1. Set Hugging Face token: + ```bash + export HUGGING_FACE_HUB_TOKEN=your_token_here + docker-compose up -d vllm-server + ``` + +2. Pre-download model: + ```bash + # On host machine + huggingface-cli download ByteDance-Seed/UI-TARS-1.5-7B + + # Mount in docker-compose.yaml + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + ``` + +### Out of Memory (OOM) + +**Issue**: CUDA out of memory errors + +**Solutions**: +1. Reduce memory utilization: + ```yaml + --gpu-memory-utilization 0.7 # Down from 0.9 + ``` + +2. Reduce context length: + ```yaml + --max-model-len 4096 # Down from 8192 + ``` + +3. Use single GPU: + ```yaml + --tensor-parallel-size 1 # Down from 2 + ``` + +4. Reduce batch size: + ```yaml + --max-num-seqs 4 # Smaller batch + ``` + +### Slow Inference + +**Issue**: Requests taking too long + +**Solutions**: +1. Enable caching: + ```yaml + --enable-prefix-caching + ``` + +2. Increase batch size: + ```yaml + --max-num-seqs 16 + ``` + +3. Use tensor parallelism (if you have 2+ GPUs): + ```yaml + --tensor-parallel-size 2 + ``` + +### Server Not Responding + +**Issue**: Cannot connect to http://localhost:8001 + +**Check**: +```bash +# Container status +docker-compose ps vllm-server + +# Port binding +docker port vllm-server + +# Logs +docker-compose logs vllm-server + +# Health +curl http://localhost:8001/health +``` + +**Wait Time**: First startup takes 2-5 minutes to download and load the model. + +### Model Not Found + +**Issue**: "Model not found" errors + +**Solution**: +Ensure the model name matches exactly: +```yaml +# Correct +--model ByteDance-Seed/UI-TARS-1.5-7B + +# Wrong +--model UI-TARS-1.5-7B +--model ByteDance/UI-TARS-1.5-7B +``` + +## Advanced Configuration + +### Custom Sampling Parameters + +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 2 + --max-model-len 8192 + --temperature 0.8 + --top-p 0.95 + --top-k 50 +``` + +### Quantization (Lower Memory) + +```yaml +command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --quantization awq + --tensor-parallel-size 1 +``` + +Note: Model must support quantization format. + +### Distributed Inference (4+ GPUs) + +```yaml +vllm-server: + environment: + - CUDA_VISIBLE_DEVICES=0,1,2,3 + command: > + --model ByteDance-Seed/UI-TARS-1.5-7B + --tensor-parallel-size 4 + --pipeline-parallel-size 1 +``` + +## API Documentation + +The vLLM server implements the OpenAI API specification: + +- **Completions**: `POST /v1/completions` +- **Chat Completions**: `POST /v1/chat/completions` +- **Models**: `GET /v1/models` +- **Health**: `GET /health` + +Full API docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html + +## Performance Benchmarks + +On 2x A100 40GB GPUs: +- **Cold Start**: 2-3 minutes (model download + loading) +- **Warm Start**: 30-60 seconds (model loading only) +- **Inference**: 20-50 tokens/second (depends on batch size) +- **Latency**: 100-300ms for first token + +## Best Practices + +1. **Pre-download models** before production deployment +2. **Use tensor parallelism** for models > 13B parameters +3. **Enable prefix caching** for repeated prompts +4. **Monitor GPU memory** and adjust utilization +5. **Set appropriate max-model-len** based on use case +6. **Use health checks** to ensure server readiness + +## Resources + +- vLLM Documentation: https://docs.vllm.ai/ +- UI-TARS Model: https://huggingface.co/ByteDance-Seed/UI-TARS-1.5-7B +- OpenAI API Spec: https://platform.openai.com/docs/api-reference +- InfantAgent Issues: https://github.com/bin123apple/InfantAgent/issues + +## Support + +For vLLM-specific issues: +- vLLM GitHub: https://github.com/vllm-project/vllm +- vLLM Discord: https://discord.gg/vllm + +For InfantAgent integration: +- GitHub Issues: https://github.com/bin123apple/InfantAgent/issues diff --git a/agent_cli.sh b/agent_cli.sh new file mode 100755 index 0000000..ddaad7b --- /dev/null +++ b/agent_cli.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Simple CLI wrapper to interact with InfantAgent via Docker + +CONTAINER_NAME="infant-agent-cli" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to check if container is running +check_container() { + if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + echo -e "${RED}โ Error: Container '${CONTAINER_NAME}' is not running.${NC}" + echo -e "${YELLOW}Please start the containers with: docker compose up -d${NC}" + exit 1 + fi +} + +# Function to send prompt via stdin +send_prompt() { + local prompt="$1" + echo -e "${BLUE}๐ค Sending prompt to agent:${NC} $prompt" + echo -e "${YELLOW}โณ Agent is processing...${NC}" + echo "" + + # Send the prompt to the container's stdin + echo "$prompt" | docker attach --no-stdin "$CONTAINER_NAME" 2>/dev/null || \ + echo "$prompt" | docker exec -i "$CONTAINER_NAME" /bin/bash -c "cat" || \ + echo -e "${RED}Failed to send prompt. Try: docker exec -it $CONTAINER_NAME python -m infant${NC}" +} + +# Function to view logs +view_logs() { + echo -e "${BLUE}๐ Viewing agent logs...${NC}" + if [ "$1" = "-f" ]; then + docker logs -f "$CONTAINER_NAME" + else + docker logs --tail 100 "$CONTAINER_NAME" + fi +} + +# Function to show status +show_status() { + echo -e "${BLUE}๐ Container Status:${NC}" + docker ps --filter "name=$CONTAINER_NAME" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" +} + +# Function to enter interactive mode +interactive_mode() { + echo -e "${GREEN}๐ค Entering interactive mode with agent...${NC}" + docker exec -it "$CONTAINER_NAME" /bin/bash +} + +# Function to show help +show_help() { + cat << EOF +${GREEN}InfantAgent CLI - Docker Container Interface${NC} + +Usage: $0 [COMMAND] [OPTIONS] + +${YELLOW}Commands:${NC} + send "prompt" Send a prompt to the agent + logs View agent logs (last 100 lines) + logs -f Follow agent logs in real-time + status Show container status + shell Open interactive shell in container + restart Restart the agent container + help Show this help message + +${YELLOW}Examples:${NC} + $0 send "Create a Python script to analyze data.csv" + $0 logs -f + $0 status + $0 shell + +${YELLOW}Direct interaction:${NC} + For direct interaction with the agent, use: + docker exec -it ${CONTAINER_NAME} /bin/bash + # Then inside container: + # The agent is already running with input prompt + +${YELLOW}View logs while agent is running:${NC} + docker logs -f ${CONTAINER_NAME} +EOF +} + +# Main script logic +case "$1" in + send) + check_container + if [ -z "$2" ]; then + echo -e "${RED}โ Error: Please provide a prompt${NC}" + echo "Usage: $0 send \"your prompt here\"" + exit 1 + fi + send_prompt "$2" + ;; + logs) + check_container + view_logs "$2" + ;; + status) + check_container + show_status + ;; + shell) + check_container + interactive_mode + ;; + restart) + echo -e "${YELLOW}โณ Restarting agent container...${NC}" + docker restart "$CONTAINER_NAME" + echo -e "${GREEN}โ Container restarted${NC}" + ;; + help|--help|-h|"") + show_help + ;; + *) + echo -e "${RED}โ Unknown command: $1${NC}" + echo "" + show_help + exit 1 + ;; +esac diff --git a/backend.py b/backend.py index b4e5d37..d1a0fa3 100644 --- a/backend.py +++ b/backend.py @@ -28,7 +28,7 @@ app = FastAPI() upstream_http = httpx.AsyncClient(verify=False, follow_redirects=True) -upstream_aiohttp = ClientSession(connector=TCPConnector(ssl=False)) +upstream_aiohttp = None # Enable CORS app.add_middleware( @@ -41,6 +41,8 @@ @app.on_event("startup") async def startup_event(): + global upstream_aiohttp + upstream_aiohttp = ClientSession(connector=TCPConnector(ssl=False)) logger.info("[WS] connecting to {url}") @@ -73,14 +75,30 @@ async def shutdown_event(): logger.error(f"Error during cleanup: {str(e)}") # Redirect to frontend -@app.get("/") -async def root(): +@app.get("/", name="root_frontend") +async def root_frontend(): return RedirectResponse(url="/frontend/index.html") # Static files app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend") # Status API +@app.get("/api/guacamole-token") +async def guacamole_token(): + """Get a fresh Guacamole auth token for the iframe.""" + try: + resp = await upstream_http.post( + "http://infant-computer:8080/guacamole/api/tokens", + data={"username": "web", "password": "web"}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + if resp.status_code == 200: + data = resp.json() + return {"success": True, "token": data["authToken"]} + return {"success": False, "error": f"Guacamole returned {resp.status_code}"} + except Exception as e: + return {"success": False, "error": str(e)} + @app.get("/api/status") async def status(): if agent: @@ -88,7 +106,7 @@ async def status(): "success": True, "status": "ready", "currentTask": "none", - "model": agent._planning_llm.model, + "model": agent.planning_llm.model_name, "sessionActive": True, } return {"success": True, "status": "ready", "currentTask": "none", "model": "demo", "sessionActive": False} @@ -100,8 +118,12 @@ async def chat(data: dict): if not user_message: return {"success": False, "error": "No message provided"} if agent and run_single_step: - response = await run_single_step(agent, user_message) - return {"success": True, "response": response, "status": "completed"} + try: + response = await run_single_step(agent, user_message) + return {"success": True, "response": response, "status": "completed"} + except Exception as e: + logger.error(f"Error in chat: {e}") + return {"success": False, "error": str(e)} await asyncio.sleep(1) return {"success": True, "response": f"Demo mode: Received '{user_message}'", "status": "completed"} @@ -121,16 +143,26 @@ async def reset(): @app.post("/api/settings") async def settings(data: dict): global config, agent, computer - config.model = data.get('model') - config.api_key = data.get('apiKey') - config.temperature = float(data.get('temperature')) - config.max_tokens = int(data.get('maxTokens')) - print(agent) + # Load base config from config.toml first + try: + user_config = config._load() + config.__dict__.update(user_config) + except FileNotFoundError: + logger.warning("config.toml not found, using defaults") + # Apply frontend settings + if data.get('model'): + config.model = data.get('model') + if data.get('apiKey'): + config.api_key = data.get('apiKey') + if data.get('temperature') is not None: + config.temperature = float(data.get('temperature')) + if data.get('maxTokens') is not None: + config.max_output_tokens = int(data.get('maxTokens')) + # Finalize config (resolves paths, SSH settings, etc.) + config.finalize_config() if agent: - # ๅฎ้ ๅฎ็ฐไธญๅบ่ฏฅๆดๆฐ agent ้ ็ฝฎ await agent.update_agent_config(config) return {"success": True, "message": "Agent updated", "appliedSettings": data} - await asyncio.sleep(0.5) return {"success": True, "message": "Agent initialized", "appliedSettings": data} @app.get("/api/initialize") @@ -433,26 +465,8 @@ def get_forward_params(request: Request): return params # โโโโโโโโโโโโโโ -# root: capture sid & redirect +# root: handled by root_frontend above # โโโโโโโโโโโโโโ -@app.get("/") -async def root(request: Request): - logger.debug(f"[ROOT] incoming path={request.url.path} query={request.url.query!r}") - qs = request.url.query - target = "/gui/" + (f"?{qs}" if qs else "") - logger.debug(f"[ROOT] redirecting to {target}") - resp = RedirectResponse(target) - if "sid" in request.query_params: - sid_val = request.query_params["sid"] - logger.debug(f"[ROOT] setting cookie sid={sid_val!r}") - resp.set_cookie( - key="sid", - value=sid_val, - httponly=True, - secure=False, # allow HTTP - ) - logger.debug("[ROOT] response prepared") - return resp # โโโโโโโโโโโโโโ # SSE proxy @@ -462,7 +476,7 @@ async def sse(request: Request): params = get_forward_params(request) logger.debug(f"[SSE] path={request.url.path} params={params} cookies={dict(request.cookies)}") upstream = await upstream_aiohttp.get( - f"https://localhost:4443{request.url.path}", + f"https://infant-computer:8080{request.url.path}", params=params, ssl=False ) @@ -481,7 +495,7 @@ async def gen(): async def ws_proxy(ws: WebSocket): await ws.accept() sid = ws.query_params.get("sid") or ws.cookies.get("sid") - url = f"wss://localhost:4443{ws.url.path}" + (f"?sid={sid}" if sid else "") + url = f"wss://infant-computer:8080{ws.url.path}" + (f"?sid={sid}" if sid else "") logger.debug(f"[WS] connecting to {url}") upstream = await upstream_aiohttp.ws_connect(url, ssl=False) logger.debug("[WS upstream] connected") @@ -515,7 +529,7 @@ async def to_client(): @app.api_route("/gui/{full_path:path}", methods=["GET","POST","HEAD","OPTIONS"]) async def gui_proxy(request: Request, full_path: str): params = get_forward_params(request) - upstream_url = f"https://localhost:4443/gui/{full_path}" + upstream_url = f"https://infant-computer:8080/gui/{full_path}" logger.debug(f"[HTTP] {request.method} {upstream_url} params={params} headers={dict(request.headers)}") resp_up = await upstream_http.request( @@ -555,12 +569,160 @@ async def gui_proxy(request: Request, full_path: str): media_type=resp_up.headers.get("content-type"), ) +# โโโโโโโโโโโโโโ +# Guacamole HTTP proxy +# โโโโโโโโโโโโโโ +@app.api_route("/guacamole/{full_path:path}", methods=["GET","POST","PUT","DELETE","HEAD","OPTIONS"]) +async def guacamole_proxy(request: Request, full_path: str): + params = get_forward_params(request) + upstream_url = f"http://infant-computer:8080/guacamole/{full_path}" + logger.debug(f"[guacamole] {request.method} {upstream_url} params={params}") + + body = await request.body() + fwd_headers = { + k: v for k, v in request.headers.items() + if k.lower() not in ("host", "content-length", "transfer-encoding") + } + resp_up = await upstream_http.request( + method=request.method, + url=upstream_url, + params=params, + content=body, + headers=fwd_headers, + ) + logger.debug(f"[guacamole upstream] status={resp_up.status_code}") + + headers = { + k: v for k, v in resp_up.headers.multi_items() + if k.lower() not in ("x-frame-options", "content-security-policy", "set-cookie") + } + + # Rewrite Set-Cookie: drop the upstream Domain (so the cookie attaches to our + # proxy host), but preserve SameSite/Secure/HttpOnly/Path as set by Guacamole. + cookies = resp_up.headers.get_list("set-cookie") + rewritten = [] + for raw in cookies: + c = SimpleCookie() + try: + c.load(raw) + except Exception: + rewritten.append(raw) + continue + for morsel in c.values(): + parts = [f"{morsel.key}={morsel.value}"] + parts.append(f"Path={morsel['path'] or '/'}") + if morsel["expires"]: + parts.append(f"Expires={morsel['expires']}") + if morsel["max-age"]: + parts.append(f"Max-Age={morsel['max-age']}") + if morsel["samesite"]: + parts.append(f"SameSite={morsel['samesite']}") + if morsel["secure"]: + parts.append("Secure") + if morsel["httponly"]: + parts.append("HttpOnly") + rewritten.append("; ".join(parts)) + if rewritten: + # Starlette's Response needs a MutableHeaders to send multiple Set-Cookie lines. + resp = Response( + content=resp_up.content, + status_code=resp_up.status_code, + headers=headers, + media_type=resp_up.headers.get("content-type"), + ) + for ck in rewritten: + resp.raw_headers.append((b"set-cookie", ck.encode("latin-1"))) + return resp + + return Response( + content=resp_up.content, + status_code=resp_up.status_code, + headers=headers, + media_type=resp_up.headers.get("content-type"), + ) + +# โโโโโโโโโโโโโโ +# Guacamole WebSocket proxy +# โโโโโโโโโโโโโโ +@app.websocket("/guacamole/websocket-tunnel") +async def guacamole_ws_proxy(ws: WebSocket): + # Guacamole's guacamole-common-js WebSocketTunnel connects with subprotocol + # "guacamole". The server MUST echo it back or the browser fails the handshake. + requested_protocols = ws.headers.get("sec-websocket-protocol", "") + client_protocols = [p.strip() for p in requested_protocols.split(",") if p.strip()] + selected = "guacamole" if "guacamole" in client_protocols else (client_protocols[0] if client_protocols else None) + + params = dict(ws.query_params) + qs = "&".join(f"{k}={v}" for k, v in params.items()) + url = f"ws://infant-computer:8080/guacamole/websocket-tunnel" + (f"?{qs}" if qs else "") + logger.debug(f"[guacamole WS] connecting to {url} protocols={client_protocols}") + + # Forward the browser's Cookie header so upstream session state stays consistent. + upstream_headers = {} + cookie = ws.headers.get("cookie") + if cookie: + upstream_headers["Cookie"] = cookie + + upstream = await upstream_aiohttp.ws_connect( + url, + ssl=False, + protocols=client_protocols or (), + headers=upstream_headers or None, + ) + logger.debug(f"[guacamole WS] connected upstream protocol={upstream.protocol}") + + # Prefer the subprotocol the upstream actually selected; fall back to our guess. + accept_protocol = upstream.protocol or selected + await ws.accept(subprotocol=accept_protocol) + + closed = asyncio.Event() + + async def up_to_client(): + try: + async for msg in upstream: + if closed.is_set(): + break + if msg.type == WSMsgType.TEXT: + await ws.send_text(msg.data) + elif msg.type == WSMsgType.BINARY: + await ws.send_bytes(msg.data) + elif msg.type in (WSMsgType.CLOSE, WSMsgType.CLOSING, WSMsgType.CLOSED): + break + except Exception: + pass + finally: + closed.set() + + async def client_to_up(): + try: + while not closed.is_set(): + m = await ws.receive() + if m["type"] == "websocket.receive": + if "text" in m: + await upstream.send_str(m["text"]) + else: + await upstream.send_bytes(m["bytes"]) + else: + break + except (WebSocketDisconnect, Exception): + pass + finally: + closed.set() + + try: + await asyncio.gather(up_to_client(), client_to_up()) + except Exception: + pass + finally: + if not upstream.closed: + await upstream.close() + # โโโโโโโโโโโโโโ # proxy for /nxplayer/* (Web Player assets) # โโโโโโโโโโโโโโ @app.api_route("/nxplayer/{full_path:path}", methods=["GET","HEAD","OPTIONS"]) async def nxplayer_proxy(request: Request, full_path: str): - upstream_url = f"https://localhost:4443/nxplayer/{full_path}" + upstream_url = f"https://infant-computer:8080/nxplayer/{full_path}" logger.debug(f"[nxplayer] {request.method} {upstream_url}") resp_up = await upstream_http.request( method=request.method, @@ -582,5 +744,6 @@ async def nxplayer_proxy(request: Request, full_path: str): ) if __name__ == "__main__": import uvicorn - print("Starting server on http://localhost:8001") - uvicorn.run("backend:app", host="0.0.0.0", port=8001, reload=True) + port = int(os.getenv("BACKEND_PORT", "8008")) + print(f"Starting server on http://localhost:{port}") + uvicorn.run("backend:app", host="0.0.0.0", port=port, reload=True) diff --git a/config.toml b/config.toml index dc62505..48801f4 100644 --- a/config.toml +++ b/config.toml @@ -1,31 +1,31 @@ # Optional, if not provided, the Global LLM will be used +# api_key is inherited from ANTHROPIC_API_KEY env var โ do NOT set empty api_key here [planning_llm] -model = "claude-sonnet-4-20250514" -api_key = "" # YOUR API KEY +model = "claude-sonnet-4-6" # Optional, if not provided, the Global LLM will be used [classification_llm] -model = "claude-sonnet-4-20250514" -api_key = "" # YOUR API KEY +model = "claude-sonnet-4-6" # Optional, if not provided, the Global LLM will be used [execution_llm] -model = "claude-sonnet-4-20250514" -api_key = "" # YOUR API KEY +model = "claude-sonnet-4-6" -# Optional, Visual Grounding model, if not provided, the default VG model will be used +# Visual Grounding model served by the vLLM container (OpenAI-compatible API). +# The "openai/" prefix tells litellm to use the OpenAI provider with our custom base_url. [vg_llm] -model_oss = "ByteDance-Seed/UI-TARS-1.5-7B" +model_oss = "openai/ByteDance-Seed/UI-TARS-1.5-7B" +base_url_oss = "http://vllm-server:8005/v1" +api_key_oss = "infant" # Optional, File Editing model, if not provided, the default model will be used [fe_llm] -model = "claude-sonnet-4-20250514" +model = "claude-sonnet-4-6" # Optional, Toolmaker model, if not provided, the default model will be used [tm_llm] -model = "claude-sonnet-4-20250514" +model = "claude-sonnet-4-6" # Audio parsing model, if not provided, the default model will be used [ap_llm] model = "gpt-4o-audio-preview" -api_key = "" # YOUR API KEY \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..76938b7 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,163 @@ +version: '3.8' + +services: + # vLLM server for OSS model inference + + vllm-server: + build: + context: . + dockerfile: Dockerfile_vllm + container_name: vllm-server + restart: unless-stopped + ports: + - "8005:8005" + environment: + - HUGGING_FACE_HUB_TOKEN=${HF_API_KEY:-} + volumes: + # Cache Hugging Face models + - ${HF_HOME:-huggingface-cache}:/root/.cache/huggingface + networks: + - infant-network + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['4'] + capabilities: [gpu, utility, compute, graphics] + + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8005/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 90s + + # InfantAgent main CLI application + infant-agent: + build: + context: . + dockerfile: Dockerfile + image: infant-agent:latest + container_name: infant-agent-cli + restart: unless-stopped + stdin_open: true # Enable interactive mode (docker run -i) + tty: true # Allocate a pseudo-TTY (docker run -t) + ports: + - "${BACKEND_PORT:-8008}:8008" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - PYTHONUNBUFFERED=1 + - PYTHONIOENCODING=utf-8 + # vLLM server configuration + - base_url_oss=http://vllm-server:8005/v1 + - api_key_oss=infant + - SSH_HOSTNAME=${SSH_HOSTNAME:-computer-container} + - SSH_PORT=${SSH_PORT:-63710} + - SSH_USERNAME=${SSH_USERNAME:-infant} + - SSH_PASSWORD=${SSH_PASSWORD:-123} + volumes: + # Mount workspace for persistent data + - ./workspace:/app/workspace + # Mount cache directory + - ./cache:/tmp/cache + # Mount config for easy updates + - ./config.toml:/app/config.toml:ro + # Mount Docker socket for container management + - /var/run/docker.sock:/var/run/docker.sock + networks: + - infant-network + depends_on: + - infant-computer + - vllm-server + + # Computer container with desktop environment + infant-computer: + build: + context: . + dockerfile: infant/computer/Dockerfile + image: ubuntu-gnome-guacamole:22.04 + container_name: infant-computer + restart: unless-stopped + ports: + # SSH port - maps container port 22 to host (computer.py line 1126) + - "${SSH_PORT:-63710}:22" + # Guacamole web interface - maps container port 8080 to host (computer.py line 1125) + - "${GUI_PORT:-4443}:8080" + # RDP port for remote desktop - maps container port 3389 (computer.py line 1127) + - "3389:3389" + environment: + # Display configuration (computer.py line 1135) + - DISPLAY=:0 + # User account creation - "infant" if run_as_infant, "root" otherwise (computer.py line 1131) + - CreateUserAccount=${CREATE_USER_ACCOUNT:-infant} + # Render type for graphics (computer.py line 1132) + - RenderType=${RENDER_TYPE:-gpu} + # NVIDIA driver type: Tesla, GeForce, etc. (computer.py line 1133) + - NvidiaDriver=${NVIDIA_DRIVER:-Tesla} + # GPU device visibility (computer.py line 1134) + - NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-0} + # Timezone + - TZ=America/New_York + volumes: + # Shared workspace between agent and computer (computer.py line 1169) + - ${WORKSPACE_MOUNT_PATH:-./workspace}:/workspace + # Cache directory for infant user (computer.py line 1170) + - ${CACHE_DIR:-./cache}:/home/infant/.cache + networks: + - infant-network + # Shared memory size (computer.py line 1114) + shm_size: '2gb' + stdin_open: true + tty: true + # Privileged mode required for systemd and desktop (computer.py line 1111) + privileged: true + # User namespace mode (computer.py line 1112) + userns_mode: host + # IPC mode (computer.py line 1113) + ipc: host + # Capabilities required for systemd (computer.py line 1115) + cap_add: + - SYS_ADMIN + - SYS_BOOT + # TTY device access (computer.py line 1116) + devices: + - /dev/tty0 + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # device_ids: ['${NVIDIA_DEVICE_ID:-0}'] + # capabilities: [gpu, utility, compute, graphics] + # systemd init command (computer.py line 1137) + command: ["/sbin/init", "-D", "-o", "PermitRootLogin=yes"] + healthcheck: + # Check Guacamole is responding on port 8080 (internal port) + test: ["CMD", "curl", "-f", "http://localhost:8080/guacamole/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 120s + +networks: + infant-network: + driver: bridge + +volumes: + workspace: + driver: local + cache: + driver: local + huggingface-cache: + driver: local + + + +# docker run -it --gpus all \ +# -v HF_HOME:/root/.cache/huggingface \ +# -p 8005:8005 \ +# vllm/vllm-openai:latest \ +# --model ByteDance-Seed/UI-TARS-1.5-7B \ +# --dtype bfloat16 \ +# --gpu-memory-utilization 0.9 diff --git a/evaluation/gaia/run_inference.py b/evaluation/gaia/run_inference.py index aef3677..e2421f0 100644 --- a/evaluation/gaia/run_inference.py +++ b/evaluation/gaia/run_inference.py @@ -8,7 +8,7 @@ import concurrent.futures from infant.config import config, ComputerParams from infant.agent.agent import Agent -from infant.computer.computer import Computer +from infant.computer.computer import create_computer_from_params from infant.llm.llm_api_base import LLM_API_BASED from infant.llm.llm_oss_base import LLM_OSS_BASED from infant.agent.memory.memory import Userrequest, Finish, IPythonRun @@ -63,7 +63,7 @@ async def initialize_docker_agent(instance: dict, config=config)-> Agent: sid = str(uuid.uuid4()) try: - computer = Computer(computer_parameter, sid = sid) + computer = create_computer_from_params(computer_parameter, sid = sid) except: logger.error({traceback.format_exc()}) diff --git a/evaluation/swe_bench/run_inference.py b/evaluation/swe_bench/run_inference.py index 608b961..3aed5a1 100644 --- a/evaluation/swe_bench/run_inference.py +++ b/evaluation/swe_bench/run_inference.py @@ -14,7 +14,7 @@ from infant.config import config, ComputerParams from datasets import load_dataset from infant.agent.agent import Agent -from infant.computer.computer import Computer +from infant.computer.computer import create_computer_from_params from infant.llm.llm_api_base import LLM_API_BASED from infant.llm.llm_oss_base import LLM_OSS_BASED from infant.agent.memory.restore_memory import truncate_output @@ -153,7 +153,7 @@ async def initialize_docker_agent(instance: dict, config=config)-> Agent: sid = str(uuid.uuid4()) try: - computer = Computer(computer_parameter, sid = sid) + computer = create_computer_from_params(computer_parameter, sid = sid) except: logger.error({traceback.format_exc()}) diff --git a/frontend/.gitignore b/frontend/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/frontend/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000..7059a96 --- /dev/null +++ b/frontend/README.md @@ -0,0 +1,12 @@ +# React + Vite + +This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. + +Currently, two official plugins are available: + +- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh +- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh + +## Expanding the ESLint configuration + +If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project. diff --git a/frontend/css/styles.css b/frontend/css/styles.css index 6d568bf..9b6dc2b 100644 --- a/frontend/css/styles.css +++ b/frontend/css/styles.css @@ -1,564 +1,920 @@ -/* Global Styles */ -:root { - --primary-color: #4a6fa5; - --secondary-color: #6c757d; - --accent-color: #28a745; - --background-color: #f8f9fa; - --text-color: #333; - --border-color: #dee2e6; - --message-bg-user: #e9f5ff; - --message-bg-system: #f0f0f0; - --shadow: 0 4px 6px rgba(0, 0, 0, 0.1); -} - -* { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -body { - font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; - line-height: 1.6; - color: var(--text-color); - background-color: var(--background-color); -} - -.container { - max-width: 1200px; - margin: 0 auto; - padding: 20px; - display: flex; - flex-direction: column; - min-height: 100vh; -} - -/* Header Styles */ -header { - background-color: var(--primary-color); - color: white; - padding: 15px 20px; - border-radius: 8px 8px 0 0; - box-shadow: var(--shadow); -} - -header h1 { - font-size: 1.8rem; - display: flex; - align-items: center; - gap: 10px; -} - -/* Main Content Styles */ -main { - display: grid; - grid-template-columns: 1fr 300px; - gap: 20px; - flex: 1; - margin: 20px 0; -} - -/* Chat Container Styles */ -.chat-container { - background-color: white; - border-radius: 8px; - box-shadow: var(--shadow); - display: flex; - flex-direction: column; - height: 70vh; -} +/* ========================================================================== + InfantAgent Dashboard โ Modern Design System + Inspired by shadcn/ui (neutral palette, clean borders, subtle shadows) + ========================================================================== */ +/* --- Design Tokens --- */ +:root { + --background: #fafafa; + --foreground: #09090b; + --card: #ffffff; + --card-foreground: #09090b; + --muted: #f4f4f5; + --muted-foreground: #71717a; + --border: #e4e4e7; + --input: #e4e4e7; + --primary: #18181b; + --primary-foreground: #fafafa; + --secondary: #f4f4f5; + --secondary-foreground: #18181b; + --accent: #f4f4f5; + --accent-foreground: #18181b; + --destructive: #ef4444; + --success: #22c55e; + --warning: #eab308; + --ring: #a1a1aa; + --radius: 0.5rem; + --font-sans: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", + Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; + --font-mono: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, + "Liberation Mono", monospace; +} + +/* --- Reset --- */ +*, +*::before, +*::after { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body, +html { + height: 100%; + font-family: var(--font-sans); + font-size: 14px; + line-height: 1.5; + color: var(--foreground); + background-color: var(--background); + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +/* ========================================================================== + Layout + ========================================================================== */ +.dashboard { + display: flex; + height: 100vh; + overflow: hidden; +} + +/* ========================================================================== + Chat Sidebar (left panel) + ========================================================================== */ +.chat-area { + flex: 0 0 25%; + display: flex; + flex-direction: column; + min-width: 300px; + max-width: 520px; + background: var(--card); + border-right: 1px solid var(--border); +} + +/* --- Header --- */ +.chat-area header { + display: flex; + justify-content: space-between; + align-items: center; + height: 56px; + padding: 0 16px; + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.chat-header-left { + display: flex; + align-items: center; + gap: 10px; +} + +.chat-header-left img { + width: 32px; + height: 32px; + border-radius: 8px; + object-fit: cover; +} + +.chat-area header h1 { + margin: 0; + font-size: 0.875rem; + font-weight: 600; + color: var(--foreground); + letter-spacing: -0.01em; +} + +.setting-btn { + display: inline-flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border-radius: 6px; + background: transparent; + border: none; + color: var(--muted-foreground); + cursor: pointer; + font-size: 0.875rem; + transition: background-color 0.15s, color 0.15s; +} + +.setting-btn:hover { + background: var(--accent); + color: var(--accent-foreground); +} + +/* --- Chat Messages --- */ .chat-messages { - display: flex; - flex-direction: column; - padding: 0; /* ๅปๆๅคไฝๅ ่พน่ท */ - margin: 5px 0; - } + flex: 1; + overflow-y: auto; + padding: 16px; + display: flex; + flex-direction: column; + gap: 10px; + border: none; + border-radius: 0; + margin: 0; +} .chat-messages .message { - display: flex; /* ๅๆ flex ๅฎนๅจ */ - width: 100%; - box-sizing: border-box; - margin-bottom: 8px; - } -/* ็ถๅ่ฎฉๅ ๅฎนๅบๅ่ทๆปกๅฉไฝ็ฉบ้ด */ -.chat-messages .message .message-content { - flex: 1 1 auto; /* ๆไผธๅกซๆปก็ถๅฎนๅจ */ - max-width: 100%; - box-sizing: border-box; - } - -.message.user { - align-items: flex-end; + display: flex; + width: 100%; + margin-bottom: 0; } -/* ็ฐ่ฒๅ ๅฎนๅ้บๆปก parent */ -.chat-messages .message-content { - display: block; - width: 100%; - box-sizing: border-box; /* padding ไธไผๆ็ ดๅฎฝๅบฆ */ - background-color: #f0f0f0; /* ็ฐ่ฒ่ๆฏ */ - padding: 8px 12px; /* ๆ นๆฎๅๅฅฝ่ฐๆดๅ ่พน่ท */ - border-radius: 4px; - word-break: break-word; /* ๅ ๅฎนๅคช้ฟๅฏไปฅๆข่ก */ - } +.chat-messages .message .message-content { + flex: 1; + max-width: 100%; + border-radius: 12px; + padding: 10px 14px; + font-size: 0.8125rem; + line-height: 1.65; + word-break: break-word; + overflow-wrap: break-word; +} .message.system .message-content { - background-color: var(--message-bg-system); - border-radius: 12px 12px 12px 0; + background: var(--muted); + color: var(--foreground); + border-radius: 12px 12px 12px 4px; } .message.user .message-content { - background-color: var(--message-bg-user); - border-radius: 12px 12px 0 12px; - color: #333; + background: var(--primary); + color: var(--primary-foreground); + border-radius: 12px 12px 4px 12px; } .message-content p { - margin-bottom: 8px; + margin-bottom: 6px; } .message-content p:last-child { - margin-bottom: 0; + margin-bottom: 0; } -.user-input { - display: flex; - padding: 15px; - border-top: 1px solid var(--border-color); - background-color: #f8f9fa; - border-radius: 0 0 8px 8px; +.message-content pre { + background: #1e1e2e; + color: #cdd6f4; + padding: 10px 12px; + border-radius: 6px; + font-family: var(--font-mono); + font-size: 0.75rem; + overflow-x: auto; + margin: 6px 0; + line-height: 1.5; } -.user-input textarea { - flex: 1; - padding: 12px 15px; - border: 1px solid var(--border-color); - border-radius: 20px; - resize: none; - height: 50px; - font-family: inherit; - font-size: 0.95rem; -} - -.user-input button { - background-color: var(--primary-color); - color: white; - border: none; - border-radius: 50%; - width: 50px; - height: 50px; - margin-left: 10px; - cursor: pointer; - transition: background-color 0.2s; - display: flex; - align-items: center; - justify-content: center; +.message-content code { + font-family: var(--font-mono); + font-size: 0.8em; + background: rgba(0, 0, 0, 0.06); + padding: 1px 5px; + border-radius: 4px; } -.user-input button:hover { - background-color: #3a5a8a; +.message-content pre code { + background: none; + padding: 0; } -#resetButton { - background-color: var(--secondary-color); +.message-content ol, +.message-content ul { + padding-left: 1.5em; + margin: 4px 0; } -#resetButton:hover { - background-color: #5a6268; +.message-content li { + margin-bottom: 2px; + list-style-position: outside; } -/* Status Panel Styles */ +/* --- Status Panel --- */ .status-panel { - background-color: white; - border-radius: 8px; - box-shadow: var(--shadow); - overflow: hidden; -} - -.status-header { - background-color: var(--primary-color); - color: white; - padding: 15px; -} - -.status-header h3 { - font-size: 1.2rem; - font-weight: 500; -} - -.status-content { - padding: 15px; + padding: 8px 16px; + border-top: 1px solid var(--border); + background: var(--muted); + flex-shrink: 0; } .status-item { - margin-bottom: 15px; - padding-bottom: 15px; - border-bottom: 1px solid var(--border-color); + display: flex; + gap: 6px; + align-items: center; + margin-bottom: 2px; + font-size: 0.6875rem; + line-height: 1.6; } .status-item:last-child { - margin-bottom: 0; - padding-bottom: 0; - border-bottom: none; + margin-bottom: 0; + padding-bottom: 0; + border-bottom: none; } .status-label { - font-weight: 600; - display: block; - margin-bottom: 5px; - color: var(--secondary-color); + font-weight: 500; + color: var(--muted-foreground); + display: inline; } .status-value { - font-size: 0.95rem; + font-size: 0.6875rem; + font-weight: 500; } -/* Footer Styles */ -footer { - text-align: center; - padding: 15px 0; - color: var(--secondary-color); - border-top: 1px solid var(--border-color); - margin-top: auto; +/* --- User Input --- */ +.user-input { + display: flex; + align-items: flex-end; + gap: 6px; + padding: 12px 16px; + border-top: 1px solid var(--border); + background: var(--card); + flex-shrink: 0; } -footer a { - color: var(--primary-color); - text-decoration: none; +.user-input textarea { + flex: 1; + resize: none; + padding: 8px 12px; + border: 1px solid var(--input); + border-radius: 12px; + font-family: var(--font-sans); + font-size: 0.8125rem; + line-height: 1.5; + color: var(--foreground); + background: var(--muted); + min-height: 38px; + max-height: 120px; + outline: none; + transition: border-color 0.15s, box-shadow 0.15s; } -footer a:hover { - text-decoration: underline; +.user-input textarea:focus { + border-color: var(--ring); + box-shadow: 0 0 0 2px rgba(161, 161, 170, 0.15); } -/* Modal Styles */ -.modal { - display: none; - position: fixed; - z-index: 1000; - left: 0; - top: 0; - width: 100%; - height: 100%; - background-color: rgba(0, 0, 0, 0.5); - align-items: center; - justify-content: center; +.user-input textarea::placeholder { + color: var(--muted-foreground); } -.modal-content { - background-color: white; - padding: 25px; - border-radius: 8px; - width: 90%; - max-width: 500px; - box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3); - position: relative; +.user-input button { + display: inline-flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border-radius: 6px; + border: 1px solid var(--border); + background: var(--card); + color: var(--muted-foreground); + cursor: pointer; + transition: all 0.15s; + flex-shrink: 0; + font-size: 0.8125rem; } -.close-button { - position: absolute; - top: 15px; - right: 20px; - font-size: 24px; - cursor: pointer; - color: var(--secondary-color); +.user-input button:hover { + background: var(--accent); + color: var(--accent-foreground); } -.modal h2 { - margin-bottom: 20px; - color: var(--primary-color); +#sendButton { + background: var(--primary); + color: var(--primary-foreground); + border-color: var(--primary); } -.form-group { - margin-bottom: 20px; +#sendButton:hover { + opacity: 0.9; } -.form-group label { - display: block; - margin-bottom: 8px; - font-weight: 500; +#resetButton { + background: var(--card); } -.form-group input, -.form-group select { - width: 100%; - padding: 10px; - border: 1px solid var(--border-color); - border-radius: 4px; - font-size: 0.95rem; +.hidden-input { + display: none; } -.form-group input[type="range"] { - width: calc(100% - 40px); - vertical-align: middle; +/* ========================================================================== + Resizer + ========================================================================== */ +.resizer { + width: 1px; + background-color: var(--border); + cursor: col-resize; + position: relative; + transition: background-color 0.15s; + flex-shrink: 0; } -#temperatureValue { - display: inline-block; - width: 30px; - text-align: right; - margin-left: 5px; +/* Wider invisible grab target */ +.resizer::before { + content: ""; + position: absolute; + top: 0; + bottom: 0; + left: -4px; + right: -4px; + z-index: 10; } -.submit-button { - background-color: var(--accent-color); - color: white; - border: none; - padding: 10px 20px; - border-radius: 4px; - cursor: pointer; - font-size: 1rem; - margin-top: 10px; +.resizer:hover { + background-color: var(--ring); } -.submit-button:hover { - background-color: #218838; +.resizer:active { + background-color: var(--primary); } -/* Responsive Styles */ -@media (max-width: 768px) { - main { - grid-template-columns: 1fr; - } +/* Small drag indicator in the center */ +.resizer::after { + content: ""; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + width: 3px; + height: 28px; + background: var(--border); + border-radius: 2px; + opacity: 0; + transition: opacity 0.15s; +} - .chat-container { - height: 60vh; - } +.resizer:hover::after { + opacity: 1; } -/* ๅจไฝ ็style.cssไธญๆทปๅ ไปฅไธๆ ทๅผ */ +/* ========================================================================== + Right Dashboard + ========================================================================== */ +.dashboard-right { + flex: 1; + display: grid; + grid-template-columns: 1fr 1fr; + grid-template-rows: 1fr 1fr; + gap: 8px; + padding: 8px; + position: relative; + min-width: 400px; + height: 100vh; + box-sizing: border-box; + background: var(--background); + pointer-events: auto; +} + +/* ========================================================================== + Panels + ========================================================================== */ +.panel { + border: 1px solid var(--border); + border-radius: var(--radius); + display: flex; + flex-direction: column; + overflow: hidden; + position: relative; + z-index: 1; + min-height: 0; + min-width: 0; + width: 100%; + height: 100%; + box-sizing: border-box; + background: var(--card); +} -/* Task items styling */ +.panel.fullspan { + grid-column: 1 / -1; + grid-row: 1 / -1; + z-index: 10; +} + +.panel.hidden { + display: none; +} + +/* --- Panel Header --- */ +.panel-header { + display: flex; + align-items: center; + gap: 6px; + padding: 0 12px; + height: 36px; + background: var(--card); + border-bottom: 1px solid var(--border); + font-size: 0.75rem; + font-weight: 500; + color: var(--foreground); + position: relative; + z-index: 100; + flex-shrink: 0; +} + +.panel-header i:first-child { + color: var(--muted-foreground); + font-size: 0.6875rem; +} + +.panel.fullspan .panel-header { + height: 44px; + font-size: 0.875rem; + padding: 0 16px; +} + +/* --- Panel Content --- */ +.panel-content { + flex: 1; + overflow: hidden; + padding: 0; + min-height: 0; + display: flex; + flex-direction: column; + word-wrap: break-word; + word-break: break-all; + overflow-wrap: break-word; + hyphens: auto; + max-width: 100%; +} + +/* --- Fullscreen Button --- */ +.fs-btn { + position: absolute; + top: 50%; + right: 8px; + transform: translateY(-50%); + width: 26px; + height: 26px; + display: inline-flex; + align-items: center; + justify-content: center; + background: transparent; + border: none; + border-radius: 6px; + font-size: 0.6875rem; + color: var(--muted-foreground); + cursor: pointer; + z-index: 101; + pointer-events: auto; + transition: background-color 0.15s, color 0.15s; + line-height: 1; +} + +.fs-btn:hover { + background: var(--accent); + color: var(--accent-foreground); +} + +.fs-btn:focus-visible { + outline: 2px solid var(--ring); + outline-offset: 2px; +} + +.panel.fullspan .fs-btn { + right: 12px; +} + +/* ========================================================================== + Terminal & Notebook + ========================================================================== */ +.terminal-output { + background: #0c0c0c; + color: #4ade80; + font-family: var(--font-mono); + font-size: 0.75rem; + padding: 12px; + flex: 1; + overflow-x: auto; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + word-break: normal; + overflow-wrap: break-word; + margin: 0; + min-height: 0; + max-width: 100%; + line-height: 1.6; +} + +.notebook-output { + background: var(--muted); + font-family: var(--font-mono); + font-size: 0.75rem; + padding: 12px; + flex: 1; + overflow-x: auto; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + word-break: normal; + overflow-wrap: break-word; + margin: 0; + min-height: 0; + max-width: 100%; + line-height: 1.6; +} + +/* --- Desktop iframe --- */ +.iframe-container { + flex: 1; + min-height: 0; + overflow: hidden; +} + +#desktopFrame { + width: 100%; + height: 100%; + border: none; +} + +/* ========================================================================== + Subtask List + ========================================================================== */ +#subtaskList { + flex: 1; + min-height: 0; + overflow-y: auto; + overflow-x: hidden; + margin: 0; + padding: 8px; + list-style: none; + max-width: 100%; +} + +#subtaskList li { + word-wrap: break-word; + overflow-wrap: break-word; + hyphens: auto; + white-space: normal; + max-width: 100%; +} + +/* --- Task Items --- */ .task-item { - display: flex; - justify-content: space-between; - align-items: center; - padding: 8px 12px; - margin: 4px 0; - background: #f8f9fa; - border-radius: 6px; - border-left: 3px solid #007bff; - transition: all 0.2s ease; + display: flex; + justify-content: space-between; + align-items: center; + padding: 8px 12px; + margin-bottom: 4px; + background: var(--muted); + border-radius: 6px; + border-left: 3px solid var(--ring); + transition: background-color 0.15s; } .task-item:hover { - background: #e9ecef; - transform: translateX(2px); + background: var(--accent); } .task-item.completed { - border-left-color: #28a745; - background: #d4edda; - opacity: 0.8; - color: #6c757d; + border-left-color: var(--success); + background: #f0fdf4; + opacity: 0.75; } .task-item.running { - border-left-color: #ffc107; - background: #fff3cd; + border-left-color: var(--warning); + background: #fefce8; } .task-content { - flex: 1; + flex: 1; } .task-name { - display: block; - font-weight: 500; - color: #333; - margin-bottom: 2px; + display: block; + font-weight: 500; + font-size: 0.8125rem; + color: var(--foreground); + margin-bottom: 2px; } .task-description { - font-size: 0.85em; - color: #666; - margin-top: 4px; + font-size: 0.75rem; + color: var(--muted-foreground); + margin-top: 2px; } .task-status { - display: inline-block; - padding: 2px 6px; - border-radius: 12px; - font-size: 0.75em; - font-weight: 500; - text-transform: uppercase; - margin-left: 8px; + display: inline-flex; + align-items: center; + padding: 1px 8px; + border-radius: 9999px; + font-size: 0.625rem; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.025em; + margin-left: 8px; } .status-pending { - background: #6c757d; - color: white; + background: var(--secondary); + color: var(--secondary-foreground); } .status-running { - background: #ffc107; - color: #212529; + background: #fef9c3; + color: #854d0e; } .status-completed { - background: #28a745; - color: white; + background: #dcfce7; + color: #166534; } .task-actions { - display: flex; - gap: 4px; - opacity: 0; - transition: opacity 0.2s ease; + display: flex; + gap: 4px; + opacity: 0; + transition: opacity 0.15s; } .task-item:hover .task-actions { - opacity: 1; + opacity: 1; } .task-btn { - width: 24px; - height: 24px; - border: none; - border-radius: 50%; - cursor: pointer; - font-size: 12px; - font-weight: bold; - transition: all 0.2s ease; + width: 22px; + height: 22px; + border: none; + border-radius: 6px; + cursor: pointer; + font-size: 0.6875rem; + font-weight: 600; + display: inline-flex; + align-items: center; + justify-content: center; + transition: opacity 0.15s; } .complete-btn { - background: #28a745; - color: white; + background: var(--success); + color: white; } .complete-btn:hover { - background: #218838; - transform: scale(1.1); + opacity: 0.85; } .delete-btn { - background: #dc3545; - color: white; + background: var(--destructive); + color: white; } .delete-btn:hover { - background: #c82333; - transform: scale(1.1); + opacity: 0.85; +} + +/* --- Empty state for subtasks --- */ +#subtaskList li[style*="italic"] { + text-align: center; + padding: 24px 16px; + border: 1px dashed var(--border); + border-radius: var(--radius); + margin: 8px; + font-size: 0.8125rem; + color: var(--muted-foreground); +} + +/* ========================================================================== + Settings Modal + ========================================================================== */ +.modal { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.5); + backdrop-filter: blur(4px); + -webkit-backdrop-filter: blur(4px); + display: none; + justify-content: center; + align-items: center; + z-index: 1000; } -/* Notification styling */ +.modal-content { + background: var(--card); + padding: 24px; + border-radius: 12px; + width: 90%; + max-width: 420px; + border: 1px solid var(--border); + box-shadow: 0 16px 48px rgba(0, 0, 0, 0.12); + position: relative; +} + +.modal-content h2 { + font-size: 1.125rem; + font-weight: 600; + color: var(--foreground); + margin-bottom: 20px; + letter-spacing: -0.01em; +} + +.close-btn { + position: absolute; + top: 16px; + right: 16px; + width: 28px; + height: 28px; + display: inline-flex; + align-items: center; + justify-content: center; + background: transparent; + border: none; + border-radius: 6px; + color: var(--muted-foreground); + cursor: pointer; + transition: background-color 0.15s, color 0.15s; + font-size: 0.875rem; +} + +.close-btn:hover { + background: var(--accent); + color: var(--accent-foreground); +} + +.form-group { + margin-bottom: 16px; +} + +.form-group label { + display: block; + margin-bottom: 6px; + font-weight: 500; + font-size: 0.8125rem; + color: var(--foreground); +} + +.form-group input, +.form-group select { + width: 100%; + padding: 8px 12px; + border: 1px solid var(--input); + border-radius: 6px; + font-size: 0.8125rem; + font-family: var(--font-sans); + color: var(--foreground); + background: var(--card); + outline: none; + transition: border-color 0.15s, box-shadow 0.15s; +} + +.form-group input:focus, +.form-group select:focus { + border-color: var(--ring); + box-shadow: 0 0 0 2px rgba(161, 161, 170, 0.15); +} + +.form-group input[type="range"] { + width: calc(100% - 44px); + vertical-align: middle; + padding: 0; + accent-color: var(--primary); +} + +#temperatureValue { + display: inline-block; + width: 30px; + text-align: right; + margin-left: 8px; + font-size: 0.8125rem; + font-weight: 500; + font-family: var(--font-mono); + color: var(--muted-foreground); +} + +.submit-button { + width: 100%; + padding: 8px 16px; + background: var(--primary); + color: var(--primary-foreground); + border: none; + border-radius: 6px; + font-size: 0.8125rem; + font-weight: 500; + font-family: var(--font-sans); + cursor: pointer; + transition: opacity 0.15s; + margin-top: 8px; +} + +.submit-button:hover { + opacity: 0.9; +} + +/* ========================================================================== + Notifications + ========================================================================== */ .notification { - position: fixed; - top: 20px; - right: 20px; - padding: 12px 20px; - border-radius: 6px; - color: white; - font-weight: 500; - z-index: 1000; - transform: translateX(100%); - animation: slideIn 0.3s ease forwards; + position: fixed; + top: 16px; + right: 16px; + padding: 10px 16px; + border-radius: var(--radius); + font-size: 0.8125rem; + font-weight: 500; + z-index: 1001; + border: 1px solid; + transform: translateX(calc(100% + 20px)); + animation: slideIn 0.2s ease forwards; } .notification-success { - background: #28a745; + background: #f0fdf4; + border-color: #bbf7d0; + color: #166534; } .notification-error { - background: #dc3545; + background: #fef2f2; + border-color: #fecaca; + color: #991b1b; } .notification-info { - background: #17a2b8; + background: #eff6ff; + border-color: #bfdbfe; + color: #1e40af; } .notification.fade-out { - animation: slideOut 0.3s ease forwards; + animation: slideOut 0.2s ease forwards; } @keyframes slideIn { - to { - transform: translateX(0); - } + to { + transform: translateX(0); + } } @keyframes slideOut { - to { - transform: translateX(100%); - } + to { + transform: translateX(calc(100% + 20px)); + } } -/* Empty state */ -#subtaskList li[style*="italic"] { - text-align: center; - padding: 20px; - border: 2px dashed #ddd; - border-radius: 6px; - margin: 10px 0; +/* ========================================================================== + Scrollbars + ========================================================================== */ +::-webkit-scrollbar { + width: 6px; + height: 6px; } -#terminalOutput { - white-space: pre-wrap; - } +::-webkit-scrollbar-track { + background: transparent; +} -/* ่ฎฉ fullspan ๆถๆจช่ทจๆๆๆ ผๅญ๏ผๅนถ็ฝฎ้กถ */ -.panel.fullspan { - grid-column: 1 / -1; - grid-row: 1 / -1; - z-index: 10; - } - - /* ้่้็ฎๆ panel */ - .panel.hidden { - display: none; - } +::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} -/* ไฟ่ฏ dashboard-right ๅบๅๆฌ่บซๅฏไปฅๆฅๆถไบไปถ */ -.dashboard-right { - position: relative; - pointer-events: auto; - } - -/* 1. Panel ไธ่ฃๅชๅ ถๅญๅ ็ด ๏ผๅ ๆฌ header ๆบขๅบ๏ผ */ -.panel { - position: relative; - overflow: visible !important; - z-index: 1; - } - - /* 2. Header ไฟ่ฏๅจๆไธๅฑๅนถไฝฟ็จ Flex ๅธๅฑ */ - .panel-header { - position: relative; /* ๅ็ ง็ณป๏ผไพ็ปๅฏนๅฎไฝ็ๆ้ฎไฝฟ็จ */ - display: flex; - align-items: center; - justify-content: space-between; - padding: 4px 8px; - background: #fafafa; - z-index: 100; /* ๅ่ฟ iframe ็ญๅ ๅฎน */ +::-webkit-scrollbar-thumb:hover { + background: var(--ring); +} + +/* ========================================================================== + Responsive + ========================================================================== */ +@media (max-width: 900px) { + .chat-area { + min-width: 240px; + flex-basis: 30%; } - - /* ---------------------------- - ๅ จๅฑๆ้ฎๆ ทๅผ - ---------------------------- */ - - /* 3. ๆ้ฎ็ปๅฏนๅฎไฝๅฐ header ๅณไธ่ง */ - .fs-btn { - position: absolute; - top: 4px; - right: 4px; - width: 24px; - height: 24px; - line-height: 24px; - text-align: center; - - background: transparent; - border: none; - font-size: 1rem; - cursor: pointer; - - z-index: 101; /* ้ซไบ header ๆฌ่บซ */ - pointer-events: auto; + + .dashboard-right { + min-width: 300px; } - - /* 4. Hover ๅ้ฆ */ - .fs-btn:hover { - background: rgba(0, 0, 0, 0.05); - border-radius: 4px; - } \ No newline at end of file +} + +#terminalOutput { + white-space: pre-wrap; +} diff --git a/frontend/eslint.config.js b/frontend/eslint.config.js new file mode 100644 index 0000000..ec2b712 --- /dev/null +++ b/frontend/eslint.config.js @@ -0,0 +1,33 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' + +export default [ + { ignores: ['dist'] }, + { + files: ['**/*.{js,jsx}'], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + parserOptions: { + ecmaVersion: 'latest', + ecmaFeatures: { jsx: true }, + sourceType: 'module', + }, + }, + plugins: { + 'react-hooks': reactHooks, + 'react-refresh': reactRefresh, + }, + rules: { + ...js.configs.recommended.rules, + ...reactHooks.configs.recommended.rules, + 'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }], + 'react-refresh/only-export-components': [ + 'warn', + { allowConstantExport: true }, + ], + }, + }, +] diff --git a/frontend/index.html b/frontend/index.html index d04d1c3..fb06c09 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -3,328 +3,38 @@
-
-
+