ezlocalai/docker-compose-rpi.yml at main · DevXT-LLC/ezlocalai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Raspberry Pi 5 / ARM64 Docker Compose
#
# Prerequisites:
#   - Raspberry Pi 5 with 8GB RAM recommended
#   - 64-bit Raspberry Pi OS (Bookworm or later)
#   - Docker installed: curl -fsSL https://get.docker.com | sh
#
# Usage:
#   docker compose -f docker-compose-rpi.yml up -d
#
# Any GGUF model from HuggingFace works - just change DEFAULT_MODEL.
# Recommended models for Pi 5 (8GB):
#   - unsloth/Qwen3.5-0.6B-GGUF  (fastest, ~400MB RAM)
#   - unsloth/Qwen3.5-1.7B-GGUF  (good balance, ~1.2GB RAM)
#   - unsloth/Qwen3.5-4B-GGUF    (best quality that fits, ~3GB RAM)
#
services:
  ezlocalai:
    image: joshxt/ezlocalai:rpi
    environment:
      - EZLOCALAI_URL=${EZLOCALAI_URL-http://localhost:8091}
      - EZLOCALAI_API_KEY=${EZLOCALAI_API_KEY-}
      - DEFAULT_MODEL=${DEFAULT_MODEL-unsloth/Qwen3.5-0.6B-GGUF}
      - WHISPER_MODEL=${WHISPER_MODEL-small}
      - IMG_MODEL=
      - LLM_BATCH_SIZE=${LLM_BATCH_SIZE-512}
      - LLM_MAX_TOKENS=${LLM_MAX_TOKENS-8192}
      - UVICORN_WORKERS=${UVICORN_WORKERS-1}
      - MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS-1}
      - MAX_QUEUE_SIZE=${MAX_QUEUE_SIZE-10}
      - REQUEST_TIMEOUT=${REQUEST_TIMEOUT-600}
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://localhost:8091/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 180s
    logging:
      driver: json-file
      options:
        max-size: "100m"
        max-file: "3"
    ports:
      - "8091:8091"
    volumes:
      - ./models:/app/models
      - ./hf:/home/root/.cache/huggingface/hub
      - ./outputs:/app/outputs
      - ./voices:/app/voices
      - ./whispercpp:/app/whispercpp