-
-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathdocker-compose-rpi.yml
More file actions
51 lines (51 loc) · 1.64 KB
/
docker-compose-rpi.yml
File metadata and controls
51 lines (51 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Raspberry Pi 5 / ARM64 Docker Compose
#
# Prerequisites:
# - Raspberry Pi 5 with 8GB RAM recommended
# - 64-bit Raspberry Pi OS (Bookworm or later)
# - Docker installed: curl -fsSL https://get.docker.com | sh
#
# Usage:
# docker compose -f docker-compose-rpi.yml up -d
#
# Any GGUF model from HuggingFace works - just change DEFAULT_MODEL.
# Recommended models for Pi 5 (8GB):
# - unsloth/Qwen3.5-0.6B-GGUF (fastest, ~400MB RAM)
# - unsloth/Qwen3.5-1.7B-GGUF (good balance, ~1.2GB RAM)
# - unsloth/Qwen3.5-4B-GGUF (best quality that fits, ~3GB RAM)
#
services:
ezlocalai:
image: joshxt/ezlocalai:rpi
environment:
- EZLOCALAI_URL=${EZLOCALAI_URL-http://localhost:8091}
- EZLOCALAI_API_KEY=${EZLOCALAI_API_KEY-}
- DEFAULT_MODEL=${DEFAULT_MODEL-unsloth/Qwen3.5-0.6B-GGUF}
- WHISPER_MODEL=${WHISPER_MODEL-small}
- IMG_MODEL=
- LLM_BATCH_SIZE=${LLM_BATCH_SIZE-512}
- LLM_MAX_TOKENS=${LLM_MAX_TOKENS-8192}
- UVICORN_WORKERS=${UVICORN_WORKERS-1}
- MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS-1}
- MAX_QUEUE_SIZE=${MAX_QUEUE_SIZE-10}
- REQUEST_TIMEOUT=${REQUEST_TIMEOUT-600}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:8091/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
logging:
driver: json-file
options:
max-size: "100m"
max-file: "3"
ports:
- "8091:8091"
volumes:
- ./models:/app/models
- ./hf:/home/root/.cache/huggingface/hub
- ./outputs:/app/outputs
- ./voices:/app/voices
- ./whispercpp:/app/whispercpp