rhasspy · edurenye · Aug 24, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 27, 2023
diff --git a/README.md b/README.md
@@ -24,32 +24,86 @@ docker run -it -p 10300:10300 -v /path/to/local/data:/data rhasspy/wyoming-whisp
 docker run -it -p 10200:10200 -v /path/to/local/data:/data rhasspy/wyoming-piper --voice en_US-lessac-medium
 ```
 
+
 ## Run openWakeWord
 
 ``` sh
 docker run -it -p 10400:10400 rhasspy/wyoming-openwakeword --preload-model 'ok_nabu'
 ```
 
+
 ## Run snowboy
 
 ``` sh
 docker run -it -p 10400:10400 rhasspy/wyoming-snowboy
 ```
 
+
 ## Run microWakeWord
 
 ``` sh
 docker run -it -p 10400:10400 rhasspy/wyoming-microwakeword
 ```
 
+
 ## Run rhasspy-speech
 
 ``` sh
 docker run -it -p 10300:10300 -v /path/to/download/models:/models -v /path/to/train:/train rhasspy/wyoming-rhasspy-speech
 ```
 
+
 ## Run speech-to-phrase
 
 ``` sh
 docker run -it -p 10300:10300 -v /path/to/download/models:/models -v /path/to/train:/train rhasspy/wyoming-speech-to-phrase --hass-websocket-uri 'ws://homeassistant.local:8123/api/websocket' --hass-token '<LONG_LIVED_ACCESS_TOKEN>' --retrain-on-start
 ```
+
+
+## To run in standalone server
+
+### Run without GPU
+
+Build openwakeword, piper and whisper without GPU with:
+
+``` sh
+docker compose -f docker-compose.base.yml build --no-cache
+```
+
+Run it with:
+
+``` sh
+docker compose -f docker-compose.base.yml up -d
+```
+
+Take it down with:
+
+``` sh
+docker compose down
+```
+
+### Run with GPU
+
+Build openwakeword, piper and whisper with GPU with:
+
+``` sh
+docker compose -f docker-compose.gpu.yml build --no-cache
+```
+
+Run it with:
+
+``` sh
+docker compose -f docker-compose.gpu.yml up -d
+```
+
+Take it down with:
+
+``` sh
+docker compose down
+```
+
+### Extend it
+
+You can extend those files adding your own languages.
+More on docker compose extend in the [official documentation](https://docs.docker.com/compose/multiple-compose-files/extends/).
+
diff --git a/docker-compose.base.yml b/docker-compose.base.yml
@@ -0,0 +1,72 @@
+### YAML Anchors ###
+x-common: &common
+  restart: unless-stopped
+
+####
+services:
+  wyoming-piper:
+    build:
+      context: ./piper/
+    ports:
+      - "10200:10200"
+    command: [ "--voice", "en_US-lessac-medium" ]
+    <<: [ *common ]
+
+  wyoming-whisper:
+    build:
+      context: ./whisper/
+    ports:
+      - "10300:10300"
+    command: [ "--model", "tiny-int8", "--language", "en" ]
+    <<: [ *common ]
+
+#  wyoming-whispercpp:
+#    build:
+#      context: ./whisper-cpp/
+#    ports:
+#      - "10300:10300"
+#    command: [ "--model", "tiny-int8", "--language", "en" ]
+#    <<: [ *common ]
+
+  wyoming-openwakeword:
+    build:
+      context: ./openwakeword/
+    ports:
+      - "10400:10400"
+    command: [ "--preload-model", "ok_nabu" ]
+    <<: [ *common ]
+
+#  wyoming-porcupine:
+#    build:
+#      context: ./porcupine1/
+#    ports:
+#      - "10400:10400"
+#    <<: [ *common ]
+
+#  wyoming-snowboy:
+#    build:
+#      context: ./snowboy/
+#    ports:
+#      - "10400:10400"
+#    <<: [ *common ]
+
+#  wyoming-vosk:
+#    build:
+#      context: ./vosk/
+#    ports:
+#      - "10400:10400"
+#    <<: [ *common ]
+
+#  wyoming-microwakeword:
+#    build:
+#      context: ./microwakeword/
+#    ports:
+#      - "10400:10400"
+#    <<: [ *common ]
+
+#  wyoming-rhasspy-speech:
+#    build:
+#      context: ./rhasspy-speech/
+#    ports:
+#      - "10300:10300"
+#    <<: [ *common ]
diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
@@ -0,0 +1,81 @@
+### YAML Anchors ###
+x-gpu: &gpu
+  build:
+    args:
+      - BASE=nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
+  runtime: nvidia
+  deploy:
+    resources:
+      reservations:
+        devices:
+          - driver: nvidia
+            count: all
+            capabilities: ["compute", "utility", "graphics"]
+
+####
+services:
+  wyoming-piper:
+    extends:
+      file: docker-compose.base.yml
+      service: wyoming-piper
+    <<: [ *gpu ]
+    build:
+      dockerfile: GPU.Dockerfile
+      args:
+        - EXTRA_DEPENDENCIES=onnxruntime-gpu
+        - RUN_SCRIPT=run-gpu.sh
+    volumes:
+      - ./piper/__main__.py:/opt/venv/lib/python3.11/site-packages/wyoming_piper/__main__.py
+      - ./piper/process.py:/opt/venv/lib/python3.11/site-packages/wyoming_piper/process.py
+
+  wyoming-whisper:
+    extends:
+      file: docker-compose.base.yml
+      service: wyoming-whisper
+    <<: [ *gpu ]
+    command: [ "--model", "tiny-int8", "--language", "en", "--device", "cuda" ]
+
+#  wyoming-whispercpp:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-whispercpp
+#    <<: [ *gpu ]
+#    command: [ "--model", "tiny-int8", "--language", "en", "--device", "cuda" ]
+
+  wyoming-openwakeword:
+    extends:
+      file: docker-compose.base.yml
+      service: wyoming-openwakeword
+    build:
+      dockerfile: GPU.Dockerfile
+    <<: [ *gpu ]
+
+#  wyoming-porcupine:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-porcupine
+#    <<: [ *gpu ]
+
+#  wyoming-snowboy:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-snowboy
+#    <<: [ *gpu ]
+
+#  wyoming-vosk:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-vosk
+#    <<: [ *gpu ]
+
+#  wyoming-microwakeword:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-microwakeword
+#    <<: [ *gpu ]
+
+#  wyoming-rhasspy-speech:
+#    extends:
+#      file: docker-compose.base.yml
+#      service: wyoming-rhasspy-speech
+#    <<: [ *gpu ]
diff --git a/microwakeword/Dockerfile b/microwakeword/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/openwakeword/Dockerfile b/openwakeword/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/openwakeword/GPU.Dockerfile b/openwakeword/GPU.Dockerfile
@@ -0,0 +1,35 @@
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
+ARG TARGETARCH
+ARG TARGETVARIANT
+
+# Install openWakeWord
+WORKDIR /usr/src
+ARG WYOMING_OPENWAKEWORD_VERSION='1.8.2'
+
+RUN \
+    apt-get update \
+    && apt-get install -y --no-install-recommends \
+        python3 \
+        python3-pip \
+        python3-venv \
+        libopenblas0 \
+    \
+    && python3 -m venv .venv \
+    && .venv/bin/pip3 install --no-cache-dir uv \
+    && .venv/bin/uv pip install --no-cache-dir -U \
+        setuptools \
+        wheel \
+    && .venv/bin/uv pip install --no-cache-dir \
+        --exclude-newer 2023-12-12 \
+        "wyoming-openwakeword==${WYOMING_OPENWAKEWORD_VERSION}" \
+    \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /
+COPY run.sh ./
+
+EXPOSE 10400
+
+ENTRYPOINT ["bash", "/run.sh"]
diff --git a/piper/GPU.Dockerfile b/piper/GPU.Dockerfile
@@ -0,0 +1,59 @@
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
+ARG EXTRA_DEPENDENCIES
+ARG RUN_SCRIPT='run.sh'
+ARG TARGETARCH
+ARG TARGETVARIANT
+
+# Install Piper
+WORKDIR /usr/src
+ARG WYOMING_PIPER_VERSION='1.5.0'
+ARG BINARY_PIPER_VERSION='1.2.0'
+
+# Create and activate virtual environment
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+RUN \
+    apt-get update \
+    && apt-get install -y --no-install-recommends \
+        wget \
+        curl \
+        python3 \
+        python3-pip \
+        python3-venv \
+    \
+    && rm -rf /var/lib/apt/lists/* \
+    \
+    # Create virtual environment
+    && python3 -m venv $VIRTUAL_ENV
+
+RUN \
+    pip3 install --no-cache-dir -U \
+        setuptools \
+        wheel \
+        $EXTRA_DEPENDENCIES \
+    \
+    && wget https://github.com/rhasspy/piper-phonemize/releases/download/v1.1.0/piper_phonemize-1.1.0-cp311-cp311-manylinux_2_28_x86_64.whl \
+    \
+    && mv piper_phonemize-1.1.0-cp311-cp311-manylinux_2_28_x86_64.whl piper_phonemize-1.1.0-py3-none-any.whl \
+    \
+    && pip3 install --no-cache-dir --force-reinstall --no-deps \
+        "piper-tts==${BINARY_PIPER_VERSION}" \
+    \
+    && pip3 install --no-cache-dir --force-reinstall --no-deps \
+        piper_phonemize-1.1.0-py3-none-any.whl \
+    \
+    && pip3 install --no-cache-dir \
+        "wyoming-piper @ https://github.com/rhasspy/wyoming-piper/archive/refs/tags/v${WYOMING_PIPER_VERSION}.tar.gz" \
+    \
+    && rm -r piper_phonemize-1.1.0-py3-none-any.whl
+
+WORKDIR /
+COPY $RUN_SCRIPT ./
+ENV RUN_SCRIPT_ENV="/${RUN_SCRIPT}"
+
+EXPOSE 10200
+
+ENTRYPOINT ["bash", "-c", "exec $RUN_SCRIPT_ENV \"${@}\"", "--"]
diff --git a/piper/__main__.py b/piper/__main__.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+import argparse
+import asyncio
+import json
+import logging
+from functools import partial
+from pathlib import Path
+from typing import Any, Dict, Set
+
+from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
+from wyoming.server import AsyncServer
+
+from . import __version__
+from .download import find_voice, get_voices
+from .handler import PiperEventHandler
+from .process import PiperProcessManager
+
+_LOGGER = logging.getLogger(__name__)
+
+
+async def main() -> None:
+    """Main entry point."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--piper",
+        required=True,
+        help="Path to piper executable",
+    )
+    parser.add_argument(
+        "--voice",
+        required=True,
+        help="Default Piper voice to use (e.g., en_US-lessac-medium)",
+    )
+    parser.add_argument("--uri", default="stdio://", help="unix:// or tcp://")
+    parser.add_argument(
+        "--data-dir",
+        required=True,
+        action="append",
+        help="Data directory to check for downloaded models",
+    )
+    parser.add_argument(
+        "--download-dir",
+        help="Directory to download voices into (default: first data dir)",
+    )
+    #
+    parser.add_argument(
+        "--speaker", type=str, help="Name or id of speaker for default voice"
+    )
+    parser.add_argument("--noise-scale", type=float, help="Generator noise")
+    parser.add_argument("--length-scale", type=float, help="Phoneme length")
+    parser.add_argument("--noise-w", type=float, help="Phoneme width noise")
+    #
+    parser.add_argument(
+        "--auto-punctuation", default=".?!", help="Automatically add punctuation"
+    )
+    parser.add_argument("--samples-per-chunk", type=int, default=1024)
+    parser.add_argument(
+        "--max-piper-procs",
+        type=int,
+        default=1,
+        help="Maximum number of piper process to run simultaneously (default: 1)",
+    )
+    #
+    parser.add_argument(
+        "--update-voices",
+        action="store_true",
+        help="Download latest voices.json during startup",
+    )
+    parser.add_argument(
+        "--use-cuda",
+        action="store_true",
+        help="Use GPU"
+    )
+    #
+    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=__version__,
+        help="Print version and exit",
+    )
+    args = parser.parse_args()
+
+    if not args.download_dir:
+        # Default to first data directory
+        args.download_dir = args.data_dir[0]
+
+    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+    _LOGGER.debug(args)
+
+    # Load voice info
+    voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
+
+    # Resolve aliases for backwards compatibility with old voice names
+    aliases_info: Dict[str, Any] = {}
+    for voice_info in voices_info.values():
+        for voice_alias in voice_info.get("aliases", []):
+            aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
+
+    voices_info.update(aliases_info)
+    voices = [
+        TtsVoice(
+            name=voice_name,
+            description=get_description(voice_info),
+            attribution=Attribution(
+                name="rhasspy", url="https://github.com/rhasspy/piper"
+            ),
+            installed=True,
+            version=None,
+            languages=[
+                voice_info.get("language", {}).get(
+                    "code",
+                    voice_info.get("espeak", {}).get("voice", voice_name.split("_")[0]),
+                )
+            ],
+            speakers=[
+                TtsVoiceSpeaker(name=speaker_name)
+                for speaker_name in voice_info["speaker_id_map"]
+            ]
+            if voice_info.get("speaker_id_map")
+            else None,
+        )
+        for voice_name, voice_info in voices_info.items()
+        if not voice_info.get("_is_alias", False)
+    ]
+
+    custom_voice_names: Set[str] = set()
+    if args.voice not in voices_info:
+        custom_voice_names.add(args.voice)
+
+    for data_dir in args.data_dir:
+        data_dir = Path(data_dir)
+        if not data_dir.is_dir():
+            continue
+
+        for onnx_path in data_dir.glob("*.onnx"):
+            custom_voice_name = onnx_path.stem
+            if custom_voice_name not in voices_info:
+                custom_voice_names.add(custom_voice_name)
+
+    for custom_voice_name in custom_voice_names:
+        # Add custom voice info
+        custom_voice_path, custom_config_path = find_voice(
+            custom_voice_name, args.data_dir
+        )
+        with open(custom_config_path, "r", encoding="utf-8") as custom_config_file:
+            custom_config = json.load(custom_config_file)
+            custom_name = custom_config.get("dataset", custom_voice_path.stem)
+            custom_quality = custom_config.get("audio", {}).get("quality")
+            if custom_quality:
+                description = f"{custom_name} ({custom_quality})"
+            else:
+                description = custom_name
+
+            lang_code = custom_config.get("language", {}).get("code")
+            if not lang_code:
+                lang_code = custom_config.get("espeak", {}).get("voice")
+                if not lang_code:
+                    lang_code = custom_voice_path.stem.split("_")[0]
+
+            voices.append(
+                TtsVoice(
+                    name=custom_name,
+                    description=description,
+                    version=None,
+                    attribution=Attribution(name="", url=""),
+                    installed=True,
+                    languages=[lang_code],
+                )
+            )
+
+    wyoming_info = Info(
+        tts=[
+            TtsProgram(
+                name="piper",
+                description="A fast, local, neural text to speech engine",
+                attribution=Attribution(
+                    name="rhasspy", url="https://github.com/rhasspy/piper"
+                ),
+                installed=True,
+                voices=sorted(voices, key=lambda v: v.name),
+                version=__version__,
+            )
+        ],
+    )
+
+    process_manager = PiperProcessManager(args, voices_info)
+
+    # Make sure default voice is loaded.
+    # Other voices will be loaded on-demand.
+    await process_manager.get_process()
+
+    # Start server
+    server = AsyncServer.from_uri(args.uri)
+
+    _LOGGER.info("Ready")
+    await server.run(
+        partial(
+            PiperEventHandler,
+            wyoming_info,
+            args,
+            process_manager,
+        )
+    )
+
+
+# -----------------------------------------------------------------------------
+
+
+def get_description(voice_info: Dict[str, Any]):
+    """Get a human readable description for a voice."""
+    name = voice_info["name"]
+    name = " ".join(name.split("_"))
+    quality = voice_info["quality"]
+
+    return f"{name} ({quality})"
+
+
+# -----------------------------------------------------------------------------
+
+
+def run():
+    asyncio.run(main())
+
+
+if __name__ == "__main__":
+    try:
+        run()
+    except KeyboardInterrupt:
+        pass
+
diff --git a/piper/process.py b/piper/process.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+import argparse
+import asyncio
+import json
+import logging
+import tempfile
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+from .download import ensure_voice_exists, find_voice
+
+_LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class PiperProcess:
+    """Info for a running Piper process (one voice)."""
+
+    name: str
+    proc: "asyncio.subprocess.Process"
+    config: Dict[str, Any]
+    wav_dir: tempfile.TemporaryDirectory
+    last_used: int = 0
+
+    def get_speaker_id(self, speaker: str) -> Optional[int]:
+        """Get speaker by name or id."""
+        return _get_speaker_id(self.config, speaker)
+
+    @property
+    def is_multispeaker(self) -> bool:
+        """True if model has more than one speaker."""
+        return _is_multispeaker(self.config)
+
+
+def _get_speaker_id(config: Dict[str, Any], speaker: str) -> Optional[int]:
+    """Get speaker by name or id."""
+    speaker_id_map = config.get("speaker_id_map", {})
+    speaker_id = speaker_id_map.get(speaker)
+    if speaker_id is None:
+        try:
+            # Try to interpret as an id
+            speaker_id = int(speaker)
+        except ValueError:
+            pass
+
+    return speaker_id
+
+
+def _is_multispeaker(config: Dict[str, Any]) -> bool:
+    """True if model has more than one speaker."""
+    return config.get("num_speakers", 1) > 1
+
+
+# -----------------------------------------------------------------------------
+
+
+class PiperProcessManager:
+    """Manager of running Piper processes."""
+
+    def __init__(self, args: argparse.Namespace, voices_info: Dict[str, Any]):
+        self.voices_info = voices_info
+        self.args = args
+        self.processes: Dict[str, PiperProcess] = {}
+        self.processes_lock = asyncio.Lock()
+
+    async def get_process(self, voice_name: Optional[str] = None) -> PiperProcess:
+        """Get a running Piper process or start a new one if necessary."""
+        voice_speaker: Optional[str] = None
+        if voice_name is None:
+            # Default voice
+            voice_name = self.args.voice
+
+        if voice_name == self.args.voice:
+            # Default speaker
+            voice_speaker = self.args.speaker
+
+        assert voice_name is not None
+
+        # Resolve alias
+        voice_info = self.voices_info.get(voice_name, {})
+        voice_name = voice_info.get("key", voice_name)
+        assert voice_name is not None
+
+        piper_proc = self.processes.get(voice_name)
+        if (piper_proc is None) or (piper_proc.proc.returncode is not None):
+            # Remove if stopped
+            self.processes.pop(voice_name, None)
+
+            # Start new Piper process
+            if self.args.max_piper_procs > 0:
+                # Restrict number of running processes
+                while len(self.processes) >= self.args.max_piper_procs:
+                    # Stop least recently used process
+                    lru_proc_name, lru_proc = sorted(
+                        self.processes.items(), key=lambda kv: kv[1].last_used
+                    )[0]
+                    _LOGGER.debug("Stopping process for: %s", lru_proc_name)
+                    self.processes.pop(lru_proc_name, None)
+                    if lru_proc.proc.returncode is None:
+                        try:
+                            lru_proc.proc.terminate()
+                            await lru_proc.proc.wait()
+                        except Exception:
+                            _LOGGER.exception("Unexpected error stopping piper process")
+
+            _LOGGER.debug(
+                "Starting process for: %s (%s/%s)",
+                voice_name,
+                len(self.processes) + 1,
+                self.args.max_piper_procs,
+            )
+
+            ensure_voice_exists(
+                voice_name,
+                self.args.data_dir,
+                self.args.download_dir,
+                self.voices_info,
+            )
+
+            onnx_path, config_path = find_voice(voice_name, self.args.data_dir)
+            with open(config_path, "r", encoding="utf-8") as config_file:
+                config = json.load(config_file)
+
+            wav_dir = tempfile.TemporaryDirectory()
+            piper_args = [
+                "--model",
+                str(onnx_path),
+                "--config",
+                str(config_path),
+                "--output_dir",
+                str(wav_dir.name),
+                "--json-input",  # piper 1.1+
+            ]
+
+            if voice_speaker is not None:
+                if _is_multispeaker(config):
+                    speaker_id = _get_speaker_id(config, voice_speaker)
+                    if speaker_id is not None:
+                        piper_args.extend(["--speaker", str(speaker_id)])
+
+            if self.args.noise_scale:
+                piper_args.extend(["--noise-scale", str(self.args.noise_scale)])
+
+            if self.args.length_scale:
+                piper_args.extend(["--length-scale", str(self.args.length_scale)])
+
+            if self.args.noise_w:
+                piper_args.extend(["--noise-w", str(self.args.noise_w)])
+
+            if self.args.use_cuda:
+                piper_args.extend(["--use-cuda"])
+
+            _LOGGER.debug(
+                "Starting piper process: %s args=%s", self.args.piper, piper_args
+            )
+            piper_proc = PiperProcess(
+                name=voice_name,
+                proc=await asyncio.create_subprocess_exec(
+                    self.args.piper,
+                    *piper_args,
+                    stdin=asyncio.subprocess.PIPE,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.DEVNULL,
+                ),
+                config=config,
+                wav_dir=wav_dir,
+            )
+            self.processes[voice_name] = piper_proc
+
+        # Update used
+        piper_proc.last_used = time.monotonic_ns()
+
+        return piper_proc
+
diff --git a/piper/run-gpu.sh b/piper/run-gpu.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+python3 -m wyoming_piper \
+    --piper 'piper' \
+    --use-cuda \
+    --uri 'tcp://0.0.0.0:10200' \
+    --data-dir /data \
+    --download-dir /data "$@"
diff --git a/porcupine1/Dockerfile b/porcupine1/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/rhasspy-speech/Dockerfile b/rhasspy-speech/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/snowboy/Dockerfile b/snowboy/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 
@@ -40,3 +42,4 @@ COPY run.sh ./
 EXPOSE 10400
 
 ENTRYPOINT ["bash", "/run.sh"]
+
diff --git a/vosk/Dockerfile b/vosk/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/whisper-cpp/Dockerfile b/whisper-cpp/Dockerfile
@@ -1,4 +1,6 @@
-FROM debian:bookworm-slim
+ARG BASE=debian:bookworm-slim
+FROM $BASE
+
 ARG TARGETARCH
 ARG TARGETVARIANT
 

diff --git a/whisper/Dockerfile b/whisper/Dockerfile
@@ -1,8 +1,9 @@
-FROM debian:bullseye-slim
+ARG BASE=debian:bullseye-slim
+FROM $BASE
 
 # Install Whisper
 WORKDIR /usr/src
-ARG WYOMING_WHISPER_VERSION='2.2.0'
+ARG WYOMING_WHISPER_VERSION='2.4.0'
 
 RUN \
     apt-get update \