feat: ww verifier

JarbasAl · JarbasAl · commit be0f092d9e36 · 2025-11-04T15:00:22.000Z
OpenVoiceOS/ovos-plugin-manager#341
diff --git a/README.md b/README.md
@@ -32,6 +32,16 @@ non exhaustive list of config options
     "microphone": {
       "module": "ovos-microphone-plugin-alsa"
     },
+    // wake word verifier plugins will double check a wake word prediction
+    // they are given a chance to reject wake word activations
+    "ww_verifiers": {
+        "ovos-ww-verifier-silero": {
+            "threshold": 0.1,
+            // does not make sense to enable if "vad_pre_wake_enabled" is set to true
+            "enabled": false
+        }
+    },
+
     // If enabled will only check for wakeword if VAD also detected speech
     // this should reduce false activations
     "vad_pre_wake_enabled": true,
diff --git a/ovos_dinkum_listener/service.py b/ovos_dinkum_listener/service.py
@@ -13,6 +13,8 @@
 import json
 import random
 import subprocess
+import time
+import warnings
 import wave
 from enum import Enum
 from hashlib import md5
@@ -24,7 +26,6 @@
 from typing import List, Tuple, Optional, Union
 
 import speech_recognition as sr
-import time
 from ovos_bus_client import MessageBusClient
 from ovos_bus_client.message import Message
 from ovos_bus_client.session import SessionManager
@@ -37,26 +38,19 @@
 from ovos_plugin_manager.templates.vad import VADEngine
 from ovos_plugin_manager.utils.tts_cache import hash_sentence
 from ovos_plugin_manager.vad import OVOSVADFactory, get_vad_configs
+from ovos_plugin_manager.wakewords import find_wake_word_verifier_plugins
 from ovos_plugin_manager.wakewords import get_ww_lang_configs, get_ww_supported_langs, get_ww_module_configs
 from ovos_utils.fakebus import FakeBus
 from ovos_utils.log import LOG, log_deprecation
 from ovos_utils.process_utils import ProcessStatus, StatusCallbackMap, ProcessState
+from ovos_utils.sound import get_sound_duration
 
-import warnings
 from ovos_dinkum_listener._util import _TemplateFilenameFormatter
 from ovos_dinkum_listener.plugins import load_stt_module, load_fallback_stt, FakeStreamingSTT
 from ovos_dinkum_listener.transformers import AudioTransformersService
 from ovos_dinkum_listener.voice_loop import DinkumVoiceLoop, ListeningMode, ListeningState
 from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer
 
-
-try:
-    from ovos_utils.sound import get_sound_duration
-except ImportError:
-
-    def get_sound_duration(*args, **kwargs):
-        raise ImportError("please install ovos-utils>=0.1.0a25")
-
 # Seconds between systemd watchdog updates
 WATCHDOG_DELAY = 0.5
 
@@ -194,7 +188,25 @@ def __init__(self, on_ready=on_ready, on_error=on_error,
 
         self.mic = mic or OVOSMicrophoneFactory.create(microphone_config)
 
-        self.hotwords = hotwords or HotwordContainer(self.bus)
+        verifiers_cfg  = self.config.get("listener", {}).get("ww_verifiers", {})
+        verifier_plugs = {}
+        for plug_type, plug in find_wake_word_verifier_plugins().items():
+            cfg = verifiers_cfg.get(plug_type, {})
+            if not cfg.get("enabled", True): # plugins are enabled by default if installed, unless disabled in config
+                LOG.debug(f"wakeword verifier plugin disabled: {plug_type}")
+                continue
+            try:
+                verifier_plugs[plug_type] =  plug(config=cfg)
+            except Exception as e:
+                LOG.exception(f"Failed to load wakeword verifier plugin: {plug_type}")
+                continue
+
+        missing_modules = [k for k, v in verifiers_cfg.items() if v.get("active", True) and k not in verifier_plugs]
+        if missing_modules:
+            LOG.warning(f"wake word verifier plugins enabled in config but not loaded: {missing_modules}")
+        LOG.debug(f"Loaded wake word verifier plugins: {list(verifier_plugs)}")
+
+        self.hotwords = hotwords or HotwordContainer(bus=self.bus, verifiers=list(verifier_plugs.values()))
         self.vad = vad or OVOSVADFactory.create()
         if stt and not isinstance(stt, StreamingSTT):
             stt = FakeStreamingSTT(stt)
diff --git a/ovos_dinkum_listener/voice_loop/hotwords.py b/ovos_dinkum_listener/voice_loop/hotwords.py
@@ -1,18 +1,14 @@
 from enum import Enum
 from os.path import dirname
 from threading import Event
-from typing import Optional
+from typing import Optional, List
 
 from ovos_config import Configuration
 from ovos_plugin_manager.wakewords import OVOSWakeWordFactory, HotWordEngine
+from ovos_plugin_manager.templates.hotwords import HotWordVerifier
 from ovos_utils.fakebus import FakeBus
 from ovos_utils.log import LOG
-try:
-    from ovos_utils.sound import get_sound_duration
-except ImportError:
-
-    def get_sound_duration(*args, **kwargs):
-        raise ImportError("please install ovos-utils>=0.1.0a25")
+from ovos_utils.sound import get_sound_duration
 
 
 class HotWordException(RuntimeWarning):
@@ -103,15 +99,15 @@ class HotwordContainer:
     _plugins = {}
     _loaded = Event()
 
-    def __init__(self, bus=FakeBus(), expected_duration=3, sample_rate=16000,
-                 sample_width=2, reload_allowed=True, autoload=False):
+    def __init__(self, bus=FakeBus(), verifiers: Optional[List[HotWordVerifier]] = None, reload_allowed=True, autoload=False):
         self.bus = bus
         self.reload_allowed = reload_allowed
         self.state = HotwordState.HOTWORD
         self.reload_on_failure = False
         self.applied_hotwords_config = None
         if autoload:
             self.load_hotword_engines()
+        self.verifiers: List[HotWordVerifier] = verifiers or []
 
     def load_hotword_engines(self):
         """
@@ -305,6 +301,13 @@ def get_ww(self, ww: str) -> dict:
         meta["engine"] = plug.__class__.__name__
         return meta
 
+    def verify(self, ww_audio: bytes) -> bool:
+        for verifier in self.verifiers:
+            if not verifier.verify(ww_audio):
+                LOG.debug(f"{verifier.__class__.__name__}: verification failed - discarding wake word detection")
+                return False
+        return True
+
     def update(self, chunk: bytes):
         """
         Update appropriate engines based on self.state
diff --git a/ovos_dinkum_listener/voice_loop/voice_loop.py b/ovos_dinkum_listener/voice_loop/voice_loop.py
@@ -10,23 +10,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import audioop
 import time
 from collections import deque
 from dataclasses import dataclass, field
 from enum import Enum
+from threading import Event
 from typing import Callable, Deque, Optional
 
-import audioop
 from ovos_config import Configuration
 from ovos_plugin_manager.stt import StreamingSTT
-from ovos_plugin_manager.templates.microphone import Microphone
 from ovos_plugin_manager.vad import VADEngine
 from ovos_utils.log import LOG
-
-from ovos_dinkum_listener.plugins import FakeStreamingSTT
+from ovos_bus_client.session import SessionManager
 from ovos_dinkum_listener.transformers import AudioTransformersService
 from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer, HotwordState, HotWordException
+from ovos_plugin_manager.templates.microphone import Microphone
+
+from ovos_dinkum_listener.plugins import FakeStreamingSTT
 
 
 class ListeningState(str, Enum):
@@ -554,16 +555,20 @@ def _detect_ww(self, chunk: bytes) -> bool:
             LOG.debug(f"Wake word detected={ww}")
             ww_data = self.hotwords.get_ww(ww)
 
+            hotword_audio_bytes = bytes()
+            while self.hotword_chunks:
+                hotword_audio_bytes += self.hotword_chunks.popleft()
+
+            self.hotword_chunks.clear()
+
+            if not self.hotwords.verify(hotword_audio_bytes):
+                LOG.debug("wake word verifier plugins discarded detection")
+                return False
+
             # Callback to handle recorded hotword audio
             if self.listenword_audio_callback is not None:
-                hotword_audio_bytes = bytes()
-                while self.hotword_chunks:
-                    hotword_audio_bytes += self.hotword_chunks.popleft()
-
                 self.listenword_audio_callback(hotword_audio_bytes, ww_data)
 
-            self.hotword_chunks.clear()
-
             # Callback to handle wake up
             if self.wake_callback is not None:
                 # emit record_begin
diff --git a/requirements/extras.txt b/requirements/extras.txt
@@ -2,7 +2,7 @@
 ovos-stt-plugin-server>=0.1.2,<1.0.0
 
 # VAD plugins
-ovos-vad-plugin-silero>=0.0.5,<1.0.0
+ovos-vad-plugin-silero>=0.1.0a1,<1.0.0
 
 # Microphone plugins
 ovos-microphone-plugin-sounddevice>=0.0.1,<1.0.0
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,4 +1,4 @@
-ovos-plugin-manager>=1.0.2,<3.0.0
+ovos-plugin-manager>=2.1.0a1,<3.0.0
 ovos-utils>=0.8.1,<1.0.0
 ovos-config>=1.2.2,<3.0.0
 ovos_bus_client>=1.3.4,<2.0.0

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-ovos-plugin-manager>=1.0.2,<3.0.0`
	`1`	`+ovos-plugin-manager>=2.1.0a1,<3.0.0`
`2`	`2`	`ovos-utils>=0.8.1,<1.0.0`
`3`	`3`	`ovos-config>=1.2.2,<3.0.0`
`4`	`4`	`ovos_bus_client>=1.3.4,<2.0.0`