Initial commit

rhasspy · Dec 5, 2023 · 7de251d · 7de251d
commit 7de251d
Show file tree

Hide file tree

Showing 24 changed files with 365 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,19 @@
+.DS_Store
+.idea
+*.log
+tmp/
+
+*.py[cod]
+*.egg
+*.egg-info/
+build
+htmlcov
+
+/.venv/
+.mypy_cache/
+__pycache__/
+
+/output/
+/token.txt
+
+/dist/
diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,6 @@
+[settings]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
diff --git a/.projectile b/.projectile
@@ -0,0 +1,2 @@
+- /.venv/
+- /.mypy_cache/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Changelog
+
+## 1.0.0
+
+- Initial version
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Michael Hansen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include requirements.txt
+include pysilero_vad/models/*.onnx
diff --git a/README.md b/README.md
@@ -0,0 +1,20 @@
+# pySilero VAD
+
+A pre-packaged voice activity detector using [silero-vad](https://github.com/snakers4/silero-vad).
+
+``` sh
+pip install pysilero-vad
+```
+
+``` python
+from pysilero_vad import SileroVoiceActivityDetector
+
+vad = SileroVoiceActivityDetector()
+
+# Audio must be 16Khz, 16-bit mono PCM
+if vad(audio_bytes) >= 0.5:
+    print("Speech")
+else:
+    print("Silence")
+```
+
diff --git a/mypy.ini b/mypy.ini
@@ -0,0 +1,5 @@
+[mypy]
+ignore_missing_imports = true
+
+[mypy-setuptools.*]
+ignore_missing_imports = True
diff --git a/pylintrc b/pylintrc
@@ -0,0 +1,37 @@
+[MESSAGES CONTROL]
+disable=
+  format,
+  abstract-method,
+  cyclic-import,
+  duplicate-code,
+  global-statement,
+  import-outside-toplevel,
+  inconsistent-return-statements,
+  locally-disabled,
+  not-context-manager,
+  too-few-public-methods,
+  too-many-arguments,
+  too-many-branches,
+  too-many-instance-attributes,
+  too-many-lines,
+  too-many-locals,
+  too-many-public-methods,
+  too-many-return-statements,
+  too-many-statements,
+  too-many-boolean-expressions,
+  unnecessary-pass,
+  unused-argument,
+  broad-except,
+  too-many-nested-blocks,
+  invalid-name,
+  unused-import,
+  fixme,
+  useless-super-delegation,
+  missing-module-docstring,
+  missing-class-docstring,
+  missing-function-docstring,
+  import-error,
+  consider-using-with
+
+[FORMAT]
+expected-line-ending-format=LF
diff --git a/pysilero_vad/__init__.py b/pysilero_vad/__init__.py
@@ -0,0 +1,60 @@
+import logging
+from pathlib import Path
+from typing import Final, Union
+
+import numpy as np
+import onnxruntime
+
+_RATE: Final = 16000
+_MAX_WAV: Final = 32767
+_DIR = Path(__file__).parent
+_DEFAULT_ONNX_PATH = _DIR / "models" / "silero_vad.onnx"
+
+_LOGGER = logging.getLogger()
+
+
+class SileroVoiceActivityDetector:
+    """Detects speech/silence using Silero VAD.
+
+    https://github.com/snakers4/silero-vad
+    """
+
+    def __init__(self, onnx_path: Union[str, Path] = _DEFAULT_ONNX_PATH) -> None:
+        onnx_path = str(onnx_path)
+
+        opts = onnxruntime.SessionOptions()
+        opts.inter_op_num_threads = 1
+        opts.intra_op_num_threads = 1
+
+        self.session = onnxruntime.InferenceSession(
+            onnx_path, providers=["CPUExecutionProvider"], sess_options=opts
+        )
+
+        self._h = np.zeros((2, 1, 64)).astype("float32")
+        self._c = np.zeros((2, 1, 64)).astype("float32")
+
+    def reset(self) -> None:
+        """Reset state."""
+        self._h = np.zeros((2, 1, 64)).astype("float32")
+        self._c = np.zeros((2, 1, 64)).astype("float32")
+
+    def __call__(self, audio: bytes) -> float:
+        """Return probability of speech in audio [0-1].
+
+        Audio must be 16Khz 16-bit mono PCM.
+        """
+        audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV
+
+        # Add batch dimension
+        audio_array = np.expand_dims(audio_array, 0)
+
+        ort_inputs = {
+            "input": audio_array,
+            "h": self._h,
+            "c": self._c,
+            "sr": np.array(_RATE, dtype=np.int64),
+        }
+        ort_outs = self.session.run(None, ort_inputs)
+        out, self._h, self._c = ort_outs
+
+        return out.squeeze()
diff --git a/pysilero_vad/models/silero_vad.onnx b/pysilero_vad/models/silero_vad.onnx
diff --git a/pysilero_vad/py.typed b/pysilero_vad/py.typed
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+onnxruntime>=1.10.0,<2
+numpy<1.26
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -0,0 +1,6 @@
+black==22.12.0
+flake8==6.0.0
+isort==5.11.3
+mypy==0.991
+pylint==2.15.9
+pytest==7.4.3
diff --git a/script/format b/script/format
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+_MODULE = _PROGRAM_DIR.name.replace("-", "_")
+_MODULE_DIR = _PROGRAM_DIR / _MODULE
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR)])
diff --git a/script/lint b/script/lint
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+_MODULE = _PROGRAM_DIR.name.replace("-", "_")
+_MODULE_DIR = _PROGRAM_DIR / _MODULE
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR), "--check"])
+subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
+subprocess.check_call([context.env_exe, "-m", "flake8", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "pylint", str(_MODULE_DIR)])
+subprocess.check_call([context.env_exe, "-m", "mypy", str(_MODULE_DIR)])
diff --git a/script/package b/script/package
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, _PROGRAM_DIR / "setup.py", "bdist_wheel"])
diff --git a/script/setup b/script/setup
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+
+
+# Create virtual environment
+builder = venv.EnvBuilder(with_pip=True)
+context = builder.ensure_directories(_VENV_DIR)
+builder.create(_VENV_DIR)
+
+# Upgrade dependencies
+pip = [context.env_exe, "-m", "pip"]
+subprocess.check_call(pip + ["install", "--upgrade", "pip"])
+subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])
+
+# Install requirements
+subprocess.check_call(pip + ["install", "-r", str(_PROGRAM_DIR / "requirements.txt")])
diff --git a/script/test b/script/test
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+import subprocess
+import venv
+from pathlib import Path
+
+_DIR = Path(__file__).parent
+_PROGRAM_DIR = _DIR.parent
+_VENV_DIR = _PROGRAM_DIR / ".venv"
+_TESTS_DIR = _PROGRAM_DIR / "tests"
+
+context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
+subprocess.check_call([context.env_exe, "-m", "pytest", str(_TESTS_DIR)])
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,22 @@
+[flake8]
+# To work with Black
+max-line-length = 88
+# E501: line too long
+# W503: Line break occurred before a binary operator
+# E203: Whitespace before ':'
+# D202 No blank lines allowed after function docstring
+# W504 line break after binary operator
+ignore =
+    E501,
+    W503,
+    E203,
+    D202,
+    W504
+
+[isort]
+multi_line_output = 3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
+indent = "    "
diff --git a/setup.py b/setup.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+from pathlib import Path
+
+import setuptools
+from setuptools import setup
+
+this_dir = Path(__file__).parent
+module_dir = this_dir / "pysilero_vad"
+
+# -----------------------------------------------------------------------------
+
+# Load README in as long description
+long_description: str = ""
+readme_path = this_dir / "README.md"
+if readme_path.is_file():
+    long_description = readme_path.read_text(encoding="utf-8")
+
+requirements = []
+requirements_path = this_dir / "requirements.txt"
+if requirements_path.is_file():
+    with open(requirements_path, "r", encoding="utf-8") as requirements_file:
+        requirements = requirements_file.read().splitlines()
+
+# -----------------------------------------------------------------------------
+
+setup(
+    name="pysilero_vad",
+    version="1.0.0",
+    description="Pre-packaged voice activity detector using silero-vad",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="http://github.com/rhasspy/silero-vad",
+    author="Michael Hansen",
+    author_email="[email protected]",
+    license="MIT",
+    packages=setuptools.find_packages(),
+    package_data={
+        "pysilero_vad": ["py.typed", "models/silero_vad.onnx"],
+    },
+    install_requires=requirements,
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Topic :: Multimedia :: Sound/Audio :: Speech",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+    ],
+    keywords="voice activity vad",
+)
diff --git a/tests/silence.wav b/tests/silence.wav
diff --git a/tests/speech.wav b/tests/speech.wav
diff --git a/tests/test_vad.py b/tests/test_vad.py
@@ -0,0 +1,29 @@
+import wave
+from pathlib import Path
+from typing import Union
+
+from pysilero_vad import SileroVoiceActivityDetector
+
+_DIR = Path(__file__).parent
+
+
+def _load_wav(wav_path: Union[str, Path]) -> bytes:
+    """Return audio bytes from a WAV file."""
+    with wave.open(str(wav_path), "rb") as wav_file:
+        assert wav_file.getframerate() == 16000
+        assert wav_file.getsampwidth() == 2
+        assert wav_file.getnchannels() == 1
+
+        return wav_file.readframes(wav_file.getnframes())
+
+
+def test_silence() -> None:
+    """Test VAD on recorded silence."""
+    vad = SileroVoiceActivityDetector()
+    assert vad(_load_wav(_DIR / "silence.wav")) < 0.5
+
+
+def test_speech() -> None:
+    """Test VAD on recorded speech."""
+    vad = SileroVoiceActivityDetector()
+    assert vad(_load_wav(_DIR / "speech.wav")) >= 0.5
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		include requirements.txt
		include pysilero_vad/models/*.onnx