Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Dec 5, 2023
0 parents commit 7de251d
Show file tree
Hide file tree
Showing 24 changed files with 365 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.DS_Store
.idea
*.log
tmp/

*.py[cod]
*.egg
*.egg-info/
build
htmlcov

/.venv/
.mypy_cache/
__pycache__/

/output/
/token.txt

/dist/
6 changes: 6 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[settings]
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
2 changes: 2 additions & 0 deletions .projectile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- /.venv/
- /.mypy_cache/
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Changelog

## 1.0.0

- Initial version
21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Michael Hansen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include requirements.txt
include pysilero_vad/models/*.onnx
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# pySilero VAD

A pre-packaged voice activity detector using [silero-vad](https://github.com/snakers4/silero-vad).

``` sh
pip install pysilero-vad
```

``` python
from pysilero_vad import SileroVoiceActivityDetector

vad = SileroVoiceActivityDetector()

# Audio must be 16Khz, 16-bit mono PCM
if vad(audio_bytes) >= 0.5:
print("Speech")
else:
print("Silence")
```

5 changes: 5 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[mypy]
ignore_missing_imports = true

[mypy-setuptools.*]
ignore_missing_imports = True
37 changes: 37 additions & 0 deletions pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[MESSAGES CONTROL]
disable=
format,
abstract-method,
cyclic-import,
duplicate-code,
global-statement,
import-outside-toplevel,
inconsistent-return-statements,
locally-disabled,
not-context-manager,
too-few-public-methods,
too-many-arguments,
too-many-branches,
too-many-instance-attributes,
too-many-lines,
too-many-locals,
too-many-public-methods,
too-many-return-statements,
too-many-statements,
too-many-boolean-expressions,
unnecessary-pass,
unused-argument,
broad-except,
too-many-nested-blocks,
invalid-name,
unused-import,
fixme,
useless-super-delegation,
missing-module-docstring,
missing-class-docstring,
missing-function-docstring,
import-error,
consider-using-with

[FORMAT]
expected-line-ending-format=LF
60 changes: 60 additions & 0 deletions pysilero_vad/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import logging
from pathlib import Path
from typing import Final, Union

import numpy as np
import onnxruntime

_RATE: Final = 16000
_MAX_WAV: Final = 32767
_DIR = Path(__file__).parent
_DEFAULT_ONNX_PATH = _DIR / "models" / "silero_vad.onnx"

_LOGGER = logging.getLogger()


class SileroVoiceActivityDetector:
"""Detects speech/silence using Silero VAD.
https://github.com/snakers4/silero-vad
"""

def __init__(self, onnx_path: Union[str, Path] = _DEFAULT_ONNX_PATH) -> None:
onnx_path = str(onnx_path)

opts = onnxruntime.SessionOptions()
opts.inter_op_num_threads = 1
opts.intra_op_num_threads = 1

self.session = onnxruntime.InferenceSession(
onnx_path, providers=["CPUExecutionProvider"], sess_options=opts
)

self._h = np.zeros((2, 1, 64)).astype("float32")
self._c = np.zeros((2, 1, 64)).astype("float32")

def reset(self) -> None:
"""Reset state."""
self._h = np.zeros((2, 1, 64)).astype("float32")
self._c = np.zeros((2, 1, 64)).astype("float32")

def __call__(self, audio: bytes) -> float:
"""Return probability of speech in audio [0-1].
Audio must be 16Khz 16-bit mono PCM.
"""
audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV

# Add batch dimension
audio_array = np.expand_dims(audio_array, 0)

ort_inputs = {
"input": audio_array,
"h": self._h,
"c": self._c,
"sr": np.array(_RATE, dtype=np.int64),
}
ort_outs = self.session.run(None, ort_inputs)
out, self._h, self._c = ort_outs

return out.squeeze()
Binary file added pysilero_vad/models/silero_vad.onnx
Binary file not shown.
Empty file added pysilero_vad/py.typed
Empty file.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
onnxruntime>=1.10.0,<2
numpy<1.26
6 changes: 6 additions & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
black==22.12.0
flake8==6.0.0
isort==5.11.3
mypy==0.991
pylint==2.15.9
pytest==7.4.3
14 changes: 14 additions & 0 deletions script/format
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path

_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_MODULE_DIR = _PROGRAM_DIR / _MODULE

context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR)])
17 changes: 17 additions & 0 deletions script/lint
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path

_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_MODULE_DIR = _PROGRAM_DIR / _MODULE

context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "black", str(_MODULE_DIR), "--check"])
subprocess.check_call([context.env_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
subprocess.check_call([context.env_exe, "-m", "flake8", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "pylint", str(_MODULE_DIR)])
subprocess.check_call([context.env_exe, "-m", "mypy", str(_MODULE_DIR)])
11 changes: 11 additions & 0 deletions script/package
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path

_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"

context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, _PROGRAM_DIR / "setup.py", "bdist_wheel"])
22 changes: 22 additions & 0 deletions script/setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path

_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"


# Create virtual environment
builder = venv.EnvBuilder(with_pip=True)
context = builder.ensure_directories(_VENV_DIR)
builder.create(_VENV_DIR)

# Upgrade dependencies
pip = [context.env_exe, "-m", "pip"]
subprocess.check_call(pip + ["install", "--upgrade", "pip"])
subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])

# Install requirements
subprocess.check_call(pip + ["install", "-r", str(_PROGRAM_DIR / "requirements.txt")])
12 changes: 12 additions & 0 deletions script/test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path

_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_TESTS_DIR = _PROGRAM_DIR / "tests"

context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
subprocess.check_call([context.env_exe, "-m", "pytest", str(_TESTS_DIR)])
22 changes: 22 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[flake8]
# To work with Black
max-line-length = 88
# E501: line too long
# W503: Line break occurred before a binary operator
# E203: Whitespace before ':'
# D202 No blank lines allowed after function docstring
# W504 line break after binary operator
ignore =
E501,
W503,
E203,
D202,
W504

[isort]
multi_line_output = 3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
indent = " "
53 changes: 53 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3
from pathlib import Path

import setuptools
from setuptools import setup

this_dir = Path(__file__).parent
module_dir = this_dir / "pysilero_vad"

# -----------------------------------------------------------------------------

# Load README in as long description
long_description: str = ""
readme_path = this_dir / "README.md"
if readme_path.is_file():
long_description = readme_path.read_text(encoding="utf-8")

requirements = []
requirements_path = this_dir / "requirements.txt"
if requirements_path.is_file():
with open(requirements_path, "r", encoding="utf-8") as requirements_file:
requirements = requirements_file.read().splitlines()

# -----------------------------------------------------------------------------

setup(
name="pysilero_vad",
version="1.0.0",
description="Pre-packaged voice activity detector using silero-vad",
long_description=long_description,
long_description_content_type="text/markdown",
url="http://github.com/rhasspy/silero-vad",
author="Michael Hansen",
author_email="[email protected]",
license="MIT",
packages=setuptools.find_packages(),
package_data={
"pysilero_vad": ["py.typed", "models/silero_vad.onnx"],
},
install_requires=requirements,
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Topic :: Multimedia :: Sound/Audio :: Speech",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
keywords="voice activity vad",
)
Binary file added tests/silence.wav
Binary file not shown.
Binary file added tests/speech.wav
Binary file not shown.
29 changes: 29 additions & 0 deletions tests/test_vad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import wave
from pathlib import Path
from typing import Union

from pysilero_vad import SileroVoiceActivityDetector

_DIR = Path(__file__).parent


def _load_wav(wav_path: Union[str, Path]) -> bytes:
"""Return audio bytes from a WAV file."""
with wave.open(str(wav_path), "rb") as wav_file:
assert wav_file.getframerate() == 16000
assert wav_file.getsampwidth() == 2
assert wav_file.getnchannels() == 1

return wav_file.readframes(wav_file.getnframes())


def test_silence() -> None:
"""Test VAD on recorded silence."""
vad = SileroVoiceActivityDetector()
assert vad(_load_wav(_DIR / "silence.wav")) < 0.5


def test_speech() -> None:
"""Test VAD on recorded speech."""
vad = SileroVoiceActivityDetector()
assert vad(_load_wav(_DIR / "speech.wav")) >= 0.5

0 comments on commit 7de251d

Please sign in to comment.