diff --git a/README.md b/README.md index 2973521..9ddcd56 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,14 @@ conda env config vars set OPENAI_API_KEY="your-api-key-here" conda deactivate ``` +- To use [MiniMax](https://www.minimaxi.com/) for translation (MiniMax-M2.5, MiniMax-M2.7) or TTS (speech-2.8-hd with 12 voices), set up your MiniMax API key: + +``` +conda activate sonitr +conda env config vars set MINIMAX_API_KEY="your-minimax-api-key-here" +conda deactivate +``` + ## Command line arguments The app_rvc.py script supports command-line arguments to customize its behavior. Here's a brief guide on how to use them: @@ -363,6 +371,7 @@ This project leverages a number of open-source projects. We would like to acknow - [Coqui TTS](https://github.com/coqui-ai/TTS) - [pypdf](https://github.com/py-pdf/pypdf) - [OpenVoice](https://github.com/myshell-ai/OpenVoice) +- [MiniMax](https://www.minimaxi.com/) - LLM translation and TTS provider ## License Although the code is licensed under Apache 2, the models or weights may have commercial restrictions, as seen with pyannote diarization. diff --git a/app_rvc.py b/app_rvc.py index 47718cd..16fe031 100644 --- a/app_rvc.py +++ b/app_rvc.py @@ -40,6 +40,7 @@ BARK_VOICES_LIST, VITS_VOICES_LIST, OPENAI_TTS_MODELS, + MINIMAX_TTS_MODELS, ) from soni_translate.utils import ( remove_files, @@ -120,6 +121,7 @@ def __init__(self, piper_enabled, xtts_enabled): self.list_bark = list(BARK_VOICES_LIST.keys()) self.list_vits = list(VITS_VOICES_LIST.keys()) self.list_openai_tts = OPENAI_TTS_MODELS + self.list_minimax_tts = MINIMAX_TTS_MODELS self.piper_enabled = piper_enabled self.list_vits_onnx = ( piper_tts_voices_list() if self.piper_enabled else [] @@ -135,6 +137,7 @@ def tts_list(self): + self.list_bark + self.list_vits + self.list_openai_tts + + self.list_minimax_tts + self.list_vits_onnx ) return list_tts @@ -268,6 +271,16 @@ def check_openai_api_key(): ) +def check_minimax_api_key(): + if not os.environ.get("MINIMAX_API_KEY"): + raise ValueError( + "To use MiniMax for translation or TTS, please set up your " + "MiniMax API key as an environment variable: " + "export MINIMAX_API_KEY='your-api-key-here'. Or change the " + "translation process / TTS voice in settings." + ) + + class SoniTranslate(SoniTrCache): def __init__(self, cpu_mode=False): super().__init__() @@ -453,6 +466,9 @@ def multilingual_media_conversion( ): check_openai_api_key() + if "MiniMax" in translate_process or "MiniMax-TTS" in tts_voice00: + check_minimax_api_key() + if media_file is None: media_file = ( directory_input @@ -1276,6 +1292,8 @@ def multilingual_docs_conversion( ): if "gpt" in translate_process: check_openai_api_key() + if "MiniMax" in translate_process or "MiniMax-TTS" in tts_voice00: + check_minimax_api_key() SOURCE_LANGUAGE = LANGUAGES[origin_language] if translate_process != "disable_translation": diff --git a/soni_translate/language_configuration.py b/soni_translate/language_configuration.py index d0e4b06..c24d675 100644 --- a/soni_translate/language_configuration.py +++ b/soni_translate/language_configuration.py @@ -505,6 +505,21 @@ def fix_code_language(translate_to, syntax="google"): ">shimmer HD OpenAI-TTS" ] +MINIMAX_TTS_MODELS = [ + ">Wise_Woman MiniMax-TTS", + ">Deep_Voice_Man MiniMax-TTS", + ">Friendly_Person MiniMax-TTS", + ">Inspirational_girl MiniMax-TTS", + ">sweet_girl MiniMax-TTS", + ">cute_boy MiniMax-TTS", + ">lovely_girl MiniMax-TTS", + ">English_Graceful_Lady MiniMax-TTS", + ">English_Insightful_Speaker MiniMax-TTS", + ">English_radiant_girl MiniMax-TTS", + ">English_Persuasive_Man MiniMax-TTS", + ">English_Lucky_Robot MiniMax-TTS", +] + LANGUAGE_CODE_IN_THREE_LETTERS = { "Automatic detection": "aut", "ar": "ara", diff --git a/soni_translate/text_to_speech.py b/soni_translate/text_to_speech.py index 2626912..332a118 100644 --- a/soni_translate/text_to_speech.py +++ b/soni_translate/text_to_speech.py @@ -2,6 +2,7 @@ import edge_tts, asyncio, json, glob # noqa from tqdm import tqdm import librosa, os, re, torch, gc, subprocess # noqa +import requests from .language_configuration import ( fix_code_language, BARK_VOICES_LIST, @@ -939,6 +940,86 @@ def segments_openai_tts( error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename) +# ===================================== +# MINIMAX TTS +# ===================================== + + +def segments_minimax_tts(filtered_minimax_tts_segments, TRANSLATE_AUDIO_TO): + api_key = os.environ.get("MINIMAX_API_KEY") + if not api_key: + raise TTS_OperationError( + "MINIMAX_API_KEY environment variable is not set. " + "Please set it to use MiniMax TTS." + ) + + sampling_rate = 24000 + + for segment in tqdm(filtered_minimax_tts_segments["segments"]): + speaker = segment["speaker"] # noqa + text = segment["text"].strip() + start = segment["start"] + tts_name = segment["tts_name"] + + # Extract voice_id from tts_name (e.g. ">Wise_Woman MiniMax-TTS") + voice_id = tts_name.split()[0][1:] + + # make the tts audio + filename = f"audio/{start}.ogg" + logger.info(f"{text} >> {filename}") + + try: + response = requests.post( + "https://api.minimax.io/v1/t2a_v2", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": "speech-2.8-hd", + "text": text, + "voice_setting": { + "voice_id": voice_id, + "speed": 1.0, + }, + "audio_setting": { + "format": "mp3", + }, + }, + timeout=60, + ) + response.raise_for_status() + result = response.json() + + if "data" not in result or "audio" not in result["data"]: + raise TTS_OperationError( + f"MiniMax TTS returned unexpected response: {result}" + ) + + audio_hex = result["data"]["audio"] + audio_bytes = bytes.fromhex(audio_hex) + + # Write mp3 to temp file, then read and convert + temp_file = filename[:-3] + "mp3" + with open(temp_file, "wb") as f: + f.write(audio_bytes) + + data, sample_rate = sf.read(temp_file) + data = pad_array(data, sample_rate) + + write_chunked( + file=filename, + samplerate=sample_rate, + data=data, + format="ogg", + subtype="vorbis", + ) + verify_saved_file_and_size(filename) + + except Exception as error: + error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename) + + # ===================================== # Select task TTS # ===================================== @@ -1022,6 +1103,7 @@ def audio_segmentation_to_voice( pattern_coqui = re.compile(r".+\.(wav|mp3|ogg|m4a)$") pattern_vits_onnx = re.compile(r".* VITS-onnx$") pattern_openai_tts = re.compile(r".* OpenAI-TTS$") + pattern_minimax_tts = re.compile(r".* MiniMax-TTS$") all_segments = result_diarize["segments"] @@ -1035,6 +1117,9 @@ def audio_segmentation_to_voice( speakers_openai_tts = find_spkr( pattern_openai_tts, speaker_to_voice, all_segments ) + speakers_minimax_tts = find_spkr( + pattern_minimax_tts, speaker_to_voice, all_segments + ) # Filter method in segments filtered_edge = filter_by_speaker(speakers_edge, all_segments) @@ -1043,6 +1128,7 @@ def audio_segmentation_to_voice( filtered_coqui = filter_by_speaker(speakers_coqui, all_segments) filtered_vits_onnx = filter_by_speaker(speakers_vits_onnx, all_segments) filtered_openai_tts = filter_by_speaker(speakers_openai_tts, all_segments) + filtered_minimax_tts = filter_by_speaker(speakers_minimax_tts, all_segments) # Infer if filtered_edge["segments"]: @@ -1072,6 +1158,9 @@ def audio_segmentation_to_voice( if filtered_openai_tts["segments"]: logger.info(f"OpenAI TTS: {speakers_openai_tts}") segments_openai_tts(filtered_openai_tts, TRANSLATE_AUDIO_TO) # wav + if filtered_minimax_tts["segments"]: + logger.info(f"MiniMax TTS: {speakers_minimax_tts}") + segments_minimax_tts(filtered_minimax_tts, TRANSLATE_AUDIO_TO) [result.pop("tts_name", None) for result in result_diarize["segments"]] return [ @@ -1080,7 +1169,8 @@ def audio_segmentation_to_voice( speakers_vits, speakers_coqui, speakers_vits_onnx, - speakers_openai_tts + speakers_openai_tts, + speakers_minimax_tts, ] @@ -1099,7 +1189,8 @@ def accelerate_segments( speakers_vits, speakers_coqui, speakers_vits_onnx, - speakers_openai_tts + speakers_openai_tts, + speakers_minimax_tts, ) = valid_speakers create_directories(f"{folder_output}/audio/") diff --git a/soni_translate/translate_segments.py b/soni_translate/translate_segments.py index 0ee87db..14f5283 100644 --- a/soni_translate/translate_segments.py +++ b/soni_translate/translate_segments.py @@ -2,6 +2,7 @@ from deep_translator import GoogleTranslator from itertools import chain import copy +import os from .language_configuration import fix_code_language, INVERTED_LANGUAGES from .logging_setup import logger import re @@ -15,12 +16,18 @@ "gpt-3.5-turbo-0125", "gpt-4-turbo-preview_batch", "gpt-4-turbo-preview", + "MiniMax-M2.5_batch", + "MiniMax-M2.5", + "MiniMax-M2.7_batch", + "MiniMax-M2.7", "disable_translation", ] DOCS_TRANSLATION_PROCESS_OPTIONS = [ "google_translator", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview", + "MiniMax-M2.5", + "MiniMax-M2.7", "disable_translation", ] @@ -213,6 +220,11 @@ def call_gpt_translate( ] ) result = response.choices[0].message.content + # Strip thinking tags (e.g. from MiniMax models) before parsing + result = re.sub(r".*?\s*", "", result, flags=re.DOTALL) + # Strip markdown code fences if present + result = re.sub(r"^```(?:json)?\s*\n?", "", result.strip(), flags=re.MULTILINE) + result = re.sub(r"\n?```\s*$", "", result.strip(), flags=re.MULTILINE) logger.debug(f"Result: {str(result)}") try: @@ -267,12 +279,28 @@ def call_gpt_translate( return translation -def gpt_sequential(segments, model, target, source=None): +def _create_minimax_client(): + """Create an OpenAI-compatible client for MiniMax API.""" + from openai import OpenAI + + api_key = os.environ.get("MINIMAX_API_KEY") + if not api_key: + raise ValueError( + "To use MiniMax for translation, please set up your MiniMax API " + "key as an environment variable: " + "export MINIMAX_API_KEY='your-api-key-here'. Or change the " + "translation process in Advanced settings." + ) + return OpenAI(api_key=api_key, base_url="https://api.minimax.io/v1") + + +def gpt_sequential(segments, model, target, source=None, client=None): from openai import OpenAI translated_segments = copy.deepcopy(segments) - client = OpenAI() + if client is None: + client = OpenAI() progress_bar = tqdm(total=len(segments), desc="Translating") lang_tg = re.sub(r'\([^)]*\)', '', INVERTED_LANGUAGES[target]).strip() @@ -318,7 +346,7 @@ def gpt_sequential(segments, model, target, source=None): return translated_segments -def gpt_batch(segments, model, target, token_batch_limit=900, source=None): +def gpt_batch(segments, model, target, token_batch_limit=900, source=None, client=None): from openai import OpenAI import tiktoken @@ -326,7 +354,8 @@ def gpt_batch(segments, model, target, token_batch_limit=900, source=None): progress_bar = tqdm(total=len(segments), desc="Translating") segments_copy = copy.deepcopy(segments) encoding = tiktoken.get_encoding("cl100k_base") - client = OpenAI() + if client is None: + client = OpenAI() lang_tg = re.sub(r'\([^)]*\)', '', INVERTED_LANGUAGES[target]).strip() lang_sc = "" @@ -451,6 +480,20 @@ def translate_text( token_batch_limit, source ) + case model if model in ["MiniMax-M2.5", "MiniMax-M2.7"]: + return gpt_sequential( + segments, model, target, source, + client=_create_minimax_client() + ) + case model if model in ["MiniMax-M2.5_batch", "MiniMax-M2.7_batch"]: + return gpt_batch( + segments, + translation_process.replace("_batch", ""), + target, + token_batch_limit, + source, + client=_create_minimax_client() + ) case "disable_translation": return segments case _: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ee6c16a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,57 @@ +"""Pytest conftest - mock heavy dependencies before importing project modules.""" +import sys +import types +from unittest.mock import MagicMock + +# Mock heavy dependencies that are not needed for testing MiniMax integration +MOCK_MODULES = [ + "edge_tts", + "gtts", + "whisperx", + "torch", + "librosa", + "soundfile", + "pydub", + "transformers", + "optimum", + "optimum.bettertransformer", + "TTS", + "TTS.api", + "piper", + "piper.download", + "openvoice", + "openvoice.api", + "openvoice.se_extractor", + "gradio", + "tiktoken", + "deep_translator", + "numpy", + "openai", + "rarfile", + "IPython", + "IPython.utils", + "IPython.utils.capture", + "requests", +] + +for mod_name in MOCK_MODULES: + if mod_name not in sys.modules: + sys.modules[mod_name] = MagicMock() + +# Make tqdm transparent (pass-through) so for loops work in tests +class _FakeTqdm: + """Minimal tqdm mock that passes through iterables and handles progress bars.""" + def __init__(self, iterable=None, **kwargs): + self._iterable = iterable + def __iter__(self): + if self._iterable is not None: + return iter(self._iterable) + return iter([]) + def update(self, n=1): + pass + def close(self): + pass + +tqdm_module = types.ModuleType("tqdm") +tqdm_module.tqdm = _FakeTqdm +sys.modules["tqdm"] = tqdm_module diff --git a/tests/test_minimax_integration.py b/tests/test_minimax_integration.py new file mode 100644 index 0000000..6ea7297 --- /dev/null +++ b/tests/test_minimax_integration.py @@ -0,0 +1,217 @@ +"""Integration tests for MiniMax provider (require MINIMAX_API_KEY). + +Run with: MINIMAX_API_KEY= python -m pytest tests/test_minimax_integration.py -v +""" +import importlib +import json +import os +import re +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +MINIMAX_API_KEY = os.environ.get("MINIMAX_API_KEY") +SKIP_MSG = "MINIMAX_API_KEY not set; skipping integration tests" + + +def _real_import(name): + """Import a real module, bypassing any conftest mocks.""" + if name in sys.modules: + del sys.modules[name] + return importlib.import_module(name) + + +@unittest.skipUnless(MINIMAX_API_KEY, SKIP_MSG) +class TestMiniMaxLLMIntegration(unittest.TestCase): + """Integration tests for MiniMax LLM translation via OpenAI-compatible API.""" + + def _create_client(self): + openai = _real_import("openai") + return openai.OpenAI( + api_key=MINIMAX_API_KEY, + base_url="https://api.minimax.io/v1", + ) + + @staticmethod + def _clean_response(text): + """Strip thinking tags and markdown code fences from LLM response.""" + text = re.sub(r".*?\s*", "", text, flags=re.DOTALL) + text = re.sub(r"^```(?:json)?\s*\n?", "", text.strip(), flags=re.MULTILINE) + text = re.sub(r"\n?```\s*$", "", text.strip(), flags=re.MULTILINE) + return text.strip() + + def test_minimax_chat_completion(self): + """Test basic chat completion with MiniMax M2.5.""" + client = self._create_client() + response = client.chat.completions.create( + model="MiniMax-M2.5", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": "Machine translation. Output JSON with key translated_text.", + }, + { + "role": "user", + "content": "Translate to Spanish: Hello", + }, + ], + ) + result = response.choices[0].message.content + result = self._clean_response(result) + parsed = json.loads(result) + self.assertIn("translated_text", parsed) + self.assertTrue(len(parsed["translated_text"]) > 0) + + def test_minimax_m27_chat_completion(self): + """Test chat completion with MiniMax M2.7.""" + client = self._create_client() + response = client.chat.completions.create( + model="MiniMax-M2.7", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": "Translate. Output JSON with key translated_text.", + }, + { + "role": "user", + "content": "Translate to French: Good morning", + }, + ], + ) + result = response.choices[0].message.content + result = self._clean_response(result) + parsed = json.loads(result) + self.assertIn("translated_text", parsed) + + def test_minimax_batch_translation_format(self): + """Test batch translation format with MiniMax (conversation JSON).""" + client = self._create_client() + batch = {"conversation": [{"A1": "Hello"}, {"B1": "Goodbye"}]} + response = client.chat.completions.create( + model="MiniMax-M2.5", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": "Machine translation. Output translated_conversation JSON with list of 2 items.", + }, + { + "role": "user", + "content": f"Translate each text value to Spanish:\n{batch}", + }, + ], + ) + result = response.choices[0].message.content + result = self._clean_response(result) + parsed = json.loads(result) + values = list(parsed.values()) + found_list = False + for v in values: + if isinstance(v, list) and len(v) >= 2: + found_list = True + break + self.assertTrue(found_list, f"Expected list in response: {parsed}") + + +@unittest.skipUnless(MINIMAX_API_KEY, SKIP_MSG) +class TestMiniMaxTTSIntegration(unittest.TestCase): + """Integration tests for MiniMax TTS API.""" + + def _get_requests(self): + return _real_import("requests") + + def test_minimax_tts_api_call(self): + """Test MiniMax TTS with Wise_Woman voice.""" + requests = self._get_requests() + response = requests.post( + "https://api.minimax.io/v1/t2a_v2", + headers={ + "Authorization": f"Bearer {MINIMAX_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": "speech-2.8-hd", + "text": "Hello, this is a test.", + "voice_setting": {"voice_id": "Wise_Woman", "speed": 1.0}, + "audio_setting": {"format": "mp3"}, + }, + timeout=30, + ) + self.assertEqual(response.status_code, 200) + data = response.json() + self.assertIn("data", data) + self.assertIn("audio", data["data"]) + audio_bytes = bytes.fromhex(data["data"]["audio"]) + self.assertGreater(len(audio_bytes), 100) + + def test_minimax_tts_different_voice(self): + """Test TTS with English_Graceful_Lady voice.""" + requests = self._get_requests() + response = requests.post( + "https://api.minimax.io/v1/t2a_v2", + headers={ + "Authorization": f"Bearer {MINIMAX_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": "speech-2.8-hd", + "text": "Testing voice selection.", + "voice_setting": {"voice_id": "English_Graceful_Lady", "speed": 1.0}, + "audio_setting": {"format": "mp3"}, + }, + timeout=30, + ) + self.assertEqual(response.status_code, 200) + + def test_minimax_tts_audio_is_valid_mp3(self): + """Verify the returned audio bytes are valid MP3.""" + requests = self._get_requests() + response = requests.post( + "https://api.minimax.io/v1/t2a_v2", + headers={ + "Authorization": f"Bearer {MINIMAX_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": "speech-2.8-hd", + "text": "Valid audio check.", + "voice_setting": {"voice_id": "Deep_Voice_Man", "speed": 1.0}, + "audio_setting": {"format": "mp3"}, + }, + timeout=30, + ) + audio_bytes = bytes.fromhex(response.json()["data"]["audio"]) + self.assertTrue( + audio_bytes[:3] == b"ID3" or audio_bytes[0] == 0xFF, + "Audio should be valid MP3", + ) + + +@unittest.skipUnless(MINIMAX_API_KEY, SKIP_MSG) +class TestThinkTagStripping(unittest.TestCase): + """Test that thinking tags are properly stripped from responses.""" + + def test_strip_think_tags(self): + raw = 'Some reasoning\n{"translated_text": "Hola"}' + cleaned = re.sub(r".*?\s*", "", raw, flags=re.DOTALL) + parsed = json.loads(cleaned) + self.assertEqual(parsed["translated_text"], "Hola") + + def test_no_think_tags_passthrough(self): + raw = '{"translated_text": "Hola"}' + cleaned = re.sub(r".*?\s*", "", raw, flags=re.DOTALL) + parsed = json.loads(cleaned) + self.assertEqual(parsed["translated_text"], "Hola") + + def test_multiline_think_tags(self): + raw = '\nLine 1\nLine 2\n\n\n{"text": "result"}' + cleaned = re.sub(r".*?\s*", "", raw, flags=re.DOTALL) + parsed = json.loads(cleaned) + self.assertEqual(parsed["text"], "result") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_minimax_translation.py b/tests/test_minimax_translation.py new file mode 100644 index 0000000..43991c3 --- /dev/null +++ b/tests/test_minimax_translation.py @@ -0,0 +1,303 @@ +"""Unit tests for MiniMax translation integration.""" +import json +import os +import sys +import unittest +from unittest.mock import MagicMock, patch + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Import conftest to mock heavy deps +import tests.conftest # noqa: F401 + + +class TestMiniMaxClientCreation(unittest.TestCase): + """Test _create_minimax_client helper.""" + + @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key-123"}) + def test_create_client_with_valid_key(self): + """Client is created with correct base_url and api_key.""" + from soni_translate.translate_segments import _create_minimax_client + + with patch("openai.OpenAI") as mock_cls: + mock_cls.return_value = MagicMock() + client = _create_minimax_client() + mock_cls.assert_called_once_with( + api_key="test-key-123", + base_url="https://api.minimax.io/v1", + ) + self.assertIsNotNone(client) + + def test_create_client_missing_key_raises(self): + """ValueError raised when MINIMAX_API_KEY is not set.""" + from soni_translate.translate_segments import _create_minimax_client + + env = os.environ.copy() + env.pop("MINIMAX_API_KEY", None) + with patch.dict(os.environ, env, clear=True): + with self.assertRaises(ValueError) as ctx: + _create_minimax_client() + self.assertIn("MINIMAX_API_KEY", str(ctx.exception)) + + +class TestTranslationProcessOptions(unittest.TestCase): + """Test that MiniMax models are in the translation options lists.""" + + def test_minimax_in_translation_options(self): + from soni_translate.translate_segments import TRANSLATION_PROCESS_OPTIONS + + self.assertIn("MiniMax-M2.5", TRANSLATION_PROCESS_OPTIONS) + self.assertIn("MiniMax-M2.5_batch", TRANSLATION_PROCESS_OPTIONS) + self.assertIn("MiniMax-M2.7", TRANSLATION_PROCESS_OPTIONS) + self.assertIn("MiniMax-M2.7_batch", TRANSLATION_PROCESS_OPTIONS) + + def test_minimax_in_docs_translation_options(self): + from soni_translate.translate_segments import DOCS_TRANSLATION_PROCESS_OPTIONS + + self.assertIn("MiniMax-M2.5", DOCS_TRANSLATION_PROCESS_OPTIONS) + self.assertIn("MiniMax-M2.7", DOCS_TRANSLATION_PROCESS_OPTIONS) + + def test_disable_still_last(self): + from soni_translate.translate_segments import TRANSLATION_PROCESS_OPTIONS + + self.assertEqual( + TRANSLATION_PROCESS_OPTIONS[-1], "disable_translation" + ) + + def test_google_translator_still_first(self): + from soni_translate.translate_segments import TRANSLATION_PROCESS_OPTIONS + + self.assertEqual( + TRANSLATION_PROCESS_OPTIONS[0], "google_translator_batch" + ) + + def test_all_options_present(self): + """Verify all expected options exist.""" + from soni_translate.translate_segments import TRANSLATION_PROCESS_OPTIONS + + expected = [ + "google_translator_batch", + "google_translator", + "gpt-3.5-turbo-0125_batch", + "gpt-3.5-turbo-0125", + "gpt-4-turbo-preview_batch", + "gpt-4-turbo-preview", + "MiniMax-M2.5_batch", + "MiniMax-M2.5", + "MiniMax-M2.7_batch", + "MiniMax-M2.7", + "disable_translation", + ] + self.assertEqual(TRANSLATION_PROCESS_OPTIONS, expected) + + +class TestTranslateTextRouting(unittest.TestCase): + """Test that translate_text correctly routes to MiniMax.""" + + @patch("soni_translate.translate_segments._create_minimax_client") + @patch("soni_translate.translate_segments.gpt_sequential") + def test_minimax_m25_routes_to_sequential(self, mock_seq, mock_client): + from soni_translate.translate_segments import translate_text + + mock_client.return_value = MagicMock() + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + mock_seq.return_value = segments + + translate_text(segments, "es", translation_process="MiniMax-M2.5") + + mock_seq.assert_called_once() + args, kwargs = mock_seq.call_args + self.assertEqual(args[1], "MiniMax-M2.5") + self.assertIn("client", kwargs) + + @patch("soni_translate.translate_segments._create_minimax_client") + @patch("soni_translate.translate_segments.gpt_sequential") + def test_minimax_m27_routes_to_sequential(self, mock_seq, mock_client): + from soni_translate.translate_segments import translate_text + + mock_client.return_value = MagicMock() + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + mock_seq.return_value = segments + + translate_text(segments, "es", translation_process="MiniMax-M2.7") + + mock_seq.assert_called_once() + args, kwargs = mock_seq.call_args + self.assertEqual(args[1], "MiniMax-M2.7") + + @patch("soni_translate.translate_segments._create_minimax_client") + @patch("soni_translate.translate_segments.gpt_batch") + def test_minimax_m25_batch_routes_to_batch(self, mock_batch, mock_client): + from soni_translate.translate_segments import translate_text + + mock_client.return_value = MagicMock() + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + mock_batch.return_value = segments + + translate_text( + segments, "es", translation_process="MiniMax-M2.5_batch" + ) + + mock_batch.assert_called_once() + args, kwargs = mock_batch.call_args + self.assertEqual(args[1], "MiniMax-M2.5") # _batch stripped + self.assertIn("client", kwargs) + + @patch("soni_translate.translate_segments._create_minimax_client") + @patch("soni_translate.translate_segments.gpt_batch") + def test_minimax_m27_batch_routes_to_batch(self, mock_batch, mock_client): + from soni_translate.translate_segments import translate_text + + mock_client.return_value = MagicMock() + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + mock_batch.return_value = segments + + translate_text( + segments, "es", translation_process="MiniMax-M2.7_batch" + ) + + mock_batch.assert_called_once() + args, kwargs = mock_batch.call_args + self.assertEqual(args[1], "MiniMax-M2.7") + + @patch("soni_translate.translate_segments.translate_batch") + def test_google_translator_still_works(self, mock_tb): + from soni_translate.translate_segments import translate_text + + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + mock_tb.return_value = segments + + translate_text(segments, "es", translation_process="google_translator_batch") + mock_tb.assert_called_once() + + +class TestGptSequentialWithClient(unittest.TestCase): + """Test that gpt_sequential accepts an external client.""" + + @patch("soni_translate.translate_segments.call_gpt_translate") + def test_custom_client_passed_through(self, mock_call): + from soni_translate.translate_segments import gpt_sequential + + mock_client = MagicMock() + mock_call.return_value = "Hola" + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + + result = gpt_sequential(segments, "MiniMax-M2.5", "es", client=mock_client) + + call_args = mock_call.call_args + self.assertEqual(call_args[0][0], mock_client) + self.assertEqual(call_args[0][1], "MiniMax-M2.5") + self.assertEqual(result[0]["text"], "Hola") + + @patch("soni_translate.translate_segments.call_gpt_translate") + def test_default_client_when_none(self, mock_call): + from soni_translate.translate_segments import gpt_sequential + + mock_call.return_value = "Hola" + segments = [{"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}] + + with patch("openai.OpenAI") as mock_cls: + mock_cls.return_value = MagicMock() + gpt_sequential(segments, "gpt-3.5-turbo-0125", "es") + mock_cls.assert_called_once() + + +class TestGptBatchWithClient(unittest.TestCase): + """Test that gpt_batch accepts an external client.""" + + @patch("soni_translate.translate_segments.call_gpt_translate") + def test_custom_client_passed_through(self, mock_call): + from soni_translate.translate_segments import gpt_batch + + # Mock tiktoken + import tiktoken + mock_encoding = MagicMock() + mock_encoding.encode.return_value = [1, 2, 3] + tiktoken.get_encoding = MagicMock(return_value=mock_encoding) + + mock_client = MagicMock() + mock_call.return_value = [{"A1": "Hola"}] + segments = [ + {"text": "Hello", "start": 0, "end": 1, "speaker": "SPEAKER_00"}, + ] + + gpt_batch(segments, "MiniMax-M2.5", "es", client=mock_client) + + call_args = mock_call.call_args + self.assertEqual(call_args[0][0], mock_client) + self.assertEqual(call_args[0][1], "MiniMax-M2.5") + + +class TestCallGptTranslate(unittest.TestCase): + """Test call_gpt_translate with MiniMax-like responses.""" + + def test_sequential_json_response(self): + from soni_translate.translate_segments import call_gpt_translate + + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"translated_text": "Hola mundo"}')) + ] + mock_client.chat.completions.create.return_value = mock_response + + result = call_gpt_translate( + mock_client, + "MiniMax-M2.5", + "Translate JSON output", + "Translate: Hello world", + ) + self.assertEqual(result, "Hola mundo") + + def test_batch_json_response(self): + from soni_translate.translate_segments import call_gpt_translate + + mock_client = MagicMock() + original_text = { + "conversation": [{"A1": "Hello"}, {"B1": "How are you?"}] + } + response_json = json.dumps({ + "translated_conversation": [ + {"A1": "Hola"}, + {"B1": "Como estas?"}, + ] + }) + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content=response_json)) + ] + mock_client.chat.completions.create.return_value = mock_response + + result = call_gpt_translate( + mock_client, + "MiniMax-M2.7", + "Translate conversation", + "Translate this", + original_text=original_text, + batch_lines=2, + ) + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["A1"], "Hola") + + def test_json_mode_used(self): + """Verify response_format=json_object is sent.""" + from soni_translate.translate_segments import call_gpt_translate + + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"text": "test"}')) + ] + mock_client.chat.completions.create.return_value = mock_response + + call_gpt_translate(mock_client, "MiniMax-M2.5", "sys", "user") + + call_kwargs = mock_client.chat.completions.create.call_args[1] + self.assertEqual( + call_kwargs["response_format"], {"type": "json_object"} + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_minimax_tts.py b/tests/test_minimax_tts.py new file mode 100644 index 0000000..96e7c2d --- /dev/null +++ b/tests/test_minimax_tts.py @@ -0,0 +1,350 @@ +"""Unit tests for MiniMax TTS integration.""" +import json +import os +import re +import sys +import unittest +from unittest.mock import MagicMock, patch, mock_open + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import tests.conftest # noqa: F401 + + +class TestMiniMaxTTSModels(unittest.TestCase): + """Test MiniMax TTS model list in language_configuration.""" + + def test_minimax_tts_models_exist(self): + from soni_translate.language_configuration import MINIMAX_TTS_MODELS + + self.assertIsInstance(MINIMAX_TTS_MODELS, list) + self.assertGreater(len(MINIMAX_TTS_MODELS), 0) + + def test_minimax_tts_models_format(self): + """All MiniMax TTS models follow '>voice_id MiniMax-TTS' format.""" + from soni_translate.language_configuration import MINIMAX_TTS_MODELS + + for model in MINIMAX_TTS_MODELS: + self.assertTrue( + model.startswith(">"), f"'{model}' should start with '>'" + ) + self.assertTrue( + model.endswith("MiniMax-TTS"), + f"'{model}' should end with 'MiniMax-TTS'", + ) + voice_id = model.split()[0][1:] + self.assertTrue(len(voice_id) > 0) + + def test_verified_voices_present(self): + from soni_translate.language_configuration import MINIMAX_TTS_MODELS + + verified_voices = [ + "Wise_Woman", + "Deep_Voice_Man", + "Friendly_Person", + "English_Graceful_Lady", + "English_Insightful_Speaker", + "English_radiant_girl", + "English_Persuasive_Man", + "English_Lucky_Robot", + "cute_boy", + "lovely_girl", + "Inspirational_girl", + "sweet_girl", + ] + model_names = [m.split()[0][1:] for m in MINIMAX_TTS_MODELS] + for voice in verified_voices: + self.assertIn(voice, model_names, f"'{voice}' should be in list") + + def test_model_count(self): + from soni_translate.language_configuration import MINIMAX_TTS_MODELS + + self.assertEqual(len(MINIMAX_TTS_MODELS), 12) + + +class TestMiniMaxTTSPatternMatch(unittest.TestCase): + """Test the regex pattern for MiniMax TTS voice matching.""" + + def test_pattern_matches_minimax_tts(self): + pattern = re.compile(r".* MiniMax-TTS$") + self.assertTrue(pattern.match(">Wise_Woman MiniMax-TTS")) + self.assertTrue(pattern.match(">English_Graceful_Lady MiniMax-TTS")) + self.assertTrue(pattern.match(">cute_boy MiniMax-TTS")) + + def test_pattern_does_not_match_openai_tts(self): + pattern = re.compile(r".* MiniMax-TTS$") + self.assertFalse(pattern.match(">alloy OpenAI-TTS")) + self.assertFalse(pattern.match(">echo HD OpenAI-TTS")) + + def test_pattern_does_not_match_edge_tts(self): + pattern = re.compile(r".* MiniMax-TTS$") + self.assertFalse(pattern.match("en-US-EmmaMultilingualNeural-Female")) + + +class TestVoiceIdExtraction(unittest.TestCase): + """Test voice_id extraction from tts_name.""" + + def test_extract_simple_voice_id(self): + tts_name = ">Wise_Woman MiniMax-TTS" + voice_id = tts_name.split()[0][1:] + self.assertEqual(voice_id, "Wise_Woman") + + def test_extract_english_voice_id(self): + tts_name = ">English_Graceful_Lady MiniMax-TTS" + voice_id = tts_name.split()[0][1:] + self.assertEqual(voice_id, "English_Graceful_Lady") + + def test_extract_cute_boy_voice_id(self): + tts_name = ">cute_boy MiniMax-TTS" + voice_id = tts_name.split()[0][1:] + self.assertEqual(voice_id, "cute_boy") + + +class TestSegmentsMiniMaxTTS(unittest.TestCase): + """Test segments_minimax_tts function.""" + + def test_raises_without_api_key(self): + from soni_translate.text_to_speech import segments_minimax_tts, TTS_OperationError + + env = os.environ.copy() + env.pop("MINIMAX_API_KEY", None) + with patch.dict(os.environ, env, clear=True): + segments = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Hello", + "start": 0.0, + "end": 1.0, + "tts_name": ">Wise_Woman MiniMax-TTS", + } + ] + } + with self.assertRaises(TTS_OperationError): + segments_minimax_tts(segments, "en") + + @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}) + @patch("soni_translate.text_to_speech.verify_saved_file_and_size") + @patch("soni_translate.text_to_speech.write_chunked") + @patch("soni_translate.text_to_speech.pad_array") + @patch("soni_translate.text_to_speech.sf") + @patch("builtins.open", mock_open()) + @patch("soni_translate.text_to_speech.requests") + def test_successful_tts_call( + self, mock_requests, mock_sf, mock_pad, mock_write, mock_verify + ): + from soni_translate.text_to_speech import segments_minimax_tts + import numpy as np + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"data": {"audio": "ff" * 100}} + mock_requests.post.return_value = mock_response + + mock_sf.read.return_value = (MagicMock(), 24000) + mock_pad.return_value = MagicMock() + + segments = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Hello world", + "start": 0.0, + "end": 1.0, + "tts_name": ">Wise_Woman MiniMax-TTS", + } + ] + } + + segments_minimax_tts(segments, "en") + + mock_requests.post.assert_called_once() + call_args = mock_requests.post.call_args + self.assertEqual(call_args[0][0], "https://api.minimax.io/v1/t2a_v2") + request_body = call_args[1]["json"] + self.assertEqual(request_body["model"], "speech-2.8-hd") + self.assertEqual(request_body["text"], "Hello world") + self.assertEqual(request_body["voice_setting"]["voice_id"], "Wise_Woman") + self.assertEqual(request_body["audio_setting"]["format"], "mp3") + headers = call_args[1]["headers"] + self.assertEqual(headers["Authorization"], "Bearer test-key") + + @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}) + @patch("soni_translate.text_to_speech.error_handling_in_tts") + @patch("soni_translate.text_to_speech.requests") + def test_api_error_triggers_fallback(self, mock_requests, mock_error_handler): + from soni_translate.text_to_speech import segments_minimax_tts + + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = Exception("API error") + mock_requests.post.return_value = mock_response + + segments = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Hello", + "start": 0.0, + "end": 1.0, + "tts_name": ">Wise_Woman MiniMax-TTS", + } + ] + } + + segments_minimax_tts(segments, "en") + mock_error_handler.assert_called_once() + + @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}) + @patch("soni_translate.text_to_speech.verify_saved_file_and_size") + @patch("soni_translate.text_to_speech.write_chunked") + @patch("soni_translate.text_to_speech.pad_array") + @patch("soni_translate.text_to_speech.sf") + @patch("builtins.open", mock_open()) + @patch("soni_translate.text_to_speech.requests") + def test_different_voice_id_sent( + self, mock_requests, mock_sf, mock_pad, mock_write, mock_verify + ): + from soni_translate.text_to_speech import segments_minimax_tts + + mock_response = MagicMock() + mock_response.json.return_value = {"data": {"audio": "ff" * 100}} + mock_requests.post.return_value = mock_response + mock_sf.read.return_value = (MagicMock(), 24000) + mock_pad.return_value = MagicMock() + + segments = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Test", + "start": 0.0, + "end": 1.0, + "tts_name": ">English_Graceful_Lady MiniMax-TTS", + } + ] + } + + segments_minimax_tts(segments, "en") + + request_body = mock_requests.post.call_args[1]["json"] + self.assertEqual( + request_body["voice_setting"]["voice_id"], "English_Graceful_Lady" + ) + + +class TestAudioSegmentationToVoice(unittest.TestCase): + """Test that audio_segmentation_to_voice handles MiniMax TTS.""" + + def test_minimax_pattern_detection(self): + from soni_translate.text_to_speech import find_spkr + + pattern_minimax_tts = re.compile(r".* MiniMax-TTS$") + speaker_to_voice = { + "SPEAKER_00": ">Wise_Woman MiniMax-TTS", + "SPEAKER_01": "en-US-EmmaMultilingualNeural-Female", + } + segments = [ + {"speaker": "SPEAKER_00", "text": "Hello"}, + {"speaker": "SPEAKER_01", "text": "World"}, + ] + + speakers = find_spkr(pattern_minimax_tts, speaker_to_voice, segments) + self.assertEqual(speakers, ["SPEAKER_00"]) + + def test_filter_by_speaker(self): + from soni_translate.text_to_speech import filter_by_speaker + + segments = [ + {"speaker": "SPEAKER_00", "text": "Hello"}, + {"speaker": "SPEAKER_01", "text": "World"}, + ] + filtered = filter_by_speaker(["SPEAKER_00"], segments) + self.assertEqual(len(filtered["segments"]), 1) + self.assertEqual(filtered["segments"][0]["text"], "Hello") + + +class TestReturnValueUpdate(unittest.TestCase): + """Test that audio_segmentation_to_voice returns 7-element list.""" + + @patch("soni_translate.text_to_speech.remove_directory_contents") + @patch("soni_translate.text_to_speech.segments_egde_tts") + def test_returns_seven_elements(self, mock_edge, mock_rm): + from soni_translate.text_to_speech import audio_segmentation_to_voice + + result_diarize = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Hello", + "start": 0.0, + "end": 1.0, + } + ] + } + + result = audio_segmentation_to_voice( + result_diarize, + TRANSLATE_AUDIO_TO="en", + is_gui=False, + tts_voice00="en-US-EmmaMultilingualNeural-Female", + ) + + self.assertEqual(len(result), 7) + + @patch("soni_translate.text_to_speech.remove_directory_contents") + @patch("soni_translate.text_to_speech.segments_minimax_tts") + def test_minimax_speakers_populated(self, mock_mm_tts, mock_rm): + from soni_translate.text_to_speech import audio_segmentation_to_voice + + result_diarize = { + "segments": [ + { + "speaker": "SPEAKER_00", + "text": "Hello", + "start": 0.0, + "end": 1.0, + } + ] + } + + result = audio_segmentation_to_voice( + result_diarize, + TRANSLATE_AUDIO_TO="en", + is_gui=False, + tts_voice00=">Wise_Woman MiniMax-TTS", + ) + + minimax_speakers = result[6] + self.assertIn("SPEAKER_00", minimax_speakers) + mock_mm_tts.assert_called_once() + + +class TestAccelerateSegmentsUnpacking(unittest.TestCase): + """Test that accelerate_segments can unpack 7-element valid_speakers.""" + + def test_seven_element_unpacking(self): + valid_speakers = [ + ["SPEAKER_00"], # edge + [], # bark + [], # vits + [], # coqui + [], # vits_onnx + [], # openai_tts + [], # minimax_tts + ] + + ( + speakers_edge, + speakers_bark, + speakers_vits, + speakers_coqui, + speakers_vits_onnx, + speakers_openai_tts, + speakers_minimax_tts, + ) = valid_speakers + + self.assertEqual(speakers_edge, ["SPEAKER_00"]) + self.assertEqual(speakers_minimax_tts, []) + + +if __name__ == "__main__": + unittest.main()