From 145cbc21527d68613c6f4d0babe52763a8c7191f Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 5 Sep 2025 16:44:31 +0800 Subject: [PATCH 1/5] add tts param volume and speed --- src/zai/api_resource/audio/audio.py | 4 ++++ tests/integration_tests/test_audio.py | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 9523f47..92fe520 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -55,6 +55,8 @@ def speech( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + speed: float | None = 1.0, + volume: float | None = 1.0, ) -> HttpxBinaryResponseContent: """ Generate speech audio from text input @@ -80,6 +82,8 @@ def speech( 'sensitive_word_check': sensitive_word_check, 'request_id': request_id, 'user_id': user_id, + 'speed': speed, + 'volume': volume, } ) return self._post( diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index e143571..420f19b 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -10,12 +10,15 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore client = ZaiClient() # Fill in your own API Key try: - speech_file_path = Path(__file__).parent / 'asr1.wav' + speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( model='cogtts', input='Hello, welcome to Z.ai Open Platform', voice='female', - response_format='wav', + response_format='pcm', + encode_format='hex', + speed=1.0, + volume=1.0, ) response.stream_to_file(speech_file_path) From 175fda532116efe4e3d17c33fe71c8b5dd768fdb Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 11:52:42 +0800 Subject: [PATCH 2/5] feat: add encode_format param to audio.speech --- src/zai/api_resource/audio/audio.py | 2 ++ tests/integration_tests/test_audio.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 92fe520..bed126c 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -55,6 +55,7 @@ def speech( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + encode_format: str = None, speed: float | None = 1.0, volume: float | None = 1.0, ) -> HttpxBinaryResponseContent: @@ -79,6 +80,7 @@ def speech( 'input': input, 'voice': voice, 'response_format': response_format, + 'encode_format': encode_format, 'sensitive_word_check': sensitive_word_check, 'request_id': request_id, 'user_id': user_id, diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 420f19b..8545eda 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient() # Fill in your own API Key + client = ZaiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='') # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( @@ -16,13 +16,13 @@ def test_audio_speech(logging_conf): input='Hello, welcome to Z.ai Open Platform', voice='female', response_format='pcm', - encode_format='hex', + encode_format='base64', speed=1.0, volume=1.0, ) response.stream_to_file(speech_file_path) - except zai.core._errors.APIRequestFailedError as err: + except Exception as err: print(err) except zai.core._errors.APIInternalError as err: print(err) From e713eaaf305c16056f88a4468349767d7c3431c8 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 11:56:51 +0800 Subject: [PATCH 3/5] delete default host --- tests/integration_tests/test_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index ec51248..7c97038 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='') # Fill in your own API Key + client = ZaiClient(base_url='', api_key='') # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( From 415d551c3279160fe427faf49a2acf716917b0eb Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 12:01:21 +0800 Subject: [PATCH 4/5] delete update --- tests/integration_tests/test_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 7c97038..88dbb01 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient(base_url='', api_key='') # Fill in your own API Key + client = ZaiClient() # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( From df3829b2b30c5a510e6a6672a1577fc1e7a42bfa Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 12:10:25 +0800 Subject: [PATCH 5/5] add stream reponse to audio.speech --- src/zai/api_resource/audio/audio.py | 3 +++ tests/integration_tests/test_audio.py | 1 + 2 files changed, 4 insertions(+) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index bed126c..2de7b47 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Mapping, Optional, cast import httpx +from httpx import stream from zai.core import ( NOT_GIVEN, @@ -58,6 +59,7 @@ def speech( encode_format: str = None, speed: float | None = 1.0, volume: float | None = 1.0, + stream: bool | None = False ) -> HttpxBinaryResponseContent: """ Generate speech audio from text input @@ -86,6 +88,7 @@ def speech( 'user_id': user_id, 'speed': speed, 'volume': volume, + 'stream': stream } ) return self._post( diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 88dbb01..e9ef33c 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -17,6 +17,7 @@ def test_audio_speech(logging_conf): voice='female', response_format='pcm', encode_format='base64', + stream=False, speed=1.0, volume=1.0, )