Skip to content

Commit 7d30990

Browse files
feat: voiceclone implement comprehensive voice cloning service with full API support (#29)
Co-authored-by: tomsun28 <[email protected]>
1 parent 67639ae commit 7d30990

File tree

12 files changed

+479
-1
lines changed

12 files changed

+479
-1
lines changed

examples/voice_clone.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from zai import ZaiClient, ZhipuAiClient
2+
import time
3+
import os
4+
5+
def voice_clone():
6+
# Initialize client
7+
client = ZhipuAiClient()
8+
9+
# Step 1: Upload the voice input audio file
10+
# First, we need to upload the voice sample audio file to get a file ID
11+
voice_input_file_path = "tests/integration_tests/voice_clone_input.mp3"
12+
13+
try:
14+
with open(voice_input_file_path, 'rb') as f:
15+
upload_response = client.files.create(
16+
file=f,
17+
purpose='voice-clone-input',
18+
)
19+
20+
print(f"Voice input file uploaded successfully with ID: {upload_response.id}")
21+
file_id = upload_response.id
22+
23+
except FileNotFoundError:
24+
print(f"File not found: {voice_input_file_path}")
25+
return
26+
except Exception as e:
27+
print(f"File upload failed: {e}")
28+
return
29+
30+
# Step 2: Clone voice using the uploaded file ID
31+
response = client.voice.clone(
32+
voice_name="My Test Voice!",
33+
text="This is sample text for voice cloning training",
34+
input="This is target text for voice preview generation",
35+
file_id=file_id,
36+
request_id=f"voice_clone_request_{int(time.time() * 1000)}",
37+
model="cogtts-clone"
38+
)
39+
print(f"Voice clone response: {response}")
40+
41+
def voice_delete():
42+
# Initialize client
43+
client = ZhipuAiClient()
44+
45+
# Delete voice
46+
response = client.voice.delete(
47+
voice="Your voice",
48+
request_id=f"voice_delete_request_{int(time.time() * 1000)}"
49+
)
50+
print(response)
51+
52+
def voice_list():
53+
# Initialize client
54+
client = ZhipuAiClient()
55+
56+
# List voices with filter
57+
response = client.voice.list(
58+
voice_type="PRIVATE",
59+
voice_name="Test",
60+
request_id=f"voice_list_request_{int(time.time() * 1000)}"
61+
)
62+
print(response)
63+
64+
if __name__ == "__main__":
65+
# voice_clone()
66+
67+
# voice_delete()
68+
69+
voice_list()

src/zai/_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from zai.api_resource.moderations import Moderations
2121
from zai.api_resource.tools import Tools
2222
from zai.api_resource.videos import Videos
23+
from zai.api_resource.voice import Voice
2324
from zai.api_resource.web_search import WebSearchApi
2425

2526
from .core import (
@@ -180,6 +181,12 @@ def moderations(self) -> Moderations:
180181

181182
return Moderations(self)
182183

184+
@cached_property
185+
def voice(self) -> Voice:
186+
from zai.api_resource.voice import Voice
187+
188+
return Voice(self)
189+
183190
@property
184191
@override
185192
def auth_headers(self) -> dict[str, str]:

src/zai/api_resource/files/files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def create(
4040
*,
4141
file: FileTypes = None,
4242
upload_detail: List[UploadDetail] = None,
43-
purpose: Literal['fine-tune', 'retrieval', 'batch'],
43+
purpose: Literal['fine-tune', 'retrieval', 'batch', 'voice-clone-input'],
4444
knowledge_id: str = None,
4545
sentence_size: int = None,
4646
extra_headers: Headers | None = None,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .voice import Voice
2+
3+
__all__ = ['Voice']
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Optional
4+
5+
import httpx
6+
from httpx import stream
7+
8+
from zai.core import (
9+
NOT_GIVEN,
10+
BaseAPI,
11+
Body,
12+
Headers,
13+
NotGiven,
14+
make_request_options,
15+
maybe_transform,
16+
)
17+
from zai.types.voiceclone import (
18+
VoiceCloneParams,
19+
VoiceCloneResult,
20+
VoiceDeleteParams,
21+
VoiceDeleteResult,
22+
VoiceListParams,
23+
VoiceListResult,
24+
)
25+
26+
if TYPE_CHECKING:
27+
from zai._client import ZaiClient
28+
29+
30+
class Voice(BaseAPI):
31+
"""
32+
Voice API resource for handling voice cloning operations
33+
"""
34+
35+
def __init__(self, client: ZaiClient) -> None:
36+
super().__init__(client)
37+
38+
def clone(
39+
self,
40+
*,
41+
voice_name: str,
42+
text: str,
43+
input: str,
44+
file_id: str,
45+
request_id: Optional[str] = None,
46+
model: str,
47+
extra_headers: Headers | None = None,
48+
extra_body: Body | None = None,
49+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
50+
) -> VoiceCloneResult:
51+
"""
52+
Clone a voice with the provided audio sample and parameters
53+
54+
Args:
55+
voice_name: Name for the cloned voice
56+
text: Text content corresponding to the sample audio
57+
input: Target text for preview audio
58+
file_id: File ID of the uploaded audio file
59+
request_id: Optional request ID for tracking
60+
model: Model
61+
extra_headers: Additional headers to include in the request
62+
extra_body: Additional body parameters
63+
timeout: Request timeout
64+
65+
Returns:
66+
Voice clone response
67+
"""
68+
69+
return self._post(
70+
"/voice/clone",
71+
body=maybe_transform(
72+
{
73+
"voice_name": voice_name,
74+
"text": text,
75+
"input": input,
76+
"file_id": file_id,
77+
"request_id": request_id,
78+
"model": model,
79+
},
80+
VoiceCloneParams,
81+
),
82+
options=make_request_options(
83+
extra_headers=extra_headers,
84+
extra_body=extra_body,
85+
timeout=timeout,
86+
),
87+
cast_type=VoiceCloneResult,
88+
stream=False,
89+
)
90+
91+
def delete(
92+
self,
93+
*,
94+
voice: str,
95+
request_id: Optional[str] = None,
96+
extra_headers: Headers | None = None,
97+
extra_body: Body | None = None,
98+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
99+
) -> VoiceDeleteResult:
100+
"""
101+
Delete a cloned voice by voice ID
102+
103+
Args:
104+
voice: The voice to delete
105+
request_id: Optional request ID for tracking
106+
extra_headers: Additional headers to include in the request
107+
extra_body: Additional body parameters
108+
timeout: Request timeout
109+
110+
Returns:
111+
Voice deletion response
112+
"""
113+
return self._post(
114+
"/voice/delete",
115+
body=maybe_transform(
116+
{
117+
"voice": voice,
118+
"request_id": request_id,
119+
},
120+
VoiceDeleteParams,
121+
),
122+
options=make_request_options(
123+
extra_headers=extra_headers,
124+
extra_body=extra_body,
125+
timeout=timeout,
126+
),
127+
cast_type=VoiceDeleteResult,
128+
stream=False,
129+
)
130+
131+
def list(
132+
self,
133+
*,
134+
voice_type: Optional[str] = None,
135+
voice_name: Optional[str] = None,
136+
request_id: Optional[str] = None,
137+
extra_headers: Headers | None = None,
138+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
139+
) -> VoiceListResult:
140+
"""
141+
List voices with optional filtering
142+
143+
Args:
144+
voice_type: Type of voice to filter by
145+
voice_name: Name of voice to filter by
146+
request_id: Optional request ID for tracking
147+
extra_headers: Additional headers to include in the request
148+
timeout: Request timeout
149+
150+
Returns:
151+
List of voices response
152+
"""
153+
return self._get(
154+
"/voice/list",
155+
options=make_request_options(
156+
extra_headers={
157+
**({} if request_id is None else {"Request-Id": request_id}),
158+
**(extra_headers or {}),
159+
},
160+
extra_query=maybe_transform(
161+
{
162+
"voiceType": voice_type,
163+
"voiceName": voice_name,
164+
"request_id": request_id,
165+
},
166+
VoiceListParams,
167+
),
168+
timeout=timeout,
169+
),
170+
cast_type=VoiceListResult,
171+
)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from .voice_clone_params import VoiceCloneParams
2+
from .voice_delete_params import VoiceDeleteParams
3+
from .voice_list_params import VoiceListParams
4+
from .voice_object import (
5+
VoiceCloneResult,
6+
VoiceDeleteResult,
7+
VoiceData,
8+
VoiceListResult,
9+
)
10+
11+
__all__ = [
12+
'VoiceCloneParams',
13+
'VoiceDeleteParams',
14+
'VoiceListParams',
15+
'VoiceCloneResult',
16+
'VoiceDeleteResult',
17+
'VoiceData',
18+
'VoiceListResult',
19+
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from typing_extensions import Required, TypedDict
6+
7+
8+
class VoiceCloneParams(TypedDict, total=False):
9+
"""
10+
Parameters for voice cloning
11+
12+
Attributes:
13+
voice_name (str): Name for the cloned voice
14+
voice_text_input (str): Text content corresponding to the sample audio
15+
voice_text_output (str): Target text for preview audio
16+
file_id (str): File ID of the uploaded audio file
17+
request_id (Optional[str]): Optional request ID for tracking
18+
"""
19+
20+
voice_name: Required[str]
21+
voice_text_input: Required[str]
22+
voice_text_output: Required[str]
23+
file_id: Required[str]
24+
request_id: Optional[str]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from typing_extensions import Required, TypedDict
6+
7+
8+
class VoiceDeleteParams(TypedDict, total=False):
9+
"""
10+
Parameters for voice deletion
11+
12+
Attributes:
13+
voice (str): The voice to delete
14+
request_id (Optional[str]): Optional request ID for tracking
15+
"""
16+
17+
voice: Required[str]
18+
request_id: Optional[str]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from typing_extensions import TypedDict
6+
7+
8+
class VoiceListParams(TypedDict, total=False):
9+
"""
10+
Parameters for listing voices
11+
12+
Attributes:
13+
voice_type (Optional[str]): Type of voice to filter by
14+
voice_name (Optional[str]): Name of voice to filter by
15+
request_id (Optional[str]): Optional request ID for tracking
16+
"""
17+
18+
voice_type: Optional[str]
19+
voice_name: Optional[str]
20+
request_id: Optional[str]

0 commit comments

Comments
 (0)