feat: voiceclone implement comprehensive voice cloning service with full API support (#29)

Lighthousexx · tomsun28 · web-flow · commit 7d30990e5381 · 2025-09-09T10:13:21.000+08:00
Co-authored-by: tomsun28 &lt;tomsun28@outlook.com&gt;
diff --git a/examples/voice_clone.py b/examples/voice_clone.py
@@ -0,0 +1,69 @@
+from zai import ZaiClient, ZhipuAiClient
+import time
+import os
+
+def voice_clone():
+	# Initialize client
+	client = ZhipuAiClient()
+
+	# Step 1: Upload the voice input audio file
+	# First, we need to upload the voice sample audio file to get a file ID
+	voice_input_file_path = "tests/integration_tests/voice_clone_input.mp3"
+	
+	try:
+		with open(voice_input_file_path, 'rb') as f:
+			upload_response = client.files.create(
+				file=f,
+				purpose='voice-clone-input',
+			)
+		
+		print(f"Voice input file uploaded successfully with ID: {upload_response.id}")
+		file_id = upload_response.id
+		
+	except FileNotFoundError:
+		print(f"File not found: {voice_input_file_path}")
+		return
+	except Exception as e:
+		print(f"File upload failed: {e}")
+		return
+
+	# Step 2: Clone voice using the uploaded file ID
+	response = client.voice.clone(
+		voice_name="My Test Voice!",
+		text="This is sample text for voice cloning training",
+		input="This is target text for voice preview generation",
+		file_id=file_id,
+		request_id=f"voice_clone_request_{int(time.time() * 1000)}",
+		model="cogtts-clone"
+	)
+	print(f"Voice clone response: {response}")
+
+def voice_delete():
+	# Initialize client
+	client = ZhipuAiClient()
+
+	# Delete voice
+	response = client.voice.delete(
+		voice="Your voice",
+		request_id=f"voice_delete_request_{int(time.time() * 1000)}"
+	)
+	print(response)
+
+def voice_list():
+	# Initialize client
+	client = ZhipuAiClient()
+
+	# List voices with filter
+	response = client.voice.list(
+		voice_type="PRIVATE",
+		voice_name="Test",
+		request_id=f"voice_list_request_{int(time.time() * 1000)}"
+	)
+	print(response)
+
+if __name__ == "__main__":
+	# voice_clone()
+
+	# voice_delete()
+
+	voice_list()
diff --git a/src/zai/_client.py b/src/zai/_client.py
@@ -20,6 +20,7 @@
 	from zai.api_resource.moderations import Moderations
 	from zai.api_resource.tools import Tools
 	from zai.api_resource.videos import Videos
+	from zai.api_resource.voice import Voice
 	from zai.api_resource.web_search import WebSearchApi
 
 from .core import (
@@ -180,6 +181,12 @@ def moderations(self) -> Moderations:
 
 		return Moderations(self)
 
+	@cached_property
+	def voice(self) -> Voice:
+		from zai.api_resource.voice import Voice
+
+		return Voice(self)
+
 	@property
 	@override
 	def auth_headers(self) -> dict[str, str]:
diff --git a/src/zai/api_resource/files/files.py b/src/zai/api_resource/files/files.py
@@ -40,7 +40,7 @@ def create(
 		*,
 		file: FileTypes = None,
 		upload_detail: List[UploadDetail] = None,
-		purpose: Literal['fine-tune', 'retrieval', 'batch'],
+		purpose: Literal['fine-tune', 'retrieval', 'batch', 'voice-clone-input'],
 		knowledge_id: str = None,
 		sentence_size: int = None,
 		extra_headers: Headers | None = None,
diff --git a/src/zai/api_resource/voice/__init__.py b/src/zai/api_resource/voice/__init__.py
@@ -0,0 +1,3 @@
+from .voice import Voice
+
+__all__ = ['Voice']
diff --git a/src/zai/api_resource/voice/voice.py b/src/zai/api_resource/voice/voice.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+from httpx import stream
+
+from zai.core import (
+	NOT_GIVEN,
+	BaseAPI,
+	Body,
+	Headers,
+	NotGiven,
+	make_request_options,
+	maybe_transform,
+)
+from zai.types.voiceclone import (
+	VoiceCloneParams,
+	VoiceCloneResult,
+	VoiceDeleteParams,
+	VoiceDeleteResult,
+	VoiceListParams,
+	VoiceListResult,
+)
+
+if TYPE_CHECKING:
+	from zai._client import ZaiClient
+
+
+class Voice(BaseAPI):
+	"""
+	Voice API resource for handling voice cloning operations
+	"""
+
+	def __init__(self, client: ZaiClient) -> None:
+		super().__init__(client)
+
+	def clone(
+		self,
+		*,
+		voice_name: str,
+		text: str,
+		input: str,
+		file_id: str,
+		request_id: Optional[str] = None,
+		model: str,
+		extra_headers: Headers | None = None,
+		extra_body: Body | None = None,
+		timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+	) -> VoiceCloneResult:
+		"""
+		Clone a voice with the provided audio sample and parameters
+
+		Args:
+			voice_name: Name for the cloned voice
+			text: Text content corresponding to the sample audio
+			input: Target text for preview audio
+			file_id: File ID of the uploaded audio file
+			request_id: Optional request ID for tracking
+			model: Model
+			extra_headers: Additional headers to include in the request
+			extra_body: Additional body parameters
+			timeout: Request timeout
+
+		Returns:
+			Voice clone response
+		"""
+			
+		return self._post(
+			"/voice/clone",
+			body=maybe_transform(
+				{
+					"voice_name": voice_name,
+					"text": text,
+					"input": input,
+					"file_id": file_id,
+					"request_id": request_id,
+					"model": model,
+				},
+				VoiceCloneParams,
+			),
+			options=make_request_options(
+				extra_headers=extra_headers,
+				extra_body=extra_body,
+				timeout=timeout,
+			),
+			cast_type=VoiceCloneResult,
+			stream=False,
+		)
+
+	def delete(
+		self,
+		*,
+		voice: str,
+		request_id: Optional[str] = None,
+		extra_headers: Headers | None = None,
+		extra_body: Body | None = None,
+		timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+	) -> VoiceDeleteResult:
+		"""
+		Delete a cloned voice by voice ID
+		
+		Args:
+			voice: The voice to delete
+			request_id: Optional request ID for tracking
+			extra_headers: Additional headers to include in the request
+			extra_body: Additional body parameters
+			timeout: Request timeout
+			
+		Returns:
+			Voice deletion response
+		"""
+		return self._post(
+			"/voice/delete",
+			body=maybe_transform(
+				{
+					"voice": voice,
+					"request_id": request_id,
+				},
+				VoiceDeleteParams,
+			),
+			options=make_request_options(
+				extra_headers=extra_headers,
+				extra_body=extra_body,
+				timeout=timeout,
+			),
+			cast_type=VoiceDeleteResult,
+			stream=False,
+		)
+
+	def list(
+		self,
+		*,
+		voice_type: Optional[str] = None,
+		voice_name: Optional[str] = None,
+		request_id: Optional[str] = None,
+		extra_headers: Headers | None = None,
+		timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+	) -> VoiceListResult:
+		"""
+		List voices with optional filtering
+		
+		Args:
+			voice_type: Type of voice to filter by
+			voice_name: Name of voice to filter by
+			request_id: Optional request ID for tracking
+			extra_headers: Additional headers to include in the request
+			timeout: Request timeout
+			
+		Returns:
+			List of voices response
+		"""
+		return self._get(
+			"/voice/list",
+			options=make_request_options(
+				extra_headers={
+					**({} if request_id is None else {"Request-Id": request_id}),
+					**(extra_headers or {}),
+				},
+				extra_query=maybe_transform(
+					{
+						"voiceType": voice_type,
+						"voiceName": voice_name,
+						"request_id": request_id,
+					},
+					VoiceListParams,
+				),
+				timeout=timeout,
+			),
+			cast_type=VoiceListResult,
+		)
diff --git a/src/zai/types/voiceclone/__init__.py b/src/zai/types/voiceclone/__init__.py
@@ -0,0 +1,19 @@
+from .voice_clone_params import VoiceCloneParams
+from .voice_delete_params import VoiceDeleteParams
+from .voice_list_params import VoiceListParams
+from .voice_object import (
+	VoiceCloneResult,
+	VoiceDeleteResult,
+	VoiceData,
+	VoiceListResult,
+)
+
+__all__ = [
+	'VoiceCloneParams',
+	'VoiceDeleteParams', 
+	'VoiceListParams',
+	'VoiceCloneResult',
+	'VoiceDeleteResult',
+	'VoiceData',
+	'VoiceListResult',
+]
diff --git a/src/zai/types/voiceclone/voice_clone_params.py b/src/zai/types/voiceclone/voice_clone_params.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import Required, TypedDict
+
+
+class VoiceCloneParams(TypedDict, total=False):
+	"""
+	Parameters for voice cloning
+	
+	Attributes:
+		voice_name (str): Name for the cloned voice
+		voice_text_input (str): Text content corresponding to the sample audio
+		voice_text_output (str): Target text for preview audio
+		file_id (str): File ID of the uploaded audio file
+		request_id (Optional[str]): Optional request ID for tracking
+	"""
+	
+	voice_name: Required[str]
+	voice_text_input: Required[str]
+	voice_text_output: Required[str]
+	file_id: Required[str]
+	request_id: Optional[str]
diff --git a/src/zai/types/voiceclone/voice_delete_params.py b/src/zai/types/voiceclone/voice_delete_params.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import Required, TypedDict
+
+
+class VoiceDeleteParams(TypedDict, total=False):
+	"""
+	Parameters for voice deletion
+	
+	Attributes:
+		voice (str): The voice to delete
+		request_id (Optional[str]): Optional request ID for tracking
+	"""
+	
+	voice: Required[str]
+	request_id: Optional[str]
diff --git a/src/zai/types/voiceclone/voice_list_params.py b/src/zai/types/voiceclone/voice_list_params.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class VoiceListParams(TypedDict, total=False):
+	"""
+	Parameters for listing voices
+	
+	Attributes:
+		voice_type (Optional[str]): Type of voice to filter by
+		voice_name (Optional[str]): Name of voice to filter by
+		request_id (Optional[str]): Optional request ID for tracking
+	"""
+	
+	voice_type: Optional[str]
+	voice_name: Optional[str]
+	request_id: Optional[str]
diff --git a/src/zai/types/voiceclone/voice_object.py b/src/zai/types/voiceclone/voice_object.py
diff --git a/tests/integration_tests/test_voice_clone.py b/tests/integration_tests/test_voice_clone.py
diff --git a/tests/integration_tests/voice_clone_input.mp3 b/tests/integration_tests/voice_clone_input.mp3

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .voice import Voice`
	`2`	`+`
	`3`	`+__all__ = ['Voice']`