@@ -807,7 +807,7 @@ def __call__(
807807 height : int = 512 ,
808808 width : int = 768 ,
809809 num_frames : int = 121 ,
810- frame_rate : float = 25 .0 ,
810+ frame_rate : float = 24 .0 ,
811811 num_inference_steps : int = 40 ,
812812 timesteps : List [int ] = None ,
813813 guidance_scale : float = 3.0 ,
@@ -844,7 +844,7 @@ def __call__(
844844 The width in pixels of the generated image. This is set to 848 by default for the best results.
845845 num_frames (`int`, *optional*, defaults to `121`):
846846 The number of video frames to generate
847- frame_rate (`float`, *optional*, defaults to `25 .0`):
847+ frame_rate (`float`, *optional*, defaults to `24 .0`):
848848 The frames per second (FPS) of the generated video.
849849 num_inference_steps (`int`, *optional*, defaults to 40):
850850 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
@@ -1067,7 +1067,7 @@ def __call__(
10671067 latents .shape [0 ], latent_num_frames , latent_height , latent_width , latents .device , fps = frame_rate
10681068 )
10691069 audio_coords = self .transformer .audio_rope .prepare_audio_coords (
1070- audio_latents .shape [0 ], audio_num_frames , audio_latents .device , fps = frame_rate
1070+ audio_latents .shape [0 ], audio_num_frames , audio_latents .device
10711071 )
10721072
10731073 # 7. Denoising loop
0 commit comments