diff --git a/setup.py b/setup.py index 70d46cc..5f26f88 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'x-clip', packages = find_packages(exclude=[]), - version = '0.0.3', + version = '0.0.4', license='MIT', description = 'X-CLIP', author = 'Phil Wang', diff --git a/x_clip/x_clip.py b/x_clip/x_clip.py index 7cc56bb..9cd3be0 100644 --- a/x_clip/x_clip.py +++ b/x_clip/x_clip.py @@ -324,7 +324,7 @@ def forward( image_to_text = text_to_image.t() if self.extra_latent_projection: - image_to_text = einsum('t d, i d -> t i', text_latents_extra, image_latents_extra) * temp + image_to_text = einsum('t d, i d -> i t', text_latents_extra, image_latents_extra) * temp # calculate loss