diff --git a/setup.py b/setup.py index ee05def..07cfa93 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name = 'x-clip', packages = find_packages(exclude=[]), include_package_data = True, - version = '0.4.2', + version = '0.4.3', license='MIT', description = 'X-CLIP', author = 'Phil Wang', diff --git a/x_clip/x_clip.py b/x_clip/x_clip.py index 2597bcb..fffee38 100644 --- a/x_clip/x_clip.py +++ b/x_clip/x_clip.py @@ -360,6 +360,12 @@ def __init__( super().__init__() assert use_all_token_embeds or (visual_has_cls_token or text_has_cls_token), 'CLS token must be included on both vision and text transformers if you are not using fine-grained contrastive learning loss' + # store some parameters for access + + self.dim_text = dim_text + self.dim_image = dim_image + self.dim_latent = dim_latent + # instantiate text transformer self.text_pad_id = text_pad_id