add forward trick to CustomCLIP

mlfoundations · Jan 31, 2023 · ad44bf6 · ad44bf6
1 parent dada50a
commit ad44bf6
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 1 deletion.
diff --git a/src/open_clip/model.py b/src/open_clip/model.py
@@ -279,6 +279,7 @@ def encode_text(self, text, normalize: bool = False):
         features = self.text(text)
         return F.normalize(features, dim=-1) if normalize else features
 
+    """
     def forward(self, image, text):
         image_features = self.encode_image(image, normalize=True)
         text_features = self.encode_text(text, normalize=True)
@@ -289,6 +290,14 @@ def forward(self, image, text):
                 "logit_scale": self.logit_scale.exp()
             }
         return image_features, text_features, self.logit_scale.exp()
+    """
+    def forward(self, image, text, clamp_logit_scale_to=None):
+        image_features = self.encode_image(image, normalize=True) if image is not None else None
+        text_features = self.encode_text(text, normalize=True) if text is not None else None
+        if clamp_logit_scale_to is not None:
+            with torch.no_grad():
+                self.logit_scale.data.clamp_(0, clamp_logit_scale_to)
+        return image_features, text_features, self.logit_scale.exp()
 
 
 def convert_weights_to_lp(model: nn.Module, dtype=torch.float16):

diff --git a/src/training/main.py b/src/training/main.py
@@ -274,6 +274,9 @@ def main(args):
                 wrap,
             )
             print(f"Before FSTP parameter num: {sum(p.numel() for p in model.parameters())}")
+            print(f"Before FSTP VISUAL parameter num: {sum(p.numel() for p in model.visual.parameters())}")
+            #print(f"Before FSTP TEXT parameter num: {sum(p.numel() for p in model.transformer.parameters())}")
+
             print(f"Before FSDP {torch.cuda.memory_allocated()/1024**3:.3} GB")
             mp = MixedPrecision(
                 #param_dtype=torch.bfloat16,
@@ -292,7 +295,7 @@ def main(args):
                        ResidualAttentionBlock,
                    },
                 ),
-                device_id=None if args.fsdp_init_on_cpu else device,
+                device_id=device,
             )
 
             # avoid "RuntimeError: The tensor has a non-zero number of elements, but its data is not allocated yet. Caffe2 uses a lazy allocation, so you will need to call mutable_data() or raw_mutable_data() to actually allocate memory."