diff --git a/setup.py b/setup.py index c0fcd32..16a8ca0 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name = 'x-clip', packages = find_packages(exclude=[]), include_package_data = True, - version = '0.8.0', + version = '0.8.2', license='MIT', description = 'X-CLIP', author = 'Phil Wang', diff --git a/x_clip/x_clip.py b/x_clip/x_clip.py index ca1d01a..ec0e06c 100644 --- a/x_clip/x_clip.py +++ b/x_clip/x_clip.py @@ -58,15 +58,7 @@ def matrix_diag(t): # checkpointing helper function -def make_checkpointable(fn, **kwargs): - if isinstance(fn, nn.ModuleList): - return [maybe(make_checkpointable)(el, **kwargs) for el in fn] - - condition = kwargs.pop('condition', None) - - if exists(condition) and not condition(fn): - return fn - +def make_checkpointable(fn): @wraps(fn) def inner(*args): input_needs_grad = any([isinstance(el, torch.Tensor) and el.requires_grad for el in args]) @@ -246,12 +238,12 @@ def forward( mask = None ): can_checkpoint = self.training and self.checkpoint_during_training + checkpoint_fn = make_checkpointable if can_checkpoint else identity x = self.norm_in(x) for attn, ff in self.layers: - if can_checkpoint: - attn, ff = map(make_checkpointable, (attn, ff)) + attn, ff = map(checkpoint_fn, (attn, ff)) x = attn(x, mask, rotary_pos_emb) + x x = ff(x) + x @@ -404,7 +396,7 @@ def __init__( simclr_temperature = 0.1, image_ssl_loss_weight = 0.05, multiview_loss_weight = 0.1, - checkpoint_during_training = True, + checkpoint_during_training = False, **kwargs ): super().__init__()