NVIDIA · faradawn · Apr 1, 2026
diff --git a/examples/multimodal/README.md b/examples/multimodal/README.md
@@ -39,6 +39,8 @@ Update the paths to point to the mcore converted CLIP and Mistral models and run
 examples/multimodal/combine_lm_vision_checkpoints.sh /path/to/mistral/model /path/to/clip/model /output/dir
 ```
 
+> **Note:** If you encounter a loading error, try setting `TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1`.
+
 ## Training
 
 ### Pretraining

diff --git a/examples/multimodal/combine_state_dicts.py b/examples/multimodal/combine_state_dicts.py
@@ -27,7 +27,9 @@ def combine(input_files, module_prefixes, output_files):
             zip(current_input_files, current_module_prefixes)
         ):
             # initialize the combined state dict using the first provided input file
-            current_state_dict = torch.load(input_file, weights_only=False)
+            # NOTE: To load legacy checkpoints, set TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
+            # (only use with trusted files — allows arbitrary code execution).
+            current_state_dict = torch.load(input_file)
             if i == 0:
                 combined_state_dict = current_state_dict.copy()
                 combined_state_dict["model"] = dict()