@@ -2588,6 +2588,8 @@ def __init__(
25882588 )
25892589 self ._behavior = behavior
25902590 self ._orig_config = config
2591+ model_mapping = {2.6 : "llama" , 4.0 : "qwen2" , 4.5 : "qwen3" }
2592+ self .model_type = model_mapping [self ._orig_config .version ]
25912593 if self ._behavior == MiniCPMVConfigBehavior .VISION_EMBEDDINGS and hasattr (config , "vision_config" ):
25922594 self ._config = config .vision_config
25932595 self .DUMMY_INPUT_GENERATOR_CLASSES = (DummyMiniCPMVImageInputGenerator ,)
@@ -2604,12 +2606,19 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
26042606 "position_ids" : {0 : "batch_size" , 1 : "patch_size" },
26052607 }
26062608 if self ._behavior == MiniCPMVConfigBehavior .RESAMPLER :
2607- return {
2608- "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2609- "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2610- "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2611- "temporal_embed" : {0 : "patch_size" , 1 : "batch_size" },
2612- }
2609+ if self ._orig_config .version == 4.5 :
2610+ return {
2611+ "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2612+ "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2613+ "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2614+ "temporal_embed" : {0 : "patch_size" , 1 : "batch_size" },
2615+ }
2616+ else :
2617+ return {
2618+ "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2619+ "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2620+ "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2621+ }
26132622 return {}
26142623
26152624 @property
@@ -2633,18 +2642,18 @@ def with_behavior(
26332642 """
26342643 if isinstance (behavior , str ) and not isinstance (behavior , MiniCPMVConfigBehavior ):
26352644 behavior = MiniCPMVConfigBehavior (behavior )
2636- model_mapping = { 2.6 : "llama" , 4.0 : "qwen2" , 4.5 : "qwen3" }
2645+
26372646 if behavior == MiniCPMVConfigBehavior .TEXT_EMBEDDINGS :
26382647 return get_vlm_text_embeddings_config (
2639- model_mapping [ self ._orig_config . version ] ,
2648+ self .model_type ,
26402649 self ._orig_config ,
26412650 self .int_dtype ,
26422651 self .float_dtype ,
26432652 )
26442653
26452654 if behavior == MiniCPMVConfigBehavior .LANGUAGE :
26462655 return get_vlm_text_generation_config (
2647- model_mapping [ self ._orig_config . version ] ,
2656+ self .model_type ,
26482657 self ._orig_config ,
26492658 self .int_dtype ,
26502659 self .float_dtype ,
0 commit comments