|
41 | 41 | from vllm.config import VllmConfig |
42 | 42 | from vllm.logger import init_logger |
43 | 43 | from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding |
| 44 | +from vllm.model_executor.models.module_mapping import MultiModelKeys |
44 | 45 | from vllm.model_executor.models.qwen2_5_vl import ( |
45 | 46 | Qwen2_5_VisionTransformer, Qwen2_5_VLImageEmbeddingInputs, |
46 | 47 | Qwen2_5_VLImageInputs, Qwen2_5_VLImagePixelInputs, |
|
66 | 67 | from vllm.sequence import IntermediateTensors |
67 | 68 | from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens |
68 | 69 |
|
69 | | -from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP |
| 70 | +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, |
| 71 | + SupportsMultiModal, SupportsPP) |
70 | 72 | from .utils import (AutoWeightsLoader, WeightsMapper, |
71 | 73 | init_vllm_registered_model, maybe_prefix, |
72 | 74 | merge_multimodal_embeddings) |
@@ -705,7 +707,7 @@ def _process_video_input( |
705 | 707 | dummy_inputs=Qwen2_5OmniThinkerDummyInputsBuilder, |
706 | 708 | ) |
707 | 709 | class Qwen2_5OmniThinkerForConditionalGeneration( |
708 | | - nn.Module, SupportsMultiModal, SupportsPP, |
| 710 | + nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, |
709 | 711 | Qwen2_5OmniConditionalGenerationMixin): |
710 | 712 | hf_to_vllm_mapper = WeightsMapper( |
711 | 713 | orig_to_new_prefix={ |
@@ -798,6 +800,15 @@ def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: |
798 | 800 | def get_language_model(self) -> torch.nn.Module: |
799 | 801 | return self.language_model |
800 | 802 |
|
| 803 | + def get_mm_mapping(self) -> MultiModelKeys: |
| 804 | + """Get module prefix for multimodal models to filter LoRA modules.""" |
| 805 | + return MultiModelKeys.from_string_field( |
| 806 | + language_model="language_model", |
| 807 | + connector=[], # No explicit connector in this model |
| 808 | + tower_model=["visual", |
| 809 | + "audio_tower"], # Exclude vision and audio towers |
| 810 | + ) |
| 811 | + |
801 | 812 | def get_multimodal_embeddings(self, |
802 | 813 | **kwargs: object) -> MultiModalEmbeddings: |
803 | 814 |
|
|
0 commit comments