diff --git a/tests/conftest.py b/tests/conftest.py index 19c2c624712..26674483f7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -311,6 +311,7 @@ def __init__( dtype: str = "auto", *, model_kwargs: Optional[dict[str, Any]] = None, + trust_remote_code: bool = True, is_sentence_transformer: bool = False, is_cross_encoder: bool = False, skip_tokenizer_init: bool = False, @@ -320,7 +321,7 @@ def __init__( self.config = AutoConfig.from_pretrained( model_name, - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) self.device = self.get_default_device() self.dtype = torch_dtype = _get_and_verify_dtype(self.config, dtype) @@ -336,7 +337,7 @@ def __init__( model_name, device=self.device, model_kwargs=model_kwargs, - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) elif is_cross_encoder: # Lazy init required for AMD CI @@ -346,12 +347,12 @@ def __init__( model_name, device=self.device, automodel_args=model_kwargs, - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) else: model = auto_cls.from_pretrained( model_name, - trust_remote_code=True, + trust_remote_code=trust_remote_code, **model_kwargs, ) @@ -372,7 +373,7 @@ def __init__( self.tokenizer = AutoTokenizer.from_pretrained( model_name, torch_dtype=torch_dtype, - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) # don't put this import at the top level @@ -381,7 +382,7 @@ def __init__( self.processor = AutoProcessor.from_pretrained( model_name, torch_dtype=torch_dtype, - trust_remote_code=True, + trust_remote_code=trust_remote_code, ) if skip_tokenizer_init: self.tokenizer = self.processor.tokenizer diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py index a44b2154b13..306cfdf3770 100644 --- a/tests/models/language/pooling/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -10,18 +10,22 @@ @pytest.mark.parametrize( "model", [ - # [Encoder-only] - pytest.param("BAAI/bge-base-en-v1.5", - marks=[pytest.mark.core_model, pytest.mark.cpu_model]), - pytest.param("sentence-transformers/all-MiniLM-L12-v2"), - pytest.param("intfloat/multilingual-e5-small"), - pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"), + # Be careful of the order of models, decoder-only models should be + # placed before encoder-only models, otherwise `Qwen2.5-0.5B-Instruct` + # case won't pass because gte-Qwen2-1.5B-instruct will cache custom + # model code with bidirectional attention. # [Decoder-only] pytest.param("BAAI/bge-multilingual-gemma2", marks=[pytest.mark.core_model]), pytest.param("intfloat/e5-mistral-7b-instruct", marks=[pytest.mark.core_model, pytest.mark.cpu_model]), pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"), + # [Encoder-only] + pytest.param("BAAI/bge-base-en-v1.5", + marks=[pytest.mark.core_model, pytest.mark.cpu_model]), + pytest.param("sentence-transformers/all-MiniLM-L12-v2"), + pytest.param("intfloat/multilingual-e5-small"), + pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"), # [Cross-Encoder] pytest.param("sentence-transformers/stsb-roberta-base-v2"), ], @@ -44,7 +48,7 @@ def test_models( vllm_extra_kwargs = {} if model == "ssmits/Qwen2-7B-Instruct-embed-base": vllm_extra_kwargs["override_pooler_config"] = \ - PoolerConfig(pooling_type="MEAN") + PoolerConfig(pooling_type="MEAN", normalize=False) # The example_prompts has ending "\n", for example: # "Write a short story about a robot that dreams for the first time.\n" diff --git a/tests/models/language/pooling/test_gte.py b/tests/models/language/pooling/test_gte.py index 18b27a68814..725e3d16840 100644 --- a/tests/models/language/pooling/test_gte.py +++ b/tests/models/language/pooling/test_gte.py @@ -45,6 +45,7 @@ ########### Qwen2ForCausalLM EmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct", architecture="Qwen2ForCausalLM", + dtype="float32", enable_test=True), ########## ModernBertModel EmbedModelInfo("Alibaba-NLP/gte-modernbert-base", diff --git a/tests/models/utils.py b/tests/models/utils.py index ac1fc6c8f0e..ffc904bd10f 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -314,6 +314,7 @@ def check_embeddings_close( dim=0) fail_msg = (f"Test{prompt_idx}:" + f"\nCosine similarity: \t{sim:.4f}" f"\n{name_0}:\t{embeddings_0[:16]!r}" f"\n{name_1}:\t{embeddings_1[:16]!r}")