diff --git a/tensorrt_llm/_torch/models/modeling_bert.py b/tensorrt_llm/_torch/models/modeling_bert.py index 0b3a6813b..320d14128 100644 --- a/tensorrt_llm/_torch/models/modeling_bert.py +++ b/tensorrt_llm/_torch/models/modeling_bert.py @@ -192,7 +192,6 @@ def __init__(self, self.model_config = model_config config = self.model_config.pretrained_config - self.padding_idx = config.pad_token_id self.add_pooling_layer = add_pooling_layer self.embedding = BertEmbeddings(config=config) diff --git a/tensorrt_llm/_torch/models/modeling_deepseekv3.py b/tensorrt_llm/_torch/models/modeling_deepseekv3.py index f66663e92..3faff96a5 100644 --- a/tensorrt_llm/_torch/models/modeling_deepseekv3.py +++ b/tensorrt_llm/_torch/models/modeling_deepseekv3.py @@ -998,7 +998,6 @@ class DeepseekV3Model(DecoderModel): def __init__(self, model_config: ModelConfig[PretrainedConfig]): super().__init__(model_config) config = model_config.pretrained_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.num_hidden_layers = config.num_hidden_layers aux_stream_list = [torch.cuda.Stream() for _ in range(2)] diff --git a/tensorrt_llm/_torch/models/modeling_gemma3.py b/tensorrt_llm/_torch/models/modeling_gemma3.py index 4a55d4271..5e1fca9d6 100644 --- a/tensorrt_llm/_torch/models/modeling_gemma3.py +++ b/tensorrt_llm/_torch/models/modeling_gemma3.py @@ -241,7 +241,6 @@ def __init__(self, model_config: ModelConfig[Gemma3TextConfig]): super().__init__(model_config) config = self.model_config self.hidden_size = config.pretrained_config.hidden_size - self.padding_idx = config.pretrained_config.pad_token_id self.embed_tokens = Gemma3TextScaledWordEmbedding( config.pretrained_config.vocab_size, diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py index cd160ba48..6207d943a 100644 --- a/tensorrt_llm/_torch/models/modeling_llama.py +++ b/tensorrt_llm/_torch/models/modeling_llama.py @@ -609,7 +609,6 @@ class Llama4Model(DecoderModel): def __init__(self, model_config: ModelConfig[LlamaConfig]): super().__init__(model_config) config = self.model_config.pretrained_config - self.padding_idx = config.pad_token_id self.num_hidden_layers = config.num_hidden_layers self.aux_stream = torch.cuda.Stream() self.mapping = model_config.mapping @@ -687,7 +686,6 @@ class LlamaModel(DecoderModel): def __init__(self, model_config: ModelConfig[LlamaConfig]): super().__init__(model_config) config = self.model_config.pretrained_config - self.padding_idx = config.pad_token_id self.num_hidden_layers = config.num_hidden_layers vocab_size = config.vocab_size diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py index 7f7dd4caf..639c4c447 100644 --- a/tensorrt_llm/_torch/models/modeling_mistral.py +++ b/tensorrt_llm/_torch/models/modeling_mistral.py @@ -119,7 +119,6 @@ class MistralModel(DecoderModel): def __init__(self, model_config: ModelConfig[MistralConfig]): super().__init__(model_config) config = self.model_config.pretrained_config - self.padding_idx = config.pad_token_id self.embed_tokens = Embedding( config.vocab_size, diff --git a/tensorrt_llm/_torch/models/modeling_mixtral.py b/tensorrt_llm/_torch/models/modeling_mixtral.py index 13df392fa..0bbc537a7 100644 --- a/tensorrt_llm/_torch/models/modeling_mixtral.py +++ b/tensorrt_llm/_torch/models/modeling_mixtral.py @@ -159,7 +159,6 @@ class MixtralModel(DecoderModel): def __init__(self, model_config: ModelConfig[PretrainedConfig]): super().__init__(model_config) config = model_config.pretrained_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.aux_stream = torch.cuda.Stream() diff --git a/tensorrt_llm/_torch/models/modeling_nemotron.py b/tensorrt_llm/_torch/models/modeling_nemotron.py index c18f728bb..7d47898c9 100644 --- a/tensorrt_llm/_torch/models/modeling_nemotron.py +++ b/tensorrt_llm/_torch/models/modeling_nemotron.py @@ -132,7 +132,6 @@ class NemotronModel(DecoderModel): def __init__(self, model_config: ModelConfig[NemotronConfig]): super().__init__(model_config) config = self.model_config.pretrained_config - self.padding_idx = config.pad_token_id self.embed_tokens = Embedding( config.vocab_size, diff --git a/tensorrt_llm/_torch/models/modeling_qwen.py b/tensorrt_llm/_torch/models/modeling_qwen.py index e13694b9c..a81ab6ac7 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen.py +++ b/tensorrt_llm/_torch/models/modeling_qwen.py @@ -118,7 +118,6 @@ class QwenModel(DecoderModel): def __init__(self, model_config: ModelConfig[Qwen2Config]): super().__init__(model_config) config = self.model_config - self.padding_idx = config.pretrained_config.pad_token_id self.embed_tokens = Embedding( config.pretrained_config.vocab_size, diff --git a/tensorrt_llm/_torch/models/modeling_qwen3.py b/tensorrt_llm/_torch/models/modeling_qwen3.py index c91c314e9..26353acdb 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen3.py +++ b/tensorrt_llm/_torch/models/modeling_qwen3.py @@ -179,7 +179,6 @@ class Qwen3Model(DecoderModel): def __init__(self, model_config: ModelConfig[Qwen3Config]): super().__init__(model_config) config = self.model_config - self.padding_idx = config.pretrained_config.pad_token_id self.embed_tokens = Embedding( config.pretrained_config.vocab_size, diff --git a/tensorrt_llm/_torch/models/modeling_qwen3_moe.py b/tensorrt_llm/_torch/models/modeling_qwen3_moe.py index cd94ec494..72d6fe18b 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen3_moe.py +++ b/tensorrt_llm/_torch/models/modeling_qwen3_moe.py @@ -308,7 +308,6 @@ class Qwen3MoEModel(DecoderModel): def __init__(self, model_config: ModelConfig[Qwen3MoeConfig]): super().__init__(model_config) config = self.model_config - self.padding_idx = config.pretrained_config.pad_token_id self.aux_stream = torch.cuda.Stream() if model_config.mapping.enable_attention_dp: diff --git a/tensorrt_llm/_torch/models/modeling_qwen_moe.py b/tensorrt_llm/_torch/models/modeling_qwen_moe.py index 0d18a5b90..7eff89527 100644 --- a/tensorrt_llm/_torch/models/modeling_qwen_moe.py +++ b/tensorrt_llm/_torch/models/modeling_qwen_moe.py @@ -191,7 +191,6 @@ class QwenMoeModel(DecoderModel): def __init__(self, model_config: ModelConfig[Qwen2MoeConfig]): super().__init__(model_config) config = self.model_config - self.padding_idx = config.pretrained_config.pad_token_id self.aux_stream = torch.cuda.Stream() self.embed_tokens = Embedding(