|
4 | 4 | """Native OpenPangu Embedded model implementation.""" |
5 | 5 |
|
6 | 6 | from collections.abc import Iterable |
7 | | -from typing import Any |
8 | 7 |
|
9 | 8 | import torch |
10 | 9 | from torch import nn |
|
22 | 21 | from vllm.model_executor.layers.quantization import QuantizationConfig |
23 | 22 | from vllm.model_executor.layers.rotary_embedding import get_rope |
24 | 23 | from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding |
25 | | -from vllm.model_executor.models.llama import LlamaForCausalLM |
26 | 24 | from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP |
| 25 | +from vllm.model_executor.models.llama import LlamaForCausalLM |
27 | 26 | from vllm.model_executor.models.utils import ( |
28 | 27 | AutoWeightsLoader, |
29 | 28 | PPMissingLayer, |
30 | 29 | make_empty_intermediate_tensors_factory, |
31 | 30 | make_layers, |
32 | | - maybe_prefix, |
33 | 31 | ) |
34 | 32 | from vllm.sequence import IntermediateTensors |
35 | 33 |
|
@@ -124,9 +122,7 @@ def __init__( |
124 | 122 | rope_scaling.setdefault( |
125 | 123 | "original_max_position_embeddings", original_max_position |
126 | 124 | ) |
127 | | - max_position_embeddings = getattr( |
128 | | - config, "max_position_embeddings", 2048 |
129 | | - ) |
| 125 | + max_position_embeddings = getattr(config, "max_position_embeddings", 2048) |
130 | 126 |
|
131 | 127 | bias = getattr(config, "bias", False) |
132 | 128 | self.q_proj = ColumnParallelLinear( |
@@ -244,9 +240,7 @@ def forward( |
244 | 240 | positions=positions, |
245 | 241 | hidden_states=hidden_states, |
246 | 242 | ) |
247 | | - hidden_states, residual = self.post_attention_layernorm( |
248 | | - hidden_states, residual |
249 | | - ) |
| 243 | + hidden_states, residual = self.post_attention_layernorm(hidden_states, residual) |
250 | 244 | hidden_states = self.mlp(hidden_states) |
251 | 245 | return hidden_states, residual |
252 | 246 |
|
@@ -280,8 +274,7 @@ def __init__( |
280 | 274 | self.vocab_size = config.vocab_size + lora_vocab |
281 | 275 | self.org_vocab_size = config.vocab_size |
282 | 276 | if get_pp_group().is_first_rank or ( |
283 | | - getattr(config, "tie_word_embeddings", True) |
284 | | - and get_pp_group().is_last_rank |
| 277 | + getattr(config, "tie_word_embeddings", True) and get_pp_group().is_last_rank |
285 | 278 | ): |
286 | 279 | self.embed_tokens = VocabParallelEmbedding( |
287 | 280 | self.vocab_size, |
@@ -333,7 +326,7 @@ def forward( |
333 | 326 |
|
334 | 327 | aux_hidden_states: list[torch.Tensor] = [] |
335 | 328 | for idx, layer in enumerate(self.layers[self.start_layer : self.end_layer]): |
336 | | - if idx in self.aux_hidden_state_layers: |
| 329 | + if self.start_layer + idx in self.aux_hidden_state_layers: |
337 | 330 | aux_hidden_states.append(hidden_states + residual) |
338 | 331 | hidden_states, residual = layer(positions, hidden_states, residual) |
339 | 332 |
|
|
0 commit comments