Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/mcore_bridge/config/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ def __post_init__(self):
self.rope_scaling = json_parse_to_dict(self.rope_scaling)
if 'type' in self.rope_scaling and 'rope_type' not in self.rope_scaling:
self.rope_scaling['rope_type'] = self.rope_scaling['type']
if self.multi_latent_attention and self.partial_rotary_factor is None:
self.partial_rotary_factor = self.qk_pos_emb_head_dim / self.kv_channels
Comment thread
Jintao-Huang marked this conversation as resolved.
Outdated

if self.add_bias_linear:
self.add_qkv_bias = True
Expand Down
5 changes: 2 additions & 3 deletions src/mcore_bridge/model/mm_gpts/gemma4.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def _set_inv_freq(self):
rope_scaling = self.config.rope_scaling
self.config.rope_scaling = rope_scaling['sliding_attention']
new_inv_freq, attention_scaling = get_rope_inv_freq(self.config)
assert attention_scaling == 1, 'not support'
self.config.attention_scaling = attention_scaling
self.rotary_pos_emb.inv_freq = new_inv_freq.to(self.rotary_pos_emb.inv_freq.device)
# full
self.full_rotary_pos_emb = copy.copy(self.rotary_pos_emb)
Expand All @@ -561,9 +561,8 @@ def _set_inv_freq(self):
kwargs['head_dim_key'] = 'global_head_dim'
new_inv_freq, attention_scaling = get_rope_inv_freq(
self.config, text_config=self.config.hf_config.text_config, **kwargs)
assert attention_scaling == 1, 'not support'
self.full_rotary_pos_emb.inv_freq = new_inv_freq
self.config.attention_scaling = attention_scaling
self.config.full_attention_scaling = attention_scaling
Comment thread
Jintao-Huang marked this conversation as resolved.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The attribute full_attention_scaling is being dynamically added to the config object at runtime. It should be explicitly defined in the ModelConfig class in src/mcore_bridge/config/model_config.py to ensure consistency, proper documentation, and to avoid potential AttributeError if accessed before this initialization step.


self.config.rope_scaling = rope_scaling

Expand Down
2 changes: 1 addition & 1 deletion src/mcore_bridge/model/rope.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_dummy_config(config):
rope_scaling=config.rope_scaling,
rope_theta=config.rotary_base,
max_position_embeddings=config.max_position_embeddings,
head_dim=config.qk_pos_emb_head_dim if config.multi_latent_attention else config.kv_channels,
head_dim=config.kv_channels,
hidden_size=config.hidden_size,
num_attention_heads=config.num_attention_heads,
)
Expand Down
Loading