Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] 自定义模型评测配置 #1746

Open
2 tasks done
ucassqs opened this issue Dec 9, 2024 · 0 comments
Open
2 tasks done

[Bug] 自定义模型评测配置 #1746

ucassqs opened this issue Dec 9, 2024 · 0 comments
Assignees

Comments

@ucassqs
Copy link

ucassqs commented Dec 9, 2024

Prerequisite

Type

I'm evaluating with the officially supported tasks/models/datasets.

Environment

aaa

Reproduces the problem - code/configuration sample

aaa

Reproduces the problem - command or script

aaa

Reproduces the problem - error message

/mnt/workspace/aaa/opencompass/opencompass/init.py:17: UserWarning: Starting from v0.4.0, all AMOTIC configuration files currently located in ./configs/datasets, ./configs/models, and ./configs/summarizers will be migrated to the opencompass/configs/ package. Please update your configuration file paths accordingly.
_warn_about_config_migration()
12/09 17:12:04 - OpenCompass - INFO - Task [PhiLanguageModel/demo_gsm8k]
Traceback (most recent call last):
File "/mnt/workspace/aaa/opencompass/opencompass/tasks/openicl_infer.py", line 161, in
inferencer.run()
File "/mnt/workspace/aaa/opencompass/opencompass/tasks/openicl_infer.py", line 73, in run
self.model = build_model_from_cfg(model_cfg)
File "/mnt/workspace/aaa/opencompass/opencompass/utils/build.py", line 25, in build_model_from_cfg
return MODELS.build(model_cfg)
File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 98, in build_from_cfg
obj_cls = registry.get(obj_type)
File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 470, in get
import_module(f'{scope}.registry')
File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/importlib/init.py", line 121, in import_module
raise TypeError(msg.format(name))
TypeError: the 'package' argument is required to perform a relative import for '.registry'

Other information

我正在评测自定义的模型,根据官方教程和网上能查到的资料进行了配置。
自定义模型在这个文件:/mnt/workspace/gaojunqi/opencompass/opencompass/models/mymodel.py

import torch
from torch.nn import functional as F
from typing import List, Optional, Dict
from transformers import PreTrainedTokenizerFast
# from your_model_file import CustomLanguageModel, load_custom_model  # 替换为正确的路径
from .base import BaseModel

import json
import torch
import torch.nn as nn
from transformers import PreTrainedTokenizerFast

# 定义自定义模块
class PhiBlock(nn.Module):
    def __init__(self, config):
        super().__init__()
        d_model = config["core_input"]["d_state"]  # 从配置中获取模型维度

        # 定义核心计算模块
        self.z_bias = nn.Parameter(torch.zeros(d_model))  # 确保 z_bias 被初始化
        self.D = nn.Parameter(torch.zeros(d_model))
        self.in_proj = nn.Linear(d_model, d_model, bias=False)
        self.conv1d = nn.Conv1d(d_model, d_model, kernel_size=1, bias=True)
        self.out_proj = nn.Linear(d_model, d_model, bias=False)

        # 定义 MLP 和 LayerNorm
        self.mlp = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Linear(d_model, d_model)
        )
        self.norm = nn.LayerNorm(d_model)

    def forward(self, x):
        if x.size(-1) != self.z_bias.size(0):
            raise ValueError(
                f"Mismatch in tensor dimensions: x has {x.size(-1)}, "
                f"but z_bias has {self.z_bias.size(0)}."
            )
        # 添加偏置
        x = x + self.z_bias

        # 转换形状以适配 Conv1d
        x = self.in_proj(x)
        x = x.transpose(1, 2)  # [batch_size, seq_length, d_model] -> [batch_size, d_model, seq_length]
        x = self.conv1d(x)  # [batch_size, d_model, seq_length]
        x = x.transpose(1, 2)  # 转回 [batch_size, seq_length, d_model]

        x = self.out_proj(x)
        x = self.mlp(self.norm(x))
        return x


# 主语言模型类
class CustomLanguageModel(nn.Module):
    def __init__(self, config_path):
        super().__init__()
        # 加载配置
        with open(config_path, "r") as f:
            config = json.load(f)

        # 初始化参数
        vocab_size = config["LanguageModel"]["input"]["vocab_size"]
        d_model = config["MixerModel"]["input"]["d_model"]
        n_layers = config["Block1"]["n_layers"]

        # 定义词嵌入层
        self.embedding = nn.Embedding(vocab_size, d_model)

        # 定义 Transformer 块
        self.blocks = nn.ModuleList(
            [PhiBlock(config["Block1"]) for _ in range(n_layers)]
        )

        # 定义 LayerNorm 和输出层
        self.final_layernorm = nn.LayerNorm(d_model)
        self.lm_head = nn.Linear(d_model, vocab_size)

    def forward(self, input_ids):
        x = self.embedding(input_ids)
        for block in self.blocks:
            x = block(x)
        x = self.final_layernorm(x)
        return self.lm_head(x)


# 重映射 state_dict 的键名
def remap_state_dict_keys(state_dict):
    new_state_dict = {}
    for key, value in state_dict.items():
        new_state_dict[key] = value
    return new_state_dict


# 模型加载函数
def load_custom_model(model_path):
    config_path = f"{model_path}/config.json"
    model_weights_path = f"{model_path}/pytorch_model.bin"
    tokenizer_path = model_path

    # 加载模型配置
    model = CustomLanguageModel(config_path)

    # 加载权重并重映射键名
    state_dict = torch.load(model_weights_path, map_location="cpu")
    remapped_state_dict = remap_state_dict_keys(state_dict)
    model.load_state_dict(remapped_state_dict, strict=False)  # strict=False 忽略不匹配的层
    model.eval()  # 设置为推理模式

    # 加载分词器
    tokenizer = PreTrainedTokenizerFast.from_pretrained(tokenizer_path)

    # 设置 pad_token
    if tokenizer.pad_token is None:
        if tokenizer.eos_token:  # 使用 eos_token 作为 pad_token
            tokenizer.pad_token = tokenizer.eos_token
        else:  # 如果没有 eos_token,手动添加 [PAD]
            tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    return model, tokenizer

# from mmengine.registry import MODELS

# @MODELS.register_module()

from mmengine.registry import OPTIMIZERS

@OPTIMIZERS.register_module()
class PhiLanguageModel(BaseModel):
    def __init__(self,
                 pkg_root: str,
                 ckpt_path: str,
                 tokenizer_only: bool = False,
                 meta_template: Optional[Dict] = None,
                 **kwargs):
        """
        初始化模型和分词器。

        Args:
            pkg_root (str): 根路径。
            ckpt_path (str): 模型权重路径。
            tokenizer_only (bool): 是否仅加载分词器。
            meta_template (Optional[Dict]): 元信息模板。
        """
        self.pkg_root = pkg_root
        self.ckpt_path = ckpt_path
        self.tokenizer_only = tokenizer_only
        self.meta_template = meta_template

        # 加载模型和分词器
        self.model, self.tokenizer = load_custom_model(ckpt_path)

        # 设置设备
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)

    def get_token_len(self, prompt: str) -> int:
        """
        获取分词后的 token 长度。

        Args:
            prompt (str): 输入的字符串。

        Returns:
            int: token 数量。
        """
        tokens = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
        return len(tokens["input_ids"][0])

    def generate(self, inputs: List[str], max_out_len: int) -> List[str]:
        """
        给定输入生成输出。

        Args:
            inputs (List[str]): 输入文本列表。
            max_out_len (int): 最大生成长度。

        Returns:
            List[str]: 生成的文本列表。
        """
        results = []
        for input_text in inputs:
            # 分词并转移到设备
            tokens = self.tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(self.device)

            # 推理
            with torch.no_grad():
                outputs = self.model(tokens["input_ids"])
                logits = outputs
                predicted_ids = torch.argmax(logits, dim=-1)

            # 解码生成文本
            generated_text = self.tokenizer.decode(predicted_ids[0], skip_special_tokens=True)
            results.append(generated_text[:max_out_len])

        return results

    def get_ppl(self, inputs: List[str], mask_length: Optional[List[int]] = None) -> List[float]:
        """
        计算困惑度 (Perplexity)。

        Args:
            inputs (List[str]): 输入文本列表。
            mask_length (Optional[List[int]]): 掩码长度列表,用于计算特定范围的困惑度。

        Returns:
            List[float]: 每个输入的困惑度。
        """
        ppl_scores = []
        for input_text in inputs:
            tokens = self.tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(self.device)

            with torch.no_grad():
                outputs = self.model(tokens["input_ids"])
                logits = outputs

            # 获取交叉熵损失
            shift_logits = logits[..., :-1, :].contiguous()
            shift_labels = tokens["input_ids"][..., 1:].contiguous()

            loss_fct = F.cross_entropy(
                shift_logits.view(-1, shift_logits.size(-1)),
                shift_labels.view(-1),
                ignore_index=self.tokenizer.pad_token_id,
                reduction="none"
            )

            loss = loss_fct.view(tokens["input_ids"].size(0), -1).mean(dim=1)
            perplexity = torch.exp(loss)
            ppl_scores.append(perplexity.item())

        return ppl_scores

/mnt/workspace/gaojunqi/opencompass/configs/models/qwen2_5/mymodels.py

from ....opencompass.models.mymodel import PhiLanguageModel

models = [
    dict(
        type=PhiLanguageModel,
        abbr='PhiLanguageModel',
        path='***',
        max_out_len=4096,
        batch_size=8,
        run_cfg=dict(num_gpus=1),
    )
]

/mnt/workspace/gaojunqi/opencompass/configs/eval_phi_mamba.py

from mmengine.config import read_base

with read_base():
    from .datasets.demo.demo_gsm8k_chat_gen import gsm8k_datasets
    from .datasets.demo.demo_math_chat_gen import math_datasets
    from .models.qwen.hf_qwen2_1_5b_instruct import models as hf_qwen2_1_5b_instruct_models
    from .models.hf_internlm.hf_internlm2_chat_1_8b import models as hf_internlm2_chat_1_8b_models
    from .models.qwen2_5.mymodels import models as PhiLanguageModel
    

datasets = gsm8k_datasets

models = PhiLanguageModel

提交命令:

python run.py configs/eval_phi_mamba.py

报错如下:

/mnt/workspace/gaojunqi/opencompass/opencompass/__init__.py:17: UserWarning: Starting from v0.4.0, all AMOTIC configuration files currently located in `./configs/datasets`, `./configs/models`, and `./configs/summarizers` will be migrated to the `opencompass/configs/` package. Please update your configuration file paths accordingly.
  _warn_about_config_migration()
12/09 17:12:04 - OpenCompass - INFO - Task [PhiLanguageModel/demo_gsm8k]
Traceback (most recent call last):
  File "/mnt/workspace/gaojunqi/opencompass/opencompass/tasks/openicl_infer.py", line 161, in <module>
    inferencer.run()
  File "/mnt/workspace/gaojunqi/opencompass/opencompass/tasks/openicl_infer.py", line 73, in run
    self.model = build_model_from_cfg(model_cfg)
  File "/mnt/workspace/gaojunqi/opencompass/opencompass/utils/build.py", line 25, in build_model_from_cfg
    return MODELS.build(model_cfg)
  File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
    return self.build_func(cfg, *args, **kwargs, registry=self)
  File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 98, in build_from_cfg
    obj_cls = registry.get(obj_type)
  File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/site-packages/mmengine/registry/registry.py", line 470, in get
    import_module(f'{scope}.registry')
  File "/mnt/workspace/anaconda3/envs/opencompass/lib/python3.10/importlib/__init__.py", line 121, in import_module
    raise TypeError(msg.format(name))
TypeError: the 'package' argument is required to perform a relative import for '.registry'

请问上述问题应该如何解决

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants