Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions spectrumlab/config/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,38 @@ class Config:
qwen_vl_api_key: str = BOYUE_API_KEY
qwen_vl_base_url: str = BOYUE_BASE_URL
qwen_vl_model_name: str = os.getenv("QWEN_VL")

# DeepSeek-VL-2
deepseek_vl_2_api_key: str = BOYUE_API_KEY
deepseek_vl_2_base_url: str = BOYUE_BASE_URL
deepseek_vl_2_model_name: str = os.getenv("DEEPSEEK_VL_2")

# Qwen-2.5-VL-32B
qwen_2_5_vl_32b_api_key: str = BOYUE_API_KEY
qwen_2_5_vl_32b_base_url: str = BOYUE_BASE_URL
qwen_2_5_vl_32b_model_name: str = os.getenv("QWEN_2_5_VL_32B")

# Qwen-2.5-VL-72B
qwen_2_5_vl_72b_api_key: str = BOYUE_API_KEY
qwen_2_5_vl_72b_base_url: str = BOYUE_BASE_URL
qwen_2_5_vl_72b_model_name: str = os.getenv("QWEN_2_5_VL_72B")

# Llama-Vision-11B
llama_vision_11b_api_key: str = BOYUE_API_KEY
llama_vision_11b_base_url: str = BOYUE_BASE_URL
llama_vision_11b_model_name: str = os.getenv("LLAMA_VISION_11B")

# Llama-Vision-90B
llama_vision_90b_api_key: str = BOYUE_API_KEY
llama_vision_90b_base_url: str = BOYUE_BASE_URL
llama_vision_90b_model_name: str = os.getenv("LLAMA_VISION_90B")

# Doubao-1.5-Vision-Pro
doubao_1_5_vision_pro_api_key: str = BOYUE_API_KEY
doubao_1_5_vision_pro_base_url: str = BOYUE_BASE_URL
doubao_1_5_vision_pro_model_name: str = os.getenv("DOUBAO_1_5_VISION_PRO")

# Doubao-1.5-Vision-Pro-Thinking
doubao_1_5_vision_pro_thinking_api_key: str = BOYUE_API_KEY
doubao_1_5_vision_pro_thinking_base_url: str = BOYUE_BASE_URL
doubao_1_5_vision_pro_thinking_model_name: str = os.getenv("DOUBAO_1_5_VISION_PRO_THINKING")
154 changes: 151 additions & 3 deletions spectrumlab/evaluator/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,157 @@ def evaluate(
"total_items": len(data_items),
}

def evaluate_many():
# TODO
pass
def evaluate_many(
self,
data_items: List[Dict],
model,
max_out_len: int = 512,
batch_size: Optional[int] = None,
save_path: str = "./eval_results",
n_jobs: int = -1,
) -> Dict:
"""
Evaluate a single model on data_items with parallel processing.

Args:
data_items: List of data items to evaluate
model: Model instance to evaluate
max_out_len: Maximum output length for model generation
batch_size: Batch size for processing (if None, will be auto-calculated)
save_path: Base path to save results
n_jobs: Number of parallel jobs (-1 for all available cores)

Returns:
Dictionary containing evaluation results
"""
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, as_completed
import math

if not data_items:
print("❌ No data items provided")
return {"error": "No data items provided"}

# Set number of jobs
if n_jobs == -1:
n_jobs = mp.cpu_count()

# Calculate batch size if not provided
if batch_size is None:
batch_size = max(1, math.ceil(len(data_items) / n_jobs))

print(f"🔄 Starting parallel evaluation on {len(data_items)} items...")
print(f"📝 Model: {type(model).__name__}")
print(f"⚡ Using {n_jobs} parallel workers with batch size {batch_size}")

# Split data into batches
batches = [
data_items[i:i + batch_size]
for i in range(0, len(data_items), batch_size)
]

print(f"📦 Split into {len(batches)} batches")

# Build prompts for all items
print("📝 Building prompts...")
all_prompts = [self._build_prompt(item) for item in data_items]

# Split prompts into batches
prompt_batches = [
all_prompts[i:i + batch_size]
for i in range(0, len(all_prompts), batch_size)
]

def process_batch(batch_data):
"""Process a batch of prompts and return responses."""
batch_prompts, batch_indices = batch_data
batch_responses = []

for i, prompt in enumerate(batch_prompts):
try:
response = model.generate(prompt, max_out_len)
batch_responses.append(response)
except Exception as e:
# 保持与evaluate方法一致的错误处理
original_index = batch_indices[i]
print(f"\n⚠️ Error on item {original_index + 1}: {e}")
batch_responses.append(f"Error: {str(e)}")

return batch_indices, batch_responses

# Prepare batch data with indices
batch_data_list = []
for i, prompt_batch in enumerate(prompt_batches):
start_idx = i * batch_size
end_idx = min(start_idx + batch_size, len(data_items))
batch_indices = list(range(start_idx, end_idx))
batch_data_list.append((prompt_batch, batch_indices))

# Execute parallel processing
all_responses = [None] * len(data_items)

with ThreadPoolExecutor(max_workers=n_jobs) as executor:
# Submit all batch tasks
future_to_batch = {
executor.submit(process_batch, batch_data): batch_data[1][0]
for batch_data in batch_data_list
}

# Collect results as they complete
for future in tqdm(
as_completed(future_to_batch),
total=len(future_to_batch),
desc="Processing batches",
unit="batch"
):
try:
batch_indices, batch_responses = future.result()
for idx, response in zip(batch_indices, batch_responses):
all_responses[idx] = response
except Exception as e:
print(f"❌ Error processing batch: {e}")

# Process responses and calculate results
print("🔍 Processing responses...")
processed_items = []
for item, response in tqdm(
zip(data_items, all_responses),
desc="Processing responses",
total=len(data_items),
unit="item",
):
item_copy = item.copy()
prediction = self._extract_prediction(response, item)
item_copy[self.prediction_key] = prediction
item_copy["model_response"] = response

answer = item.get("answer", "")
is_correct = self._calculate_accuracy(answer, prediction, item)
item_copy["pass"] = is_correct

processed_items.append(item_copy)

# Save results
saved_files = self._save_results(processed_items, save_path)
print(f"💾 Results saved to: {saved_files}")

# Calculate metrics
print("📊 Calculating metrics...")
metrics = self._calculate_metrics(processed_items)

# Print results
self._print_results(metrics)

return {
"metrics": metrics,
"saved_files": saved_files,
"total_items": len(data_items),
"parallel_info": {
"n_jobs": n_jobs,
"batch_size": batch_size,
"n_batches": len(batches)
}
}

def _save_results(self, results_data: List[Dict], save_path: str) -> List[str]:
"""Save results grouped by subcategory. If save_path is None, do not save."""
Expand Down
8 changes: 6 additions & 2 deletions spectrumlab/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from .claude_api import Claude_Sonnet_3_5, Claude_Opus_4, Claude_Haiku_3_5, Claude_Sonnet_4
from .gpt4_v_api import GPT4_1, GPT4_Vision
from .grok_api import Grok_2_Vision
from .qwen_vl_api import Qwen_VL_Max
from .deepseek_vl import DeepSeek_VL2
from .qwen_vl_api import Qwen_VL_Max, Qwen_2_5_VL_32B, Qwen_2_5_VL_72B
from .llama_api import Llama_Vision_11B, Llama_Vision_90B
from .doubao_api import Doubao_1_5_Vision_Pro, Doubao_1_5_Vision_Pro_Thinking

__all__ = ["DeepSeek", "GPT4o", "InternVL", "Claude_Sonnet_3_5", "Claude_Opus_4",
"Claude_Haiku_3_5", "Claude_Sonnet_4", "GPT4_1", "GPT4_Vision", "Grok_2_Vision", "Qwen_VL_Max"]
"Claude_Haiku_3_5", "Claude_Sonnet_4", "GPT4_1", "GPT4_Vision", "Grok_2_Vision", "Qwen_VL_Max",
"DeepSeek_VL2", "Qwen_2_5_VL_32B", "Qwen_2_5_VL_72B", "Llama_Vision_11B", "Llama_Vision_90B", "Doubao_1_5_Vision_Pro", "Doubao_1_5_Vision_Pro_Thinking" ]
75 changes: 75 additions & 0 deletions spectrumlab/models/deepseek_vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from typing import Optional, Union, Dict, Any
from .base_api import BaseAPIModel
from spectrumlab.config import Config
from openai import OpenAI


class DeepSeek_VL2(BaseAPIModel):
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None,
**kwargs,
):
config = Config()

# Use provided parameters or fall back to config
self.api_key = api_key or config.deepseek_vl_2_api_key
self.base_url = base_url or config.deepseek_vl_2_base_url
self.model_name = model_name or config.deepseek_vl_2_model_name

# Validate that we have required configuration
if not self.api_key:
raise ValueError(
"InternVL API key not found. Please set INTERNVL_API_KEY in your .env file "
"or provide api_key parameter."
)

self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)

# Initialize parent class
super().__init__(model_name=self.model_name, **kwargs)

def generate(
self, prompt: Union[str, Dict[str, Any]], max_tokens: int = 512
) -> str:
"""
Generate response supporting both text and multimodal input.

Args:
prompt: Either text string or multimodal dict
max_tokens: Maximum tokens to generate

Returns:
Generated response string
"""

# Link: https://internlm.intern-ai.org.cn/api/document
messages = []

if isinstance(prompt, dict) and "images" in prompt:
content = []

content.append({"type": "text", "text": prompt["text"]})

for image_data in prompt["images"]:
content.append(image_data)

messages.append({"role": "user", "content": content})
else:
text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
messages.append({"role": "user", "content": text_content})

try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=max_tokens,
)
return response.choices[0].message.content
except Exception as e:
raise RuntimeError(f"InternVL API call failed: {e}")
Loading