Skip to content

Commit

Permalink
fix: Fix up regex str for better filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
YeonwooSung committed Feb 16, 2025
1 parent 8cb9716 commit 6ea100d
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion LLMs/training/train_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def strict_format_reward_func(completions, **kwargs) -> list[float]:

def soft_format_reward_func(completions, **kwargs) -> list[float]:
"""Reward function that checks if the completion has a specific format."""
pattern = r"<reasoning>.*?</reasoning>\s*<answer>.*?</answer>"
#pattern = r"<reasoning>.*?</reasoning>\s*<answer>.*?</answer>"
pattern = r"<reasoning>[\s\S]*</reasoning>\s*<answer>.*?</answer>"
responses = [completion[0]["content"] for completion in completions]
matches = [re.match(pattern, r, flags=re.DOTALL) for r in responses]
return [0.5 if match else 0.0 for match in matches]
Expand Down

0 comments on commit 6ea100d

Please sign in to comment.