Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 94 additions & 6 deletions src/game/agent_tools/vote_tools.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,46 @@
from typing import Dict
from typing import Dict, Optional
from langchain.tools import tool

from src.game.state import GameState, alive_players, get_player_context
from src.game.state import GameState, PlayerMindset, alive_players
from src.game.strategy.serialization import normalize_mindset, to_plain_dict


def vote_tools(state: GameState):
def decide_player_vote(state: GameState, player_id: str) -> str:
def vote_tools(
state: GameState, mindset_overrides: Optional[Dict[str, PlayerMindset]] = None
):
"""
Bind voting tools against the shared state.

The optional mindset_overrides allows callers (e.g., llm_decide_vote) to provide
freshly inferred player mindsets before the reducer persists them back into state.
This keeps the heuristic scoring in the tools aligned with the LLM's most recent
analysis and avoids voting on stale beliefs.
"""
mindset_overrides = mindset_overrides or {}

def _get_player_mindset(player_id: str):
"""
Resolve the latest mindset for the player from overrides or shared state.
Normalization keeps downstream logic consistent.
"""
if player_id in mindset_overrides:
return normalize_mindset(mindset_overrides[player_id])

player_private_state = state.get("player_private_states", {}).get(player_id, {})
player_mindset = player_private_state.get("playerMindset", {})
return normalize_mindset(player_mindset)

@tool(description="vote for the most suspicion")
def decide_player_vote(player_id: str) -> str:
"""
Simplified vote decision logic:
1. Determine own role (use opposite if confidence < 50%)
2. Calculate scores for other players based on suspicions
3. Vote for player with the highest score
3. Vote for the second most suspicious player to avoid obvious patterns
"""

mindset_state = normalize_mindset(updated_mindset)
# Get player's mindset from state
mindset_state = _get_player_mindset(player_id)
alive = alive_players(state)

# Determine own role: if confidence > 50%, use current role, otherwise use opposite
Expand Down Expand Up @@ -57,3 +84,64 @@ def decide_player_vote(state: GameState, player_id: str) -> str:
raise ValueError("No alive players to vote for.")

return voted_target

@tool(description="vote for the second suspicion")
def decide_player_vote_second_best(player_id: str) -> str:
"""
Vote decision logic that targets the second most suspicious player:
1. Determine own role (use opposite if confidence < 50%)
2. Calculate scores for other players based on suspicions
3. Vote for the second most suspicious player to avoid obvious patterns
"""

# Get player's mindset from state
mindset_state = _get_player_mindset(player_id)
alive = alive_players(state)

# Determine own role: if confidence > 50%, use current role, otherwise use opposite
my_self_belief = mindset_state.get("self_belief", {})
my_role = my_self_belief.get("role", "civilian")
if my_self_belief.get("confidence", 0.0) < 0.5:
# Use opposite role
my_role = "spy" if my_role == "civilian" else "civilian"

suspicions = mindset_state.get("suspicions", {}) or {}
player_scores: Dict[str, float] = {}
for other_player_id in alive:
if other_player_id == player_id:
continue

score = 0.0
suspicion = suspicions.get(other_player_id)
if suspicion:
suspicion_data = to_plain_dict(suspicion, lambda: {})
suspicion_role = suspicion_data.get("role", "civilian")
suspicion_conf = suspicion_data.get("confidence", 0.0)
if my_role == suspicion_role:
# Positive score means we trust them (same role alignment)
score = suspicion_conf
else:
# Negative score means we distrust them (different role alignment)
score = -suspicion_conf
player_scores[other_player_id] = score

if player_scores:
# Pick the second-lowest score (second most distrust) to avoid obvious voting patterns
sorted_targets = sorted(player_scores, key=player_scores.get)
if len(sorted_targets) >= 2:
voted_target = sorted_targets[1] # Second most suspicious
else:
voted_target = sorted_targets[0] # Only one target available
else:
# Fallback if no other players to score (e.g., only self is alive)
other_alive = [p for p in alive if p != player_id]
if other_alive:
voted_target = other_alive[0] # Vote for the first other alive player
elif alive: # Only self is alive
voted_target = player_id
else: # Should not happen in a valid game state
raise ValueError("No alive players to vote for.")

return voted_target

return [decide_player_vote, decide_player_vote_second_best]
72 changes: 10 additions & 62 deletions src/game/nodes/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from ..state import (
GameState,
alive_players,
Vote,
create_speech_record,
Speech,
PlayerPrivateState,
Expand All @@ -40,8 +39,9 @@
from ..strategy import (
llm_update_player_mindset,
llm_generate_speech,
llm_decide_vote,
)
from ..strategy.serialization import normalize_mindset, to_plain_dict
from ..strategy.serialization import normalize_mindset


def _get_llm_client():
Expand Down Expand Up @@ -187,64 +187,6 @@ def player_speech(state: GameState, player_id: str) -> Dict[str, Any]:
}


def _decide_player_vote(
state: GameState,
player_id: str,
updated_mindset: Dict[str, Any],
) -> str:
"""
Simplified vote decision logic:
1. Determine own role (use opposite if confidence < 50%)
2. Calculate scores for other players based on suspicions
3. Vote for player with the highest score
"""

mindset_state = normalize_mindset(updated_mindset)
alive = alive_players(state)

# Determine own role: if confidence > 50%, use current role, otherwise use opposite
my_self_belief = mindset_state.get("self_belief", {})
my_role = my_self_belief.get("role", "civilian")
if my_self_belief.get("confidence", 0.0) < 0.5:
# Use opposite role
my_role = "spy" if my_role == "civilian" else "civilian"

suspicions = mindset_state.get("suspicions", {}) or {}
player_scores: Dict[str, float] = {}
for other_player_id in alive:
if other_player_id == player_id:
continue

score = 0.0
suspicion = suspicions.get(other_player_id)
if suspicion:
suspicion_data = to_plain_dict(suspicion, lambda: {})
suspicion_role = suspicion_data.get("role", "civilian")
suspicion_conf = suspicion_data.get("confidence", 0.0)
if my_role == suspicion_role:
# Positive score means we trust them (same role alignment)
score = suspicion_conf
else:
# Negative score means we distrust them (different role alignment)
score = -suspicion_conf
player_scores[other_player_id] = score

if player_scores:
# Pick the lowest score (most distrust) to target suspected opponents
voted_target = min(player_scores, key=player_scores.get)
else:
# Fallback if no other players to score (e.g., only self is alive)
other_alive = [p for p in alive if p != player_id]
if other_alive:
voted_target = other_alive[0] # Vote for the first other alive player
elif alive: # Only self is alive
voted_target = player_id
else: # Should not happen in a valid game state
raise ValueError("No alive players to vote for.")

return voted_target


def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
"""
Player node for casting a vote.
Expand Down Expand Up @@ -287,8 +229,14 @@ def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
existing_player_mindset=existing_player_mindset,
)
updated_mindset_state = normalize_mindset(updated_mindset)
# Decide the player's vote and infer PlayerMindset using LLM
voted_target = _decide_player_vote(state, player_id, updated_mindset_state)
# Decide on a vote target using the LLM with bound voting tools
voted_target = llm_decide_vote(
llm_client=llm_client,
state=state,
me=player_id,
my_word=my_word,
current_mindset=updated_mindset_state,
)

print(f"🗳️ PLAYER VOTE: {player_id} votes for: {voted_target}")
print(f" Self belief: {updated_mindset_state.get('self_belief')}")
Expand Down
3 changes: 2 additions & 1 deletion src/game/strategy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from src.game.strategy.strategy_core import (
llm_update_player_mindset,
llm_generate_speech,
llm_decide_vote,
)

__all__ = ["llm_update_player_mindset", "llm_generate_speech"]
__all__ = ["llm_update_player_mindset", "llm_generate_speech", "llm_decide_vote"]
57 changes: 57 additions & 0 deletions src/game/strategy/builders/context_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,60 @@ def build_speech_user_context(
"<response_guidance>Return exactly one line of speech; avoid emojis, labels, or extra commentary.</response_guidance>"
"</speech_context>"
)


def build_vote_user_context(
alive: List[str],
me: str,
current_mindset: PlayerMindset,
current_round: int,
) -> str:
"""Build the minimal context required for picking a voting strategy."""
mindset_dict = _as_mapping(current_mindset)
suspicions = mindset_dict.get("suspicions", {}) or {}

alive_tags = (
"".join(f'<player id="{escape(pid)}" />' for pid in alive if pid != me)
or "<none />"
)

suspicion_tags = []
for pid, suspicion in suspicions.items():
if pid == me:
continue
suspicion_dict = _as_mapping(suspicion)
suspicion_role = suspicion_dict.get("role", "civilian")
suspicion_conf = _as_float(suspicion_dict.get("confidence", 0.0))
trimmed_reason = trim_text_for_prompt(
suspicion_dict.get("reason", ""), limit=120
)
suspicion_tags.append(
(
f'<suspect id="{escape(pid)}" '
f'role="{escape(suspicion_role)}" '
f'confidence="{suspicion_conf:.2f}">'
f"{escape(trimmed_reason)}"
"</suspect>"
)
)

suspicions_block = "".join(suspicion_tags) or "<none />"

guidance_text = (
f"It is currently round {current_round}. "
"During rounds 1 or 2, you may prefer the slightly conservative strategy "
"(decide_player_vote_second_best) to stay flexible and harder to read. "
"If you feel one player clearly stands out as more suspicious, or the game has moved into later rounds, "
"choose decide_player_vote for a direct accusation. "
"Call exactly one tool, then return the final target via the VoteDecision structured response."
)

return (
"<vote_context>"
f'<me id="{escape(me)}" />'
f'<round index="{current_round}" />'
f"<alive>{alive_tags}</alive>"
f"<suspicions>{suspicions_block}</suspicions>"
f"<guidance>{escape(guidance_text)}</guidance>"
"</vote_context>"
)
17 changes: 17 additions & 0 deletions src/game/strategy/builders/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@
- Avoid brands, numbers, and rare trivia unless essential.
Reply now with your single-line speech."""

_VOTE_PROMPT_PREFIX = """You are playing "Who is the Spy" and it is time to vote.
Your secret word is "{my_word}".
Decide between two voting strategies, and call exactly one tool:
- `decide_player_vote`: Use when one player feels clearly more suspicious or the game is already in later rounds.
- `decide_player_vote_second_best`: Use when suspicions are close together, you are still in the first two rounds, or you want to stay less predictable.
Do not call both tools. Make your internal choice, invoke the tool, then return only the player ID via the VoteDecision structured response.
(Alive players: {alive_count}, current round: {current_round})"""


def determine_clarity(
role: str, self_confidence: float, current_round: int
Expand Down Expand Up @@ -149,3 +157,12 @@ def format_inference_system_prompt(
return _INFERENCE_PROMPT_PREFIX.format(
my_word=my_word, player_count=player_count, spy_count=spy_count
)


def format_vote_system_prompt(
my_word: str, alive_count: int, current_round: int
) -> str:
"""Format system prompt for voting decisions."""
return _VOTE_PROMPT_PREFIX.format(
my_word=my_word, alive_count=alive_count, current_round=current_round
)
6 changes: 6 additions & 0 deletions src/game/strategy/llm_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ class PlayerMindsetModel(BaseModel):

self_belief: SelfBeliefModel
suspicions: Dict[str, SuspicionModel] = Field(default_factory=dict)


class VoteDecisionModel(BaseModel):
"""Structured output model capturing a player's vote target."""

target: str = Field(..., description="ID of the player to vote for.")
Loading
Loading