Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 77 additions & 32 deletions src/game/agent_tools/vote_tools.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,53 @@
from typing import Dict
from typing import Dict, Optional
from langchain.tools import tool

from src.game.state import GameState, alive_players, get_player_context
from src.game.state import GameState, PlayerMindset, alive_players
from src.game.strategy.serialization import normalize_mindset, to_plain_dict


def vote_tools(state: GameState):
def decide_player_vote(state: GameState, player_id: str) -> str:
def vote_tools(
state: GameState,
bound_player_id: str,
mindset_overrides: Optional[Dict[str, PlayerMindset]] = None,
):
"""
Bind voting tools against the shared state.

The optional mindset_overrides allows callers (e.g., llm_decide_vote) to provide
freshly inferred player mindsets before the reducer persists them back into state.
This keeps the heuristic scoring in the tools aligned with the LLM's most recent
analysis and avoids voting on stale beliefs.

The returned tools are zero-argument and always operate on the bound player, so
downstream LLMs cannot accidentally vote using another player's mindset.
"""
mindset_overrides = mindset_overrides or {}

def _resolve_mindset() -> PlayerMindset:
"""
Simplified vote decision logic:
1. Determine own role (use opposite if confidence < 50%)
2. Calculate scores for other players based on suspicions
3. Vote for player with the highest score
Resolve the latest mindset for the bound player from overrides or shared state.
Normalization keeps downstream logic consistent.
"""
if bound_player_id in mindset_overrides:
return normalize_mindset(mindset_overrides[bound_player_id])

mindset_state = normalize_mindset(updated_mindset)
alive = alive_players(state)
player_private_state = state.get("player_private_states", {}).get(
bound_player_id, {}
)
player_mindset = player_private_state.get("playerMindset", {})
return normalize_mindset(player_mindset)

# Determine own role: if confidence > 50%, use current role, otherwise use opposite
def _score_players(mindset_state: PlayerMindset) -> Dict[str, float]:
alive = alive_players(state)
my_self_belief = mindset_state.get("self_belief", {})
my_role = my_self_belief.get("role", "civilian")
if my_self_belief.get("confidence", 0.0) < 0.5:
# Use opposite role
my_role = "spy" if my_role == "civilian" else "civilian"

suspicions = mindset_state.get("suspicions", {}) or {}
player_scores: Dict[str, float] = {}
for other_player_id in alive:
if other_player_id == player_id:
if other_player_id == bound_player_id:
continue

score = 0.0
Expand All @@ -35,25 +56,49 @@ def decide_player_vote(state: GameState, player_id: str) -> str:
suspicion_data = to_plain_dict(suspicion, lambda: {})
suspicion_role = suspicion_data.get("role", "civilian")
suspicion_conf = suspicion_data.get("confidence", 0.0)
if my_role == suspicion_role:
# Positive score means we trust them (same role alignment)
score = suspicion_conf
else:
# Negative score means we distrust them (different role alignment)
score = -suspicion_conf
score = suspicion_conf if my_role == suspicion_role else -suspicion_conf
player_scores[other_player_id] = score
return player_scores

@tool(description="vote for the most suspicion")
def decide_player_vote() -> str:
"""
Simplified vote decision logic (player id pre-bound).
"""

mindset_state = _resolve_mindset()
alive = alive_players(state)
player_scores = _score_players(mindset_state)

if player_scores:
# Pick the lowest score (most distrust) to target suspected opponents
voted_target = min(player_scores, key=player_scores.get)
else:
# Fallback if no other players to score (e.g., only self is alive)
other_alive = [p for p in alive if p != player_id]
if other_alive:
voted_target = other_alive[0] # Vote for the first other alive player
elif alive: # Only self is alive
voted_target = player_id
else: # Should not happen in a valid game state
raise ValueError("No alive players to vote for.")

return voted_target
return min(player_scores, key=player_scores.get)

other_alive = [p for p in alive if p != bound_player_id]
if other_alive:
return other_alive[0]
if alive:
return bound_player_id
raise ValueError("No alive players to vote for.")

@tool(description="vote for the second suspicion")
def decide_player_vote_second_best() -> str:
"""
Vote decision logic targeting the second most suspicious player (player id pre-bound).
"""

mindset_state = _resolve_mindset()
alive = alive_players(state)
player_scores = _score_players(mindset_state)

if player_scores:
sorted_targets = sorted(player_scores, key=player_scores.get)
return sorted_targets[1] if len(sorted_targets) >= 2 else sorted_targets[0]

other_alive = [p for p in alive if p != bound_player_id]
if other_alive:
return other_alive[0]
if alive:
return bound_player_id
raise ValueError("No alive players to vote for.")

return [decide_player_vote, decide_player_vote_second_best]
72 changes: 10 additions & 62 deletions src/game/nodes/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from ..state import (
GameState,
alive_players,
Vote,
create_speech_record,
Speech,
PlayerPrivateState,
Expand All @@ -40,8 +39,9 @@
from ..strategy import (
llm_update_player_mindset,
llm_generate_speech,
llm_decide_vote,
)
from ..strategy.serialization import normalize_mindset, to_plain_dict
from ..strategy.serialization import normalize_mindset


def _get_llm_client():
Expand Down Expand Up @@ -187,64 +187,6 @@ def player_speech(state: GameState, player_id: str) -> Dict[str, Any]:
}


def _decide_player_vote(
state: GameState,
player_id: str,
updated_mindset: Dict[str, Any],
) -> str:
"""
Simplified vote decision logic:
1. Determine own role (use opposite if confidence < 50%)
2. Calculate scores for other players based on suspicions
3. Vote for player with the highest score
"""

mindset_state = normalize_mindset(updated_mindset)
alive = alive_players(state)

# Determine own role: if confidence > 50%, use current role, otherwise use opposite
my_self_belief = mindset_state.get("self_belief", {})
my_role = my_self_belief.get("role", "civilian")
if my_self_belief.get("confidence", 0.0) < 0.5:
# Use opposite role
my_role = "spy" if my_role == "civilian" else "civilian"

suspicions = mindset_state.get("suspicions", {}) or {}
player_scores: Dict[str, float] = {}
for other_player_id in alive:
if other_player_id == player_id:
continue

score = 0.0
suspicion = suspicions.get(other_player_id)
if suspicion:
suspicion_data = to_plain_dict(suspicion, lambda: {})
suspicion_role = suspicion_data.get("role", "civilian")
suspicion_conf = suspicion_data.get("confidence", 0.0)
if my_role == suspicion_role:
# Positive score means we trust them (same role alignment)
score = suspicion_conf
else:
# Negative score means we distrust them (different role alignment)
score = -suspicion_conf
player_scores[other_player_id] = score

if player_scores:
# Pick the lowest score (most distrust) to target suspected opponents
voted_target = min(player_scores, key=player_scores.get)
else:
# Fallback if no other players to score (e.g., only self is alive)
other_alive = [p for p in alive if p != player_id]
if other_alive:
voted_target = other_alive[0] # Vote for the first other alive player
elif alive: # Only self is alive
voted_target = player_id
else: # Should not happen in a valid game state
raise ValueError("No alive players to vote for.")

return voted_target


def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
"""
Player node for casting a vote.
Expand Down Expand Up @@ -287,8 +229,14 @@ def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
existing_player_mindset=existing_player_mindset,
)
updated_mindset_state = normalize_mindset(updated_mindset)
# Decide the player's vote and infer PlayerMindset using LLM
voted_target = _decide_player_vote(state, player_id, updated_mindset_state)
# Decide on a vote target using the LLM with bound voting tools
voted_target = llm_decide_vote(
llm_client=llm_client,
state=state,
me=player_id,
my_word=my_word,
current_mindset=updated_mindset_state,
)

print(f"🗳️ PLAYER VOTE: {player_id} votes for: {voted_target}")
print(f" Self belief: {updated_mindset_state.get('self_belief')}")
Expand Down
3 changes: 2 additions & 1 deletion src/game/strategy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from src.game.strategy.strategy_core import (
llm_update_player_mindset,
llm_generate_speech,
llm_decide_vote,
)

__all__ = ["llm_update_player_mindset", "llm_generate_speech"]
__all__ = ["llm_update_player_mindset", "llm_generate_speech", "llm_decide_vote"]
57 changes: 57 additions & 0 deletions src/game/strategy/builders/context_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,60 @@ def build_speech_user_context(
"<response_guidance>Return exactly one line of speech; avoid emojis, labels, or extra commentary.</response_guidance>"
"</speech_context>"
)


def build_vote_user_context(
alive: List[str],
me: str,
current_mindset: PlayerMindset,
current_round: int,
) -> str:
"""Build the minimal context required for picking a voting strategy."""
mindset_dict = _as_mapping(current_mindset)
suspicions = mindset_dict.get("suspicions", {}) or {}

alive_tags = (
"".join(f'<player id="{escape(pid)}" />' for pid in alive if pid != me)
or "<none />"
)

suspicion_tags = []
for pid, suspicion in suspicions.items():
if pid == me:
continue
suspicion_dict = _as_mapping(suspicion)
suspicion_role = suspicion_dict.get("role", "civilian")
suspicion_conf = _as_float(suspicion_dict.get("confidence", 0.0))
trimmed_reason = trim_text_for_prompt(
suspicion_dict.get("reason", ""), limit=120
)
suspicion_tags.append(
(
f'<suspect id="{escape(pid)}" '
f'role="{escape(suspicion_role)}" '
f'confidence="{suspicion_conf:.2f}">'
f"{escape(trimmed_reason)}"
"</suspect>"
)
)

suspicions_block = "".join(suspicion_tags) or "<none />"

guidance_text = (
f"It is currently round {current_round}. "
"During rounds 1 or 2, you may prefer the slightly conservative strategy "
"(decide_player_vote_second_best) to stay flexible and harder to read. "
"If you feel one player clearly stands out as more suspicious, or the game has moved into later rounds, "
"choose decide_player_vote for a direct accusation. "
"Call exactly one tool, then return the final target via the VoteDecision structured response."
)

return (
"<vote_context>"
f'<me id="{escape(me)}" />'
f'<round index="{current_round}" />'
f"<alive>{alive_tags}</alive>"
f"<suspicions>{suspicions_block}</suspicions>"
f"<guidance>{escape(guidance_text)}</guidance>"
"</vote_context>"
)
17 changes: 17 additions & 0 deletions src/game/strategy/builders/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@
- Avoid brands, numbers, and rare trivia unless essential.
Reply now with your single-line speech."""

_VOTE_PROMPT_PREFIX = """You are playing "Who is the Spy" and it is time to vote.
Your secret word is "{my_word}".
Decide between two voting strategies, and call exactly one tool:
- `decide_player_vote`: Use when one player feels clearly more suspicious or the game is already in later rounds.
- `decide_player_vote_second_best`: Use when suspicions are close together, you are still in the first two rounds, or you want to stay less predictable.
Do not call both tools. Make your internal choice, invoke the tool, then return only the player ID via the VoteDecision structured response.
(Alive players: {alive_count}, current round: {current_round})"""


def determine_clarity(
role: str, self_confidence: float, current_round: int
Expand Down Expand Up @@ -149,3 +157,12 @@ def format_inference_system_prompt(
return _INFERENCE_PROMPT_PREFIX.format(
my_word=my_word, player_count=player_count, spy_count=spy_count
)


def format_vote_system_prompt(
my_word: str, alive_count: int, current_round: int
) -> str:
"""Format system prompt for voting decisions."""
return _VOTE_PROMPT_PREFIX.format(
my_word=my_word, alive_count=alive_count, current_round=current_round
)
6 changes: 6 additions & 0 deletions src/game/strategy/llm_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ class PlayerMindsetModel(BaseModel):

self_belief: SelfBeliefModel
suspicions: Dict[str, SuspicionModel] = Field(default_factory=dict)


class VoteDecisionModel(BaseModel):
"""Structured output model capturing a player's vote target."""

target: str = Field(..., description="ID of the player to vote for.")
Loading
Loading