leslieo2 · leslieo2 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/src/game/agent_tools/vote_tools.py b/src/game/agent_tools/vote_tools.py
@@ -1,32 +1,53 @@
-from typing import Dict
+from typing import Dict, Optional
+from langchain.tools import tool
 
-from src.game.state import GameState, alive_players, get_player_context
+from src.game.state import GameState, PlayerMindset, alive_players
 from src.game.strategy.serialization import normalize_mindset, to_plain_dict
 
 
-def vote_tools(state: GameState):
-    def decide_player_vote(state: GameState, player_id: str) -> str:
+def vote_tools(
+    state: GameState,
+    bound_player_id: str,
+    mindset_overrides: Optional[Dict[str, PlayerMindset]] = None,
+):
+    """
+    Bind voting tools against the shared state.
+
+    The optional mindset_overrides allows callers (e.g., llm_decide_vote) to provide
+    freshly inferred player mindsets before the reducer persists them back into state.
+    This keeps the heuristic scoring in the tools aligned with the LLM's most recent
+    analysis and avoids voting on stale beliefs.
+
+    The returned tools are zero-argument and always operate on the bound player, so
+    downstream LLMs cannot accidentally vote using another player's mindset.
+    """
+    mindset_overrides = mindset_overrides or {}
+
+    def _resolve_mindset() -> PlayerMindset:
         """
-        Simplified vote decision logic:
-        1. Determine own role (use opposite if confidence < 50%)
-        2. Calculate scores for other players based on suspicions
-        3. Vote for player with the highest score
+        Resolve the latest mindset for the bound player from overrides or shared state.
+        Normalization keeps downstream logic consistent.
         """
+        if bound_player_id in mindset_overrides:
+            return normalize_mindset(mindset_overrides[bound_player_id])
 
-        mindset_state = normalize_mindset(updated_mindset)
-        alive = alive_players(state)
+        player_private_state = state.get("player_private_states", {}).get(
+            bound_player_id, {}
+        )
+        player_mindset = player_private_state.get("playerMindset", {})
+        return normalize_mindset(player_mindset)
 
-        # Determine own role: if confidence > 50%, use current role, otherwise use opposite
+    def _score_players(mindset_state: PlayerMindset) -> Dict[str, float]:
+        alive = alive_players(state)
         my_self_belief = mindset_state.get("self_belief", {})
         my_role = my_self_belief.get("role", "civilian")
         if my_self_belief.get("confidence", 0.0) < 0.5:
-            # Use opposite role
             my_role = "spy" if my_role == "civilian" else "civilian"
 
         suspicions = mindset_state.get("suspicions", {}) or {}
         player_scores: Dict[str, float] = {}
         for other_player_id in alive:
-            if other_player_id == player_id:
+            if other_player_id == bound_player_id:
                 continue
 
             score = 0.0
@@ -35,25 +56,49 @@ def decide_player_vote(state: GameState, player_id: str) -> str:
                 suspicion_data = to_plain_dict(suspicion, lambda: {})
                 suspicion_role = suspicion_data.get("role", "civilian")
                 suspicion_conf = suspicion_data.get("confidence", 0.0)
-                if my_role == suspicion_role:
-                    # Positive score means we trust them (same role alignment)
-                    score = suspicion_conf
-                else:
-                    # Negative score means we distrust them (different role alignment)
-                    score = -suspicion_conf
+                score = suspicion_conf if my_role == suspicion_role else -suspicion_conf
             player_scores[other_player_id] = score
+        return player_scores
+
+    @tool(description="vote for the most suspicion")
+    def decide_player_vote() -> str:
+        """
+        Simplified vote decision logic (player id pre-bound).
+        """
+
+        mindset_state = _resolve_mindset()
+        alive = alive_players(state)
+        player_scores = _score_players(mindset_state)
 
         if player_scores:
-            # Pick the lowest score (most distrust) to target suspected opponents
-            voted_target = min(player_scores, key=player_scores.get)
-        else:
-            # Fallback if no other players to score (e.g., only self is alive)
-            other_alive = [p for p in alive if p != player_id]
-            if other_alive:
-                voted_target = other_alive[0]  # Vote for the first other alive player
-            elif alive:  # Only self is alive
-                voted_target = player_id
-            else:  # Should not happen in a valid game state
-                raise ValueError("No alive players to vote for.")
-
-        return voted_target
+            return min(player_scores, key=player_scores.get)
+
+        other_alive = [p for p in alive if p != bound_player_id]
+        if other_alive:
+            return other_alive[0]
+        if alive:
+            return bound_player_id
+        raise ValueError("No alive players to vote for.")
+
+    @tool(description="vote for the second suspicion")
+    def decide_player_vote_second_best() -> str:
+        """
+        Vote decision logic targeting the second most suspicious player (player id pre-bound).
+        """
+
+        mindset_state = _resolve_mindset()
+        alive = alive_players(state)
+        player_scores = _score_players(mindset_state)
+
+        if player_scores:
+            sorted_targets = sorted(player_scores, key=player_scores.get)
+            return sorted_targets[1] if len(sorted_targets) >= 2 else sorted_targets[0]
+
+        other_alive = [p for p in alive if p != bound_player_id]
+        if other_alive:
+            return other_alive[0]
+        if alive:
+            return bound_player_id
+        raise ValueError("No alive players to vote for.")
+
+    return [decide_player_vote, decide_player_vote_second_best]
diff --git a/src/game/nodes/player.py b/src/game/nodes/player.py
@@ -29,7 +29,6 @@
 from ..state import (
     GameState,
     alive_players,
-    Vote,
     create_speech_record,
     Speech,
     PlayerPrivateState,
@@ -40,8 +39,9 @@
 from ..strategy import (
     llm_update_player_mindset,
     llm_generate_speech,
+    llm_decide_vote,
 )
-from ..strategy.serialization import normalize_mindset, to_plain_dict
+from ..strategy.serialization import normalize_mindset
 
 
 def _get_llm_client():
@@ -187,64 +187,6 @@ def player_speech(state: GameState, player_id: str) -> Dict[str, Any]:
     }
 
 
-def _decide_player_vote(
-    state: GameState,
-    player_id: str,
-    updated_mindset: Dict[str, Any],
-) -> str:
-    """
-    Simplified vote decision logic:
-    1. Determine own role (use opposite if confidence < 50%)
-    2. Calculate scores for other players based on suspicions
-    3. Vote for player with the highest score
-    """
-
-    mindset_state = normalize_mindset(updated_mindset)
-    alive = alive_players(state)
-
-    # Determine own role: if confidence > 50%, use current role, otherwise use opposite
-    my_self_belief = mindset_state.get("self_belief", {})
-    my_role = my_self_belief.get("role", "civilian")
-    if my_self_belief.get("confidence", 0.0) < 0.5:
-        # Use opposite role
-        my_role = "spy" if my_role == "civilian" else "civilian"
-
-    suspicions = mindset_state.get("suspicions", {}) or {}
-    player_scores: Dict[str, float] = {}
-    for other_player_id in alive:
-        if other_player_id == player_id:
-            continue
-
-        score = 0.0
-        suspicion = suspicions.get(other_player_id)
-        if suspicion:
-            suspicion_data = to_plain_dict(suspicion, lambda: {})
-            suspicion_role = suspicion_data.get("role", "civilian")
-            suspicion_conf = suspicion_data.get("confidence", 0.0)
-            if my_role == suspicion_role:
-                # Positive score means we trust them (same role alignment)
-                score = suspicion_conf
-            else:
-                # Negative score means we distrust them (different role alignment)
-                score = -suspicion_conf
-        player_scores[other_player_id] = score
-
-    if player_scores:
-        # Pick the lowest score (most distrust) to target suspected opponents
-        voted_target = min(player_scores, key=player_scores.get)
-    else:
-        # Fallback if no other players to score (e.g., only self is alive)
-        other_alive = [p for p in alive if p != player_id]
-        if other_alive:
-            voted_target = other_alive[0]  # Vote for the first other alive player
-        elif alive:  # Only self is alive
-            voted_target = player_id
-        else:  # Should not happen in a valid game state
-            raise ValueError("No alive players to vote for.")
-
-    return voted_target
-
-
 def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
     """
     Player node for casting a vote.
@@ -287,8 +229,14 @@ def player_vote(state: GameState, player_id: str) -> Dict[str, Any]:
         existing_player_mindset=existing_player_mindset,
     )
     updated_mindset_state = normalize_mindset(updated_mindset)
-    # Decide the player's vote and infer PlayerMindset using LLM
-    voted_target = _decide_player_vote(state, player_id, updated_mindset_state)
+    # Decide on a vote target using the LLM with bound voting tools
+    voted_target = llm_decide_vote(
+        llm_client=llm_client,
+        state=state,
+        me=player_id,
+        my_word=my_word,
+        current_mindset=updated_mindset_state,
+    )
 
     print(f"🗳️  PLAYER VOTE: {player_id} votes for: {voted_target}")
     print(f"   Self belief: {updated_mindset_state.get('self_belief')}")

diff --git a/src/game/strategy/__init__.py b/src/game/strategy/__init__.py
@@ -14,6 +14,7 @@
 from src.game.strategy.strategy_core import (
     llm_update_player_mindset,
     llm_generate_speech,
+    llm_decide_vote,
 )
 
-__all__ = ["llm_update_player_mindset", "llm_generate_speech"]
+__all__ = ["llm_update_player_mindset", "llm_generate_speech", "llm_decide_vote"]
diff --git a/src/game/strategy/builders/context_builder.py b/src/game/strategy/builders/context_builder.py
@@ -182,3 +182,60 @@ def build_speech_user_context(
         "<response_guidance>Return exactly one line of speech; avoid emojis, labels, or extra commentary.</response_guidance>"
         "</speech_context>"
     )
+
+
+def build_vote_user_context(
+    alive: List[str],
+    me: str,
+    current_mindset: PlayerMindset,
+    current_round: int,
+) -> str:
+    """Build the minimal context required for picking a voting strategy."""
+    mindset_dict = _as_mapping(current_mindset)
+    suspicions = mindset_dict.get("suspicions", {}) or {}
+
+    alive_tags = (
+        "".join(f'<player id="{escape(pid)}" />' for pid in alive if pid != me)
+        or "<none />"
+    )
+
+    suspicion_tags = []
+    for pid, suspicion in suspicions.items():
+        if pid == me:
+            continue
+        suspicion_dict = _as_mapping(suspicion)
+        suspicion_role = suspicion_dict.get("role", "civilian")
+        suspicion_conf = _as_float(suspicion_dict.get("confidence", 0.0))
+        trimmed_reason = trim_text_for_prompt(
+            suspicion_dict.get("reason", ""), limit=120
+        )
+        suspicion_tags.append(
+            (
+                f'<suspect id="{escape(pid)}" '
+                f'role="{escape(suspicion_role)}" '
+                f'confidence="{suspicion_conf:.2f}">'
+                f"{escape(trimmed_reason)}"
+                "</suspect>"
+            )
+        )
+
+    suspicions_block = "".join(suspicion_tags) or "<none />"
+
+    guidance_text = (
+        f"It is currently round {current_round}. "
+        "During rounds 1 or 2, you may prefer the slightly conservative strategy "
+        "(decide_player_vote_second_best) to stay flexible and harder to read. "
+        "If you feel one player clearly stands out as more suspicious, or the game has moved into later rounds, "
+        "choose decide_player_vote for a direct accusation. "
+        "Call exactly one tool, then return the final target via the VoteDecision structured response."
+    )
+
+    return (
+        "<vote_context>"
+        f'<me id="{escape(me)}" />'
+        f'<round index="{current_round}" />'
+        f"<alive>{alive_tags}</alive>"
+        f"<suspicions>{suspicions_block}</suspicions>"
+        f"<guidance>{escape(guidance_text)}</guidance>"
+        "</vote_context>"
+    )
diff --git a/src/game/strategy/builders/prompt_builder.py b/src/game/strategy/builders/prompt_builder.py
@@ -95,6 +95,14 @@
 - Avoid brands, numbers, and rare trivia unless essential.
 Reply now with your single-line speech."""
 
+_VOTE_PROMPT_PREFIX = """You are playing "Who is the Spy" and it is time to vote.
+Your secret word is "{my_word}".
+Decide between two voting strategies, and call exactly one tool:
+- `decide_player_vote`: Use when one player feels clearly more suspicious or the game is already in later rounds.
+- `decide_player_vote_second_best`: Use when suspicions are close together, you are still in the first two rounds, or you want to stay less predictable.
+Do not call both tools. Make your internal choice, invoke the tool, then return only the player ID via the VoteDecision structured response.
+(Alive players: {alive_count}, current round: {current_round})"""
+
 
 def determine_clarity(
     role: str, self_confidence: float, current_round: int
@@ -149,3 +157,12 @@ def format_inference_system_prompt(
     return _INFERENCE_PROMPT_PREFIX.format(
         my_word=my_word, player_count=player_count, spy_count=spy_count
     )
+
+
+def format_vote_system_prompt(
+    my_word: str, alive_count: int, current_round: int
+) -> str:
+    """Format system prompt for voting decisions."""
+    return _VOTE_PROMPT_PREFIX.format(
+        my_word=my_word, alive_count=alive_count, current_round=current_round
+    )
diff --git a/src/game/strategy/llm_schemas.py b/src/game/strategy/llm_schemas.py
@@ -31,3 +31,9 @@ class PlayerMindsetModel(BaseModel):
 
     self_belief: SelfBeliefModel
     suspicions: Dict[str, SuspicionModel] = Field(default_factory=dict)
+
+
+class VoteDecisionModel(BaseModel):
+    """Structured output model capturing a player's vote target."""
+
+    target: str = Field(..., description="ID of the player to vote for.")