From e16667977b7f6138b2893d05ee52ee46d8d6471c Mon Sep 17 00:00:00 2001 From: leslie Date: Fri, 31 Oct 2025 16:10:51 +0800 Subject: [PATCH 1/3] feat: implement LLM-driven voting strategy with structured tools - Create builders/ and utils/ directories for better organization - Move context_builder.py and prompt_builder.py to builders/ - Move logging_utils.py and text_utils.py to utils/ - Add serialization.py with mindset normalization utilities - Remove duplicate mindset normalization logic from player.py - Update all imports to reflect new module structure --- src/game/agent_tools/vote_tools.py | 76 +++++++++++++++- src/game/nodes/player.py | 72 +++------------- src/game/strategy/__init__.py | 3 +- src/game/strategy/builders/context_builder.py | 57 ++++++++++++ src/game/strategy/builders/prompt_builder.py | 17 ++++ src/game/strategy/llm_schemas.py | 6 ++ src/game/strategy/strategy_core.py | 86 ++++++++++++++++++- tests/test_player_nodes.py | 25 +++--- 8 files changed, 259 insertions(+), 83 deletions(-) diff --git a/src/game/agent_tools/vote_tools.py b/src/game/agent_tools/vote_tools.py index 55d3116..f09fe16 100644 --- a/src/game/agent_tools/vote_tools.py +++ b/src/game/agent_tools/vote_tools.py @@ -1,19 +1,24 @@ from typing import Dict +from langchain.tools import tool -from src.game.state import GameState, alive_players, get_player_context +from src.game.state import GameState, alive_players from src.game.strategy.serialization import normalize_mindset, to_plain_dict def vote_tools(state: GameState): - def decide_player_vote(state: GameState, player_id: str) -> str: + @tool(description="vote for the most suspicion") + def decide_player_vote(player_id: str) -> str: """ Simplified vote decision logic: 1. Determine own role (use opposite if confidence < 50%) 2. Calculate scores for other players based on suspicions - 3. Vote for player with the highest score + 3. Vote for the second most suspicious player to avoid obvious patterns """ - mindset_state = normalize_mindset(updated_mindset) + # Get player's mindset from state + player_private_state = state.get("player_private_states", {}).get(player_id, {}) + player_mindset = player_private_state.get("playerMindset", {}) + mindset_state = normalize_mindset(player_mindset) alive = alive_players(state) # Determine own role: if confidence > 50%, use current role, otherwise use opposite @@ -57,3 +62,66 @@ def decide_player_vote(state: GameState, player_id: str) -> str: raise ValueError("No alive players to vote for.") return voted_target + + @tool(description="vote for the second suspicion") + def decide_player_vote_second_best(player_id: str) -> str: + """ + Vote decision logic that targets the second most suspicious player: + 1. Determine own role (use opposite if confidence < 50%) + 2. Calculate scores for other players based on suspicions + 3. Vote for the second most suspicious player to avoid obvious patterns + """ + + # Get player's mindset from state + player_private_state = state.get("player_private_states", {}).get(player_id, {}) + player_mindset = player_private_state.get("playerMindset", {}) + mindset_state = normalize_mindset(player_mindset) + alive = alive_players(state) + + # Determine own role: if confidence > 50%, use current role, otherwise use opposite + my_self_belief = mindset_state.get("self_belief", {}) + my_role = my_self_belief.get("role", "civilian") + if my_self_belief.get("confidence", 0.0) < 0.5: + # Use opposite role + my_role = "spy" if my_role == "civilian" else "civilian" + + suspicions = mindset_state.get("suspicions", {}) or {} + player_scores: Dict[str, float] = {} + for other_player_id in alive: + if other_player_id == player_id: + continue + + score = 0.0 + suspicion = suspicions.get(other_player_id) + if suspicion: + suspicion_data = to_plain_dict(suspicion, lambda: {}) + suspicion_role = suspicion_data.get("role", "civilian") + suspicion_conf = suspicion_data.get("confidence", 0.0) + if my_role == suspicion_role: + # Positive score means we trust them (same role alignment) + score = suspicion_conf + else: + # Negative score means we distrust them (different role alignment) + score = -suspicion_conf + player_scores[other_player_id] = score + + if player_scores: + # Pick the second-lowest score (second most distrust) to avoid obvious voting patterns + sorted_targets = sorted(player_scores, key=player_scores.get) + if len(sorted_targets) >= 2: + voted_target = sorted_targets[1] # Second most suspicious + else: + voted_target = sorted_targets[0] # Only one target available + else: + # Fallback if no other players to score (e.g., only self is alive) + other_alive = [p for p in alive if p != player_id] + if other_alive: + voted_target = other_alive[0] # Vote for the first other alive player + elif alive: # Only self is alive + voted_target = player_id + else: # Should not happen in a valid game state + raise ValueError("No alive players to vote for.") + + return voted_target + + return [decide_player_vote, decide_player_vote_second_best] diff --git a/src/game/nodes/player.py b/src/game/nodes/player.py index f8e00ef..2f0d48f 100644 --- a/src/game/nodes/player.py +++ b/src/game/nodes/player.py @@ -29,7 +29,6 @@ from ..state import ( GameState, alive_players, - Vote, create_speech_record, Speech, PlayerPrivateState, @@ -40,8 +39,9 @@ from ..strategy import ( llm_update_player_mindset, llm_generate_speech, + llm_decide_vote, ) -from ..strategy.serialization import normalize_mindset, to_plain_dict +from ..strategy.serialization import normalize_mindset def _get_llm_client(): @@ -187,64 +187,6 @@ def player_speech(state: GameState, player_id: str) -> Dict[str, Any]: } -def _decide_player_vote( - state: GameState, - player_id: str, - updated_mindset: Dict[str, Any], -) -> str: - """ - Simplified vote decision logic: - 1. Determine own role (use opposite if confidence < 50%) - 2. Calculate scores for other players based on suspicions - 3. Vote for player with the highest score - """ - - mindset_state = normalize_mindset(updated_mindset) - alive = alive_players(state) - - # Determine own role: if confidence > 50%, use current role, otherwise use opposite - my_self_belief = mindset_state.get("self_belief", {}) - my_role = my_self_belief.get("role", "civilian") - if my_self_belief.get("confidence", 0.0) < 0.5: - # Use opposite role - my_role = "spy" if my_role == "civilian" else "civilian" - - suspicions = mindset_state.get("suspicions", {}) or {} - player_scores: Dict[str, float] = {} - for other_player_id in alive: - if other_player_id == player_id: - continue - - score = 0.0 - suspicion = suspicions.get(other_player_id) - if suspicion: - suspicion_data = to_plain_dict(suspicion, lambda: {}) - suspicion_role = suspicion_data.get("role", "civilian") - suspicion_conf = suspicion_data.get("confidence", 0.0) - if my_role == suspicion_role: - # Positive score means we trust them (same role alignment) - score = suspicion_conf - else: - # Negative score means we distrust them (different role alignment) - score = -suspicion_conf - player_scores[other_player_id] = score - - if player_scores: - # Pick the lowest score (most distrust) to target suspected opponents - voted_target = min(player_scores, key=player_scores.get) - else: - # Fallback if no other players to score (e.g., only self is alive) - other_alive = [p for p in alive if p != player_id] - if other_alive: - voted_target = other_alive[0] # Vote for the first other alive player - elif alive: # Only self is alive - voted_target = player_id - else: # Should not happen in a valid game state - raise ValueError("No alive players to vote for.") - - return voted_target - - def player_vote(state: GameState, player_id: str) -> Dict[str, Any]: """ Player node for casting a vote. @@ -287,8 +229,14 @@ def player_vote(state: GameState, player_id: str) -> Dict[str, Any]: existing_player_mindset=existing_player_mindset, ) updated_mindset_state = normalize_mindset(updated_mindset) - # Decide the player's vote and infer PlayerMindset using LLM - voted_target = _decide_player_vote(state, player_id, updated_mindset_state) + # Decide on a vote target using the LLM with bound voting tools + voted_target = llm_decide_vote( + llm_client=llm_client, + state=state, + me=player_id, + my_word=my_word, + current_mindset=updated_mindset_state, + ) print(f"🗳️ PLAYER VOTE: {player_id} votes for: {voted_target}") print(f" Self belief: {updated_mindset_state.get('self_belief')}") diff --git a/src/game/strategy/__init__.py b/src/game/strategy/__init__.py index 3452d0a..e3fb97e 100644 --- a/src/game/strategy/__init__.py +++ b/src/game/strategy/__init__.py @@ -14,6 +14,7 @@ from src.game.strategy.strategy_core import ( llm_update_player_mindset, llm_generate_speech, + llm_decide_vote, ) -__all__ = ["llm_update_player_mindset", "llm_generate_speech"] +__all__ = ["llm_update_player_mindset", "llm_generate_speech", "llm_decide_vote"] diff --git a/src/game/strategy/builders/context_builder.py b/src/game/strategy/builders/context_builder.py index 378d0c9..bcc657b 100644 --- a/src/game/strategy/builders/context_builder.py +++ b/src/game/strategy/builders/context_builder.py @@ -182,3 +182,60 @@ def build_speech_user_context( "Return exactly one line of speech; avoid emojis, labels, or extra commentary." "" ) + + +def build_vote_user_context( + alive: List[str], + me: str, + current_mindset: PlayerMindset, + current_round: int, +) -> str: + """Build the minimal context required for picking a voting strategy.""" + mindset_dict = _as_mapping(current_mindset) + suspicions = mindset_dict.get("suspicions", {}) or {} + + alive_tags = ( + "".join(f'' for pid in alive if pid != me) + or "" + ) + + suspicion_tags = [] + for pid, suspicion in suspicions.items(): + if pid == me: + continue + suspicion_dict = _as_mapping(suspicion) + suspicion_role = suspicion_dict.get("role", "civilian") + suspicion_conf = _as_float(suspicion_dict.get("confidence", 0.0)) + trimmed_reason = trim_text_for_prompt( + suspicion_dict.get("reason", ""), limit=120 + ) + suspicion_tags.append( + ( + f'' + f"{escape(trimmed_reason)}" + "" + ) + ) + + suspicions_block = "".join(suspicion_tags) or "" + + guidance_text = ( + f"It is currently round {current_round}. " + "During rounds 1 or 2, you may prefer the slightly conservative strategy " + "(decide_player_vote_second_best) to stay flexible and harder to read. " + "If you feel one player clearly stands out as more suspicious, or the game has moved into later rounds, " + "choose decide_player_vote for a direct accusation. " + "Call exactly one tool, then return the final target via the VoteDecision structured response." + ) + + return ( + "" + f'' + f'' + f"{alive_tags}" + f"{suspicions_block}" + f"{escape(guidance_text)}" + "" + ) diff --git a/src/game/strategy/builders/prompt_builder.py b/src/game/strategy/builders/prompt_builder.py index d3cc141..345ad2e 100644 --- a/src/game/strategy/builders/prompt_builder.py +++ b/src/game/strategy/builders/prompt_builder.py @@ -95,6 +95,14 @@ - Avoid brands, numbers, and rare trivia unless essential. Reply now with your single-line speech.""" +_VOTE_PROMPT_PREFIX = """You are playing "Who is the Spy" and it is time to vote. +Your secret word is "{my_word}". +Decide between two voting strategies, and call exactly one tool: +- `decide_player_vote`: Use when one player feels clearly more suspicious or the game is already in later rounds. +- `decide_player_vote_second_best`: Use when suspicions are close together, you are still in the first two rounds, or you want to stay less predictable. +Do not call both tools. Make your internal choice, invoke the tool, then return only the player ID via the VoteDecision structured response. +(Alive players: {alive_count}, current round: {current_round})""" + def determine_clarity( role: str, self_confidence: float, current_round: int @@ -149,3 +157,12 @@ def format_inference_system_prompt( return _INFERENCE_PROMPT_PREFIX.format( my_word=my_word, player_count=player_count, spy_count=spy_count ) + + +def format_vote_system_prompt( + my_word: str, alive_count: int, current_round: int +) -> str: + """Format system prompt for voting decisions.""" + return _VOTE_PROMPT_PREFIX.format( + my_word=my_word, alive_count=alive_count, current_round=current_round + ) diff --git a/src/game/strategy/llm_schemas.py b/src/game/strategy/llm_schemas.py index bc4cbf5..aa6748a 100644 --- a/src/game/strategy/llm_schemas.py +++ b/src/game/strategy/llm_schemas.py @@ -31,3 +31,9 @@ class PlayerMindsetModel(BaseModel): self_belief: SelfBeliefModel suspicions: Dict[str, SuspicionModel] = Field(default_factory=dict) + + +class VoteDecisionModel(BaseModel): + """Structured output model capturing a player's vote target.""" + + target: str = Field(..., description="ID of the player to vote for.") diff --git a/src/game/strategy/strategy_core.py b/src/game/strategy/strategy_core.py index 783204b..018104c 100644 --- a/src/game/strategy/strategy_core.py +++ b/src/game/strategy/strategy_core.py @@ -12,16 +12,29 @@ from langchain.agents import create_agent from langchain.agents.structured_output import ToolStrategy -from src.game.state import Speech, PlayerMindset, SelfBelief +from src.game.agent_tools.vote_tools import vote_tools +from src.game.state import ( + Speech, + PlayerMindset, + SelfBelief, + GameState, + alive_players, +) from src.game.strategy.builders.context_builder import ( build_inference_user_context, build_speech_user_context, + build_vote_user_context, ) from src.game.strategy.utils.logging_utils import log_self_belief_update -from src.game.strategy.llm_schemas import PlayerMindsetModel, SelfBeliefModel +from src.game.strategy.llm_schemas import ( + PlayerMindsetModel, + SelfBeliefModel, + VoteDecisionModel, +) from src.game.strategy.builders.prompt_builder import ( format_inference_system_prompt, format_speech_system_prompt, + format_vote_system_prompt, ) from src.game.strategy.utils.text_utils import sanitize_speech_output @@ -180,3 +193,72 @@ def llm_generate_speech( raw_text = response.content if hasattr(response, "content") else response return sanitize_speech_output(raw_text) + + +def llm_decide_vote( + llm_client: Any, + state: GameState, + me: str, + my_word: str, + current_mindset: PlayerMindset, +) -> str: + """ + Use LLM with voting tools to decide which player to vote for. + + Args: + llm_client: Language model client + state: Current shared game state + me: Current player's ID + my_word: Player's assigned word + current_mindset: Player's latest mindset state + + Returns: + Player ID selected as the vote target + """ + tools = vote_tools(state) + response_format = ToolStrategy( + schema=VoteDecisionModel, + tool_message_content="Vote decision captured.", + ) + + agent = create_agent( + model=llm_client, + tools=tools, + response_format=response_format, + ) + + alive_now = alive_players(state) + system_prompt = format_vote_system_prompt( + my_word=my_word, + alive_count=len(alive_now), + current_round=state.get("current_round", 0), + ) + vote_context = build_vote_user_context( + alive=alive_now, + me=me, + current_mindset=current_mindset, + current_round=state.get("current_round", 0), + ) + + try: + result = agent.invoke( + { + "messages": [ + SystemMessage(content=system_prompt), + HumanMessage(content=vote_context), + ] + } + ) + structured = result.get("structured_response") + if structured: + if not isinstance(structured, VoteDecisionModel): + structured = VoteDecisionModel.model_validate(structured) + return structured.target + except Exception as exc: + logger.exception("LLM vote decision failed: %s", exc) + + # Fallback: choose the first other alive player or self if alone + alternatives = [pid for pid in alive_now if pid != me] + if alternatives: + return alternatives[0] + return me diff --git a/tests/test_player_nodes.py b/tests/test_player_nodes.py index 53e6243..b022ca4 100644 --- a/tests/test_player_nodes.py +++ b/tests/test_player_nodes.py @@ -10,6 +10,7 @@ SelfBelief, Suspicion, ) +from src.game.strategy.serialization import normalize_mindset def make_self_belief(role: str = "civilian", confidence: float = 0.5) -> SelfBelief: @@ -139,9 +140,9 @@ def test_player_speech( @patch("src.game.nodes.player._get_llm_client") @patch("src.game.nodes.player.llm_update_player_mindset") -@patch("langchain.agents.create_agent") +@patch("src.game.nodes.player.llm_decide_vote") def test_player_vote( - mock_create_agent, mock_infer, mock_get_llm, player_id, base_player_state: GameState + mock_decide_vote, mock_infer, mock_get_llm, player_id, base_player_state: GameState ): """Tests the player_vote node with mocked LLM calls.""" # Arrange: Configure mocks @@ -153,17 +154,7 @@ def test_player_vote( suspicions={"c": make_suspicion("spy", 0.8, "very vague")}, ) - # Mock the agent to return a vote for "c" - from langchain_core.messages import AIMessage, HumanMessage - - mock_agent = MagicMock() - mock_agent.invoke.return_value = { - "messages": [ - HumanMessage(content="test context"), - AIMessage(content="I vote for c"), - ] - } - mock_create_agent.return_value = mock_agent + mock_decide_vote.return_value = "c" voting_state = base_player_state | { "game_phase": "voting", @@ -184,9 +175,15 @@ def test_player_vote( assert private_update["playerMindset"]["self_belief"]["role"] == "civilian" # Verify mocks - # LLM client is called once for the agent mock_get_llm.assert_called_once() mock_infer.assert_called_once() + mock_decide_vote.assert_called_once_with( + llm_client=mock_llm_client, + state=voting_state, + me=player_id, + my_word=base_player_state["player_private_states"][player_id]["assigned_word"], + current_mindset=normalize_mindset(mock_infer.return_value), + ) def test_player_speech_not_in_speaking_phase(base_player_state: GameState): From 9c7fab508c72c47c6b5c327522a3d2bd5c5c6692 Mon Sep 17 00:00:00 2001 From: leslie Date: Fri, 31 Oct 2025 16:27:12 +0800 Subject: [PATCH 2/3] feat: implement LLM-driven voting strategy with structured tools - Create builders/ and utils/ directories for better organization - Move context_builder.py and prompt_builder.py to builders/ - Move logging_utils.py and text_utils.py to utils/ - Add serialization.py with mindset normalization utilities - Remove duplicate mindset normalization logic from player.py - Update all imports to reflect new module structure --- src/game/agent_tools/vote_tools.py | 38 +++++++++++++++++++++++------- src/game/strategy/strategy_core.py | 3 ++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/game/agent_tools/vote_tools.py b/src/game/agent_tools/vote_tools.py index f09fe16..b6d0388 100644 --- a/src/game/agent_tools/vote_tools.py +++ b/src/game/agent_tools/vote_tools.py @@ -1,11 +1,35 @@ -from typing import Dict +from typing import Dict, Optional from langchain.tools import tool -from src.game.state import GameState, alive_players +from src.game.state import GameState, PlayerMindset, alive_players from src.game.strategy.serialization import normalize_mindset, to_plain_dict -def vote_tools(state: GameState): +def vote_tools( + state: GameState, mindset_overrides: Optional[Dict[str, PlayerMindset]] = None +): + """ + Bind voting tools against the shared state. + + The optional mindset_overrides allows callers (e.g., llm_decide_vote) to provide + freshly inferred player mindsets before the reducer persists them back into state. + This keeps the heuristic scoring in the tools aligned with the LLM's most recent + analysis and avoids voting on stale beliefs. + """ + mindset_overrides = mindset_overrides or {} + + def _get_player_mindset(player_id: str): + """ + Resolve the latest mindset for the player from overrides or shared state. + Normalization keeps downstream logic consistent. + """ + if player_id in mindset_overrides: + return normalize_mindset(mindset_overrides[player_id]) + + player_private_state = state.get("player_private_states", {}).get(player_id, {}) + player_mindset = player_private_state.get("playerMindset", {}) + return normalize_mindset(player_mindset) + @tool(description="vote for the most suspicion") def decide_player_vote(player_id: str) -> str: """ @@ -16,9 +40,7 @@ def decide_player_vote(player_id: str) -> str: """ # Get player's mindset from state - player_private_state = state.get("player_private_states", {}).get(player_id, {}) - player_mindset = player_private_state.get("playerMindset", {}) - mindset_state = normalize_mindset(player_mindset) + mindset_state = _get_player_mindset(player_id) alive = alive_players(state) # Determine own role: if confidence > 50%, use current role, otherwise use opposite @@ -73,9 +95,7 @@ def decide_player_vote_second_best(player_id: str) -> str: """ # Get player's mindset from state - player_private_state = state.get("player_private_states", {}).get(player_id, {}) - player_mindset = player_private_state.get("playerMindset", {}) - mindset_state = normalize_mindset(player_mindset) + mindset_state = _get_player_mindset(player_id) alive = alive_players(state) # Determine own role: if confidence > 50%, use current role, otherwise use opposite diff --git a/src/game/strategy/strategy_core.py b/src/game/strategy/strategy_core.py index 018104c..9da8ed9 100644 --- a/src/game/strategy/strategy_core.py +++ b/src/game/strategy/strategy_core.py @@ -215,7 +215,8 @@ def llm_decide_vote( Returns: Player ID selected as the vote target """ - tools = vote_tools(state) + # Pass the freshly inferred mindset so vote heuristics reflect the latest suspicions. + tools = vote_tools(state, mindset_overrides={me: current_mindset}) response_format = ToolStrategy( schema=VoteDecisionModel, tool_message_content="Vote decision captured.", From 3850d40213df39dd85e9396155638ee73a61b885 Mon Sep 17 00:00:00 2001 From: leslie Date: Fri, 31 Oct 2025 16:49:33 +0800 Subject: [PATCH 3/3] feat: implement LLM-driven voting strategy with structured tools - Create builders/ and utils/ directories for better organization - Move context_builder.py and prompt_builder.py to builders/ - Move logging_utils.py and text_utils.py to utils/ - Add serialization.py with mindset normalization utilities - Remove duplicate mindset normalization logic from player.py - Update all imports to reflect new module structure --- src/game/agent_tools/vote_tools.py | 137 ++++++++++------------------- src/game/strategy/strategy_core.py | 6 +- 2 files changed, 52 insertions(+), 91 deletions(-) diff --git a/src/game/agent_tools/vote_tools.py b/src/game/agent_tools/vote_tools.py index b6d0388..02f234f 100644 --- a/src/game/agent_tools/vote_tools.py +++ b/src/game/agent_tools/vote_tools.py @@ -6,7 +6,9 @@ def vote_tools( - state: GameState, mindset_overrides: Optional[Dict[str, PlayerMindset]] = None + state: GameState, + bound_player_id: str, + mindset_overrides: Optional[Dict[str, PlayerMindset]] = None, ): """ Bind voting tools against the shared state. @@ -15,45 +17,37 @@ def vote_tools( freshly inferred player mindsets before the reducer persists them back into state. This keeps the heuristic scoring in the tools aligned with the LLM's most recent analysis and avoids voting on stale beliefs. + + The returned tools are zero-argument and always operate on the bound player, so + downstream LLMs cannot accidentally vote using another player's mindset. """ mindset_overrides = mindset_overrides or {} - def _get_player_mindset(player_id: str): + def _resolve_mindset() -> PlayerMindset: """ - Resolve the latest mindset for the player from overrides or shared state. + Resolve the latest mindset for the bound player from overrides or shared state. Normalization keeps downstream logic consistent. """ - if player_id in mindset_overrides: - return normalize_mindset(mindset_overrides[player_id]) + if bound_player_id in mindset_overrides: + return normalize_mindset(mindset_overrides[bound_player_id]) - player_private_state = state.get("player_private_states", {}).get(player_id, {}) + player_private_state = state.get("player_private_states", {}).get( + bound_player_id, {} + ) player_mindset = player_private_state.get("playerMindset", {}) return normalize_mindset(player_mindset) - @tool(description="vote for the most suspicion") - def decide_player_vote(player_id: str) -> str: - """ - Simplified vote decision logic: - 1. Determine own role (use opposite if confidence < 50%) - 2. Calculate scores for other players based on suspicions - 3. Vote for the second most suspicious player to avoid obvious patterns - """ - - # Get player's mindset from state - mindset_state = _get_player_mindset(player_id) + def _score_players(mindset_state: PlayerMindset) -> Dict[str, float]: alive = alive_players(state) - - # Determine own role: if confidence > 50%, use current role, otherwise use opposite my_self_belief = mindset_state.get("self_belief", {}) my_role = my_self_belief.get("role", "civilian") if my_self_belief.get("confidence", 0.0) < 0.5: - # Use opposite role my_role = "spy" if my_role == "civilian" else "civilian" suspicions = mindset_state.get("suspicions", {}) or {} player_scores: Dict[str, float] = {} for other_player_id in alive: - if other_player_id == player_id: + if other_player_id == bound_player_id: continue score = 0.0 @@ -62,86 +56,49 @@ def decide_player_vote(player_id: str) -> str: suspicion_data = to_plain_dict(suspicion, lambda: {}) suspicion_role = suspicion_data.get("role", "civilian") suspicion_conf = suspicion_data.get("confidence", 0.0) - if my_role == suspicion_role: - # Positive score means we trust them (same role alignment) - score = suspicion_conf - else: - # Negative score means we distrust them (different role alignment) - score = -suspicion_conf + score = suspicion_conf if my_role == suspicion_role else -suspicion_conf player_scores[other_player_id] = score + return player_scores - if player_scores: - # Pick the lowest score (most distrust) to target suspected opponents - voted_target = min(player_scores, key=player_scores.get) - else: - # Fallback if no other players to score (e.g., only self is alive) - other_alive = [p for p in alive if p != player_id] - if other_alive: - voted_target = other_alive[0] # Vote for the first other alive player - elif alive: # Only self is alive - voted_target = player_id - else: # Should not happen in a valid game state - raise ValueError("No alive players to vote for.") - - return voted_target - - @tool(description="vote for the second suspicion") - def decide_player_vote_second_best(player_id: str) -> str: + @tool(description="vote for the most suspicion") + def decide_player_vote() -> str: """ - Vote decision logic that targets the second most suspicious player: - 1. Determine own role (use opposite if confidence < 50%) - 2. Calculate scores for other players based on suspicions - 3. Vote for the second most suspicious player to avoid obvious patterns + Simplified vote decision logic (player id pre-bound). """ - # Get player's mindset from state - mindset_state = _get_player_mindset(player_id) + mindset_state = _resolve_mindset() alive = alive_players(state) + player_scores = _score_players(mindset_state) - # Determine own role: if confidence > 50%, use current role, otherwise use opposite - my_self_belief = mindset_state.get("self_belief", {}) - my_role = my_self_belief.get("role", "civilian") - if my_self_belief.get("confidence", 0.0) < 0.5: - # Use opposite role - my_role = "spy" if my_role == "civilian" else "civilian" + if player_scores: + return min(player_scores, key=player_scores.get) - suspicions = mindset_state.get("suspicions", {}) or {} - player_scores: Dict[str, float] = {} - for other_player_id in alive: - if other_player_id == player_id: - continue + other_alive = [p for p in alive if p != bound_player_id] + if other_alive: + return other_alive[0] + if alive: + return bound_player_id + raise ValueError("No alive players to vote for.") - score = 0.0 - suspicion = suspicions.get(other_player_id) - if suspicion: - suspicion_data = to_plain_dict(suspicion, lambda: {}) - suspicion_role = suspicion_data.get("role", "civilian") - suspicion_conf = suspicion_data.get("confidence", 0.0) - if my_role == suspicion_role: - # Positive score means we trust them (same role alignment) - score = suspicion_conf - else: - # Negative score means we distrust them (different role alignment) - score = -suspicion_conf - player_scores[other_player_id] = score + @tool(description="vote for the second suspicion") + def decide_player_vote_second_best() -> str: + """ + Vote decision logic targeting the second most suspicious player (player id pre-bound). + """ + + mindset_state = _resolve_mindset() + alive = alive_players(state) + player_scores = _score_players(mindset_state) if player_scores: - # Pick the second-lowest score (second most distrust) to avoid obvious voting patterns sorted_targets = sorted(player_scores, key=player_scores.get) - if len(sorted_targets) >= 2: - voted_target = sorted_targets[1] # Second most suspicious - else: - voted_target = sorted_targets[0] # Only one target available - else: - # Fallback if no other players to score (e.g., only self is alive) - other_alive = [p for p in alive if p != player_id] - if other_alive: - voted_target = other_alive[0] # Vote for the first other alive player - elif alive: # Only self is alive - voted_target = player_id - else: # Should not happen in a valid game state - raise ValueError("No alive players to vote for.") - - return voted_target + return sorted_targets[1] if len(sorted_targets) >= 2 else sorted_targets[0] + + other_alive = [p for p in alive if p != bound_player_id] + if other_alive: + return other_alive[0] + if alive: + return bound_player_id + raise ValueError("No alive players to vote for.") return [decide_player_vote, decide_player_vote_second_best] diff --git a/src/game/strategy/strategy_core.py b/src/game/strategy/strategy_core.py index 9da8ed9..4608b88 100644 --- a/src/game/strategy/strategy_core.py +++ b/src/game/strategy/strategy_core.py @@ -216,7 +216,11 @@ def llm_decide_vote( Player ID selected as the vote target """ # Pass the freshly inferred mindset so vote heuristics reflect the latest suspicions. - tools = vote_tools(state, mindset_overrides={me: current_mindset}) + tools = vote_tools( + state, + me, + mindset_overrides={me: current_mindset}, + ) response_format = ToolStrategy( schema=VoteDecisionModel, tool_message_content="Vote decision captured.",