diff --git a/.env.template b/.env.template index c35b830..a5327f3 100644 --- a/.env.template +++ b/.env.template @@ -1,5 +1,8 @@ LLM_PROVIDER=deepseek +# Logging +LIEGRAPH_LOG_LEVEL=INFO + # Core API keys OPENAI_API_KEY=your-key LANGSMITH_TRACING=true diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b68518f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,209 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +LieGraph is an AI-powered implementation of the social deduction game "Who Is Spy" built with LangGraph. It features autonomous AI agents that use LLM reasoning to find the spy among them. + +- **Main Language**: Python 3.12+ +- **Core Framework**: LangGraph for workflow orchestration +- **AI Integration**: LangChain with structured LLM outputs +- **Frontend**: React 19.2 with LangGraph SDK + +## Development Commands + +### Initial Setup +```bash +# Install Python dependencies (uses uv package manager) +uv sync + +# Create .env file from template +cp .env.template .env +# Edit .env with your API keys (OpenAI, DeepSeek, or OpenRouter) + +# Install frontend dependencies +cd ui-web/frontend && npm install +``` + +### Running the Application +```bash +# Terminal 1: Start LangGraph backend (from project root) +langgraph dev --config langgraph.json --port 8124 --allow-blocking + +# Terminal 2: Start React frontend (from ui-web/frontend) +npm start + +# Access UI at: http://localhost:3000 +``` + +### Testing +```bash +# Run all tests +python -m pytest tests/ -v + +# Run specific test file +python -m pytest tests/test_game_rules.py -v + +# Run specific test +python -m pytest tests/test_game_rules.py::test_assign_roles -v +``` + +### Linting/Formatting +```bash +# Format Python code +black src/ tests/ +``` + +## Architecture Overview + +### LangGraph Workflow Flow +``` +START → host_setup → host_stage_switch + ↓ + speaking phase (sequential player_speech nodes) + ↓ + voting phase (concurrent player_vote nodes) + ↓ + check_votes_and_transition + ↓ + host_result → (continue or END) +``` + +### Key Architectural Patterns + +1. **State Management**: TypedDict-based GameState with private state separation + - `GameState`: Shared public state (speech history, votes, game status) + - `HostState`: Private host mindset (invariant after setup) + - `PlayerState`: Private player mindsets (evolving beliefs about identities) + +2. **Concurrent Voting**: Multiple players vote in parallel using LangGraph reducers + - Reducer: `merge_votes` handles timestamp-based conflict resolution + - Each vote node is independent but writes to shared state + +3. **AI Strategy System** (`src/game/strategy/`): + - `strategy_core.py`: Main LLM coordination + - `builders/`: Context and prompt builders for speech/voting/inference + - `llm_schemas.py`: Pydantic models for structured LLM outputs + +4. **Agent Tools** (`src/game/agent_tools/`): + - `speech_tools.py`: Structured reasoning for speech generation + - `vote_tools.py`: Evidence-based voting decisions + - Uses TrustCall for reliable structured output extraction + +5 **Reducers**: State conflict resolution functions + - `merge_private_states`: Combine incremental mindset updates + - `merge_votes`: Handle concurrent vote submissions + - Use `add` for append-only collections (speeches, votes) + +6. **Conditional Routing**: `src/game/graph.py` uses dynamic edge routing + - `host_stage_switch`: Routes between speaking and voting phases + - Checks `current_speaker` and vote readiness + - Returns edge names for graph transitions + +7. **PyDict Structured Output**: Uses dict exports for serialization + - GameState uses `PyDict` export methods for LangGraph checkpoints + - Ensure all Pydantic models in state have proper serialization + +8 **Private State Updates**: Player/hod nodes return private state deltas + - Add "_" prefix: `{player_name: PlayerState}` → `{"_" + player_name: PlayerState}` + - Host returns `f"_{HOST_NAME}": HostState` + - Graph middleware merges private states using configured reducers + +9. **Channel Configuration**: Define channels for each node in graph + - Player nodes: `"_" + player_name` channels + - Host node: `"_" + HOST_NAME` channel + - Channels must match reducer keys + +## Configuration + +**LLM Configuration** (`.env`): +- Supports OpenAI, DeepSeek, and OpenRouter providers +- Set provider-specific API keys and model names +- Example models: `gpt-4o-mini`, `deepseek-chat`, `anthropic/claude-sonnet-4.5` + +**Game Configuration** (`config.yaml`): +- `player_count`: Number of players (3-8) +- `vocabulary`: Word pairs for civilian/spy assignments +- `player_names`: Pool of available player names +- `metrics.enabled`: Toggle metrics collection on/off + +## Testing Strategy + +**Test Coverage** (50 tests across 6 modules): +- `test_game_rules.py`: Core game logic, role assignment, win conditions +- `test_state.py`: State management and reducer functions +- `test_host_nodes.py`: Host node behavior and phase transitions +- `test_player_nodes.py`: Player speech and voting nodes +- `test_llm_strategy.py`: AI strategy builders and prompt generation +- `agents/test_speech_tools.py`: LLM tool behavior and structured outputs + +**Key Testing Patterns**: +- Use fixtures for common GameState configurations +- Mock LLM responses for deterministic AI behavior tests +- Test both sequential (speaking) and concurrent (voting) nodes +- Verify private state updates and mindset evolution + +## Metrics and Quality Tracking + +**Built-in Metrics** (`src/game/metrics.py`): +- Win balance tracking (civilian vs spy win rates) +- Identification accuracy (role inference quality) +- Speech diversity (lexical variety measurement) +- Auto-saves to `logs/metrics/{game_id}.json` +- Overall summary at `logs/metrics/overall.json` + +**Quality Scoring**: +```python +from src.game.metrics import metrics_collector + +# Get quality score +deterministic_score = metrics_collector.compute_quality_score() + +# Or use LLM-based evaluation +llm_score = metrics_collector.compute_quality_score(method="llm", llm=client) +``` + +**Metrics History**: Track prompts and configurations in `docs/metrics-history.md` + +## Common Development Tasks + +### Adding a New Game Phase +1. Add node function to `src/game/nodes/` +2. Register node in graph with `graph.add_node(node_name, node_function)` +3. Add conditional routing logic in transition nodes +4. Update state types if adding new fields + +### Modifying AI Strategy +1. Update prompt builders in `src/game/strategy/builders/` +2. Modify Pydantic schemas in `src/game/strategy/llm_schemas.py` if changing output structure +3. Adjust strategy coordination in `src/game/strategy/strategy_core.py` +4. Test with `pytest tests/test_llm_strategy.py` + +### Debugging Game Flow +1. Enable LangSmith tracing: `LANGSMITH_TRACING=true` in `.env` +2. Check LangGraph Studio at `http://localhost:8123` +3. Review game logs in `logs/metrics/` +4. Use `print()` in nodes to debug state (visible in LangGraph Studio traces) + +### Adding New Metrics +1. Add metric collection hooks in `src/game/metrics.py` +2. Update quality scoring computation +3. Add metric tests in `tests/test_metrics_history.py` +4. Document in `docs/metrics-history.md` + +### Working with Player-Specific Hooks (callbacks) for Metrics +When implementing player-specific behaviors that need to track metrics per player: +- Use the `metrics_collector.on_player_speech(player_name, is_spy, round_num, speech)` hook within player speech nodes to collect speech diversity metrics +- Use the `metrics_collector.on_vote_cast()` hook in player vote nodes to collect voting pattern data. +- Metrics collection respects the `metrics.enabled` flag in `config.yaml` and will be no-ops when metrics are disabled. + +## LangGraph Development Notes + +**Checkpointing**: State is automatically checkpointed between nodes - you don't need to manually persist + +**State Mutation**: Always return new state dicts rather than mutating existing state in nodes + +**Error Handling**: LangGraph nodes should handle exceptions gracefully to prevent workflow crashes + +**See**: [ARCHITECTURE.md](ARCHITECTURE.md) for detailed system design and [README.md](README.md) for project overview \ No newline at end of file diff --git a/src/game/agent_tools/speech_tools.py b/src/game/agent_tools/speech_tools.py index 2604faa..31ef50e 100644 --- a/src/game/agent_tools/speech_tools.py +++ b/src/game/agent_tools/speech_tools.py @@ -13,6 +13,7 @@ from langchain.tools import tool +from src.game.logger import get_logger from src.game.state import GameState, PlayerMindset, alive_players from src.game.strategy.builders.prompt_builder import determine_clarity from src.game.strategy.serialization import normalize_mindset, to_plain_dict @@ -20,6 +21,8 @@ SelfBeliefDict = Dict[str, Any] SuspicionDict = Dict[str, Any] +logger = get_logger(__name__) + def speech_planning_tools( state: GameState, @@ -157,12 +160,12 @@ def plan_speech() -> Dict[str, Any]: "top_suspicions": suspects_payload, } - print( - "🛠️ SPEECH PLAN TOOL:", - f"player={bound_player_id}", - f"round={current_round}", - f"clarity={clarity_code}", - f"goal={goal.get('label')}", + logger.info( + "Speech plan tool executed for %s round %d clarity=%s goal=%s", + bound_player_id, + current_round, + clarity_code, + goal.get("label"), ) return plan diff --git a/src/game/config.py b/src/game/config.py index 9c782b9..7ec0f70 100644 --- a/src/game/config.py +++ b/src/game/config.py @@ -22,6 +22,8 @@ import yaml from pydantic import BaseModel, Field, ValidationError, model_validator +from .logger import get_logger + class ConfigurationError(RuntimeError): """Raised when configuration cannot be loaded or validated.""" @@ -256,12 +258,13 @@ def validate_config(self) -> bool: raise ValueError("Name generation failed") return True except Exception as exc: - print(f"Configuration validation failed: {exc}") + logger.error("Configuration validation failed: %s", exc) return False # Global configuration instance _config_instance: GameConfig | None = None +logger = get_logger(__name__) def get_config(config_path: str | Path | None = None) -> GameConfig: diff --git a/src/game/graph.py b/src/game/graph.py index e9d3cd0..1c12ad6 100644 --- a/src/game/graph.py +++ b/src/game/graph.py @@ -26,6 +26,7 @@ from langgraph.constants import START from langgraph.graph import END, StateGraph +from src.game.logger import get_logger from src.game.nodes.host import host_setup, host_stage_switch, host_result from src.game.nodes.player import player_speech, player_vote from src.game.nodes.transition import check_votes_and_transition @@ -33,6 +34,8 @@ from src.tools import save_graph_image from src.game.config import get_config +logger = get_logger(__name__) + def route_from_stage(state: GameState) -> list[str] | str: """Route to appropriate nodes based on current game phase. @@ -170,7 +173,7 @@ def build_workflow(config=None): # Generate player names based on configuration players = game_config.generate_player_names() - print(f"🎮 Building workflow with {len(players)} players: {players}") + logger.info("Building workflow with %d players: %s", len(players), players) return build_workflow_with_players(players) @@ -183,10 +186,10 @@ def main(): # Generate player names based on configuration players = config.generate_player_names() - print(f"Game Configuration:") - print(f" Player count: {config.player_count}") - print(f" Players: {players}") - print(f" Vocabulary pairs: {len(config.vocabulary)}") + logger.info("Game configuration loaded") + logger.info("Player count: %d", config.player_count) + logger.debug("Players: %s", players) + logger.info("Vocabulary pairs: %d", len(config.vocabulary)) # Build and run the workflow app = build_workflow_with_players(players) @@ -206,7 +209,7 @@ async def _run_workflow(): return await app.ainvoke(initial_state, config=langgraph_config) result = asyncio.run(_run_workflow()) - print(result) + logger.info("Workflow result: %s", result) if __name__ == "__main__": diff --git a/src/game/logger.py b/src/game/logger.py new file mode 100644 index 0000000..8e80b39 --- /dev/null +++ b/src/game/logger.py @@ -0,0 +1,36 @@ +""" +Centralized logging utilities for the LieGraph game engine. + +Ensures every module uses a consistent logger configuration, while +still allowing runtime control via the ``LIEGRAPH_LOG_LEVEL`` env var. +""" + +from __future__ import annotations + +import logging +import os +from typing import Optional + +_IS_CONFIGURED = False + + +def _configure_logging() -> None: + """Configure the standard logging module once.""" + global _IS_CONFIGURED + if _IS_CONFIGURED: + return + + level_name = os.getenv("LIEGRAPH_LOG_LEVEL", "INFO").upper() + level = getattr(logging, level_name, logging.INFO) + + logging.basicConfig( + level=level, + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + ) + _IS_CONFIGURED = True + + +def get_logger(name: Optional[str] = None) -> logging.Logger: + """Return a configured logger scoped to the provided name.""" + _configure_logging() + return logging.getLogger(name or "liegraph") diff --git a/src/game/metrics.py b/src/game/metrics.py index aeaa54a..c3b8f26 100644 --- a/src/game/metrics.py +++ b/src/game/metrics.py @@ -29,6 +29,7 @@ from pathlib import Path from threading import Lock +from .logger import get_logger from .state import PlayerMindset @@ -694,6 +695,7 @@ def aggregate_from_summaries( # Global collector used by the rest of the codebase. metrics_collector = GameMetrics() +logger = get_logger(__name__) MULTILINGUAL_VOCABULARY_BATCH: List[tuple[str, tuple[str, str]]] = [ ("english", ("lighthouse", "windmill")), @@ -722,8 +724,12 @@ def _run_single_multilingual_game( app = build_workflow_with_players(player_list) game_id = f"metrics-{idx}-{language_tag}" - print( - f"\nStarting game {idx}/5 ({language_tag}) with civilian='{civilian_word}' and spy='{spy_word}'" + logger.info( + "Starting metrics game %d/5 (%s) civilian='%s' spy='%s'", + idx, + language_tag, + civilian_word, + spy_word, ) initial_state = { @@ -794,10 +800,13 @@ def run_multilingual_metrics_batch( overall_metrics = metrics_collector.get_overall_metrics() quality_score = metrics_collector.compute_quality_score() - print("\nOverall metrics:") - print(json.dumps(overall_metrics, ensure_ascii=False, indent=2)) - print("\nQuality score:") - print(json.dumps(quality_score, ensure_ascii=False, indent=2)) + logger.info( + "Overall metrics:\n%s", + json.dumps(overall_metrics, ensure_ascii=False, indent=2), + ) + logger.info( + "Quality score:\n%s", json.dumps(quality_score, ensure_ascii=False, indent=2) + ) result = {"metrics": overall_metrics, "quality_score": quality_score} metrics_collector.set_enabled(previous_state) @@ -913,10 +922,14 @@ def main(): metrics_dir=metrics_dir, output_path=output_path, ) - print("\nAggregated historical metrics:") - print(json.dumps(result["metrics"], ensure_ascii=False, indent=2)) - print("\nQuality score:") - print(json.dumps(result["quality_score"], ensure_ascii=False, indent=2)) + logger.info( + "Aggregated historical metrics:\n%s", + json.dumps(result["metrics"], ensure_ascii=False, indent=2), + ) + logger.info( + "Quality score:\n%s", + json.dumps(result["quality_score"], ensure_ascii=False, indent=2), + ) return result # Default: run multilingual batch diff --git a/src/game/nodes/host.py b/src/game/nodes/host.py index 9ad2051..243b3ad 100644 --- a/src/game/nodes/host.py +++ b/src/game/nodes/host.py @@ -8,8 +8,11 @@ calculate_eliminated_player, determine_winner, ) +from ..logger import get_logger from .helpers import get_assigned_word +logger = get_logger(__name__) + def host_setup(state: GameState) -> Dict[str, Any]: """Initializes the game, assigning roles and words.""" @@ -29,12 +32,11 @@ def host_setup(state: GameState) -> Dict[str, Any]: player_list, host_private_state=host_private_state ) - # Debug output - print(f"🎮 Host: Initializing game, {len(player_list)} players") - print(f" Players: {player_list}") + logger.info("Host initializing game with %d players", len(player_list)) + logger.debug("Initial player list: %s", player_list) for player_id, private_state in assignments["player_private_states"].items(): assigned_word = get_assigned_word(private_state) - print(f" Player {player_id}: Assigned word = {assigned_word}") + logger.debug("Assigned word for %s: %s", player_id, assigned_word) if metrics_collector.enabled: metrics_collector.on_game_start( @@ -68,13 +70,13 @@ def host_stage_switch(state: GameState) -> Dict[str, Any]: next_player = next_alive_player(state) if next_player: # There's another player to speak - print(f"🎮 Stage switch: Next speaker is {next_player}") + logger.info("Stage switch selecting next speaker: %s", next_player) return ( {} ) # No state update needed - next_alive_player is computed dynamically else: # All players have spoken, transition to voting - print(f"🎮 Stage switch: All players have spoken, starting voting") + logger.info("Stage switch detected all speeches complete; starting voting") updates = {"game_phase": "voting", "current_votes": {}} updates["phase_id"] = generate_phase_id( state | updates @@ -91,10 +93,12 @@ def host_result(state: GameState) -> Dict[str, Any]: """ eliminated_player = calculate_eliminated_player(state) - print( - f"🎮 Host Round {state['current_round']} - Voted out player: {eliminated_player}" + logger.info( + "Host round %d voted out player: %s", + state["current_round"], + eliminated_player, ) - print(f" Current votes: {state.get('current_votes', {})}") + logger.debug("Current votes: %s", state.get("current_votes", {})) # Create a temporary state to check for a winner after the potential elimination temp_state = cast(GameState, state.copy()) @@ -107,7 +111,7 @@ def host_result(state: GameState) -> Dict[str, Any]: winner = determine_winner(temp_state, state["host_private_state"]) if winner: - print(f"🎮 Host announces result: Game over! Winner: {winner}") + logger.info("Winner determined: %s", winner) if metrics_collector.enabled: metrics_collector.on_game_end( game_id=state.get("game_id"), @@ -119,7 +123,7 @@ def host_result(state: GameState) -> Dict[str, Any]: return update # No winner, advance to the next round - print(f"🎮 Game not over: Round {state['current_round'] + 1}") + logger.info("No winner; advancing to round %d", state["current_round"] + 1) return _prepare_next_round(state, eliminated_player) @@ -136,7 +140,7 @@ def _prepare_next_round(state: GameState, eliminated: str | None) -> Dict[str, A if eliminated: updates["eliminated_players"] = [eliminated] - print(f"🎮 ADVANCE ROUND: Moving to round {updates['current_round']}") + logger.info("Advancing to round %d", updates["current_round"]) if eliminated: - print(f" Voted out player: {eliminated}") + logger.info("Eliminated player carried forward: %s", eliminated) return updates diff --git a/src/game/nodes/player.py b/src/game/nodes/player.py index 3874b23..00a5c86 100644 --- a/src/game/nodes/player.py +++ b/src/game/nodes/player.py @@ -43,12 +43,15 @@ plan_player_speech, ) from ..strategy.serialization import normalize_mindset +from ..logger import get_logger from .helpers import ( get_assigned_word, get_private_state, get_normalized_player_mindset, ) +logger = get_logger(__name__) + def _get_llm_client(): """Create and return an LLM client instance. @@ -110,10 +113,10 @@ async def player_speech(state: GameState, player_id: str) -> Dict[str, Any]: # Get player-specific context _, existing_private_state, my_word = _get_player_context(state, player_id) - print( - f"🎤 PLAYER SPEECH: {player_id} is generating speech for round {state['current_round']}" + logger.info( + "Player %s generating speech for round %d", player_id, state["current_round"] ) - print(f" Assigned word: {my_word}") + logger.debug("Player %s assigned word: %s", player_id, my_word) # Generate playerMindset using LLM config = get_config() @@ -136,8 +139,10 @@ async def player_speech(state: GameState, player_id: str) -> Dict[str, Any]: try: speech_plan = plan_player_speech(state, player_id, updated_mindset_state) except Exception as exc: - print( - f"⚠️ SPEECH PLAN TOOL failed for {player_id}: {exc}, falling back without plan." + logger.warning( + "Speech planning tool failed for %s; proceeding without plan: %s", + player_id, + exc, ) speech_plan = None @@ -154,9 +159,9 @@ async def player_speech(state: GameState, player_id: str) -> Dict[str, Any]: speech_plan=speech_plan, ) - print(f'🎤 PLAYER SPEECH: {player_id} says: "{new_speech_text}"') - print(f" Self belief: {updated_mindset_state.get('self_belief')}") - print(f" Suspicions: {updated_mindset_state.get('suspicions')}") + logger.info('Player %s speech: "%s"', player_id, new_speech_text) + logger.debug("Self belief: %s", updated_mindset_state.get("self_belief")) + logger.debug("Suspicions: %s", updated_mindset_state.get("suspicions")) # Prepare the state updates based on the generated speech and PlayerMindset speech_record: Speech = create_speech_record(state, player_id, new_speech_text) @@ -201,10 +206,10 @@ async def player_vote(state: GameState, player_id: str) -> Dict[str, Any]: # Get player-specific context for voting _, existing_private_state, my_word = _get_player_context(state, player_id) - print( - f"🗳️ PLAYER VOTE: {player_id} is deciding vote for round {state['current_round']}" + logger.info( + "Player %s deciding vote for round %d", player_id, state["current_round"] ) - print(f" Assigned word: {my_word}") + logger.debug("Player %s assigned word: %s", player_id, my_word) # Generate playerMindset using LLM config = get_config() @@ -231,9 +236,9 @@ async def player_vote(state: GameState, player_id: str) -> Dict[str, Any]: current_mindset=updated_mindset_state, ) - print(f"🗳️ PLAYER VOTE: {player_id} votes for: {voted_target}") - print(f" Self belief: {updated_mindset_state.get('self_belief')}") - print(f" Suspicions: {updated_mindset_state.get('suspicions')}") + logger.info("Player %s votes for %s", player_id, voted_target) + logger.debug("Self belief: %s", updated_mindset_state.get("self_belief")) + logger.debug("Suspicions: %s", updated_mindset_state.get("suspicions")) # Prepare the state updates based on the decided vote and PlayerMindset ts = int(datetime.now().timestamp() * 1000) diff --git a/src/game/rules.py b/src/game/rules.py index 3a9ffd9..e7e474b 100644 --- a/src/game/rules.py +++ b/src/game/rules.py @@ -23,6 +23,7 @@ from typing import List, Dict, Any from .config import get_config, calculate_spy_count +from .logger import get_logger from .state import ( GameState, alive_players, @@ -32,6 +33,8 @@ SelfBelief, ) +logger = get_logger(__name__) + def assign_roles_and_words( players: List[str], @@ -60,8 +63,10 @@ def assign_roles_and_words( ): civilian_word = host_private_state["civilian_word"] spy_word = host_private_state["spy_word"] - print( - f"🎮 Using custom words from host_private_state: civilian='{civilian_word}', spy='{spy_word}'" + logger.info( + "Using custom words from host_private_state civilian='%s' spy='%s'", + civilian_word, + spy_word, ) elif word_list: civilian_word, spy_word = random.choice(word_list) @@ -127,7 +132,9 @@ def calculate_eliminated_player(state: GameState) -> str | None: if tied_players: eliminated = random.choice(tied_players) - print(f"🎮 Break tie:from {tied_players} chose {eliminated}") + logger.info( + "Tie detected among %s; randomly eliminated %s", tied_players, eliminated + ) return eliminated return None diff --git a/src/tools/graph_viz.py b/src/tools/graph_viz.py index 9b0b656..b339ec5 100644 --- a/src/tools/graph_viz.py +++ b/src/tools/graph_viz.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import Protocol +from src.game.logger import get_logger + try: # Imported lazily so pure-Python usage works without IPython. from IPython import get_ipython from IPython.display import Image, display @@ -25,7 +27,7 @@ def save_png(png_bytes: bytes, filename: str | Path = "graph.png") -> Path: output_path = Path(filename) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_bytes(png_bytes) - print(f"Graph saved to {output_path}") + logger.info("Graph saved to %s", output_path) if get_ipython and Image and display and get_ipython(): display(Image(png_bytes)) @@ -41,6 +43,9 @@ def save_graph_image( try: png_bytes = graph.draw_mermaid_png() except ValueError as exc: # Mermaid service may be unavailable in offline runs. - print(f"Skipping graph image generation: {exc}") + logger.warning("Skipping graph image generation: %s", exc) return None return save_png(png_bytes, filename) + + +logger = get_logger(__name__)