Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,16 @@ START → host_setup → host_stage_switch

**Quality Scoring**:
```python
from src.game.metrics import metrics_collector
from src.game.dependencies import build_dependencies

deps = build_dependencies()
collector = deps.metrics

# Get quality score
deterministic_score = metrics_collector.compute_quality_score()
deterministic_score = collector.compute_quality_score()

# Or use LLM-based evaluation
llm_score = metrics_collector.compute_quality_score(method="llm", llm=client)
llm_score = collector.compute_quality_score(method="llm", llm=client)
```

**Metrics History**: Track prompts and configurations in `docs/metrics-history.md`
Expand Down Expand Up @@ -194,8 +197,8 @@ llm_score = metrics_collector.compute_quality_score(method="llm", llm=client)

### Working with Player-Specific Hooks (callbacks) for Metrics
When implementing player-specific behaviors that need to track metrics per player:
- Use the `metrics_collector.on_player_speech(player_name, is_spy, round_num, speech)` hook within player speech nodes to collect speech diversity metrics
- Use the `metrics_collector.on_vote_cast()` hook in player vote nodes to collect voting pattern data.
- Access the injected collector via the dependencies bundle (e.g., the `metrics` argument supplied to LangGraph nodes).
- Use the `metrics.on_player_speech()` and related hooks within player speech/vote nodes to collect lexical diversity and voting pattern data.
- Metrics collection respects the `metrics.enabled` flag in `config.yaml` and will be no-ops when metrics are disabled.

## LangGraph Development Notes
Expand All @@ -206,4 +209,4 @@ When implementing player-specific behaviors that need to track metrics per playe

**Error Handling**: LangGraph nodes should handle exceptions gracefully to prevent workflow crashes

**See**: [ARCHITECTURE.md](ARCHITECTURE.md) for detailed system design and [README.md](README.md) for project overview
**See**: [ARCHITECTURE.md](ARCHITECTURE.md) for detailed system design and [README.md](README.md) for project overview
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,18 @@ Metrics are streamed to memory during play and automatically persisted when a ga
- Per-game summaries: `logs/metrics/{game_id}.json`
- Rolling aggregate + functional quality score: `logs/metrics/overall.json`

You can also access the live collector from code:
You can also access the live collector from code by building a dependency bundle
for each game instance:

```python
from src.game.metrics import metrics_collector
from src.game.dependencies import build_dependencies

audit = metrics_collector.get_overall_metrics()
score = metrics_collector.compute_quality_score() # deterministic
# metrics_collector.compute_quality_score(method="llm", llm=client) for LLM-based review
deps = build_dependencies()
collector = deps.metrics

audit = collector.get_overall_metrics()
score = collector.compute_quality_score() # deterministic
# collector.compute_quality_score(method="llm", llm=client) for LLM-based review
```

These outputs are ready to feed into downstream prompt-evaluation or offline analysis pipelines.
Expand Down
13 changes: 8 additions & 5 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,17 @@ game:
- 单局摘要:`logs/metrics/{game_id}.json`
- 全局聚合与函数版总分:`logs/metrics/overall.json`

在代码中可直接访问实时指标
在代码中可通过依赖容器访问实时指标

```python
from src.game.metrics import metrics_collector
from src.game.dependencies import build_dependencies

report = metrics_collector.get_overall_metrics()
score = metrics_collector.compute_quality_score() # 函数评分
# metrics_collector.compute_quality_score(method="llm", llm=client) 可获取 LLM 评价
deps = build_dependencies()
collector = deps.metrics

report = collector.get_overall_metrics()
score = collector.compute_quality_score() # 函数评分
# collector.compute_quality_score(method="llm", llm=client) 可获取 LLM 评价
```

这些数据可作为后续提示词评估或离线分析的直接输入。
Expand Down
45 changes: 16 additions & 29 deletions src/game/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

Configuration precedence:
1. Built-in defaults defined in ``DEFAULT_CONFIG``.
2. Values provided in ``config.yaml`` (or a custom path passed to ``get_config``),
2. Values provided in ``config.yaml`` (or a custom path passed to ``load_config``),
merged over the defaults.
3. Pydantic model defaults for any fields still unset after the merge.
"""
Expand Down Expand Up @@ -262,46 +262,33 @@ def validate_config(self) -> bool:
return False


# Global configuration instance
_config_instance: GameConfig | None = None
logger = get_logger(__name__)


def get_config(config_path: str | Path | None = None) -> GameConfig:
"""
Get the global configuration instance.
def default_config_path() -> Path:
"""Return the default config file location inside the repository."""
return Path(__file__).resolve().parents[2] / "config.yaml"

Args:
config_path: Path to configuration file. If None, uses default location.

Returns:
GameConfig instance
def load_config(config_path: str | Path | None = None) -> GameConfig:
"""
global _config_instance

if _config_instance is None:
if config_path is None:
project_root = Path(__file__).resolve().parents[2]
config_path = project_root / "config.yaml"
Build a new GameConfig instance from the provided path.

_config_instance = GameConfig(config_path)

return _config_instance
Args:
config_path: Optional override path. When omitted, uses ``config.yaml`` at
the project root.
"""
resolved_path = (
Path(config_path).expanduser() if config_path else default_config_path()
)
return GameConfig(resolved_path)


def reload_config(config_path: str | Path | None = None) -> GameConfig:
"""
Reload the configuration from file.

Args:
config_path: Path to configuration file. If None, uses default location.

Returns:
GameConfig instance
Compatibility shim for legacy callers. Returns a freshly loaded config.
"""
global _config_instance
_config_instance = None
return get_config(config_path)
return load_config(config_path)


def calculate_spy_count(total_players: int) -> int:
Expand Down
44 changes: 44 additions & 0 deletions src/game/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Lightweight dependency container for wiring runtime services into the game.

Instead of relying on module-level singletons (e.g., global config instances or
metrics collectors), we bundle the required collaborators into a simple data
class and pass them explicitly where needed. This makes it trivial to spin up
multiple, isolated games for tests or concurrent executions.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Optional

from .config import GameConfig, load_config
from .metrics import GameMetrics


@dataclass(slots=True)
class GameDependencies:
"""Container object that holds the runtime services a game needs."""

config: GameConfig
metrics: GameMetrics


def build_dependencies(
*,
config: GameConfig | None = None,
metrics: GameMetrics | None = None,
config_path: str | Path | None = None,
) -> GameDependencies:
"""
Construct a ``GameDependencies`` instance.

Args:
config: Optional pre-built ``GameConfig``.
metrics: Optional ``GameMetrics`` instance (useful for sharing collectors).
config_path: Optional config path when ``config`` is not supplied.
"""
cfg = config or load_config(config_path)
collector = metrics or GameMetrics()
return GameDependencies(config=cfg, metrics=collector)
85 changes: 73 additions & 12 deletions src/game/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,26 @@
from src.game.nodes.transition import check_votes_and_transition
from src.game.state import GameState, votes_ready, next_alive_player
from src.tools import save_graph_image
from src.game.config import get_config
from src.game.dependencies import GameDependencies, build_dependencies

logger = get_logger(__name__)


def _resolve_dependencies(
*,
dependencies: GameDependencies | None = None,
config=None,
metrics=None,
) -> GameDependencies:
if dependencies is not None and (config is not None or metrics is not None):
raise ValueError(
"Provide either `dependencies` or individual `config`/`metrics`, not both."
)
if dependencies is not None:
return dependencies
return build_dependencies(config=config, metrics=metrics)


def route_from_stage(state: GameState) -> list[str] | str:
"""Route to appropriate nodes based on current game phase.

Expand Down Expand Up @@ -84,7 +99,14 @@ def should_continue(state: GameState) -> str:
return "end" if state.get("winner") else "continue"


def build_workflow_with_players(players: list[str], *, checkpointer=None):
def build_workflow_with_players(
players: list[str],
*,
dependencies: GameDependencies | None = None,
config=None,
metrics=None,
checkpointer=None,
):
"""Build the complete LangGraph workflow for a specific set of players.

This function constructs the entire state machine with all nodes and edges
Expand All @@ -102,20 +124,46 @@ def build_workflow_with_players(players: list[str], *, checkpointer=None):
- Player nodes: speech and vote nodes for each player
- Transition nodes: vote counting and phase transitions
"""
deps = _resolve_dependencies(
dependencies=dependencies,
config=config,
metrics=metrics,
)
cfg = deps.config
collector = deps.metrics

workflow = StateGraph(GameState)

# Register nodes
workflow.add_node("host_setup", host_setup)
workflow.add_node(
"host_setup", partial(host_setup, game_config=cfg, metrics=collector)
)
workflow.add_node(
"host_stage_switch", host_stage_switch
) # Responsible for writing phase/next_* pointers
workflow.add_node("host_result", host_result)
workflow.add_node("host_result", partial(host_result, metrics=collector))

workflow.add_node("check_votes_and_transition", check_votes_and_transition)

for pid in players:
workflow.add_node(f"player_speech_{pid}", partial(player_speech, player_id=pid))
workflow.add_node(f"player_vote_{pid}", partial(player_vote, player_id=pid))
workflow.add_node(
f"player_speech_{pid}",
partial(
player_speech,
player_id=pid,
game_config=cfg,
metrics=collector,
),
)
workflow.add_node(
f"player_vote_{pid}",
partial(
player_vote,
player_id=pid,
game_config=cfg,
metrics=collector,
),
)

# Basic skeleton
workflow.add_edge(START, "host_setup")
Expand Down Expand Up @@ -161,27 +209,40 @@ def build_workflow_with_players(players: list[str], *, checkpointer=None):
return app


def build_workflow(config=None):
def build_workflow(
*,
dependencies: GameDependencies | None = None,
config=None,
metrics=None,
):
"""Build workflow for LangGraph Server - accepts RunnableConfig parameter.

For LangGraph Server, we build a workflow using the player count from config.yaml.
The frontend will get the actual player list from the game state.
"""
# Load configuration to get the configured player count
game_config = get_config()
deps = _resolve_dependencies(
dependencies=dependencies,
config=config,
metrics=metrics,
)
game_config = deps.config

# Generate player names based on configuration
players = game_config.generate_player_names()

logger.info("Building workflow with %d players: %s", len(players), players)

return build_workflow_with_players(players)
return build_workflow_with_players(
players,
dependencies=deps,
)


def main():
"""Main execution function using configuration."""
# Load configuration
config = get_config()
deps = build_dependencies()
config = deps.config

# Generate player names based on configuration
players = config.generate_player_names()
Expand All @@ -192,7 +253,7 @@ def main():
logger.info("Vocabulary pairs: %d", len(config.vocabulary))

# Build and run the workflow
app = build_workflow_with_players(players)
app = build_workflow_with_players(players, dependencies=deps)
save_graph_image(app, filename="artifacts/agent_with_router.png")

initial_state = {
Expand Down
Loading
Loading