rwightman · rwightman · May 20, 2026 · May 18, 2026 · May 19, 2026 · May 19, 2026
diff --git a/DESIGN.md b/DESIGN.md
@@ -439,8 +439,40 @@ enabled for line-oriented doors such as Ether/Tele-Arena. It is appropriate
 when most commands are text lines submitted with Enter and the two-step
 `type_text` plus `press_key enter` pattern is just decision overhead.
 
-A future campaign runner should compose activities into fair model-vs-model
-schedules instead of replacing these activity runners.
+`run-match` composes multiple activity states into one shared environment. Each
+participant has a separate terminal session, model adapter, stateful provider
+session, recent-step context, campaign memory, and per-agent trace. The
+scheduler is policy-driven: `sequential`, `parallel_barrier`, `parallel_race`,
+and `continuous` share fixed, seeded shuffle, and rotate order policies. Fixed
+order preserves reproducibility, seeded shuffle reduces first-mover bias, and
+rotate alternates first position without randomness. `parallel_barrier` splits
+each step into a decision phase and a commit phase: active agents decide
+concurrently, then actions are committed in the scheduled order. `parallel_race`
+uses the scheduled order as launch order but commits actions as soon as model
+decisions complete, making latency part of the competition. `continuous` keeps
+one decision in flight per active agent and immediately requeues an agent after
+its action commits, so faster models can take more initiative within the same
+match wall-clock budget. Choose `parallel_barrier` when fairness matters more
+than latency. The scheduler writes match events for match start/completion,
+per-round or per-tick order, decision completion, each committed action,
+disconnects, reconnect attempts, and final stop reasons, while the normal
+activity traces remain the source of detailed prompts, actions, observations,
+and memory updates.
+
+Melee runs are the same match abstraction with more participants and richer
+configuration. A TOML or JSON match config should own the roster, scheduler
+policy, reconnect policy, objective template, disabled action set, budgets, and
+per-participant provider settings. `max_wall_seconds` is a match-level budget
+shared by every participant; `max_decision_ticks` remains per participant. In
+`continuous` mode, `max_rounds` is interpreted as the maximum number of queued
+action decisions for the whole match rather than full all-agent rounds. A future
+campaign runner should compose activities into longer fair model-vs-model
+schedules instead of replacing these activity and match runners.
+
+Continuous mode does not emit `round_started` or `round_completed`, because
+there are no all-agent rounds. Continuous scheduler events use `tick` for the
+committed action count; `commit_order` also records `queued_tick` for the
+decision request that produced the action.
 
 Memory is harness-owned:
 

diff --git a/README.md b/README.md
@@ -251,6 +251,61 @@ Ether/Tele-Arena where normal commands are submitted with Enter. It keeps
 `submit_line` available while preserving `press_key` and `type_text` for
 single-key or partial-input prompts.
 
+`run-match` runs several agents against the same BBS or door server. Each
+participant gets its own terminal session, model adapter, stateful provider
+session, recent-step context, campaign memory, and per-agent trace; the match
+trace records match start/completion, per-round or per-tick order, actions,
+disconnects, and reconnects. The default scheduler mode is `sequential`: agents
+act one at a time in the chosen per-round order. `parallel_barrier` asks active
+agents for decisions concurrently, then commits actions in the chosen order.
+`parallel_race` also asks concurrently, but commits each action as soon as that
+agent's decision is ready. `continuous` keeps one decision in flight per active
+agent and immediately requeues that agent after each committed action; faster
+models get more initiative by design. The default order is fixed CLI order, but
+competitive runs can use seeded shuffle or rotating first-player order. For
+example, a Claude-vs-Codex Tele-Arena smoke can use:
+
+```bash
+uv run bbs-gym run-match \
+  --host 127.0.0.1 \
+  --port 3000 \
+  --transport telnet \
+  --telnet-enter lf \
+  --no-agents-config \
+  --activity bbs-door-line \
+  --participant arena-codex:codex:gpt-5.5 \
+  --participant arena-claude:claude:sonnet \
+  --codex-stateful \
+  --claude-stateful \
+  --scheduler-mode sequential \
+  --match-order shuffle \
+  --match-seed 20260519 \
+  --disconnect-policy reconnect \
+  --disable-action hangup \
+  --run-objective "Play Tele-Arena as {agent_id}. If asked for a character name, create or log in as {agent_id}. Stay connected; do not hang up or quit. Other active agents: {opponents}. Survive, gain experience and gold, buy and equip useful supplies, spend gold wisely, recover when hurt, find opponents, and defeat them when prepared." \
+  --max-rounds 100 \
+  --max-decision-ticks 100
+```
+
+For larger melees, put the participant roster and scheduler settings in a
+TOML or JSON file:
+
+```bash
+uv run bbs-gym run-match --match-config examples/tele_arena_melee.toml
+```
+
+`examples/tele_arena_melee.toml` shows a Codex, Claude, and local
+OpenAI-compatible model sharing one Tele-Arena server. Config files can set the
+activity, transport, budgets, objective template, scheduler mode/order/seed,
+disconnect policy, disabled actions, and per-participant provider settings.
+Config values are treated as the match definition when `--match-config` is used.
+For match runs, `--max-wall-seconds` is a match-level wall-clock budget shared
+by all participants, while `--max-decision-ticks` is per participant. In
+`continuous` mode, `--max-rounds` caps the number of queued action decisions for
+the whole match instead of all-agent rounds. Continuous traces use `tick`
+instead of `round` for scheduler events and do not emit `round_started` /
+`round_completed` lifecycle events.
+
 Use `--prompt-layout cache_friendly` when comparing local OpenAI-compatible
 servers with prefix caching. The default `timeline_first` layout preserves the
 existing trace-oriented prompt order; `cache_friendly` moves stable objectives,

diff --git a/TELE_ARENA.md b/TELE_ARENA.md
@@ -277,6 +277,48 @@ bat, died, recovered in the temple, and continued until the 100-step budget.
 The same run used `submit_line` for most complete commands and had no action
 validation failures.
 
+## 10. Let Two Agents Play A Match
+
+`run-match` opens one telnet session per participant. The default `sequential`
+scheduler alternates one decision tick per active agent. `parallel_barrier`
+collects decisions concurrently and commits them in the scheduled order;
+`parallel_race` commits actions as model decisions finish. `continuous` keeps
+one decision in flight per active agent and immediately requeues that agent
+after each committed action, so faster models get more chances to act during the
+same match wall-clock budget. Inline participant specs use
+`agent_id:provider:model`; each participant still gets its own per-agent JSONL
+trace and model state.
+
+```bash
+uv run bbs-gym run-match \
+  --host 127.0.0.1 \
+  --port 3000 \
+  --transport telnet \
+  --telnet-enter lf \
+  --no-agents-config \
+  --activity bbs-door-line \
+  --participant arena-codex:codex:gpt-5.5 \
+  --participant arena-claude:claude:sonnet \
+  --codex-stateful \
+  --claude-stateful \
+  --prompt-layout cache_friendly \
+  --log-path runtime/logs/tele-arena-match.jsonl \
+  --disable-action hangup \
+  --run-objective "Play Tele-Arena as {agent_id}. If asked for a character name, create or log in as {agent_id}. Stay connected; do not hang up or quit. Other active agents: {opponents}. Survive, gain experience and gold, buy and equip useful supplies, spend gold wisely, recover when hurt, find opponents, and defeat them when prepared." \
+  --max-rounds 100 \
+  --max-decision-ticks 100
+```
+
+The match trace goes to `runtime/logs/tele-arena-match.jsonl`. Per-agent traces
+use the same stem, for example `tele-arena-match.arena-codex.jsonl` and
+`tele-arena-match.arena-claude.jsonl`.
+
+For match runs, `--max-wall-seconds` is match-level. `--max-decision-ticks`
+still applies per participant. In `continuous` mode, `--max-rounds` caps the
+total queued action decisions for the whole match rather than full all-agent
+rounds. Continuous traces use `tick` instead of `round` for scheduler events and
+do not emit `round_started` / `round_completed` lifecycle events.
+
 ## Notes
 
 - Use `--telnet-enter lf` for Ether. CR-only caused repeated delayed submits.
@@ -286,5 +328,6 @@ validation failures.
   before Enter.
 - The wrapper is intentionally thin; pass any extra `bbs-gym run-activity`
   arguments after the wrapper arguments and they will be forwarded.
-- The current objective is conservative. For more exploratory runs, override
-  `--run-objective`.
+- Match-specific objectives should carry game strategy. Add
+  `--disable-action hangup` for competitive runs so agents cannot leave the
+  match with the harness-level hangup action.
diff --git a/examples/tele_arena_activity.py b/examples/tele_arena_activity.py
@@ -15,8 +15,9 @@
 
 DEFAULT_RUN_OBJECTIVE = (
     "Play Tele-Arena through this telnet session. If asked for a character name, create or log in as "
-    "ArenaCodex. Explore carefully, learn commands, survive fights, gain experience or gold, buy useful "
-    "starter supplies, and recover from mistakes."
+    "ArenaCodex. Stay connected unless the run objective explicitly says to leave. Survive fights, gain "
+    "experience and gold, buy and equip useful starter supplies, spend gold wisely, recover when hurt, and "
+    "keep making progress instead of quitting early."
 )
 
 
@@ -109,6 +110,8 @@ def build_bbs_gym_argv(args: argparse.Namespace) -> list[str]:
         cmd.extend(["--prompt-layout", args.prompt_layout])
     if args.recent_steps_to_keep is not None:
         cmd.extend(["--recent-steps-to-keep", str(args.recent_steps_to_keep)])
+    for disabled_action in args.disabled_actions:
+        cmd.extend(["--disable-action", disabled_action])
     return cmd
 
 
@@ -153,6 +156,7 @@ def parse_args(argv: list[str] | None = None) -> tuple[argparse.Namespace, list[
     parser.add_argument("--prompt-mode", choices=["stateless_full", "stateful_delta"])
     parser.add_argument("--prompt-layout", choices=["timeline_first", "cache_friendly"])
     parser.add_argument("--recent-steps-to-keep", type=int)
+    parser.add_argument("--disable-action", dest="disabled_actions", action="append", default=["hangup"])
     parser.add_argument("--max-decision-ticks", type=int, default=100)
     parser.add_argument("--max-wall-seconds", type=float, default=2400.0)
     parser.add_argument("--observe-timeout", type=float, default=8.0)

diff --git a/examples/tele_arena_melee.toml b/examples/tele_arena_melee.toml
@@ -0,0 +1,47 @@
+host = "127.0.0.1"
+port = 3000
+transport = "telnet"
+telnet_enter = "lf"
+activity = "bbs-door-line"
+prompt_layout = "cache_friendly"
+recent_steps_to_keep = 5
+log_path = "runtime/logs/tele-arena-melee.jsonl"
+disabled_actions = ["hangup"]
+run_objective = "Play Tele-Arena as {agent_id}. If asked for a character name, create or log in as {agent_id}. Stay connected; do not hang up or quit. Other active agents: {opponents}. Survive, gain experience and gold, buy and equip useful supplies, spend gold wisely, recover when hurt, find opponents, and defeat them when prepared."
+
+[scheduler]
+mode = "sequential"
+order = "shuffle"
+seed = 20260519
+disconnect_policy = "reconnect"
+max_reconnects = 3
+reconnect_delay = 2.0
+
+[budget]
+# Match-level wall-clock cap; decision ticks remain per participant.
+max_rounds = 2000
+max_decision_ticks = 2000
+max_wall_seconds = 86400
+
+[[participants]]
+agent_id = "ArenaCodex"
+provider = "codex"
+model = "gpt-5.5"
+stateful = true
+codex_session_file = "runtime/codex-sessions/tele-arena-melee-ArenaCodex.session"
+
+[[participants]]
+agent_id = "ArenaClaude"
+provider = "claude"
+model = "sonnet"
+stateful = true
+claude_session_file = "runtime/claude-sessions/tele-arena-melee-ArenaClaude.session"
+
+[[participants]]
+agent_id = "ArenaGemma"
+provider = "openai-compatible"
+model = "gemma4"
+base_url = "http://localhost:8000/v1"
+temperature = 0.6
+max_tokens = 4096
+response_filter = "gemma4"
diff --git a/packages/bbs-gym/README.md b/packages/bbs-gym/README.md
@@ -4,5 +4,5 @@ BBS and door-game environments for LLM terminal agents.
 
 This package contains the BBS-specific layer from the Spree workspace:
 Synchronet connection wiring, account tooling, BBS/TW2 profiles, routed
-activities, and the `bbs-gym` CLI. It depends on `tty-agent` for the reusable
-terminal-agent runtime.
+activities, scheduled multi-agent matches, and the `bbs-gym` CLI. It depends on
+`tty-agent` for the reusable terminal-agent runtime.