meta-pytorch · pankit-eng · Oct 31, 2025 · Oct 23, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -79,6 +79,8 @@ jobs:
             dockerfile: src/envs/atari_env/server/Dockerfile
           - name: git-env
             dockerfile: src/envs/git_env/server/Dockerfile
+          - name: textarena-env
+            dockerfile: src/envs/textarena_env/server/Dockerfile
 
     steps:
       - name: Checkout code

diff --git a/examples/textarena_simple.py b/examples/textarena_simple.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Quickstart example for the generic TextArena environment."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Add project src/ to import path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from envs.textarena_env import TextArenaEnv, TextArenaAction
+
+
+def main() -> None:
+
+    print("=" * 60)
+    print("💬 TextArena Hello World - GuessTheNumber-v0")
+    print("=" * 60)
+
+    env = TextArenaEnv.from_docker_image(
+        "textarena-env:latest",
+        env_vars={
+            "TEXTARENA_ENV_ID": "GuessTheNumber-v0",
+            "TEXTARENA_NUM_PLAYERS": "1",
+        },
+        ports={8000: 8000},
+    )
+
+    try:
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+        print(f"   Prompt:\n{result.observation.prompt}\n")
+
+        # Simple heuristic: if prompt mentions a range, start with midpoint
+        guess = "[10]"
+
+        for step in range(5):
+            print(f"🎯 Step {step + 1}: sending guess {guess}")
+            result = env.step(TextArenaAction(message=guess))
+
+            for message in result.observation.messages:
+                print(f"   [{message.category}] {message.content}")
+
+            if result.done:
+                break
+
+            # Basic update: look for 'higher' or 'lower' hints
+            feedback = " ".join(msg.content for msg in result.observation.messages)
+            if "higher" in feedback:
+                guess = "[15]"
+            elif "lower" in feedback:
+                guess = "[5]"
+            else:
+                guess = "[10]"
+
+        print("\n✅ Episode finished!")
+        print(f"   Reward: {result.reward}")
+        print(f"   Done: {result.done}")
+
+        state = env.state()
+        print("\n📊 Server State Snapshot:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Env ID: {state.env_id}")
+
+    except Exception as exc:  # pragma: no cover - demonstration script
+        print(f"\n❌ Error: {exc}")
+        print("\nMake sure you have built the Docker image first:")
+        print("  docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .")
+        print("\nAlternatively run the server manually:")
+        print("  python -m envs.textarena_env.server.app")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/examples/textarena_wordle_inference.py b/examples/textarena_wordle_inference.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""Play TextArena Wordle with a hosted LLM via Hugging Face Inference Providers.
+
+This script mirrors the structure of the Kuhn Poker inference sample but targets
+the Wordle environment. We deploy the generic TextArena server (wrapped in
+OpenEnv) inside a local Docker container and query a single hosted model using
+the OpenAI-compatible API provided by Hugging Face's router.
+
+Prerequisites
+-------------
+1. Build the TextArena Docker image::
+
+       docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .
+
+2. Set your Hugging Face token::
+
+       export HF_TOKEN=your_token_here
+
+3. Run this script::
+
+       python examples/wordle_inference.py
+
+By default we ask the DeepSeek Terminus model to play ``Wordle-v0``. Adjust the
+``MODEL`` constant if you'd like to experiment with another provider-compatible
+model.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Iterable, List
+
+from openai import OpenAI
+
+from envs.textarena_env import TextArenaAction, TextArenaEnv
+from envs.textarena_env.models import TextArenaMessage
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+API_BASE_URL = "https://router.huggingface.co/v1"
+API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
+
+MODEL = "openai/gpt-oss-120b:novita"
+MAX_TURNS = 8
+VERBOSE = True
+
+SYSTEM_PROMPT = (
+    "You are an expert Wordle solver."
+    " Always respond with a single guess inside square brackets, e.g. [crane]."
+    " Use lowercase letters, exactly one five-letter word per reply."
+    " Reason about prior feedback before choosing the next guess."
+    " Words must be 5 letters long and real English words."
+    " Do not not include any other text in your response."
+    " Do not repeat the same guess twice."
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def format_history(messages: Iterable[TextArenaMessage]) -> str:
+    """Convert TextArena message history into plain text for the model."""
+
+    lines: List[str] = []
+    for message in messages:
+        tag = message.category or "MESSAGE"
+        lines.append(f"[{tag}] {message.content}")
+    return "\n".join(lines)
+
+
+def extract_guess(text: str) -> str:
+    """Return the first Wordle-style guess enclosed in square brackets."""
+
+    match = re.search(r"\[[A-Za-z]{5}\]", text)
+    if match:
+        return match.group(0).lower()
+    # Fallback: remove whitespace and ensure lowercase, then wrap
+    cleaned = re.sub(r"[^a-zA-Z]", "", text).lower()
+    if len(cleaned) >= 5:
+        return f"[{cleaned[:5]}]"
+    return "[dunno]"
+
+
+def make_user_prompt(prompt_text: str, messages: Iterable[TextArenaMessage]) -> str:
+    """Combine the TextArena prompt and feedback history for the model."""
+
+    history = format_history(messages)
+    return (
+        f"Current prompt:\n{prompt_text}\n\n"
+        f"Conversation so far:\n{history}\n\n"
+        "Reply with your next guess enclosed in square brackets."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Gameplay
+# ---------------------------------------------------------------------------
+
+def play_wordle(env: TextArenaEnv, client: OpenAI) -> None:
+    result = env.reset()
+    observation = result.observation
+
+    if VERBOSE:
+        print("📜 Initial Prompt:\n" + observation.prompt)
+
+    for turn in range(1, MAX_TURNS + 1):
+        if result.done:
+            break
+
+        user_prompt = make_user_prompt(observation.prompt, observation.messages)
+
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_prompt},
+            ],
+            max_tokens=2048,
+            temperature=0.7,
+        )
+
+        raw_output = response.choices[0].message.content.strip()
+        guess = extract_guess(raw_output)
+
+        if VERBOSE:
+            print(f"\n🎯 Turn {turn}: model replied with -> {raw_output}")
+            print(f"   Parsed guess: {guess}")
+
+        result = env.step(TextArenaAction(message=guess))
+        observation = result.observation
+
+        if VERBOSE:
+            print("   Feedback messages:")
+            for message in observation.messages:
+                print(f"     [{message.category}] {message.content}")
+
+    print("\n✅ Game finished")
+    print(f"   Reward: {result.reward}")
+    print(f"   Done: {result.done}")
+
+
+# ---------------------------------------------------------------------------
+# Entrypoint
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    if not API_KEY:
+        raise SystemExit("HF_TOKEN (or API_KEY) must be set to query the model.")
+
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+
+    env = TextArenaEnv.from_docker_image(
+        "textarena-env:latest",
+        env_vars={
+            "TEXTARENA_ENV_ID": "Wordle-v0",
+            "TEXTARENA_NUM_PLAYERS": "1",
+        },
+        ports={8000: 8000},
+    )
+
+    try:
+        play_wordle(env, client)
+    finally:
+        env.close()
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
@@ -169,8 +169,12 @@ def start_container(
         cmd.append(image)
 
         # Run container
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        self._container_id = result.stdout.strip()
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            self._container_id = result.stdout.strip()
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Failed to start Docker container.\nCommand: {' '.join(cmd)}\nExit code: {e.returncode}\nStderr: {e.stderr}\nStdout: {e.stdout}"
+            raise RuntimeError(error_msg) from e
 
         # Wait a moment for container to start
         time.sleep(1)

diff --git a/src/envs/textarena_env/README.md b/src/envs/textarena_env/README.md
@@ -0,0 +1,46 @@
+# TextArena Environment
+
+Generic wrapper for any [TextArena](https://www.textarena.ai/docs/overview) game inside OpenEnv. This module exposes the TextArena `Env` interface through the standard HTTP server/client APIs used by other OpenEnv environments, enabling quick experimentation with the full suite of word, reasoning, and multi-agent games.
+
+## Features
+- Works with any registered TextArena game (e.g. `Wordle-v0`, `GuessTheNumber-v0`, `Chess-v0`, ...).
+- Transparent access to TextArena message streams, rewards, and state snapshots.
+- Docker image for easy deployment with Python 3.11 and preinstalled dependencies.
+- Example client demonstrating end-to-end interaction.
+
+## Docker
+
+Build the container from the project root:
+
+```bash
+docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .
+```
+
+Run it with your desired game (default is `Wordle-v0`). Environment configuration is handled via env vars:
+
+```bash
+docker run -p 8000:8000 \
+  -e TEXTARENA_ENV_ID=GuessTheNumber-v0 \
+  -e TEXTARENA_NUM_PLAYERS=1 \
+  textarena-env:latest
+```
+
+Additional environment arguments can be passed using the `TEXTARENA_KW_` prefix. For example, to enable `hardcore=True`:
+
+```bash
+docker run -p 8000:8000 \
+  -e TEXTARENA_ENV_ID=Wordle-v0 \
+  -e TEXTARENA_KW_hardcore=true \
+  textarena-env:latest
+```
+
+## Python Example
+
+The repository ships with a simple client script that connects to a running server (local or Docker) and plays a few turns. Run it from the repo root:
+
+```bash
+python examples/textarena_simple.py
+```
+
+The script uses `TextArenaEnv.from_docker_image` to automatically build/run the container if needed. Review the source (`examples/textarena_simple.py`) for more details and to customize the gameplay loop.
+
diff --git a/src/envs/textarena_env/__init__.py b/src/envs/textarena_env/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""TextArena environment integration for OpenEnv."""
+
+from .client import TextArenaEnv
+from .models import (
+    TextArenaAction,
+    TextArenaMessage,
+    TextArenaObservation,
+    TextArenaState,
+)
+from .rewards import RewardProvider, build_reward_providers
+
+__all__ = [
+    "TextArenaEnv",
+    "TextArenaAction",
+    "TextArenaObservation",
+    "TextArenaState",
+    "TextArenaMessage",
+    "RewardProvider",
+    "build_reward_providers",
+]