Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ jobs:
dockerfile: src/envs/atari_env/server/Dockerfile
- name: git-env
dockerfile: src/envs/git_env/server/Dockerfile
- name: textarena-env
dockerfile: src/envs/textarena_env/server/Dockerfile

steps:
- name: Checkout code
Expand Down
87 changes: 87 additions & 0 deletions examples/textarena_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Quickstart example for the generic TextArena environment."""

from __future__ import annotations

import sys
from pathlib import Path

# Add project src/ to import path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from envs.textarena_env import TextArenaEnv, TextArenaAction


def main() -> None:

print("=" * 60)
print("💬 TextArena Hello World - GuessTheNumber-v0")
print("=" * 60)

env = TextArenaEnv.from_docker_image(
"textarena-env:latest",
env_vars={
"TEXTARENA_ENV_ID": "GuessTheNumber-v0",
"TEXTARENA_NUM_PLAYERS": "1",
},
ports={8000: 8000},
)

try:
print("\n📍 Resetting environment...")
result = env.reset()
print(f" Prompt:\n{result.observation.prompt}\n")

# Simple heuristic: if prompt mentions a range, start with midpoint
guess = "[10]"

for step in range(5):
print(f"🎯 Step {step + 1}: sending guess {guess}")
result = env.step(TextArenaAction(message=guess))

for message in result.observation.messages:
print(f" [{message.category}] {message.content}")

if result.done:
break

# Basic update: look for 'higher' or 'lower' hints
feedback = " ".join(msg.content for msg in result.observation.messages)
if "higher" in feedback:
guess = "[15]"
elif "lower" in feedback:
guess = "[5]"
else:
guess = "[10]"

print("\n✅ Episode finished!")
print(f" Reward: {result.reward}")
print(f" Done: {result.done}")

state = env.state()
print("\n📊 Server State Snapshot:")
print(f" Episode ID: {state.episode_id}")
print(f" Step count: {state.step_count}")
print(f" Env ID: {state.env_id}")

except Exception as exc: # pragma: no cover - demonstration script
print(f"\n❌ Error: {exc}")
print("\nMake sure you have built the Docker image first:")
print(" docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .")
print("\nAlternatively run the server manually:")
print(" python -m envs.textarena_env.server.app")

finally:
env.close()
print("\n👋 Done!")


if __name__ == "__main__":
main()

174 changes: 174 additions & 0 deletions examples/textarena_wordle_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""Play TextArena Wordle with a hosted LLM via Hugging Face Inference Providers.

This script mirrors the structure of the Kuhn Poker inference sample but targets
the Wordle environment. We deploy the generic TextArena server (wrapped in
OpenEnv) inside a local Docker container and query a single hosted model using
the OpenAI-compatible API provided by Hugging Face's router.

Prerequisites
-------------
1. Build the TextArena Docker image::

docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .

2. Set your Hugging Face token::

export HF_TOKEN=your_token_here

3. Run this script::

python examples/wordle_inference.py

By default we ask the DeepSeek Terminus model to play ``Wordle-v0``. Adjust the
``MODEL`` constant if you'd like to experiment with another provider-compatible
model.
"""

from __future__ import annotations

import os
import re
from typing import Iterable, List

from openai import OpenAI

from envs.textarena_env import TextArenaAction, TextArenaEnv
from envs.textarena_env.models import TextArenaMessage

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

API_BASE_URL = "https://router.huggingface.co/v1"
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")

MODEL = "openai/gpt-oss-120b:novita"
MAX_TURNS = 8
VERBOSE = True

SYSTEM_PROMPT = (
"You are an expert Wordle solver."
" Always respond with a single guess inside square brackets, e.g. [crane]."
" Use lowercase letters, exactly one five-letter word per reply."
" Reason about prior feedback before choosing the next guess."
" Words must be 5 letters long and real English words."
" Do not not include any other text in your response."
" Do not repeat the same guess twice."
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def format_history(messages: Iterable[TextArenaMessage]) -> str:
"""Convert TextArena message history into plain text for the model."""

lines: List[str] = []
for message in messages:
tag = message.category or "MESSAGE"
lines.append(f"[{tag}] {message.content}")
return "\n".join(lines)


def extract_guess(text: str) -> str:
"""Return the first Wordle-style guess enclosed in square brackets."""

match = re.search(r"\[[A-Za-z]{5}\]", text)
if match:
return match.group(0).lower()
# Fallback: remove whitespace and ensure lowercase, then wrap
cleaned = re.sub(r"[^a-zA-Z]", "", text).lower()
if len(cleaned) >= 5:
return f"[{cleaned[:5]}]"
return "[dunno]"


def make_user_prompt(prompt_text: str, messages: Iterable[TextArenaMessage]) -> str:
"""Combine the TextArena prompt and feedback history for the model."""

history = format_history(messages)
return (
f"Current prompt:\n{prompt_text}\n\n"
f"Conversation so far:\n{history}\n\n"
"Reply with your next guess enclosed in square brackets."
)


# ---------------------------------------------------------------------------
# Gameplay
# ---------------------------------------------------------------------------

def play_wordle(env: TextArenaEnv, client: OpenAI) -> None:
result = env.reset()
observation = result.observation

if VERBOSE:
print("📜 Initial Prompt:\n" + observation.prompt)

for turn in range(1, MAX_TURNS + 1):
if result.done:
break

user_prompt = make_user_prompt(observation.prompt, observation.messages)

response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
max_tokens=2048,
temperature=0.7,
)

raw_output = response.choices[0].message.content.strip()
guess = extract_guess(raw_output)

if VERBOSE:
print(f"\n🎯 Turn {turn}: model replied with -> {raw_output}")
print(f" Parsed guess: {guess}")

result = env.step(TextArenaAction(message=guess))
observation = result.observation

if VERBOSE:
print(" Feedback messages:")
for message in observation.messages:
print(f" [{message.category}] {message.content}")

print("\n✅ Game finished")
print(f" Reward: {result.reward}")
print(f" Done: {result.done}")


# ---------------------------------------------------------------------------
# Entrypoint
# ---------------------------------------------------------------------------

def main() -> None:
if not API_KEY:
raise SystemExit("HF_TOKEN (or API_KEY) must be set to query the model.")

client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

env = TextArenaEnv.from_docker_image(
"textarena-env:latest",
env_vars={
"TEXTARENA_ENV_ID": "Wordle-v0",
"TEXTARENA_NUM_PLAYERS": "1",
},
ports={8000: 8000},
)

try:
play_wordle(env, client)
finally:
env.close()


if __name__ == "__main__":
main()


8 changes: 6 additions & 2 deletions src/core/containers/runtime/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,12 @@ def start_container(
cmd.append(image)

# Run container
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
self._container_id = result.stdout.strip()
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
self._container_id = result.stdout.strip()
except subprocess.CalledProcessError as e:
error_msg = f"Failed to start Docker container.\nCommand: {' '.join(cmd)}\nExit code: {e.returncode}\nStderr: {e.stderr}\nStdout: {e.stdout}"
raise RuntimeError(error_msg) from e

# Wait a moment for container to start
time.sleep(1)
Expand Down
46 changes: 46 additions & 0 deletions src/envs/textarena_env/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# TextArena Environment

Generic wrapper for any [TextArena](https://www.textarena.ai/docs/overview) game inside OpenEnv. This module exposes the TextArena `Env` interface through the standard HTTP server/client APIs used by other OpenEnv environments, enabling quick experimentation with the full suite of word, reasoning, and multi-agent games.

## Features
- Works with any registered TextArena game (e.g. `Wordle-v0`, `GuessTheNumber-v0`, `Chess-v0`, ...).
- Transparent access to TextArena message streams, rewards, and state snapshots.
- Docker image for easy deployment with Python 3.11 and preinstalled dependencies.
- Example client demonstrating end-to-end interaction.

## Docker

Build the container from the project root:

```bash
docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .
```

Run it with your desired game (default is `Wordle-v0`). Environment configuration is handled via env vars:

```bash
docker run -p 8000:8000 \
-e TEXTARENA_ENV_ID=GuessTheNumber-v0 \
-e TEXTARENA_NUM_PLAYERS=1 \
textarena-env:latest
```

Additional environment arguments can be passed using the `TEXTARENA_KW_` prefix. For example, to enable `hardcore=True`:

```bash
docker run -p 8000:8000 \
-e TEXTARENA_ENV_ID=Wordle-v0 \
-e TEXTARENA_KW_hardcore=true \
textarena-env:latest
```

## Python Example

The repository ships with a simple client script that connects to a running server (local or Docker) and plays a few turns. Run it from the repo root:

```bash
python examples/textarena_simple.py
```

The script uses `TextArenaEnv.from_docker_image` to automatically build/run the container if needed. Review the source (`examples/textarena_simple.py`) for more details and to customize the gameplay loop.

26 changes: 26 additions & 0 deletions src/envs/textarena_env/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""TextArena environment integration for OpenEnv."""

from .client import TextArenaEnv
from .models import (
TextArenaAction,
TextArenaMessage,
TextArenaObservation,
TextArenaState,
)
from .rewards import RewardProvider, build_reward_providers

__all__ = [
"TextArenaEnv",
"TextArenaAction",
"TextArenaObservation",
"TextArenaState",
"TextArenaMessage",
"RewardProvider",
"build_reward_providers",
]
Loading