From 69872f17d809d4d56aab5299f2794cb4ac6b5860 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:24:24 +0530 Subject: [PATCH 01/13] feat: add shared runtime provider mapping --- swe_af/runtime/__init__.py | 15 +++++++++++++++ swe_af/runtime/providers.py | 37 +++++++++++++++++++++++++++++++++++++ tests/test_model_config.py | 25 +++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 swe_af/runtime/__init__.py create mode 100644 swe_af/runtime/providers.py diff --git a/swe_af/runtime/__init__.py b/swe_af/runtime/__init__.py new file mode 100644 index 0000000..69c8d7e --- /dev/null +++ b/swe_af/runtime/__init__.py @@ -0,0 +1,15 @@ +"""Runtime mapping helpers.""" + +from .providers import ( + RUNTIME_VALUES, + normalize_runtime_provider, + runtime_to_harness_adapter, + runtime_to_harness_provider, +) + +__all__ = [ + "RUNTIME_VALUES", + "normalize_runtime_provider", + "runtime_to_harness_adapter", + "runtime_to_harness_provider", +] diff --git a/swe_af/runtime/providers.py b/swe_af/runtime/providers.py new file mode 100644 index 0000000..8784253 --- /dev/null +++ b/swe_af/runtime/providers.py @@ -0,0 +1,37 @@ +"""Shared runtime/provider normalization and mapping utilities.""" + +from __future__ import annotations + +RUNTIME_VALUES = ("claude_code", "open_code", "codex") + + +def normalize_runtime_provider(runtime: str) -> str: + """Normalize user/runtime aliases to canonical runtime values.""" + value = (runtime or "").strip().lower() + if value in {"claude_code", "claude", "claude-code"}: + return "claude_code" + if value in {"open_code", "opencode"}: + return "open_code" + if value == "codex": + return "codex" + raise ValueError(f"Unsupported runtime provider: {runtime}") + + +def runtime_to_harness_provider(runtime: str) -> str: + """Map canonical runtime to harness provider value.""" + normalized = normalize_runtime_provider(runtime) + if normalized == "claude_code": + return "claude" + if normalized == "open_code": + return "opencode" + return "codex" + + +def runtime_to_harness_adapter(runtime: str) -> str: + """Map runtime aliases to AgentField harness adapter values.""" + normalized = normalize_runtime_provider(runtime) + if normalized == "claude_code": + return "claude-code" + if normalized == "open_code": + return "opencode" + return "codex" diff --git a/tests/test_model_config.py b/tests/test_model_config.py index 086f020..fff5745 100644 --- a/tests/test_model_config.py +++ b/tests/test_model_config.py @@ -367,5 +367,30 @@ def test_ci_gate_caps_round_trip(self) -> None: self.assertEqual(exec_cfg.ci_poll_seconds, 15) +class TestRuntimeProviderMapping(unittest.TestCase): + def test_runtime_to_harness_adapter_maps_all_supported_runtimes(self) -> None: + from swe_af.runtime.providers import runtime_to_harness_adapter + + self.assertEqual(runtime_to_harness_adapter("claude_code"), "claude-code") + self.assertEqual(runtime_to_harness_adapter("claude"), "claude-code") + self.assertEqual(runtime_to_harness_adapter("claude-code"), "claude-code") + self.assertEqual(runtime_to_harness_adapter("open_code"), "opencode") + self.assertEqual(runtime_to_harness_adapter("opencode"), "opencode") + self.assertEqual(runtime_to_harness_adapter("codex"), "codex") + + def test_runtime_to_harness_provider_maps_all_supported_runtimes(self) -> None: + from swe_af.runtime.providers import runtime_to_harness_provider + + self.assertEqual(runtime_to_harness_provider("claude_code"), "claude") + self.assertEqual(runtime_to_harness_provider("open_code"), "opencode") + self.assertEqual(runtime_to_harness_provider("codex"), "codex") + + def test_unknown_runtime_provider_raises(self) -> None: + from swe_af.runtime.providers import normalize_runtime_provider + + with self.assertRaises(ValueError): + normalize_runtime_provider("bad_runtime") + + if __name__ == "__main__": unittest.main() From 66700fab11dcabdc4f7411a8061f2886f2473b4a Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:26:49 +0530 Subject: [PATCH 02/13] feat: support codex runtime in planner config --- swe_af/execution/schemas.py | 26 +++++++++++--------------- tests/test_model_config.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/swe_af/execution/schemas.py b/swe_af/execution/schemas.py index 43ecf32..a6550a6 100644 --- a/swe_af/execution/schemas.py +++ b/swe_af/execution/schemas.py @@ -16,6 +16,7 @@ field_validator, model_validator, ) +from swe_af.runtime.providers import RUNTIME_VALUES, runtime_to_harness_provider # Global default for all agent max_turns. Change this one value to adjust everywhere. DEFAULT_AGENT_MAX_TURNS: int = 150 @@ -460,8 +461,6 @@ class QASynthesisResult(BaseModel): # Model configuration: runtime + flat role map # --------------------------------------------------------------------------- -RUNTIME_VALUES: tuple[str, str] = ("claude_code", "open_code") - ROLE_TO_MODEL_FIELD: dict[str, str] = { "pm": "pm_model", "architect": "architect_model", @@ -511,20 +510,17 @@ class QASynthesisResult(BaseModel): "open_code": { **{field: "openrouter/minimax/minimax-m2.5" for field in ALL_MODEL_FIELDS}, }, + "codex": { + **{field: "gpt-5.3-codex" for field in ALL_MODEL_FIELDS}, + }, } -def _runtime_to_provider(runtime: str) -> Literal["claude", "opencode"]: - if runtime == "claude_code": - return "claude" - if runtime == "open_code": - return "opencode" - raise ValueError( - f"Unsupported runtime {runtime!r}. Valid runtimes: {', '.join(RUNTIME_VALUES)}" - ) +def _runtime_to_provider(runtime: str) -> Literal["claude", "opencode", "codex"]: + return runtime_to_harness_provider(runtime) # type: ignore[return-value] -def _default_runtime() -> Literal["claude_code", "open_code"]: +def _default_runtime() -> Literal["claude_code", "open_code", "codex"]: """Default runtime, honoring the ``SWE_DEFAULT_RUNTIME`` env var. Lets the deployer pick the runtime without every caller having to pass @@ -685,7 +681,7 @@ class BuildConfig(BaseModel): model_config = ConfigDict(extra="forbid") - runtime: Literal["claude_code", "open_code"] = Field(default_factory=_default_runtime) + runtime: Literal["claude_code", "open_code", "codex"] = Field(default_factory=_default_runtime) models: dict[str, str] | None = None max_review_iterations: int = 2 @@ -800,7 +796,7 @@ def model_post_init(self, __context: Any) -> None: _validate_flat_models(self.models) @property - def ai_provider(self) -> Literal["claude", "opencode"]: + def ai_provider(self) -> Literal["claude", "opencode", "codex"]: return _runtime_to_provider(self.runtime) @property @@ -989,7 +985,7 @@ class ExecutionConfig(BaseModel): model_config = ConfigDict(extra="forbid") - runtime: Literal["claude_code", "open_code"] = Field(default_factory=_default_runtime) + runtime: Literal["claude_code", "open_code", "codex"] = Field(default_factory=_default_runtime) models: dict[str, str] | None = None _resolved_models: dict[str, str] = PrivateAttr(default_factory=dict) @@ -1039,7 +1035,7 @@ def _model_for(self, field_name: str) -> str: return self._resolved_models[field_name] @property - def ai_provider(self) -> Literal["claude", "opencode"]: + def ai_provider(self) -> Literal["claude", "opencode", "codex"]: return _runtime_to_provider(self.runtime) @property diff --git a/tests/test_model_config.py b/tests/test_model_config.py index fff5745..f2e02ff 100644 --- a/tests/test_model_config.py +++ b/tests/test_model_config.py @@ -136,6 +136,35 @@ def test_empty_env_uses_claude_code(self) -> None: self.assertEqual(BuildConfig().runtime, "claude_code") +class TestCodexRuntimeConfig(unittest.TestCase): + def test_codex_runtime_provider_and_defaults(self) -> None: + cfg = BuildConfig(runtime="codex") + self.assertEqual(cfg.ai_provider, "codex") + resolved = cfg.resolved_models() + for field in ALL_MODEL_FIELDS: + self.assertEqual(resolved[field], "gpt-5.3-codex") + + def test_codex_execution_config_provider_and_defaults(self) -> None: + cfg = ExecutionConfig(runtime="codex") + self.assertEqual(cfg.ai_provider, "codex") + self.assertEqual(cfg.coder_model, "gpt-5.3-codex") + self.assertEqual(cfg.verifier_model, "gpt-5.3-codex") + + def test_env_codex_runtime_overrides_default(self) -> None: + with mock.patch.dict(os.environ, {"SWE_DEFAULT_RUNTIME": "codex"}): + self.assertEqual(BuildConfig().runtime, "codex") + self.assertEqual(ExecutionConfig().runtime, "codex") + self.assertEqual(BuildConfig().ai_provider, "codex") + + def test_codex_models_default_and_role_override(self) -> None: + cfg = ExecutionConfig( + runtime="codex", + models={"default": "gpt-5.3-codex", "coder": "gpt-5.3-codex-spark"}, + ) + self.assertEqual(cfg.coder_model, "gpt-5.3-codex-spark") + self.assertEqual(cfg.qa_model, "gpt-5.3-codex") + + class TestDefaultModelFromEnv(unittest.TestCase): """`SWE_DEFAULT_MODEL` lets the deployer pin a single model id without code changes or threading config through every caller. Caller-supplied From 3a057d7b288d026a2676035871e000a48e167a66 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:30:11 +0530 Subject: [PATCH 03/13] feat: support codex runtime in fast agent --- swe_af/fast/app.py | 3 ++- swe_af/fast/schemas.py | 12 ++++++++++-- tests/fast/test_app.py | 13 ++++++++++++- ...t_fast_init_executor_planner_verifier_routing.py | 6 ++++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/swe_af/fast/app.py b/swe_af/fast/app.py index c1f2641..52d70be 100644 --- a/swe_af/fast/app.py +++ b/swe_af/fast/app.py @@ -20,6 +20,7 @@ from swe_af.execution.envelope import unwrap_call_result as _unwrap from swe_af.fast import fast_router from swe_af.fast.schemas import FastBuildConfig, FastBuildResult, fast_resolve_models +from swe_af.runtime.providers import runtime_to_harness_provider NODE_ID = os.getenv("NODE_ID", "swe-fast") @@ -48,7 +49,7 @@ def _repo_name_from_url(url: str) -> str: def _runtime_to_provider(runtime: str) -> str: """Map runtime string to ai_provider string.""" - return "claude" if runtime == "claude_code" else "opencode" + return runtime_to_harness_provider(runtime) @app.reasoner() diff --git a/swe_af/fast/schemas.py b/swe_af/fast/schemas.py index cd7c86d..7358eac 100644 --- a/swe_af/fast/schemas.py +++ b/swe_af/fast/schemas.py @@ -2,9 +2,11 @@ from __future__ import annotations +import os from typing import Literal -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field +from swe_af.runtime.providers import RUNTIME_VALUES # --------------------------------------------------------------------------- # Runtime default model strings @@ -12,10 +14,12 @@ _CLAUDE_CODE_DEFAULT = "haiku" _OPEN_CODE_DEFAULT = "qwen/qwen-2.5-coder-32b-instruct" +_CODEX_DEFAULT = "gpt-5.3-codex" _RUNTIME_DEFAULTS: dict[str, str] = { "claude_code": _CLAUDE_CODE_DEFAULT, "open_code": _OPEN_CODE_DEFAULT, + "codex": _CODEX_DEFAULT, } # All four roles resolved by fast_resolve_models() @@ -89,13 +93,17 @@ class FastVerificationResult(BaseModel): # Build-level schemas # --------------------------------------------------------------------------- +def _default_fast_runtime() -> str: + value = os.getenv("SWE_DEFAULT_RUNTIME", "claude_code") + return value if value in RUNTIME_VALUES else "claude_code" + class FastBuildConfig(BaseModel): """Configuration for a fast single-pass build run.""" model_config = ConfigDict(extra="forbid") - runtime: Literal["claude_code", "open_code"] = "claude_code" + runtime: Literal["claude_code", "open_code", "codex"] = Field(default_factory=_default_fast_runtime) models: dict[str, str] | None = None max_tasks: int = 10 task_timeout_seconds: int = 300 diff --git a/tests/fast/test_app.py b/tests/fast/test_app.py index dc9ae35..4d3a5b3 100644 --- a/tests/fast/test_app.py +++ b/tests/fast/test_app.py @@ -491,7 +491,18 @@ def test_runtime_to_provider_helper(self) -> None: assert _runtime_to_provider("claude_code") == "claude" assert _runtime_to_provider("open_code") == "opencode" - assert _runtime_to_provider("other") == "opencode" + assert _runtime_to_provider("codex") == "codex" + + +def test_fast_build_config_accepts_codex_runtime() -> None: + from swe_af.fast.schemas import FastBuildConfig, fast_resolve_models + + cfg = FastBuildConfig(runtime="codex") + resolved = fast_resolve_models(cfg) + assert resolved["pm_model"] == "gpt-5.3-codex" + assert resolved["coder_model"] == "gpt-5.3-codex" + assert resolved["verifier_model"] == "gpt-5.3-codex" + assert resolved["git_model"] == "gpt-5.3-codex" class TestBuildNonFatalPaths: diff --git a/tests/fast/test_fast_init_executor_planner_verifier_routing.py b/tests/fast/test_fast_init_executor_planner_verifier_routing.py index 89d2458..09d6a29 100644 --- a/tests/fast/test_fast_init_executor_planner_verifier_routing.py +++ b/tests/fast/test_fast_init_executor_planner_verifier_routing.py @@ -996,3 +996,9 @@ def test_verifier_ai_provider_param_present(self) -> None: assert "ai_provider" in sig.parameters, ( "fast_verify must accept 'ai_provider' parameter" ) + + +def test_runtime_to_provider_codex_runtime_maps_to_codex() -> None: + import swe_af.fast.app as fast_app + + assert fast_app._runtime_to_provider("codex") == "codex" From ad9b2b804c88467b50148d9e43931147cd34d04c Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:33:43 +0530 Subject: [PATCH 04/13] refactor: route harness providers through shared adapter --- swe_af/execution/_replanner_compat.py | 3 ++- swe_af/fast/planner.py | 4 +-- swe_af/reasoners/execution_agents.py | 37 +++++++++++++------------- swe_af/reasoners/pipeline.py | 9 ++++--- tests/test_runtime_provider_routing.py | 34 +++++++++++++++++++++++ 5 files changed, 62 insertions(+), 25 deletions(-) create mode 100644 tests/test_runtime_provider_routing.py diff --git a/swe_af/execution/_replanner_compat.py b/swe_af/execution/_replanner_compat.py index 14b9fdc..ad88a0b 100644 --- a/swe_af/execution/_replanner_compat.py +++ b/swe_af/execution/_replanner_compat.py @@ -15,6 +15,7 @@ ) from swe_af.prompts.replanner import SYSTEM_PROMPT, replanner_task_prompt from swe_af.reasoners import router +from swe_af.runtime.providers import runtime_to_harness_adapter async def invoke_replanner( @@ -39,7 +40,7 @@ async def invoke_replanner( if dag_state.artifacts_dir else None ) - provider = "claude-code" if config.ai_provider == "claude" else config.ai_provider + provider = runtime_to_harness_adapter(config.ai_provider) try: result = await router.harness( diff --git a/swe_af/fast/planner.py b/swe_af/fast/planner.py index 9cb87c3..845c9b9 100644 --- a/swe_af/fast/planner.py +++ b/swe_af/fast/planner.py @@ -12,6 +12,7 @@ from swe_af.fast import fast_router from swe_af.fast.prompts import FAST_PLANNER_SYSTEM_PROMPT, fast_planner_task_prompt from swe_af.fast.schemas import FastPlanResult, FastTask +from swe_af.runtime.providers import runtime_to_harness_adapter logger = logging.getLogger(__name__) @@ -96,8 +97,7 @@ async def fast_plan_tasks( additional_context=additional_context, ) - # Map 'claude' to 'claude-code' for AgentField router compatibility - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: res = await fast_router.harness( prompt=task_prompt, diff --git a/swe_af/reasoners/execution_agents.py b/swe_af/reasoners/execution_agents.py index 481a56c..195a64c 100644 --- a/swe_af/reasoners/execution_agents.py +++ b/swe_af/reasoners/execution_agents.py @@ -36,6 +36,7 @@ VerificationResult, WorkspaceInfo, ) +from swe_af.runtime.providers import runtime_to_harness_adapter from swe_af.prompts.ci_fixer import SYSTEM_PROMPT as CI_FIXER_SYSTEM_PROMPT from swe_af.prompts.ci_fixer import ci_fixer_task_prompt from swe_af.prompts.pr_resolver import SYSTEM_PROMPT as PR_RESOLVER_SYSTEM_PROMPT @@ -161,7 +162,7 @@ async def run_retry_advisor( workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -244,7 +245,7 @@ async def run_issue_advisor( ) cwd = worktree_path or dag_state_summary.get("repo_path", ".") - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -324,7 +325,7 @@ async def run_replanner( ) log_dir = os.path.join(state.artifacts_dir, "logs") if state.artifacts_dir else None - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) current_prompt = task_prompt for attempt in range(2): @@ -440,7 +441,7 @@ class IssueWriterOutput(BaseModel): issue_file_path: str success: bool - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -507,7 +508,7 @@ async def run_verifier( workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -592,7 +593,7 @@ async def run_git_init( "- If the error indicates a parsing issue, ensure your output is valid JSON\n" ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -668,7 +669,7 @@ class WorkspaceSetupResult(BaseModel): workspaces: list[WorkspaceInfo] success: bool - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -733,7 +734,7 @@ async def run_merger( architecture_summary=architecture_summary, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -809,7 +810,7 @@ async def run_integration_tester( workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -878,7 +879,7 @@ class WorkspaceCleanupResult(BaseModel): success: bool cleaned: list[str] = [] - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -955,7 +956,7 @@ async def run_coder( target_repo=target_repo, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1032,7 +1033,7 @@ async def run_qa( target_repo=target_repo, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1112,7 +1113,7 @@ async def run_code_reviewer( target_repo=target_repo, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1295,7 +1296,7 @@ class FixGeneratorOutput(BaseModel): debt_items: list[dict] = [] summary: str = "" - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1362,7 +1363,7 @@ async def run_repo_finalize( task_prompt = repo_finalize_task_prompt(repo_path=repo_path) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1436,7 +1437,7 @@ async def run_github_pr( accumulated_debt=accumulated_debt, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1579,7 +1580,7 @@ async def run_ci_fixer( previous_attempts=previous_attempts, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( @@ -1679,7 +1680,7 @@ async def run_pr_resolver( additional_context=additional_context, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) try: result = await router.harness( diff --git a/swe_af/reasoners/pipeline.py b/swe_af/reasoners/pipeline.py index a38007f..b65f60d 100644 --- a/swe_af/reasoners/pipeline.py +++ b/swe_af/reasoners/pipeline.py @@ -22,6 +22,7 @@ PRD, ReviewResult, ) +from swe_af.runtime.providers import runtime_to_harness_adapter from . import router @@ -191,7 +192,7 @@ async def run_product_manager( additional_context=additional_context, workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) result = await router.harness( prompt=task_prompt, schema=PRD, @@ -251,7 +252,7 @@ async def run_architect( feedback=feedback or None, workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) result = await router.harness( prompt=task_prompt, schema=Architecture, @@ -306,7 +307,7 @@ async def run_tech_lead( revision_number=revision_number, workspace_manifest=ws_manifest, ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) result = await router.harness( prompt=task_prompt, schema=ReviewResult, @@ -383,7 +384,7 @@ class SprintPlanOutput(BaseModel): prd_path=paths["prd"], architecture_path=paths["architecture"], ) - provider = "claude-code" if ai_provider == "claude" else ai_provider + provider = runtime_to_harness_adapter(ai_provider) result = await router.harness( prompt=task_prompt, schema=SprintPlanOutput, diff --git a/tests/test_runtime_provider_routing.py b/tests/test_runtime_provider_routing.py new file mode 100644 index 0000000..a618509 --- /dev/null +++ b/tests/test_runtime_provider_routing.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from swe_af.runtime.providers import runtime_to_harness_adapter + + +def test_runtime_to_harness_adapter_supports_codex() -> None: + assert runtime_to_harness_adapter("codex") == "codex" + + +def test_execution_agents_source_uses_shared_runtime_adapter() -> None: + import inspect + import swe_af.reasoners.execution_agents as execution_agents + + source = inspect.getsource(execution_agents) + assert "runtime_to_harness_adapter" in source + assert 'provider = "claude-code" if ai_provider == "claude" else ai_provider' not in source + + +def test_pipeline_source_uses_shared_runtime_adapter() -> None: + import inspect + import swe_af.reasoners.pipeline as pipeline + + source = inspect.getsource(pipeline) + assert "runtime_to_harness_adapter" in source + assert 'provider = "claude-code" if ai_provider == "claude" else ai_provider' not in source + + +def test_fast_planner_source_uses_shared_runtime_adapter() -> None: + import inspect + import swe_af.fast.planner as planner + + source = inspect.getsource(planner) + assert "runtime_to_harness_adapter" in source + assert 'provider = "claude-code" if ai_provider == "claude" else ai_provider' not in source From d0905ee28808d4d01113a42bd363d6129d0272a2 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:37:15 +0530 Subject: [PATCH 05/13] fix: preserve fast runtime fallback mapping --- swe_af/fast/app.py | 9 ++++++--- tests/fast/test_app.py | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/swe_af/fast/app.py b/swe_af/fast/app.py index 52d70be..cb1252e 100644 --- a/swe_af/fast/app.py +++ b/swe_af/fast/app.py @@ -20,7 +20,6 @@ from swe_af.execution.envelope import unwrap_call_result as _unwrap from swe_af.fast import fast_router from swe_af.fast.schemas import FastBuildConfig, FastBuildResult, fast_resolve_models -from swe_af.runtime.providers import runtime_to_harness_provider NODE_ID = os.getenv("NODE_ID", "swe-fast") @@ -48,8 +47,12 @@ def _repo_name_from_url(url: str) -> str: def _runtime_to_provider(runtime: str) -> str: - """Map runtime string to ai_provider string.""" - return runtime_to_harness_provider(runtime) + """Map runtime string to ai_provider string, preserving legacy fast fallback.""" + if runtime == "claude_code": + return "claude" + if runtime == "codex": + return "codex" + return "opencode" @app.reasoner() diff --git a/tests/fast/test_app.py b/tests/fast/test_app.py index 4d3a5b3..7a8ffaf 100644 --- a/tests/fast/test_app.py +++ b/tests/fast/test_app.py @@ -492,6 +492,7 @@ def test_runtime_to_provider_helper(self) -> None: assert _runtime_to_provider("claude_code") == "claude" assert _runtime_to_provider("open_code") == "opencode" assert _runtime_to_provider("codex") == "codex" + assert _runtime_to_provider("other") == "opencode" def test_fast_build_config_accepts_codex_runtime() -> None: From 1662c6e99bfb684834489e5acece134272a34cf1 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:51:49 +0530 Subject: [PATCH 06/13] feat: patch agentfield codex structured output --- swe_af/fast/__init__.py | 3 + swe_af/reasoners/__init__.py | 3 + swe_af/runtime/__init__.py | 2 + swe_af/runtime/codex_harness_patch.py | 234 ++++++++++++++++++++++++++ tests/test_codex_harness_patch.py | 36 ++++ 5 files changed, 278 insertions(+) create mode 100644 swe_af/runtime/codex_harness_patch.py create mode 100644 tests/test_codex_harness_patch.py diff --git a/swe_af/fast/__init__.py b/swe_af/fast/__init__.py index 0ac3693..78d875e 100644 --- a/swe_af/fast/__init__.py +++ b/swe_af/fast/__init__.py @@ -17,6 +17,9 @@ from __future__ import annotations from agentfield import AgentRouter +from swe_af.runtime.codex_harness_patch import apply_codex_harness_patch + +apply_codex_harness_patch() fast_router = AgentRouter(tags=["swe-fast"]) diff --git a/swe_af/reasoners/__init__.py b/swe_af/reasoners/__init__.py index 25d2090..22dbf58 100644 --- a/swe_af/reasoners/__init__.py +++ b/swe_af/reasoners/__init__.py @@ -1,4 +1,7 @@ from agentfield import AgentRouter +from swe_af.runtime.codex_harness_patch import apply_codex_harness_patch + +apply_codex_harness_patch() router = AgentRouter(tags=["swe-planner"]) diff --git a/swe_af/runtime/__init__.py b/swe_af/runtime/__init__.py index 69c8d7e..1b399bf 100644 --- a/swe_af/runtime/__init__.py +++ b/swe_af/runtime/__init__.py @@ -1,5 +1,6 @@ """Runtime mapping helpers.""" +from swe_af.runtime.codex_harness_patch import apply_codex_harness_patch from .providers import ( RUNTIME_VALUES, normalize_runtime_provider, @@ -12,4 +13,5 @@ "normalize_runtime_provider", "runtime_to_harness_adapter", "runtime_to_harness_provider", + "apply_codex_harness_patch", ] diff --git a/swe_af/runtime/codex_harness_patch.py b/swe_af/runtime/codex_harness_patch.py new file mode 100644 index 0000000..39fa936 --- /dev/null +++ b/swe_af/runtime/codex_harness_patch.py @@ -0,0 +1,234 @@ +from __future__ import annotations + +import asyncio +import json +import os +from pathlib import Path +from typing import Any + +_PATCHED = False + + +def _codex_strict_json_schema(schema: dict[str, Any]) -> dict[str, Any]: + if not isinstance(schema, dict): + return schema + strict = dict(schema) + schema_type = strict.get("type") + if schema_type == "object": + properties = strict.get("properties") + if isinstance(properties, dict): + cleaned: dict[str, Any] = {} + for key, value in properties.items(): + if isinstance(value, dict): + child = dict(value) + child.pop("default", None) + cleaned[key] = _codex_strict_json_schema(child) + else: + cleaned[key] = value + strict["properties"] = cleaned + strict["required"] = list(cleaned.keys()) + strict["additionalProperties"] = False + if schema_type == "array": + items = strict.get("items") + if isinstance(items, dict): + strict["items"] = _codex_strict_json_schema(items) + for key in ("allOf", "anyOf", "oneOf"): + branch = strict.get(key) + if isinstance(branch, list): + strict[key] = [ + _codex_strict_json_schema(item) if isinstance(item, dict) else item + for item in branch + ] + return strict + + +def _augment_codex_error_message(message: str, detail: str) -> str: + lower = f"{message}\n{detail}".lower() + hints = ( + ".git/index.lock", + ".git/refs", + "repository metadata is read-only", + ) + if any(token in lower for token in hints): + return ( + f"{message}\n\n" + "Codex tried to mutate git metadata under workspace-write; " + "git must be host-managed." + ) + return message + + +async def _run_codex_cli_with_stdin( + cmd: list[str], + prompt_for_codex: str, + *, + env: dict[str, str] | None, + cwd: str | None, +) -> tuple[str, str, int]: + proc = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=env, + cwd=cwd, + ) + stdout_bytes, stderr_bytes = await proc.communicate(prompt_for_codex.encode("utf-8")) + stdout = stdout_bytes.decode("utf-8", errors="replace") + stderr = stderr_bytes.decode("utf-8", errors="replace") + return stdout, stderr, int(proc.returncode) + + +def apply_codex_harness_patch() -> None: + global _PATCHED + if _PATCHED: + return + try: + from agentfield.harness import _runner, _schema + from agentfield.harness._cli import ( + estimate_cli_cost, + extract_final_text, + parse_jsonl, + strip_ansi, + ) + from agentfield.harness._result import FailureType, Metrics, RawResult + from agentfield.harness.providers.codex import CodexProvider + except Exception: + return + + original_build_prompt_suffix = _schema.build_prompt_suffix + + def build_prompt_suffix_with_schema_file(schema: Any, cwd: str) -> str: + schema_json = json.dumps( + _codex_strict_json_schema(_schema.schema_to_json_schema(schema)), + indent=2, + ) + _schema.write_schema_file(schema_json, cwd) + return original_build_prompt_suffix(schema, cwd) + + async def execute_with_native_structured_output(self: Any, prompt: str, options: dict[str, object]) -> Any: + cwd = str(options.get("cwd")) if isinstance(options.get("cwd"), str) else None + model = options.get("model") + permission_mode = options.get("permission_mode") + env_value = options.get("env") + merged_env = {**os.environ} + if isinstance(env_value, dict): + merged_env.update({str(k): str(v) for k, v in env_value.items() if isinstance(k, str)}) + + cmd = [self._bin, "exec", "--json", "--skip-git-repo-check"] + if cwd: + cmd.extend(["-C", cwd]) + if model: + cmd.extend(["-m", str(model)]) + + if permission_mode == "auto": + cmd.append("--dangerously-bypass-approvals-and-sandbox") + elif permission_mode in {"read-only", "workspace-write", "danger-full-access"}: + cmd.extend(["--sandbox", str(permission_mode)]) + else: + cmd.extend(["--sandbox", "workspace-write"]) + + prompt_for_codex = prompt + if cwd: + schema_path = _schema.get_schema_path(cwd) + output_path = _schema.get_output_path(cwd) + if Path(schema_path).exists(): + cmd.extend(["--output-schema", schema_path]) + cmd.extend(["--output-last-message", output_path]) + prompt_for_codex += ( + "\n\n---\n" + "CODEX STRUCTURED OUTPUT CONTRACT:\n" + f"The Codex CLI will save your final response to: {output_path}\n" + f"Your final response MUST be a single JSON object conforming to: {schema_path}\n" + "Do not make the missing output file the subject of the task. " + "Complete the user's task, then return the required JSON object as your final response." + ) + + try: + start = asyncio.get_running_loop().time() + timeout_seconds = options.get("timeout_seconds") + if isinstance(timeout_seconds, (int, float)) and timeout_seconds > 0: + stdout, stderr, returncode = await asyncio.wait_for( + _run_codex_cli_with_stdin(cmd, prompt_for_codex, env=merged_env, cwd=cwd), + timeout=float(timeout_seconds), + ) + else: + stdout, stderr, returncode = await _run_codex_cli_with_stdin( + cmd, prompt_for_codex, env=merged_env, cwd=cwd + ) + duration_ms = int((asyncio.get_running_loop().time() - start) * 1000) + except FileNotFoundError as exc: + return RawResult( + result="", + messages=[], + metrics=Metrics( + duration_api_ms=0, + num_turns=1, + total_cost_usd=0.0, + session_id="", + ), + is_error=True, + error_message=str(exc), + failure_type=FailureType.CRASH, + returncode=-1, + ) + except asyncio.TimeoutError: + return RawResult( + result="", + messages=[], + metrics=Metrics( + duration_api_ms=0, + num_turns=1, + total_cost_usd=0.0, + session_id="", + ), + is_error=True, + error_message="Codex CLI timed out", + failure_type=FailureType.TIMEOUT, + returncode=-1, + ) + + stderr_clean = strip_ansi(stderr or "") + records = parse_jsonl(stdout or "") + result_text = extract_final_text(records) or "" + + if not result_text and cwd: + output_path = _schema.get_output_path(cwd) + output_file = Path(output_path) + if output_file.exists(): + try: + result_text = output_file.read_text(encoding="utf-8") + except Exception: + result_text = "" + + is_error = returncode != 0 + error_message = "" + failure_type = FailureType.NONE + if is_error: + base_error = stderr_clean or "Codex CLI failed" + error_message = _augment_codex_error_message(base_error, base_error) + failure_type = FailureType.CRASH + + return RawResult( + result=result_text, + messages=records if isinstance(records, list) else [], + metrics=Metrics( + duration_api_ms=duration_ms, + num_turns=1, + total_cost_usd=estimate_cli_cost( + model=str(options.get("model", "")), + prompt=prompt_for_codex, + result_text=result_text, + ), + session_id="", + ), + is_error=is_error, + error_message=error_message, + failure_type=failure_type, + returncode=returncode, + ) + + _schema.build_prompt_suffix = build_prompt_suffix_with_schema_file + _runner.build_prompt_suffix = build_prompt_suffix_with_schema_file + CodexProvider.execute = execute_with_native_structured_output + _PATCHED = True diff --git a/tests/test_codex_harness_patch.py b/tests/test_codex_harness_patch.py new file mode 100644 index 0000000..260aec1 --- /dev/null +++ b/tests/test_codex_harness_patch.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from swe_af.runtime.codex_harness_patch import ( + _augment_codex_error_message, + _codex_strict_json_schema, +) + + +def test_codex_strict_json_schema_requires_all_object_properties() -> None: + schema = { + "type": "object", + "properties": { + "summary": {"type": "string", "default": ""}, + "files_changed": {"type": "array", "items": {"type": "string"}}, + }, + } + + strict = _codex_strict_json_schema(schema) + + assert strict["required"] == ["summary", "files_changed"] + assert strict["additionalProperties"] is False + assert "default" not in strict["properties"]["summary"] + + +def test_codex_git_metadata_error_gets_actionable_hint() -> None: + message = _augment_codex_error_message( + "fatal: cannot create .git/index.lock", + "fatal: cannot create .git/index.lock", + ) + + assert "Codex tried to mutate git metadata under workspace-write" in message + assert "git must be host-managed" in message + + +def test_codex_unrelated_error_is_unchanged() -> None: + assert _augment_codex_error_message("plain error", "plain error") == "plain error" From 13661039c1bca2b7afd51d9bbdc3c57a43d4dad6 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 16:59:13 +0530 Subject: [PATCH 07/13] chore: install codex cli in docker images --- Dockerfile | 33 +++++++++++++++++++++++++++- docker-compose.local.yml | 5 +++++ docker-compose.yml | 9 ++++++++ tests/fast/test_docker_config.py | 37 ++++++++++++++++++++++++++------ tests/test_dockerfile.py | 11 ++++++++++ 5 files changed, 88 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6663192..9687f53 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ WORKDIR /app # System deps: git (worktrees, branches), curl (healthcheck), jq (agent bash), # openssh-client (optional SSH git), gh CLI (draft PRs) RUN apt-get update && apt-get install -y --no-install-recommends \ - git curl openssh-client jq && \ + git curl openssh-client jq nodejs npm && \ # Install GitHub CLI curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \ @@ -17,6 +17,37 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get update && apt-get install -y --no-install-recommends gh && \ # Install OpenCode CLI v1.2+ for opencode provider (with run --model support) curl -fsSL https://opencode.ai/install | bash && \ + # Install Codex CLI for codex runtime provider + npm install -g @openai/codex && \ + codex_path="$(command -v codex)" && \ + mv "${codex_path}" /usr/local/bin/codex-real && \ + printf '%s\n' \ + '#!/usr/bin/env bash' \ + 'set -euo pipefail' \ + '' \ + 'auth_mode="${SWE_CODEX_AUTH_MODE:-auto}"' \ + '' \ + 'case "${auth_mode}" in' \ + ' chatgpt)' \ + ' unset OPENAI_API_KEY' \ + ' ;;' \ + ' api_key)' \ + ' if [ -z "${OPENAI_API_KEY:-}" ]; then' \ + ' echo "SWE_CODEX_AUTH_MODE=api_key requires OPENAI_API_KEY to be set" >&2' \ + ' exit 2' \ + ' fi' \ + ' ;;' \ + ' auto)' \ + ' ;;' \ + ' *)' \ + ' echo "Invalid SWE_CODEX_AUTH_MODE: ${auth_mode}. Expected one of: auto, chatgpt, api_key" >&2' \ + ' exit 2' \ + ' ;;' \ + 'esac' \ + '' \ + 'exec /usr/local/bin/codex-real "$@"' \ + > /usr/local/bin/codex && \ + chmod +x /usr/local/bin/codex && \ rm -rf /var/lib/apt/lists/* # Add OpenCode to PATH for non-interactive shells diff --git a/docker-compose.local.yml b/docker-compose.local.yml index c7d9da0..bd00942 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -19,10 +19,15 @@ services: - PORT=8003 # Callback URL for control plane to reach this agent - AGENT_CALLBACK_URL=http://localhost:8003 + - SWE_DEFAULT_RUNTIME=${SWE_DEFAULT_RUNTIME:-claude_code} + - SWE_DEFAULT_MODEL=${SWE_DEFAULT_MODEL:-} + - SWE_CODEX_AUTH_MODE=${SWE_CODEX_AUTH_MODE:-auto} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} ports: - "8003:8003" volumes: - workspaces:/workspaces + - ${HOME}/.codex:/root/.codex extra_hosts: - "host.docker.internal:host-gateway" diff --git a/docker-compose.yml b/docker-compose.yml index c476a44..a4a75d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,10 +19,15 @@ services: - NODE_ID=swe-planner - PORT=8003 - AGENT_CALLBACK_URL=http://swe-agent:8003 + - SWE_DEFAULT_RUNTIME=${SWE_DEFAULT_RUNTIME:-claude_code} + - SWE_DEFAULT_MODEL=${SWE_DEFAULT_MODEL:-} + - SWE_CODEX_AUTH_MODE=${SWE_CODEX_AUTH_MODE:-auto} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} ports: - "8003:8003" volumes: - workspaces:/workspaces + - ${HOME}/.codex:/root/.codex depends_on: - control-plane deploy: @@ -45,10 +50,14 @@ services: - OPENAI_API_KEY=${OPENAI_API_KEY:-} - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} - OPENCODE_MODEL=${OPENCODE_MODEL:-} + - SWE_DEFAULT_RUNTIME=${SWE_DEFAULT_RUNTIME:-claude_code} + - SWE_DEFAULT_MODEL=${SWE_DEFAULT_MODEL:-} + - SWE_CODEX_AUTH_MODE=${SWE_CODEX_AUTH_MODE:-auto} ports: - "8004:8004" volumes: - workspaces:/workspaces + - ${HOME}/.codex:/root/.codex depends_on: - control-plane diff --git a/tests/fast/test_docker_config.py b/tests/fast/test_docker_config.py index d01085c..27b1776 100644 --- a/tests/fast/test_docker_config.py +++ b/tests/fast/test_docker_config.py @@ -18,6 +18,16 @@ def load_pyproject(): return tomllib.load(f) +def _service_environment(service_name: str) -> list[str]: + compose = load_docker_compose() + return compose["services"][service_name]["environment"] + + +def _service_volumes(service_name: str) -> list[str]: + compose = load_docker_compose() + return compose["services"][service_name]["volumes"] + + def test_swe_fast_service_exists(): """swe-fast service is present in docker-compose.yml.""" compose = load_docker_compose() @@ -26,15 +36,13 @@ def test_swe_fast_service_exists(): def test_swe_fast_node_id_env_var(): """swe-fast service has NODE_ID=swe-fast in environment.""" - compose = load_docker_compose() - env = compose["services"]["swe-fast"]["environment"] + env = _service_environment("swe-fast") assert "NODE_ID=swe-fast" in env, "NODE_ID=swe-fast must be in swe-fast environment" def test_swe_fast_port_env_var(): """swe-fast service has PORT=8004 in environment.""" - compose = load_docker_compose() - env = compose["services"]["swe-fast"]["environment"] + env = _service_environment("swe-fast") assert "PORT=8004" in env, "PORT=8004 must be in swe-fast environment" @@ -54,8 +62,7 @@ def test_swe_fast_depends_on_control_plane(): def test_swe_fast_agentfield_server_env_var(): """swe-fast service has AGENTFIELD_SERVER=http://control-plane:8080 in environment.""" - compose = load_docker_compose() - env = compose["services"]["swe-fast"]["environment"] + env = _service_environment("swe-fast") assert "AGENTFIELD_SERVER=http://control-plane:8080" in env, ( "AGENTFIELD_SERVER=http://control-plane:8080 must be in swe-fast environment" ) @@ -72,6 +79,24 @@ def test_swe_agent_service_unchanged(): assert "8003:8003" in swe_agent["ports"], "swe-agent port mapping must remain 8003:8003" +def test_codex_volume_mount_in_swe_agent_and_swe_fast(): + expected = "${HOME}/.codex:/root/.codex" + assert expected in _service_volumes("swe-agent") + assert expected in _service_volumes("swe-fast") + + +def test_codex_auth_mode_env_in_swe_agent_and_swe_fast(): + expected = "SWE_CODEX_AUTH_MODE=${SWE_CODEX_AUTH_MODE:-auto}" + assert expected in _service_environment("swe-agent") + assert expected in _service_environment("swe-fast") + + +def test_default_runtime_env_in_swe_agent_and_swe_fast(): + expected = "SWE_DEFAULT_RUNTIME=${SWE_DEFAULT_RUNTIME:-claude_code}" + assert expected in _service_environment("swe-agent") + assert expected in _service_environment("swe-fast") + + def test_pyproject_swe_fast_script(): """pyproject.toml [project.scripts] contains swe-fast = 'swe_af.fast.app:main'.""" pyproject = load_pyproject() diff --git a/tests/test_dockerfile.py b/tests/test_dockerfile.py index 00292a3..a4d40f6 100644 --- a/tests/test_dockerfile.py +++ b/tests/test_dockerfile.py @@ -54,3 +54,14 @@ def test_workspaces_created_before_expose(self, dockerfile_content: str) -> None "/workspaces must be created before EXPOSE to ensure it's part of " "the image layer before any volume mount" ) + + +def test_dockerfile_installs_codex_cli(dockerfile_content: str) -> None: + assert "npm install -g @openai/codex" in dockerfile_content + assert "SWE_CODEX_AUTH_MODE" in dockerfile_content + assert "codex-real" in dockerfile_content + + +def test_dockerfile_preserves_opencode_install(dockerfile_content: str) -> None: + assert "https://opencode.ai/install" in dockerfile_content + assert "OPENROUTER_API_KEY" in dockerfile_content From bf207c706d17bb381f5a1f823b4931ead1dc3157 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 17:07:39 +0530 Subject: [PATCH 08/13] docs: document codex runtime configuration --- .env.example | 18 ++++++++++++--- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++-- docs/ARCHITECTURE.md | 3 ++- docs/CONTRIBUTING.md | 2 +- docs/SKILL.md | 8 ++++++- docs/deployment.md | 10 ++++++++ 6 files changed, 87 insertions(+), 8 deletions(-) diff --git a/.env.example b/.env.example index e91fdc1..1e99be4 100644 --- a/.env.example +++ b/.env.example @@ -14,9 +14,17 @@ ANTHROPIC_API_KEY=sk-ant-api03-... # OpenRouter (recommended - 200+ models including DeepSeek, Qwen, Llama, MiniMax) # OPENROUTER_API_KEY=sk-or-v1-... -# OpenAI (GPT-4, GPT-4o, etc.) +# OpenAI API-platform billing for OpenAI models and Codex api_key mode. # OPENAI_API_KEY=sk-... +# Codex CLI runtime auth. Values: +# auto Use OPENAI_API_KEY when set; otherwise use local Codex login. +# chatgpt Use ChatGPT Free/Plus/Pro/Team login. Run `codex login` on the +# host, keep OPENAI_API_KEY unset for this process, and Docker will +# mount ~/.codex into both swe-planner and swe-fast. +# api_key Use OpenAI API-platform billing. Set OPENAI_API_KEY=sk-... +# SWE_CODEX_AUTH_MODE=auto + # Google Gemini # GOOGLE_API_KEY=... @@ -66,7 +74,7 @@ GH_TOKEN=ghp_... # Lets the deployer pick the runtime once instead of every caller threading # a config through. Falls back to claude_code if unset; an invalid value is # logged as a warning and ignored. -# SWE_DEFAULT_RUNTIME=claude_code # or: open_code +# SWE_DEFAULT_RUNTIME=claude_code # or: open_code, codex # Default model when callers don't pass `models` in the request config. # Applies to all 16 agent roles for whichever runtime is active. Caller @@ -78,7 +86,7 @@ GH_TOKEN=ghp_... # Runtime/model selection is configured via API request config (V2): # { -# "runtime": "claude_code" | "open_code", +# "runtime": "claude_code" | "open_code" | "codex", # "models": { # "default": "sonnet or provider/model-id", # "coder": "provider/model-id", @@ -89,6 +97,7 @@ GH_TOKEN=ghp_... # Runtime mapping: # claude_code -> Claude backend # open_code -> OpenCode backend +# codex -> OpenAI Codex CLI backend # # Legacy keys are removed: ai_provider, preset, model, and all *_model fields. # @@ -97,6 +106,9 @@ GH_TOKEN=ghp_... # # Example Claude runtime request config: # {"runtime": "claude_code", "models": {"default": "sonnet", "coder": "opus"}} +# +# Example Codex runtime request config: +# {"runtime": "codex", "models": {"default": "gpt-5.3-codex"}} # Available open runtime model IDs (format: provider/model-name): # deepseek/deepseek-chat # DeepSeek via OpenRouter diff --git a/README.md b/README.md index d755a51..8f48e46 100644 --- a/README.md +++ b/README.md @@ -188,9 +188,10 @@ Most agent frameworks wrap a single coder loop. SWE-AF is a coordinated engineer -**Claude & open-source models supported**: Run builds with either runtime and tune models per role in one flat config map. +**Claude, open-source, and Codex models supported**: Run builds with any runtime and tune models per role in one flat config map. - `runtime: "claude_code"` maps to Claude backend. - `runtime: "open_code"` maps to OpenCode backend (OpenRouter/OpenAI/Google/Anthropic model IDs). +- `runtime: "codex"` maps to the OpenAI Codex CLI backend. ## Adaptive Factory Control @@ -279,6 +280,42 @@ curl -X POST http://localhost:8080/api/v1/execute/async/swe-planner.build \ } JSON +# With Codex CLI runtime +curl -X POST http://localhost:8080/api/v1/execute/async/swe-planner.build \ + -H "Content-Type: application/json" \ + -d @- <<'JSON' +{ + "input": { + "goal": "Add JWT auth", + "repo_url": "https://github.com/user/my-project", + "config": { + "runtime": "codex", + "models": { + "default": "gpt-5.3-codex" + } + } + } +} +JSON + +# Fast mode with Codex CLI runtime +curl -X POST http://localhost:8080/api/v1/execute/async/swe-fast.build \ + -H "Content-Type: application/json" \ + -d @- <<'JSON' +{ + "input": { + "goal": "Add a focused bug fix", + "repo_url": "https://github.com/user/my-project", + "config": { + "runtime": "codex", + "models": { + "default": "gpt-5.3-codex" + } + } + } +} +JSON + # Local workspace mode (repo_path) + targeted role override curl -X POST http://localhost:8080/api/v1/execute/async/swe-planner.build \ -H "Content-Type: application/json" \ @@ -303,6 +340,8 @@ JSON For OpenRouter with `open_code`, use model IDs in `openrouter//` format (for example `openrouter/minimax/minimax-m2.5`). +For Codex with ChatGPT subscription auth, install the Codex CLI on the host, run `codex login`, leave `OPENAI_API_KEY` unset for this process, and set `SWE_CODEX_AUTH_MODE=chatgpt` or `auto`. For OpenAI API-platform billing, set `SWE_CODEX_AUTH_MODE=api_key` and `OPENAI_API_KEY`. + ### Optional: web search Coding and review agents can look up external documentation, library APIs, error messages, and version/deprecation status during a build. This is opt-in via two env vars on the deployment: @@ -611,7 +650,7 @@ Pass `config` to `build` or `execute`. Full schema: [`swe_af/execution/schemas.p | Key | Default | Description | | ------------------------- | --------------- | ----------------------------------------------------- | -| `runtime` | `"claude_code"` | Model runtime: `"claude_code"` or `"open_code"`. The default also honors the `SWE_DEFAULT_RUNTIME` env var when no `runtime` is passed in `config` — set it on the deployment so callers don't need to plumb a config through. | +| `runtime` | `"claude_code"` | Model runtime: `"claude_code"`, `"open_code"`, or `"codex"`. The default also honors the `SWE_DEFAULT_RUNTIME` env var when no `runtime` is passed in `config` — set it on the deployment so callers don't need to plumb a config through. | | `models` | `null` | Flat role-model map (`default` + role keys below). Without a caller-supplied value, the `SWE_DEFAULT_MODEL` env var is used as the default for all roles — set it on the deployment to pin a model without code changes. Caller `models.default` or per-role keys still win. | | `max_coding_iterations` | `5` | Inner-loop retry budget | | `max_advisor_invocations` | `2` | Middle-loop advisor budget | @@ -655,6 +694,17 @@ Minimal: } ``` +Codex: + +```json +{ + "runtime": "codex", + "models": { + "default": "gpt-5.3-codex" + } +} +``` + Fully customized: ```json diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index d59dc22..7454588 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -422,7 +422,7 @@ SWE-AF orchestrates 22 specialized agents across four phases. Each agent is a re Every build now uses a single V2 model contract: -- `runtime`: `claude_code` or `open_code` +- `runtime`: `claude_code`, `open_code`, or `codex` - `models`: flat role map (`default` + explicit role keys) Supported role keys: @@ -442,3 +442,4 @@ Runtime defaults: |---|---|---| | `claude_code` | `sonnet` | `qa_synthesizer=haiku` | | `open_code` | `minimax/minimax-m2.5` | none | +| `codex` | `gpt-5.3-codex` | none | diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index f33a363..cda4563 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -6,7 +6,7 @@ Thanks for contributing to SWE-AF. - Python 3.12+ - AgentField control plane (`af`) -- Access to an AI runtime used by your run (`claude_code` or `open_code`) +- Access to an AI runtime used by your run (`claude_code`, `open_code`, or `codex`) ## Local setup diff --git a/docs/SKILL.md b/docs/SKILL.md index 2e28557..8b95fa4 100644 --- a/docs/SKILL.md +++ b/docs/SKILL.md @@ -75,7 +75,7 @@ curl -X POST http://localhost:8080/api/v1/execute/async/swe-planner.build \ | Key | Values | Description | |-----|--------|-------------| -| `runtime` | `"claude_code"`, `"open_code"` | AI backend to use | +| `runtime` | `"claude_code"`, `"open_code"`, `"codex"` | AI backend to use | | `models.default` | model ID string | Default model for all agents | | `models.coder` | model ID string | Override for coder role | | `models.qa` | model ID string | Override for QA role | @@ -148,6 +148,12 @@ curl -X POST http://localhost:8080/api/v1/execute/async/swe-planner.build \ 2. Model provider credentials configured in OpenCode (e.g., `OPENAI_API_KEY` for z.ai) 3. Model ID format matches what OpenCode expects +## Requirements for codex Runtime + +1. Codex CLI installed and in PATH. +2. For ChatGPT subscription auth: run `codex login` on the host, set `SWE_CODEX_AUTH_MODE=chatgpt` or `auto`, and leave `OPENAI_API_KEY` unset for the agent process. +3. For OpenAI API-platform billing: set `SWE_CODEX_AUTH_MODE=api_key` and `OPENAI_API_KEY`. + ## Monitoring ```bash diff --git a/docs/deployment.md b/docs/deployment.md index 3d42c3d..c8e9704 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -35,6 +35,13 @@ cp .env.example .env | `OPENAI_API_KEY` | OpenAI API key | | `GOOGLE_API_KEY` | Google Gemini API key | +**For Codex CLI runtime:** + +| Variable | Purpose | +|---|---| +| `SWE_CODEX_AUTH_MODE` | `auto`, `chatgpt`, or `api_key`; defaults to `auto` in Docker | +| `OPENAI_API_KEY` | Required only when `SWE_CODEX_AUTH_MODE=api_key` | + **Optional:** | Variable | Purpose | Default | @@ -51,6 +58,7 @@ cp .env.example .env | `agentfield` | 0.1.67+ | Python SDK (includes opencode v1.4+ fix) | | `claude-agent-sdk` | 0.1.20+ | Claude runtime | | opencode CLI | 1.4+ | Only if using `open_code` runtime (see Known Issues) | +| Codex CLI | latest | Installed in the Docker image; required on host only to run `codex login` for ChatGPT subscription auth | ## Quick Start @@ -68,6 +76,8 @@ This starts: - **swe-agent** on `:8003` — SWE-AF full pipeline (`swe-planner` node) - **swe-fast** on `:8004` — SWE-AF fast mode (`swe-fast` node) +To use Codex with a ChatGPT subscription, run `codex login` on the host before starting Docker and leave `OPENAI_API_KEY` unset for this process. The compose files mount `~/.codex` into both agent containers. To use OpenAI API billing instead, set `SWE_CODEX_AUTH_MODE=api_key` and `OPENAI_API_KEY`. + ### Agent Only (connect to existing control plane) If you already have an AgentField control plane running: From 43417159bfbe081e710a2530fbe1f7804a778429 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 17:23:03 +0530 Subject: [PATCH 09/13] fix: stabilize docker codex runtime startup --- docker-compose.yml | 1 + requirements-docker.txt | 3 +++ tests/fast/test_docker_config.py | 6 ++++++ tests/test_dockerfile.py | 7 +++++++ 4 files changed, 17 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index a4a75d3..e6f0a1f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,6 +38,7 @@ services: context: . dockerfile: Dockerfile command: ["python", "-m", "swe_af.fast"] + env_file: .env environment: - AGENTFIELD_SERVER=http://control-plane:8080 - NODE_ID=swe-fast diff --git a/requirements-docker.txt b/requirements-docker.txt index d98f9aa..82353aa 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -7,3 +7,6 @@ pydantic>=2.0 claude-agent-sdk==0.1.20 hax-sdk>=0.2.0 python-dotenv>=1.0 +# cryptography 48.0.0 currently crashes with SIGILL on some Linux/aarch64 +# Docker hosts when AgentField imports Ed25519 for DID registration. +cryptography<46 diff --git a/tests/fast/test_docker_config.py b/tests/fast/test_docker_config.py index 27b1776..97f355b 100644 --- a/tests/fast/test_docker_config.py +++ b/tests/fast/test_docker_config.py @@ -68,6 +68,12 @@ def test_swe_fast_agentfield_server_env_var(): ) +def test_swe_fast_loads_env_file_like_swe_agent(): + """swe-fast should receive .env values the same way as swe-agent.""" + compose = load_docker_compose() + assert compose["services"]["swe-fast"].get("env_file") == ".env" + + def test_swe_agent_service_unchanged(): """Existing swe-agent service is present and unchanged.""" compose = load_docker_compose() diff --git a/tests/test_dockerfile.py b/tests/test_dockerfile.py index a4d40f6..9158b2d 100644 --- a/tests/test_dockerfile.py +++ b/tests/test_dockerfile.py @@ -15,6 +15,7 @@ import pytest DOCKERFILE = Path(__file__).resolve().parent.parent / "Dockerfile" +REQUIREMENTS_DOCKER = Path(__file__).resolve().parent.parent / "requirements-docker.txt" @pytest.fixture(scope="module") @@ -65,3 +66,9 @@ def test_dockerfile_installs_codex_cli(dockerfile_content: str) -> None: def test_dockerfile_preserves_opencode_install(dockerfile_content: str) -> None: assert "https://opencode.ai/install" in dockerfile_content assert "OPENROUTER_API_KEY" in dockerfile_content + + +def test_docker_requirements_pin_cryptography_below_sigill_version() -> None: + """Docker image should avoid cryptography 48 SIGILL on Linux/aarch64.""" + content = REQUIREMENTS_DOCKER.read_text() + assert "cryptography<46" in content From b8bec447023f8418d5afad638d5dfbd38d68a1b1 Mon Sep 17 00:00:00 2001 From: ivasuy Date: Sun, 10 May 2026 17:35:42 +0530 Subject: [PATCH 10/13] fix: make codex structured output schema-valid --- swe_af/runtime/codex_harness_patch.py | 37 +++++++++++++++++++--- tests/test_codex_harness_patch.py | 45 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/swe_af/runtime/codex_harness_patch.py b/swe_af/runtime/codex_harness_patch.py index 39fa936..42a7288 100644 --- a/swe_af/runtime/codex_harness_patch.py +++ b/swe_af/runtime/codex_harness_patch.py @@ -39,6 +39,18 @@ def _codex_strict_json_schema(schema: dict[str, Any]) -> dict[str, Any]: _codex_strict_json_schema(item) if isinstance(item, dict) else item for item in branch ] + defs = strict.get("$defs") + if isinstance(defs, dict): + strict["$defs"] = { + key: _codex_strict_json_schema(value) if isinstance(value, dict) else value + for key, value in defs.items() + } + definitions = strict.get("definitions") + if isinstance(definitions, dict): + strict["definitions"] = { + key: _codex_strict_json_schema(value) if isinstance(value, dict) else value + for key, value in definitions.items() + } return strict @@ -96,15 +108,30 @@ def apply_codex_harness_patch() -> None: except Exception: return - original_build_prompt_suffix = _schema.build_prompt_suffix - def build_prompt_suffix_with_schema_file(schema: Any, cwd: str) -> str: + """Use Codex-native structured output instead of AgentField's Write-tool suffix. + + AgentField's default suffix asks the model to create + ``.agentfield_output.json`` with a Write tool. Codex CLI executions may + run under read-only sandboxing and do not have AgentField's Write tool, + so that instruction causes no final output. The Codex provider below + passes ``--output-schema`` and ``--output-last-message`` to the CLI; this + suffix only needs to create the schema file and ask for final JSON. + """ schema_json = json.dumps( _codex_strict_json_schema(_schema.schema_to_json_schema(schema)), indent=2, ) _schema.write_schema_file(schema_json, cwd) - return original_build_prompt_suffix(schema, cwd) + schema_path = _schema.get_schema_path(cwd) + return ( + "\n\n---\n" + "CRITICAL CODEX STRUCTURED OUTPUT REQUIREMENTS:\n" + f"Return a single final JSON object conforming to: {schema_path}\n" + "Do not use markdown fences, comments, or surrounding prose.\n" + "Do not try to create .agentfield_output.json yourself; the Codex " + "CLI will persist your final JSON response for AgentField." + ) async def execute_with_native_structured_output(self: Any, prompt: str, options: dict[str, object]) -> Any: cwd = str(options.get("cwd")) if isinstance(options.get("cwd"), str) else None @@ -140,8 +167,8 @@ async def execute_with_native_structured_output(self: Any, prompt: str, options: "CODEX STRUCTURED OUTPUT CONTRACT:\n" f"The Codex CLI will save your final response to: {output_path}\n" f"Your final response MUST be a single JSON object conforming to: {schema_path}\n" - "Do not make the missing output file the subject of the task. " - "Complete the user's task, then return the required JSON object as your final response." + "Return the JSON object as your final answer. Do not write " + "the output file yourself or make the output file the task." ) try: diff --git a/tests/test_codex_harness_patch.py b/tests/test_codex_harness_patch.py index 260aec1..a94e154 100644 --- a/tests/test_codex_harness_patch.py +++ b/tests/test_codex_harness_patch.py @@ -3,6 +3,7 @@ from swe_af.runtime.codex_harness_patch import ( _augment_codex_error_message, _codex_strict_json_schema, + apply_codex_harness_patch, ) @@ -22,6 +23,32 @@ def test_codex_strict_json_schema_requires_all_object_properties() -> None: assert "default" not in strict["properties"]["summary"] +def test_codex_strict_json_schema_recurses_into_defs() -> None: + schema = { + "$defs": { + "Item": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "count": {"type": "integer", "default": 1}, + }, + "required": ["name"], + } + }, + "type": "object", + "properties": { + "items": {"type": "array", "items": {"$ref": "#/$defs/Item"}}, + }, + } + + strict = _codex_strict_json_schema(schema) + + item = strict["$defs"]["Item"] + assert item["required"] == ["name", "count"] + assert item["additionalProperties"] is False + assert "default" not in item["properties"]["count"] + + def test_codex_git_metadata_error_gets_actionable_hint() -> None: message = _augment_codex_error_message( "fatal: cannot create .git/index.lock", @@ -34,3 +61,21 @@ def test_codex_git_metadata_error_gets_actionable_hint() -> None: def test_codex_unrelated_error_is_unchanged() -> None: assert _augment_codex_error_message("plain error", "plain error") == "plain error" + + +def test_codex_prompt_suffix_uses_final_json_not_write_tool(tmp_path) -> None: + from agentfield.harness import _schema + + apply_codex_harness_patch() + + suffix = _schema.build_prompt_suffix( + { + "type": "object", + "properties": {"summary": {"type": "string"}}, + }, + str(tmp_path), + ) + + assert "Return a single final JSON object" in suffix + assert "Write tool" not in suffix + assert (tmp_path / ".agentfield_schema.json").exists() From 7a593ebde9588acf384c7ac666f1712e98a08fb4 Mon Sep 17 00:00:00 2001 From: Abir Abbas Date: Mon, 11 May 2026 12:02:30 -0400 Subject: [PATCH 11/13] fix(codex): gate prompt-suffix patch on active provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codex harness patch was replacing _schema.build_prompt_suffix and _runner.build_prompt_suffix globally at import time, so claude_code and open_code runs were also receiving the codex-specific instruction: "Do not try to create .agentfield_output.json yourself; the Codex CLI will persist your final JSON response for AgentField." That instruction is wrong for those providers — Claude / OpenCode are supposed to use their Write tool to create the output file (the fast path the runner expects), and forcing them onto the stdout-parse fallback costs latency, drops the inline schema for small schemas, and sends a confusing instruction referencing a Codex CLI that isn't in the loop. Use a contextvars.ContextVar set by a wrapped Agent.harness so that the suffix dispatcher returns the codex-native suffix only when the active call is for codex, and falls back to the original AgentField suffix for every other provider. Co-Authored-By: Claude Opus 4.7 (1M context) --- swe_af/runtime/codex_harness_patch.py | 46 +++++++++++++++++++++++++-- tests/test_codex_harness_patch.py | 35 ++++++++++++++++++-- 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/swe_af/runtime/codex_harness_patch.py b/swe_af/runtime/codex_harness_patch.py index 42a7288..96eaa3f 100644 --- a/swe_af/runtime/codex_harness_patch.py +++ b/swe_af/runtime/codex_harness_patch.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import contextvars import json import os from pathlib import Path @@ -8,6 +9,16 @@ _PATCHED = False +# Set by the wrapped Agent.harness for the duration of a harness call. +# Read by the dispatching build_prompt_suffix so that claude_code / open_code +# calls keep the original AgentField "use Write tool" instruction and only +# codex calls get the Codex-native structured-output instruction. +active_provider: contextvars.ContextVar[str | None] = contextvars.ContextVar( + "swe_af_codex_active_provider", default=None +) + +_ORIGINAL_BUILD_PROMPT_SUFFIX: Any = None + def _codex_strict_json_schema(schema: dict[str, Any]) -> dict[str, Any]: if not isinstance(schema, dict): @@ -92,10 +103,11 @@ async def _run_codex_cli_with_stdin( def apply_codex_harness_patch() -> None: - global _PATCHED + global _PATCHED, _ORIGINAL_BUILD_PROMPT_SUFFIX if _PATCHED: return try: + from agentfield.agent import Agent from agentfield.harness import _runner, _schema from agentfield.harness._cli import ( estimate_cli_cost, @@ -108,6 +120,8 @@ def apply_codex_harness_patch() -> None: except Exception: return + _ORIGINAL_BUILD_PROMPT_SUFFIX = _schema.build_prompt_suffix + def build_prompt_suffix_with_schema_file(schema: Any, cwd: str) -> str: """Use Codex-native structured output instead of AgentField's Write-tool suffix. @@ -255,7 +269,33 @@ async def execute_with_native_structured_output(self: Any, prompt: str, options: returncode=returncode, ) - _schema.build_prompt_suffix = build_prompt_suffix_with_schema_file - _runner.build_prompt_suffix = build_prompt_suffix_with_schema_file + def build_prompt_suffix_dispatching(schema: Any, cwd: str) -> str: + """Route to codex-native suffix only when the active call is for codex. + + Without this gate, every claude_code / open_code harness call would + also receive the codex-specific instruction "Do not try to create + .agentfield_output.json yourself; the Codex CLI will persist your + final JSON response" — which is wrong for those providers and forces + their runner into the slower stdout-parse fallback path. + """ + if active_provider.get() == "codex": + return build_prompt_suffix_with_schema_file(schema, cwd) + return _ORIGINAL_BUILD_PROMPT_SUFFIX(schema, cwd) + + _orig_agent_harness = Agent.harness + + async def _harness_with_provider_context( + self: Any, prompt: str, *args: Any, **kwargs: Any + ) -> Any: + provider_value = kwargs.get("provider") + token = active_provider.set(str(provider_value) if provider_value else None) + try: + return await _orig_agent_harness(self, prompt, *args, **kwargs) + finally: + active_provider.reset(token) + + _schema.build_prompt_suffix = build_prompt_suffix_dispatching + _runner.build_prompt_suffix = build_prompt_suffix_dispatching CodexProvider.execute = execute_with_native_structured_output + Agent.harness = _harness_with_provider_context _PATCHED = True diff --git a/tests/test_codex_harness_patch.py b/tests/test_codex_harness_patch.py index a94e154..3919e97 100644 --- a/tests/test_codex_harness_patch.py +++ b/tests/test_codex_harness_patch.py @@ -3,6 +3,7 @@ from swe_af.runtime.codex_harness_patch import ( _augment_codex_error_message, _codex_strict_json_schema, + active_provider, apply_codex_harness_patch, ) @@ -68,6 +69,35 @@ def test_codex_prompt_suffix_uses_final_json_not_write_tool(tmp_path) -> None: apply_codex_harness_patch() + token = active_provider.set("codex") + try: + suffix = _schema.build_prompt_suffix( + { + "type": "object", + "properties": {"summary": {"type": "string"}}, + }, + str(tmp_path), + ) + finally: + active_provider.reset(token) + + assert "Return a single final JSON object" in suffix + assert "Write tool" not in suffix + assert (tmp_path / ".agentfield_schema.json").exists() + + +def test_non_codex_prompt_suffix_keeps_agentfield_write_tool_default(tmp_path) -> None: + """For claude_code / open_code calls, build_prompt_suffix must return the + original AgentField suffix that instructs the agent to use its Write tool. + + Without this gate the codex-native suffix would leak into every harness + call, forcing claude/opencode runs onto the slower stdout-parse fallback. + """ + from agentfield.harness import _schema + + apply_codex_harness_patch() + + # No active provider set ⇒ default suffix. suffix = _schema.build_prompt_suffix( { "type": "object", @@ -76,6 +106,5 @@ def test_codex_prompt_suffix_uses_final_json_not_write_tool(tmp_path) -> None: str(tmp_path), ) - assert "Return a single final JSON object" in suffix - assert "Write tool" not in suffix - assert (tmp_path / ".agentfield_schema.json").exists() + assert "Write tool" in suffix + assert "Codex CLI" not in suffix From c1782fb53777fd8fdd37e4291aad3165cdb4a6e8 Mon Sep 17 00:00:00 2001 From: Abir Abbas Date: Mon, 11 May 2026 12:02:35 -0400 Subject: [PATCH 12/13] fix(fast-planner): force fallback_used=False on successful parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codex strict-schema patch strips `default` from properties and marks every field as required, so when FastPlanResult flows through Codex the model has to invent a value for `fallback_used`. Despite the prompt example showing `false`, Codex sometimes returns `true` alongside a perfectly valid task list — making the flag meaningless for any downstream consumer that gates on it. `fallback_used` is planner-side state, not an LLM self-assessment: it should be True iff the planner's `_fallback_plan(...)` path ran. Override it back to False after a successful parse so the flag reflects what actually happened, regardless of what the model wrote. Co-Authored-By: Claude Opus 4.7 (1M context) --- swe_af/fast/planner.py | 9 +++++++++ tests/fast/test_planner.py | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/swe_af/fast/planner.py b/swe_af/fast/planner.py index 845c9b9..4a89a5a 100644 --- a/swe_af/fast/planner.py +++ b/swe_af/fast/planner.py @@ -125,6 +125,15 @@ async def fast_plan_tasks( ) return _fallback_plan(goal).model_dump() + # `fallback_used` is a planner-side flag, not an LLM self-assessment. + # The codex strict-schema patch strips `default` and forces the field to + # be required, so the model has to invent a value and sometimes invents + # `true` despite the prompt example showing `false`. Anything reaching + # this point parsed cleanly through the harness, so the flag must be + # False — only the `_fallback_plan(...)` paths above set it to True. + if plan.fallback_used: + plan = plan.model_copy(update={"fallback_used": False}) + # Truncate to max_tasks using model_copy to avoid class-identity issues if len(plan.tasks) > max_tasks: plan = plan.model_copy(update={"tasks": plan.tasks[:max_tasks]}) diff --git a/tests/fast/test_planner.py b/tests/fast/test_planner.py index a4bddeb..07c857b 100644 --- a/tests/fast/test_planner.py +++ b/tests/fast/test_planner.py @@ -158,6 +158,33 @@ def test_valid_llm_response_produces_fast_plan_result(self) -> None: assert result["tasks"][0]["name"] == "step-one" assert result["fallback_used"] is False + def test_successful_parse_forces_fallback_used_false_even_if_llm_set_true(self) -> None: + """If the LLM (e.g. codex with stripped schema defaults) returns + fallback_used=True alongside a valid task list, the planner must + treat the parse as successful and reset the flag to False — the + flag is planner-side state, not an LLM self-assessment.""" + from swe_af.fast.planner import fast_plan_tasks + + plan = FastPlanResult( + tasks=[_make_fast_task("real-task")], + rationale="Codex filled fallback_used=true by mistake.", + fallback_used=True, + ) + mock_response = _make_mock_response(plan) + + with patch("swe_af.fast.planner._note"), \ + patch("swe_af.fast.planner.fast_router") as mock_router: + mock_router.harness = AsyncMock(return_value=mock_response) + mock_router.note = MagicMock() + + result = _run(fast_plan_tasks( + goal="Add a /health endpoint", + repo_path="/tmp/repo", + )) + + assert result["fallback_used"] is False + assert [t["name"] for t in result["tasks"]] == ["real-task"] + def test_llm_parsed_none_triggers_fallback(self) -> None: """When parsed=None the fallback plan with 'implement-goal' is returned.""" from swe_af.fast.planner import fast_plan_tasks From 54b0737bf56419f3a8aa53ee58615a5e9738963b Mon Sep 17 00:00:00 2001 From: Abir Abbas Date: Mon, 11 May 2026 14:54:31 -0400 Subject: [PATCH 13/13] docs(codex): flag SWE_DEFAULT_MODEL requirement and bwrap host needs Two gotchas surfaced when actually running a full main-mode build with the codex runtime that weren't covered in the existing setup notes: 1. The Docker image bakes ENV HARNESS_MODEL=openrouter/moonshotai/kimi-k2.6 as an OpenCode-side fallback, and SWE-AF's model-resolution env cascade reads HARNESS_MODEL. So a codex deployment that only sets SWE_DEFAULT_RUNTIME=codex (without SWE_DEFAULT_MODEL) hands an OpenRouter Kimi model id to the Codex CLI and the Product Manager reasoner fails in ~13s. Document that SWE_DEFAULT_MODEL=gpt-5.3-codex (or per-build models map) is required to pin the Codex model. 2. Codex CLI's workspace-write sandbox uses bubblewrap (`bwrap`) and needs Linux user namespaces enabled on the host. Docker-on-WSL2 and hardened environments refuse with "bwrap: No permissions to create a new namespace", and the coder agents return success while writing no files. Document the symptom so operators can recognize and fix it. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 8f48e46..ee666b9 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,10 @@ For OpenRouter with `open_code`, use model IDs in `openrouter// For Codex with ChatGPT subscription auth, install the Codex CLI on the host, run `codex login`, leave `OPENAI_API_KEY` unset for this process, and set `SWE_CODEX_AUTH_MODE=chatgpt` or `auto`. For OpenAI API-platform billing, set `SWE_CODEX_AUTH_MODE=api_key` and `OPENAI_API_KEY`. +> **Codex deployments using the Docker image must set `SWE_DEFAULT_MODEL=gpt-5.3-codex` on the environment** (or pass `models: {"default": "gpt-5.3-codex"}` in every build's `config`). The image bakes `HARNESS_MODEL=openrouter/moonshotai/kimi-k2.6` as an OpenCode fallback, and SWE-AF's model-resolution env cascade reads `HARNESS_MODEL` — so without `SWE_DEFAULT_MODEL` set, the Codex CLI receives an OpenRouter model id it can't handle and the Product Manager reasoner fails in ~13s. Setting `SWE_DEFAULT_MODEL` makes the cascade pin every role to the Codex model. + +> Codex CLI's `workspace-write` sandbox uses bubblewrap (`bwrap`) and needs Linux user namespaces enabled on the host. Most production Linux hosts and managed container runtimes (Railway, etc.) allow this by default, but local Docker on WSL2 or hardened environments may refuse with `bwrap: No permissions to create a new namespace`. If the verifier reports that error, the coder ran but couldn't write files — enable user namespaces on the host before relying on the codex runtime there. + ### Optional: web search Coding and review agents can look up external documentation, library APIs, error messages, and version/deprecation status during a build. This is opt-in via two env vars on the deployment: