Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,508 changes: 786 additions & 722 deletions swe_af/app.py

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions swe_af/hitl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,39 @@
format_prior_user_responses,
request_user_input_and_pause,
)
from swe_af.hitl.credentials_store import (
clear_scoped_credentials,
get_scoped_credentials,
inject_credentials_into_env,
store_scoped_credentials,
)
from swe_af.hitl.scout_schema import ScoutResult
from swe_af.hitl.services import (
KNOWN_SERVICES,
ServiceCredentialSpec,
detect_services_from_repo,
known_service_summary_for_prompt,
)
from swe_af.hitl.wrapper import AskUserBudget, run_with_ask_user

__all__ = [
"AskUserForm",
"AskUserFormField",
"AskUserResponse",
"AskUserBudget",
"KNOWN_SERVICES",
"ScoutResult",
"ServiceCredentialSpec",
"approval_webhook_url",
"build_form_builder",
"build_hax_client_from_env",
"clear_scoped_credentials",
"detect_services_from_repo",
"format_prior_user_responses",
"get_scoped_credentials",
"inject_credentials_into_env",
"known_service_summary_for_prompt",
"request_user_input_and_pause",
"run_with_ask_user",
"store_scoped_credentials",
]
96 changes: 96 additions & 0 deletions swe_af/hitl/credentials_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Process-local, execution-scoped store for credentials the scout negotiates.

Why a module-level dict instead of ``BuildConfig`` or ``app.memory``:

* ``BuildConfig`` is serialized through ``to_execution_config_dict()`` and
passed to ``execute()`` via ``app.call``. The control plane logs all
``app.call`` input data, which would persist the credentials.
* ``app.memory`` (scope=``run``) is synced to the control plane DB by design
— also persists.
* Filesystem under ``artifacts_dir`` is written to disk and archived.

The scout's negotiation produces credentials that should *only* live in the
agent process's memory for the duration of the build, then be cleared. A
module-level dict keyed by execution_id is the simplest way to achieve that
while keeping concurrent builds (which share the Python process) isolated.

Security boundary:

* Values are never logged.
* Values are never written to disk.
* Values are not serialized through ``app.call`` (use this store from inside
the receiving reasoner, not as a kwarg).
* The build()'s ``finally`` block MUST call ``clear_scoped_credentials`` —
every error path included.
"""

from __future__ import annotations

import threading

# Module-level. Keyed by execution_id (each build has its own).
_STORE: dict[str, dict[str, str]] = {}
_LOCK = threading.Lock()


def store_scoped_credentials(execution_id: str, creds: dict[str, str]) -> None:
"""Replace the stored credentials for ``execution_id`` with ``creds``.

Filters out None/empty values so a partially-filled mega-form (user skipped
some fields) doesn't surface as empty env vars to downstream subprocesses
(which can be confusing — "is the env set or not?").
"""
if not execution_id:
return
filtered = {
k: v
for k, v in (creds or {}).items()
if isinstance(v, str) and v.strip()
}
with _LOCK:
if filtered:
_STORE[execution_id] = filtered
else:
_STORE.pop(execution_id, None)


def get_scoped_credentials(execution_id: str) -> dict[str, str]:
"""Return a *copy* of the stored credentials for ``execution_id``.

Returns an empty dict if nothing is stored — callers should treat that as
"no credentials negotiated; rely on os.environ only".
"""
if not execution_id:
return {}
with _LOCK:
stored = _STORE.get(execution_id)
return dict(stored) if stored else {}


def clear_scoped_credentials(execution_id: str) -> None:
"""Remove credentials for ``execution_id`` from process memory."""
if not execution_id:
return
with _LOCK:
_STORE.pop(execution_id, None)


def inject_credentials_into_env(
base_env: dict[str, str] | None, execution_id: str
) -> dict[str, str]:
"""Return a NEW env dict = ``base_env`` ∪ scoped credentials.

Scoped credentials WIN over ``base_env`` so a freshly-minted token from
the scout overrides any stale value already in os.environ (e.g. an
expired RAILWAY_TOKEN from a previous build).

Callers should use this immediately before each ``router.harness(...)``
call, passing the result as the ``env=`` kwarg. The base is normally
``dict(os.environ)`` so the subprocess still inherits everything the
parent has — we only ADD/override the scoped creds.
"""
merged: dict[str, str] = dict(base_env or {})
creds = get_scoped_credentials(execution_id)
if creds:
merged.update(creds)
return merged
59 changes: 59 additions & 0 deletions swe_af/hitl/scout_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Structured output schema for ``run_environment_scout``.

The scout is a two-pass reasoner driven by ``run_with_ask_user``:

* Pass 1 (no ``prior_user_responses``): scan the repo, populate
``detected_services`` and ``ask_user_form`` with one optional text field
per service. ``scoped_credentials`` stays empty.
* Pass 2 (after the user submits): take the values from
``prior_user_responses[-1]['values']`` and surface them as
``scoped_credentials``. ``ask_user_form`` is cleared.

If no services are detected on pass 1, the scout returns
``ask_user_form=None`` immediately and the wrapper short-circuits — no pause,
no second pass.
"""

from __future__ import annotations

from pydantic import BaseModel, Field

from swe_af.hitl.ask_user import AskUserForm
from swe_af.hitl.services import ServiceCredentialSpec


class ScoutResult(BaseModel):
"""Structured output the scout LLM emits."""

detected_services: list[ServiceCredentialSpec] = Field(
default_factory=list,
description=(
"Third-party services the scout believes the PRD work touches. "
"On pass 1 this matches the form's fields one-for-one."
),
)
scoped_credentials: dict[str, str] = Field(
default_factory=dict,
description=(
"Populated on pass 2 ONLY. Keys are env var names (matching "
"ServiceCredentialSpec.env_var_name); values are the secrets the "
"user provided. Must NOT be logged or persisted."
),
)
skipped_services: list[str] = Field(
default_factory=list,
description=(
"Env var names the user explicitly left blank (informed opt-out). "
"Surfaced so downstream code can warn early if a critical "
"credential is missing."
),
)
summary: str = Field(
default="",
description=(
"One-line summary the scout writes — e.g. 'Negotiated 2 "
"credentials: RAILWAY_TOKEN, SENTRY_AUTH_TOKEN. User skipped: "
"DATADOG_API_KEY.' Safe to log; never includes secret values."
),
)
ask_user_form: AskUserForm | None = None
169 changes: 169 additions & 0 deletions swe_af/hitl/services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""Knowledge base of common third-party services + how to mint a scoped token.

Used by ``run_environment_scout`` to recognize signal files in a repo (e.g.
``railway.toml``, ``fly.toml``, ``sentry.properties``) and ask the user for
the matching scoped credential. The LLM inside the scout reasoner consumes
``KNOWN_SERVICES`` as a hint list; ``detect_services_from_repo`` provides a
deterministic pre-pass that the LLM can build on.
"""

from __future__ import annotations

import os
from typing import Iterable

from pydantic import BaseModel, Field


class ServiceCredentialSpec(BaseModel):
"""One row in the knowledge base; also returned by the scout."""

service_name: str = Field(description="Human-readable service name shown to the user.")
env_var_name: str = Field(
description=(
"Env var the build expects (becomes the ask_user_form field id). "
"Match what the service's CLI / SDK looks for by default."
)
)
mint_url: str = Field(
description=(
"URL where the user mints a scoped/temporary token. Surfaced in "
"the form description so the user can click through and paste back."
)
)
permissions_hint: str = Field(
description=(
"Short hint on what scope / TTL to request when minting. Shown to "
"the user in the form so they don't over-grant access."
)
)
signal_files: list[str] = Field(
default_factory=list,
description=(
"Glob-ish filenames whose presence in the repo strongly implies "
"this service is in use. Used by detect_services_from_repo."
),
)
evidence_template: str = Field(
default="",
description=(
"Sentence template explaining WHY the build needs this credential, "
"used in the form description. Use {{signal}} as the placeholder."
),
)


KNOWN_SERVICES: list[ServiceCredentialSpec] = [
ServiceCredentialSpec(
service_name="Railway",
env_var_name="RAILWAY_TOKEN",
mint_url="https://railway.com/account/tokens",
permissions_hint="Project token, read-only if possible, set expiry to 1 day.",
signal_files=["railway.toml", "railway.json", ".railway/config.json"],
evidence_template="Saw {signal} — build likely needs Railway access to deploy or query services.",
),
ServiceCredentialSpec(
service_name="Fly.io",
env_var_name="FLY_API_TOKEN",
mint_url="https://fly.io/user/personal_access_tokens",
permissions_hint="Deploy token scoped to this app, 1-day expiry.",
signal_files=["fly.toml", "fly.io.toml", ".fly/config.toml"],
evidence_template="Saw {signal} — build may need Fly.io access for deploys.",
),
ServiceCredentialSpec(
service_name="Vercel",
env_var_name="VERCEL_TOKEN",
mint_url="https://vercel.com/account/tokens",
permissions_hint="Scope to this team only, 1-day expiry.",
signal_files=["vercel.json", ".vercel/project.json"],
evidence_template="Saw {signal} — build may need Vercel access.",
),
ServiceCredentialSpec(
service_name="Supabase",
env_var_name="SUPABASE_ACCESS_TOKEN",
mint_url="https://supabase.com/dashboard/account/tokens",
permissions_hint="Personal access token, 1-day expiry — required only if migrations or schema changes are part of the work.",
signal_files=["supabase/config.toml", "supabase/.gitignore", "supabase/migrations"],
evidence_template="Saw {signal} — Supabase project detected.",
),
ServiceCredentialSpec(
service_name="Sentry",
env_var_name="SENTRY_AUTH_TOKEN",
mint_url="https://sentry.io/settings/account/api/auth-tokens/",
permissions_hint="Auth token scoped to project:read + project:releases, 1-day expiry.",
signal_files=["sentry.properties", ".sentryclirc", "sentry.io.json"],
evidence_template="Saw {signal} — Sentry integration detected.",
),
ServiceCredentialSpec(
service_name="Datadog",
env_var_name="DATADOG_API_KEY",
mint_url="https://app.datadoghq.com/organization-settings/api-keys",
permissions_hint="Application API key (NOT a client token), restricted to read scopes if possible.",
signal_files=["datadog.yaml", ".datadog/conf.yaml"],
evidence_template="Saw {signal} — Datadog integration detected.",
),
ServiceCredentialSpec(
service_name="GitHub",
env_var_name="GH_TOKEN",
mint_url="https://github.com/settings/personal-access-tokens/new",
permissions_hint="Fine-grained PAT scoped to THIS repo only, repo:contents+pull-requests, 1-day expiry.",
signal_files=[".github/workflows", "CODEOWNERS"],
evidence_template="Saw {signal} — work likely needs GitHub API beyond what gh CLI provides anonymously.",
),
ServiceCredentialSpec(
service_name="OpenAI",
env_var_name="OPENAI_API_KEY",
mint_url="https://platform.openai.com/api-keys",
permissions_hint="Restricted API key with low usage cap; 1-day expiry.",
signal_files=[], # Detected via dependency manifests, not signal files.
evidence_template="Project depends on the OpenAI SDK.",
),
ServiceCredentialSpec(
service_name="Anthropic",
env_var_name="ANTHROPIC_API_KEY",
mint_url="https://console.anthropic.com/settings/keys",
permissions_hint="Restricted API key, set monthly spend limit, 1-day expiry.",
signal_files=[],
evidence_template="Project depends on the Anthropic SDK.",
),
]


def detect_services_from_repo(repo_path: str) -> list[ServiceCredentialSpec]:
"""Deterministic pre-pass: look for ``signal_files`` under ``repo_path``.

Returns the subset of ``KNOWN_SERVICES`` whose signal files exist on disk.
This is a hint to the LLM scout — the final decision on which credentials
to ask for stays with the scout, which can incorporate PRD context the
static scan can't see.

Notes:
* No recursive glob; checks each ``signal_file`` as a path under
``repo_path``. ``signal_file`` may be a file or a directory; both
count as a hit.
* Returns an empty list if ``repo_path`` doesn't exist (don't raise).
* Order matches ``KNOWN_SERVICES`` so callers get stable output.
"""
if not repo_path or not os.path.isdir(repo_path):
return []
hits: list[ServiceCredentialSpec] = []
for spec in KNOWN_SERVICES:
for signal in spec.signal_files:
candidate = os.path.join(repo_path, signal)
if os.path.exists(candidate):
hits.append(spec)
break
return hits


def known_service_summary_for_prompt(specs: Iterable[ServiceCredentialSpec]) -> str:
"""Render a markdown bullet list of service specs for inclusion in a prompt."""
lines: list[str] = []
for spec in specs:
signals = ", ".join(f"`{s}`" for s in spec.signal_files) or "(no static signal)"
lines.append(
f"- **{spec.service_name}** — env `{spec.env_var_name}`; "
f"signals: {signals}; mint at {spec.mint_url}; "
f"hint: {spec.permissions_hint}"
)
return "\n".join(lines)
Loading
Loading