diff --git a/.gitignore b/.gitignore index f497622261..b7f1e46ab9 100644 --- a/.gitignore +++ b/.gitignore @@ -34,12 +34,14 @@ charts/helix-controlplane/charts/*.tgz # Source-of-truth is the .tmpl file; the rendered Chart.yaml is build output. charts/*/Chart.yaml # Anchor helix-runner to repo root so it does not shadow charts/helix-runner/. -# Other unanchored patterns below (helix, zed-build, tmp) are left as-is to -# avoid regressing the ignoring of build artifacts in subdirectories such as -# api/cmd/helix/helix or runner-cmd/helix-runner/helix-runner. +# `helix` and friends are anchored to specific build-artifact paths because a +# bare `helix` pattern matches any directory named `helix` at any depth — which +# accidentally swallowed helix-org/helix/ and helix-org/agent/helix/ entirely. /helix-runner zed-build -helix +/helix +/api/helix +/api/cmd/helix/helix tmp zed-config/development_credentials diff --git a/helix-org/.gitignore b/helix-org/.gitignore new file mode 100644 index 0000000000..64b924cf9a --- /dev/null +++ b/helix-org/.gitignore @@ -0,0 +1,33 @@ +# Binaries +/bin/ +/dist/ +*.exe +*.test +*.out + +# Coverage +coverage.out +coverage.html + +# Editor / OS +.DS_Store +.idea/ +.vscode/ +*.swp + +# Env +.env +.env.local + +# Media +*.mp4 +*.gif +*.png + +# Notes +TODO.md +design/ + +# Data +*.db +envs/ \ No newline at end of file diff --git a/helix-org/.golangci.yml b/helix-org/.golangci.yml new file mode 100644 index 0000000000..b7b6710ae2 --- /dev/null +++ b/helix-org/.golangci.yml @@ -0,0 +1,38 @@ +version: "2" + +run: + timeout: 5m + tests: true + +linters: + default: none + enable: + - bodyclose + - errcheck + - errorlint + - gosec + - govet + - ineffassign + - misspell + - nolintlint + - revive + - staticcheck + - unused + settings: + revive: + rules: + - name: exported + disabled: true + +formatters: + enable: + - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/helixml/helix-org + +issues: + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/helix-org/CLAUDE.md b/helix-org/CLAUDE.md new file mode 100644 index 0000000000..e9b945a786 --- /dev/null +++ b/helix-org/CLAUDE.md @@ -0,0 +1,146 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project + +Helix Org — a Go prototype run independently from the `helix` monorepo. It has its own tooling and does not share build/test infrastructure with the parent repo. Rules in the parent `helix/CLAUDE.md` do **not** apply here; treat this directory as its own project. + +**Current year: 2026** — include "2026" in web searches for documentation and browser APIs. + +## Design Philosophy (read this before writing code) + +The ultimate goal is that the system is configured and used **almost entirely via the prompts of specific Roles and Positions**. Behaviour lives in the profile/prompt, not in the codebase. The code is scaffolding that lets this prompt-driven ecosystem thrive. + +Practical consequences when choosing between alternatives: + +- **Prefer data and text over code.** If a feature can be expressed as a profile edit, a scope value, or a tool description, do that before adding Go logic. +- **Keep the core generic.** Tool definitions, scope shapes, and enforcement decisions are owned by individual tools — not hard-coded in the registry, server, or domain layer. New tools (including MCP tools later) must be addable without editing the core. +- **Keep the MCP surface small.** MCP tools are reserved for org-graph primitives — both reads and mutations of the structural state (Workers, Positions, Roles, Channels, Grants, Streams). Anything else a Worker needs to do should go through the shell tools provisioned in their Environment (`bash`, `curl`, `git`, `gh`, `python`, etc.). If you're tempted to add an MCP wrapper like `publish_to_blog` or `fetch_url`, stop — the Role text describes how to use the shell directly, and if the workflow changes, only the Role changes. The test: does this operation read or mutate org-graph state? If yes, MCP. If no, shell. +- **No workflow in code.** Tools do exactly one thing. The code does not orchestrate multi-step sequences on behalf of an agent — it does not subscribe Workers to channels, grant tools implicitly, auto-create related records, or otherwise chain steps together. Orchestration lives in the *prompt* of the Worker invoking the tool. If a Role declares `DefaultTools` or `DefaultStreams`, those fields are **reference data the hiring manager's prompt reads**, not triggers the code acts on. When writing or reviewing a tool, ask: "is the code making a decision that the agent should be making?" If yes, remove it. +- **Write the smallest thing that works.** No speculative abstractions, no optional plumbing that isn't exercised today. If two tools share code, extract it then — not in advance. +- **Social enforcement first.** The default is that a Worker reads their scope from their prompt and complies. Only reach for hard enforcement when the cost of a violation is high. + +When a design choice looks like it could go either way, pick the one that pushes more responsibility into prompts/configuration and less into Go code. + +## Architecture at a Glance + +- **Storage**: SQLite, driven by GORM with `AutoMigrate`. The database file lives alongside the binary by default and is configurable via flag/env. No raw SQL migration files. +- **Interface**: One HTTP endpoint. `/workers/{id}/mcp` (Streamable HTTP transport, no auth yet) carries every read and mutation of the org graph; the worker ID in the URL identifies the caller, and the server exposes only the tools that worker holds grants for. There is no other HTTP surface — even the human "what's happening?" view runs over MCP via `subscribe` + `read_events(wait=…)` from a `claude` session. +- **CLI**: A thin client binary with three subcommands. `helix-org bootstrap` opens the SQLite store directly and seeds the initial owner Worker — the one operation that cannot use MCP, because there is no Worker yet. Pass `--install-claude-mcp` to bootstrap to register the owner's MCP endpoint with the local `claude` CLI; from then on plain `claude` sessions can drive the org. `helix-org serve` runs the HTTP listener. `helix-org chat` exec's an interactive `claude` session pointed at a chosen Worker's MCP endpoint (default `w-owner`), with `--continue` so the per-directory conversation is restorable across invocations. After bootstrap, the CLI never opens the database again — humans drive the org through `claude` (directly or via `helix-org chat`), and Workers do the same when activated by the dispatcher. +- **Auth**: Deferred. Treat all callers as the root owner for now; real authentication is a later phase. + +## Setup + +Install required development tools before doing anything else: + +```bash +make tools +``` + +## Build, Test, and Check + +**Always prefer `make` targets over raw shell commands.** The Makefile sets required build tags, CGO flags, environment variables, and opinionated defaults (envs dir, DB path, listen address) that ad-hoc `go run` / `go test` / `golangci-lint` invocations miss. Running raw commands silently drifts from how the project actually builds and runs. + +If you find yourself reaching for a multi-step shell incantation to build, run, test, format, lint, clean, or seed local state — **add a `make` target for it instead**, then call that target. Future-you and other agents will reuse it; one-off shell strings rot. Keep targets discoverable via `make help`. + +```bash +make build # Build the binary into ./bin +make run # Run `helix-org serve` with opinionated defaults (./envs, ./helix-org.db, :8080) +make run ARGS="--model sonnet" # Run with extra flags appended after the defaults +make test # Run all tests (race + -count=1) +make test PKG=./domain/... # Test a specific package +make test-cover # Run tests + write coverage.out / coverage.html +make check # Format, vet, lint, and test (modifies files) +make ci # CI-safe: fmt-check, vet, lint, test (no writes) +make clean # Kill running servers, remove ./bin, ./envs, *.db, coverage files +``` + +`make check` is for local use — it runs `goimports -w` and may modify files. `make ci` runs `fmt-check` instead, failing if anything is unformatted without touching files. CI must use `make ci`; contributors must pass `make check` locally before pushing. + +## Running the Project End-to-End + +```bash +make run # opinionated defaults: serve, ./envs, ./helix-org.db, :8080 +make run ARGS="--model opus" # append extra flags +make clean && make run # nuke local state (DB + envs + running server) and start fresh +make build && ./bin/helix-org --help # compiled binary (matches what CI ships) +``` + +`make run` is for fast iteration. Before pushing or tagging a release, exercise the compiled binary (`make build && ./bin/helix-org ...`) — the `go run` path can mask build-tag or linker differences. + +`make clean` is destructive on purpose: it kills any `helix-org serve` process it can find, deletes `./bin`, `./envs`, every `*.db` in the project root, and the coverage artefacts. Use it whenever local state has drifted (stale Workers, half-bootstrapped DB, lingering server holding port 8080). + +## Shipping Code + +Before committing: + +1. Run `make check` and confirm it passes (or at minimum `make test` for affected packages). Do not commit code that has not been validated. +2. Fix any failures before committing — do not skip or work around them. + +Commits and PRs use **Conventional Commits**: + +- Prefix: `feat:`, `fix:`, `docs:`, `refactor:`, `chore:`, `test:`, etc. +- Example commit: `feat: add webhook retry logic` +- PR titles follow the same format: `feat: add webhook retry logic` + +When pushing additional commits to an existing PR, update the PR title and description to reflect the full set of changes in the branch. + +## Go + +- Fail fast: `return fmt.Errorf("failed: %w", err)` — never log and continue +- Error on missing configuration — fail with an error, don't log a warning and continue +- Use structs, not `map[string]interface{}` +- GORM AutoMigrate only — no SQL migration files +- Use gomock, not testify/mock +- No fallbacks — one approach, no fallback code paths +- No type aliases — update all references when moving or renaming types +- No panics — return errors; rewrite methods to support error returns if needed +- Log errors once at the top level — domain code returns errors, only handlers/workers log them + +## Testing + +When the user says "tdd", follow red-green strictly: + +1. **Red**: Write a failing test. Run it, confirm it fails. +2. **Green**: Minimal fix. Run test, confirm it passes. +3. Run the full test suite for regressions. + +Practices: + +- **Tests live next to the code**: `foo.go` → `foo_test.go` in the same directory. Use package `foo` for whitebox tests and `foo_test` only when the test must exercise the public API in isolation (e.g. to avoid import cycles). +- **Table-driven tests** for anything with multiple input cases. Use `t.Run(name, ...)` per case so failures name the case. +- **`t.Parallel()`** in tests that don't share global state. +- **Race detector is always on** (`make test` adds `-race`). Treat race failures as hard bugs, never flakes. +- **`-count=1`** is set so tests never use the build cache — all runs are fresh. +- **Mocks**: use `gomock` generated via `mockgen`. Place generated mocks in `mocks//` and regenerate them as part of `make tools` updates, not by hand. Prefer hand-rolled fakes where the interface is small enough that a mock adds no value. +- **Fixtures**: put reusable test data under `testdata/` (Go ignores it during builds). +- **Coverage**: run `make test-cover` before large PRs. Treat coverage as a diagnostic, not a gate — low coverage on a file means "go look," not "write busywork tests." +- **No `testing.Short()` skips by default**. If a test must be slow or external, gate it on an explicit build tag (e.g. `//go:build integration`) and document how to run it. + +## Linting + +`golangci-lint` config lives in `.golangci.yml`. The enabled linters (`errcheck`, `govet`, `ineffassign`, `staticcheck`, `unused`, `gofmt`, `goimports`, `misspell`, `revive`, `gosec`, `bodyclose`, `errorlint`, `nolintlint`) catch a deliberate, narrow set. Do not disable linters to silence findings — fix the code. + +- **Fix the finding, don't suppress it.** `//nolint:` is only acceptable with a trailing comment explaining *why* the rule is wrong for this site (`nolintlint` enforces this). Unexplained `//nolint` directives fail the lint run. +- **Formatting is non-negotiable**: `goimports` with `-local github.com/helixml/helix-org` groups local imports separately. Run `make fmt` before committing; CI runs `make fmt-check` and will fail on any drift. +- **Error wrapping**: `errorlint` enforces `%w` instead of `%v` for errors, and forbids type-assertion on errors — use `errors.As` / `errors.Is`. +- **`gosec`** flags raw SQL string concatenation, weak crypto, and command injection. If one of these fires, the fix is almost always to restructure the code, not to suppress the warning. +- **New linters** are added by editing `.golangci.yml` in a dedicated `chore: enable ` PR that also fixes all findings it surfaces — never in the same PR as unrelated changes. + +## Software Engineering + +Object-oriented design principles: + +- **Naming**: Classes by what they are, not what they do (avoid -er suffixes). Methods are builders (noun) or manipulators (verb), rarely both. Variables should be explainable as single/plural nouns; prefer simple names over compound ones. +- **Constructors**: One primary constructor, secondaries delegate to it. Keep constructors light. Prefer `new` only in secondary constructors. +- **Methods**: Prefer fewer than five public methods per class. Avoid static methods. Avoid null arguments and return values. Prefer richer encapsulation over getters/setters. +- **Encapsulation**: Prefer four or fewer encapsulated objects per class. Favour composition over inheritance. +- **Interfaces**: Prefer interfaces. Keep them small. +- **Immutability**: Default to immutable classes. Avoid type introspection and reflection unless the language idiom demands it. +- **No globals**: Prefer classes over public constants or enums. +- **Testing**: Prefer fakes over mocks. +- **Design**: Think in objects, not algorithms. Tell objects what you want; don't ask for data. +- **Boolean parameters**: Don't use a boolean to switch between fundamentally different behaviours (split the method or use polymorphism). Booleans are fine for orthogonal modifiers like filters or formatting options. +- **Dependency injection**: Use constructor injection. When a constructor accumulates many parameters, group related ones into a parameter/options object. +- **Language idioms**: Where these principles conflict with strong language conventions (e.g. Go exported struct fields), follow the language idiom and note the deviation. diff --git a/helix-org/Makefile b/helix-org/Makefile new file mode 100644 index 0000000000..6cbd3d5f53 --- /dev/null +++ b/helix-org/Makefile @@ -0,0 +1,69 @@ +# Helix Org Makefile — always drive builds/tests/lint through make. + +BINARY := helix-org +MODULE := github.com/helixml/helix-org +CMD_PKG := ./cmd/$(BINARY) +BIN_DIR := bin +PKG ?= ./... +ARGS ?= + +GOLANGCI_LINT_VERSION := v2.11.4 + +GO := go +GOFLAGS := +CGO_ENABLED ?= 0 + +export CGO_ENABLED + +.PHONY: help tools build run test test-cover test-e2e-helix fmt fmt-check vet lint check ci clean + +help: ## Show available targets + @awk 'BEGIN {FS = ":.*##"} /^[a-zA-Z_-]+:.*##/ {printf " %-12s %s\n", $$1, $$2}' $(MAKEFILE_LIST) + +tools: ## Install required development tools + $(GO) install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$(GOLANGCI_LINT_VERSION) + $(GO) install golang.org/x/tools/cmd/goimports@latest + $(GO) install go.uber.org/mock/mockgen@latest + +build: ## Build the binary into ./bin + mkdir -p $(BIN_DIR) + $(GO) build $(GOFLAGS) -o $(BIN_DIR)/$(BINARY) $(CMD_PKG) + +run: ## Run `helix-org serve` with opinionated defaults; pass extra flags via ARGS="..." + $(GO) run $(CMD_PKG) serve --envs-dir ./envs --db ./helix-org.db --addr :8080 $(ARGS) + +test: ## Run tests (use PKG=./path/... to scope) + CGO_ENABLED=1 $(GO) test $(GOFLAGS) -race -count=1 $(PKG) + +test-e2e-helix: ## Run the //go:build integration helix-end-to-end smoke (requires HELIX_URL+HELIX_API_KEY) + CGO_ENABLED=1 $(GO) test -tags=integration -count=1 ./tools/helixclient/... ./tools/... + +test-cover: ## Run tests and produce coverage.out + coverage.html + CGO_ENABLED=1 $(GO) test $(GOFLAGS) -race -count=1 -coverprofile=coverage.out -covermode=atomic $(PKG) + $(GO) tool cover -func=coverage.out | tail -n 1 + $(GO) tool cover -html=coverage.out -o coverage.html + @echo "Wrote coverage.html" + +fmt: ## Format and organise imports + goimports -w -local $(MODULE) . + +fmt-check: ## Fail if files are not formatted (does not modify) + @unformatted=$$(goimports -l -local $(MODULE) . | grep -v '^vendor/' || true); \ + if [ -n "$$unformatted" ]; then \ + echo "Unformatted files (run 'make fmt'):"; echo "$$unformatted"; exit 1; \ + fi + +vet: ## Run go vet + $(GO) vet $(PKG) + +lint: ## Run golangci-lint + golangci-lint run $(PKG) + +check: fmt vet lint test ## Format, vet, lint, and test (modifies files) + +ci: fmt-check vet lint test ## CI-safe: checks formatting without modifying + +clean: ## Remove build artefacts, kill running servers, wipe local DBs and envs + @pkill -f '[h]elix-org serve' 2>/dev/null || true + rm -rf $(BIN_DIR) coverage.out coverage.html envs + rm -f *.db diff --git a/helix-org/agent/claude/spawner.go b/helix-org/agent/claude/spawner.go new file mode 100644 index 0000000000..184a716ad0 --- /dev/null +++ b/helix-org/agent/claude/spawner.go @@ -0,0 +1,416 @@ +// Package claude is the local-development Spawner runtime: it embodies +// each AI Worker activation by exec'ing the `claude` CLI in the +// Worker's Environment directory and streaming its stream-json output +// onto the Worker's activation Stream. +// +// This runtime is the dev-mode counterpart to agent/helix. The Helix +// runtime is the production target; claude is what you reach for when +// you want to drive the org graph end-to-end without standing up a +// Helix server. +package claude + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// SpawnerConfig configures the claude-backed Spawner. +type SpawnerConfig struct { + // ClaudeBin is the path to the claude CLI (e.g. "claude"). + ClaudeBin string + // PublicURL is the base URL the spawned agent uses to reach the + // helix-org MCP endpoint. Each Worker's tools are exposed at + // PublicURL + "/workers/{workerID}/mcp". + PublicURL string + // Model, if non-empty, is passed to claude as --model. Aliases like + // "sonnet" or "opus" resolve to the latest model in that family. + Model string + // Effort, if non-empty, is passed to claude as --effort. Valid + // values are low|medium|high|xhigh|max. + Effort string + // Logger receives spawn bookkeeping. Must be non-nil. + Logger *slog.Logger + + // Store, Broadcaster, Now and NewID are used to publish per-message + // activation events to the Worker's activation Stream + // (s-activations-). Store and NewID and Now are required; + // Broadcaster is optional (long-poll observers won't wake without it). + Store *store.Store + Broadcaster *broadcast.Broadcaster + Now func() time.Time + NewID func() string +} + +// mcpServerName is the key under which the helix MCP server is registered +// in each Worker's mcp.json. Tool names surface in Claude as +// mcp____. +const mcpServerName = "helix" + +// Spawner returns an agent.Spawner that runs `claude -p` in the new +// Worker's Environment directory and BLOCKS until claude exits. The +// dispatcher is responsible for serialising calls per Worker. +// +// State lives in the domain (DB), not on disk. Before exec'ing claude, +// the Spawner projects current state into the Environment as three +// markdown files: +// +// - role.md — the canonical Role.Content read from the store. +// - identity.md — the Worker's IdentityContent read from the store. +// - agent.md — agent.Policy, the fixed org-wide policy on speaking +// discipline, log.md hygiene, and AI-origin vs human-origin handling. +// +// This is the single seam that knows "how to make role/identity visible +// to a worker." Local envs write files (today). When envs eventually go +// remote (SSH targets, container exec, prompt-only), only this +// projection step swaps strategy — tools and bootstrap don't change. +// +// Tools are exposed to the agent over MCP. Per activation the Spawner +// writes /mcp.json pointing at /workers//mcp on the helix +// server and passes --mcp-config + --strict-mcp-config so claude only +// sees the helix tools and not the user's machine-wide config. +// +// Claude is run with --output-format stream-json so every assistant +// message, tool call, and tool result flows through a parser in this +// process that publishes one Event per atomic message segment to the +// Worker's activation Stream. Observers (typically the hiring Worker, +// auto-subscribed at hire) watch via read_events on that Stream. +func Spawner(cfg SpawnerConfig) agent.Spawner { + return func(ctx context.Context, workerID domain.WorkerID, envPath string, triggers []agent.Trigger) error { + if len(triggers) == 0 { + return fmt.Errorf("spawner invoked with no triggers") + } + if err := projectEnv(ctx, cfg.Store, workerID, envPath); err != nil { + return fmt.Errorf("project env for %s: %w", workerID, err) + } + + mcpConfigPath, err := writeMCPConfig(envPath, cfg.PublicURL, workerID) + if err != nil { + return fmt.Errorf("write mcp config: %w", err) + } + + prompt := agent.BuildPrompt(workerID, agent.Policy, triggers) + + args := []string{ + "-p", prompt, + "--permission-mode", "bypassPermissions", + "--output-format", "stream-json", + "--verbose", + "--mcp-config", mcpConfigPath, + "--strict-mcp-config", + } + if cfg.Model != "" { + args = append(args, "--model", cfg.Model) + } + if cfg.Effort != "" { + args = append(args, "--effort", cfg.Effort) + } + + cmd := exec.CommandContext(ctx, cfg.ClaudeBin, args...) //nolint:gosec // spawning claude with generated prompt is this Spawner's purpose + cmd.Dir = envPath + cmd.Env = append(os.Environ(), + "HELIX_WORKER_ID="+string(workerID), + ) + + streamID := agent.ActivationStreamID(workerID) + publish := func(body string) { + publishActivationEvent(ctx, cfg, workerID, streamID, body) + } + + // Mark the start of this activation on the stream so consecutive + // activations are easy to tell apart for an observer reading + // events. The trigger description matches what callers see when + // inspecting their hires. + publish(fmt.Sprintf("=== activation: %s ===", agent.DescribeTriggers(triggers))) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("stdout pipe: %w", err) + } + // Claude's stderr is rare and usually a hard failure (bad flag, + // missing binary). Fold it into the activation stream so it's + // visible alongside the rest of the transcript. + stderrR, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("stderr pipe: %w", err) + } + cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} + + if err := cmd.Start(); err != nil { + return fmt.Errorf("start claude: %w", err) + } + pid := cmd.Process.Pid + cfg.Logger.Info("spawned claude", + "worker", workerID, + "pid", pid, + "env", envPath, + "trigger", triggers[0].Kind, + "triggers", len(triggers), + "stream", streamID, + ) + + // Drain stderr in the background so the pipe doesn't block. + stderrDone := make(chan struct{}) + go func() { + defer close(stderrDone) + scanner := bufio.NewScanner(stderrR) + for scanner.Scan() { + publish("stderr: " + agent.OneLine(scanner.Text(), 500)) + } + }() + + // Parse stream-json synchronously (blocks until stdout closes). + streamTranscript(stdout, publish) + <-stderrDone + + err = cmd.Wait() + publish(fmt.Sprintf("=== exit: %s ===", okOr(errString(err)))) + cfg.Logger.Info("claude exited", + "worker", workerID, + "pid", pid, + "err", errString(err), + ) + return err + } +} + +// projectEnv writes the current canonical state of a Worker — role, +// identity, and the fixed agent.md entry stub — into envPath. Called +// once per activation, just before claude is exec'd. Reads from the +// domain (DB); writes to disk (env). The DB is the source of truth; +// disk is a per-activation projection. +func projectEnv(ctx context.Context, s *store.Store, workerID domain.WorkerID, envPath string) error { + if s == nil { + return fmt.Errorf("spawner has no store") + } + worker, err := s.Workers.Get(ctx, workerID) + if err != nil { + return fmt.Errorf("get worker: %w", err) + } + positions := worker.Positions() + if len(positions) == 0 { + return fmt.Errorf("worker %s has no positions", workerID) + } + pos, err := s.Positions.Get(ctx, positions[0]) + if err != nil { + return fmt.Errorf("get position: %w", err) + } + role, err := s.Roles.Get(ctx, pos.RoleID) + if err != nil { + return fmt.Errorf("get role: %w", err) + } + if err := writeEnvFile(envPath, "role.md", role.Content); err != nil { + return err + } + if err := writeEnvFile(envPath, "identity.md", worker.IdentityContent()); err != nil { + return err + } + if err := writeEnvFile(envPath, "agent.md", agent.Policy); err != nil { + return err + } + return nil +} + +// writeEnvFile writes content to a file inside a Worker's Environment +// directory. The mode is 0o600 — these files describe behaviour and +// identity and shouldn't be world-readable. +func writeEnvFile(envPath, name, content string) error { + full := filepath.Join(envPath, name) + if err := os.WriteFile(full, []byte(content), 0o600); err != nil { + return fmt.Errorf("write %q: %w", full, err) + } + return nil +} + +// publishActivationEvent appends one Event to the Worker's activation +// Stream and notifies long-poll observers. It does NOT go through the +// dispatcher: per-message events would otherwise re-trigger any +// subscribed AI Worker on every line, which would be unbounded. The +// Worker themselves is intentionally never subscribed to their own +// activation stream for the same reason. +// +// All errors are logged and swallowed; a transient SQLite hiccup must +// not abort the activation. +func publishActivationEvent(ctx context.Context, cfg SpawnerConfig, workerID domain.WorkerID, streamID domain.StreamID, body string) { + if cfg.Store == nil || cfg.NewID == nil || cfg.Now == nil || body == "" { + return + } + event, err := domain.NewMessageEvent( + domain.EventID("e-"+cfg.NewID()), + streamID, + workerID, + domain.Message{From: string(workerID), Body: body}, + cfg.Now(), + ) + if err != nil { + cfg.Logger.Warn("activation event: build", "worker", workerID, "err", err) + return + } + if err := cfg.Store.Events.Append(ctx, event); err != nil { + cfg.Logger.Warn("activation event: append", "worker", workerID, "err", err) + return + } + if cfg.Broadcaster != nil { + cfg.Broadcaster.Notify(streamID) + } +} + +// writeMCPConfig writes a per-worker mcp.json into envPath wiring claude +// to the worker's MCP endpoint. Returning the path keeps the caller +// honest about pointing --mcp-config at a real file. +func writeMCPConfig(envPath, publicURL string, workerID domain.WorkerID) (string, error) { + cfg := struct { + MCPServers map[string]mcpServerEntry `json:"mcpServers"` + }{ + MCPServers: map[string]mcpServerEntry{ + mcpServerName: { + Type: "http", + URL: fmt.Sprintf("%s/workers/%s/mcp", strings.TrimRight(publicURL, "/"), workerID), + }, + }, + } + data, err := json.MarshalIndent(cfg, "", " ") + if err != nil { + return "", fmt.Errorf("marshal mcp config: %w", err) + } + path := filepath.Join(envPath, "mcp.json") + if err := os.WriteFile(path, data, 0o600); err != nil { + return "", fmt.Errorf("write %q: %w", path, err) + } + return path, nil +} + +type mcpServerEntry struct { + Type string `json:"type"` + URL string `json:"url"` +} + +func errString(err error) string { + if err == nil { + return "" + } + return err.Error() +} + +func okOr(s string) string { + if s == "" { + return "ok" + } + return s +} + +// streamTranscript reads newline-delimited JSON from r (claude's stdout) +// and calls publish once per atomic message segment — assistant text, +// tool call, tool result, system init, run result. Lines that don't +// parse as JSON are published verbatim so nothing is silently dropped. +func streamTranscript(r io.Reader, publish func(body string)) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + for scanner.Scan() { + line := scanner.Bytes() + var ev streamEvent + if err := json.Unmarshal(line, &ev); err != nil { + publish(agent.OneLine(string(line), 500)) + continue + } + for _, body := range renderEvent(ev) { + publish(body) + } + } + if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) { + publish(fmt.Sprintf("[stream] scanner error: %v", err)) + } +} + +// streamEvent captures the parts of claude's stream-json format we care +// about for the transcript. +type streamEvent struct { + Type string `json:"type"` + Subtype string `json:"subtype,omitempty"` + Message json.RawMessage `json:"message,omitempty"` + Result string `json:"result,omitempty"` + IsError bool `json:"is_error,omitempty"` +} + +type messagePayload struct { + Role string `json:"role"` + Content []contentSegment `json:"content"` +} + +type contentSegment struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + Name string `json:"name,omitempty"` + Input json.RawMessage `json:"input,omitempty"` + ToolUseID string `json:"tool_use_id,omitempty"` + Content json.RawMessage `json:"content,omitempty"` + IsError bool `json:"is_error,omitempty"` +} + +// renderEvent turns one parsed stream-json line into zero or more +// transcript bodies — one per atomic segment. Each becomes its own +// Event on the Worker's activation Stream. +func renderEvent(ev streamEvent) []string { + switch ev.Type { + case "system": + if ev.Subtype == "init" { + return []string{"--- session start ---"} + } + case "result": + tag := "result" + if ev.IsError { + tag = "result-error" + } + return []string{fmt.Sprintf("%s: %s", tag, agent.OneLine(ev.Result, 500))} + case "assistant": + var msg messagePayload + if err := json.Unmarshal(ev.Message, &msg); err != nil { + return nil + } + var out []string + for _, seg := range msg.Content { + switch seg.Type { + case "text": + if seg.Text != "" { + out = append(out, fmt.Sprintf("assistant: %s", agent.OneLine(seg.Text, 500))) + } + case "tool_use": + out = append(out, fmt.Sprintf("tool_use %s: %s", seg.Name, agent.OneLine(string(seg.Input), 500))) + } + } + return out + case "user": + var msg messagePayload + if err := json.Unmarshal(ev.Message, &msg); err != nil { + return nil + } + var out []string + for _, seg := range msg.Content { + if seg.Type != "tool_result" { + continue + } + tag := "tool_result" + if seg.IsError { + tag = "tool_result-error" + } + out = append(out, fmt.Sprintf("%s: %s", tag, agent.OneLine(string(seg.Content), 500))) + } + return out + } + return nil +} diff --git a/helix-org/agent/claude/spawner_test.go b/helix-org/agent/claude/spawner_test.go new file mode 100644 index 0000000000..b2b7440452 --- /dev/null +++ b/helix-org/agent/claude/spawner_test.go @@ -0,0 +1,299 @@ +package claude + +import ( + "context" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store/sqlite" +) + +// readFile reads a single file inside dir. Tests use t.TempDir() so +// path traversal isn't a concern. +func readFile(dir, name string) (string, error) { + b, err := os.ReadFile(filepath.Join(dir, name)) //nolint:gosec // dir is t.TempDir() + if err != nil { + return "", err + } + return string(b), nil +} + +// TestRenderEvent covers the parsed-line → transcript-body rules. +func TestRenderEvent(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + ev streamEvent + want []string + }{ + { + name: "system init", + ev: streamEvent{Type: "system", Subtype: "init"}, + want: []string{"--- session start ---"}, + }, + { + name: "system other subtype is ignored", + ev: streamEvent{Type: "system", Subtype: "compact"}, + want: nil, + }, + { + name: "result success", + ev: streamEvent{Type: "result", Result: "all done"}, + want: []string{"result: all done"}, + }, + { + name: "result error", + ev: streamEvent{Type: "result", Result: "boom", IsError: true}, + want: []string{"result-error: boom"}, + }, + { + name: "assistant text + tool_use as separate bodies", + ev: streamEvent{ + Type: "assistant", + Message: jsonRaw(`{"role":"assistant","content":[ + {"type":"text","text":"hi there"}, + {"type":"tool_use","name":"publish","input":{"streamId":"s-x","body":"y"}} + ]}`), + }, + want: []string{ + "assistant: hi there", + `tool_use publish: {"streamId":"s-x","body":"y"}`, + }, + }, + { + name: "assistant empty text segment is skipped", + ev: streamEvent{ + Type: "assistant", + Message: jsonRaw(`{"role":"assistant","content":[ + {"type":"text","text":""} + ]}`), + }, + want: nil, + }, + { + name: "user tool_result success", + ev: streamEvent{ + Type: "user", + Message: jsonRaw(`{"role":"user","content":[ + {"type":"tool_result","tool_use_id":"t1","content":"ok"} + ]}`), + }, + want: []string{`tool_result: "ok"`}, + }, + { + name: "user tool_result error", + ev: streamEvent{ + Type: "user", + Message: jsonRaw(`{"role":"user","content":[ + {"type":"tool_result","tool_use_id":"t1","content":"nope","is_error":true} + ]}`), + }, + want: []string{`tool_result-error: "nope"`}, + }, + { + name: "non-tool_result user segments are ignored", + ev: streamEvent{ + Type: "user", + Message: jsonRaw(`{"role":"user","content":[{"type":"text","text":"x"}]}`), + }, + want: nil, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := renderEvent(tc.ev) + if !equalSlice(got, tc.want) { + t.Fatalf("renderEvent = %q, want %q", got, tc.want) + } + }) + } +} + +func TestStreamTranscriptPublishesPerSegment(t *testing.T) { + t.Parallel() + + input := strings.Join([]string{ + `{"type":"system","subtype":"init"}`, + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}`, + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"tool_use","name":"publish","input":{"x":1}}]}}`, + `{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":"ok"}]}}`, + `not-json-but-should-still-show-up`, + `{"type":"result","result":"done"}`, + }, "\n") + + var got []string + streamTranscript(strings.NewReader(input), func(body string) { + got = append(got, body) + }) + + want := []string{ + "--- session start ---", + "assistant: hello", + `tool_use publish: {"x":1}`, + `tool_result: "ok"`, + "not-json-but-should-still-show-up", + "result: done", + } + if !equalSlice(got, want) { + t.Fatalf("transcript = %q, want %q", got, want) + } +} + +func TestPublishActivationEventAppendsAndNotifies(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + ctx := context.Background() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + + streamID := agent.ActivationStreamID("w-x") + stream, err := domain.NewStream(streamID, "Activations: w-x", "test", "w-owner", now, domain.Transport{}) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := s.Streams.Create(ctx, stream); err != nil { + t.Fatalf("create stream: %v", err) + } + + bc := broadcast.New() + wake := bc.Subscribe([]domain.StreamID{streamID}) + t.Cleanup(func() { bc.Unsubscribe([]domain.StreamID{streamID}, wake) }) + + cfg := SpawnerConfig{ + Logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + Store: s, + Broadcaster: bc, + Now: func() time.Time { return now }, + NewID: func() string { return "id-1" }, + } + + publishActivationEvent(ctx, cfg, "w-x", streamID, "assistant: hello") + + events, err := s.Events.ListForStream(ctx, streamID, 10) + if err != nil { + t.Fatalf("list events: %v", err) + } + if len(events) != 1 { + t.Fatalf("events = %+v, want one", events) + } + if events[0].Source != "w-x" { + t.Fatalf("source = %q, want w-x", events[0].Source) + } + msg, err := events[0].Message() + if err != nil { + t.Fatalf("parse message: %v", err) + } + if msg.Body != "assistant: hello" { + t.Fatalf("message body = %q", msg.Body) + } + if msg.From != "w-x" { + t.Fatalf("message from = %q, want w-x", msg.From) + } + + select { + case <-wake: + default: + t.Fatalf("broadcaster did not wake long-poll observer") + } + + publishActivationEvent(ctx, cfg, "w-x", streamID, "") + events, _ = s.Events.ListForStream(ctx, streamID, 10) + if len(events) != 1 { + t.Fatalf("empty body should not append; events = %d", len(events)) + } +} + +func TestProjectEnvWritesCanonicalState(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + ctx := context.Background() + now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC) + + role, _ := domain.NewRole("r-eng", "# Role: Engineer\nBuild stuff.", now) + if err := s.Roles.Create(ctx, role); err != nil { + t.Fatalf("create role: %v", err) + } + pos, _ := domain.NewPosition("p-eng", "r-eng", nil) + if err := s.Positions.Create(ctx, pos); err != nil { + t.Fatalf("create position: %v", err) + } + worker, _ := domain.NewAIWorker("w-eng", []domain.PositionID{"p-eng"}, "# Persona\nAlice.") + if err := s.Workers.Create(ctx, worker); err != nil { + t.Fatalf("create worker: %v", err) + } + + envPath := t.TempDir() + if err := projectEnv(ctx, s, "w-eng", envPath); err != nil { + t.Fatalf("projectEnv: %v", err) + } + + want := map[string]string{ + "role.md": "# Role: Engineer\nBuild stuff.", + "identity.md": "# Persona\nAlice.", + "agent.md": agent.Policy, + } + for name, expected := range want { + got, err := readFile(envPath, name) + if err != nil { + t.Fatalf("read %s: %v", name, err) + } + if got != expected { + t.Errorf("%s = %q, want %q", name, got, expected) + } + } + + updated := domain.Role{ID: role.ID, Content: "# Role: Engineer v2", CreatedAt: role.CreatedAt, UpdatedAt: now} + if err := s.Roles.Update(ctx, updated); err != nil { + t.Fatalf("update role: %v", err) + } + if err := projectEnv(ctx, s, "w-eng", envPath); err != nil { + t.Fatalf("re-project: %v", err) + } + got, _ := readFile(envPath, "role.md") + if got != "# Role: Engineer v2" { + t.Fatalf("post-update role.md = %q", got) + } + + if err := s.Workers.Update(ctx, worker.WithIdentityContent("# Persona\nAlice (v2).")); err != nil { + t.Fatalf("update worker: %v", err) + } + if err := projectEnv(ctx, s, "w-eng", envPath); err != nil { + t.Fatalf("re-project after identity update: %v", err) + } + got, _ = readFile(envPath, "identity.md") + if got != "# Persona\nAlice (v2)." { + t.Fatalf("post-update identity.md = %q", got) + } +} + +func jsonRaw(s string) []byte { return []byte(s) } + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/helix-org/agent/claude/workspace.go b/helix-org/agent/claude/workspace.go new file mode 100644 index 0000000000..022f6ca620 --- /dev/null +++ b/helix-org/agent/claude/workspace.go @@ -0,0 +1,66 @@ +package claude + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" +) + +// Workspace is the agent.WorkspaceSync implementation for the local +// `claude` runtime. It writes files into // +// — the same directory the spawner exec's claude in — so role and +// identity edits land on disk between activations without waiting for +// the spawner's projection step on the next run. +// +// The spawner re-projects role.md / identity.md / agent.md from the +// DB at the start of every activation as a backstop, so a missed +// PublishFile is recoverable. The WorkspaceSync push is just to keep +// the on-disk view fresh between activations. +type Workspace struct { + EnvsDir string +} + +// NewWorkspace returns a Workspace anchored at envsDir. Each Worker +// has a sibling subdirectory created by HireWorker. +func NewWorkspace(envsDir string) *Workspace { + return &Workspace{EnvsDir: envsDir} +} + +// PublishFile writes content to //. `name` +// must satisfy agent.ValidateWorkspaceName (no absolute paths, no +// upward traversal). `message` is unused by this backend (no commit +// log on the local filesystem). +func (w *Workspace) PublishFile(_ context.Context, workerID domain.WorkerID, name, content, _ string) error { + if w.EnvsDir == "" { + return errors.New("claude workspace: EnvsDir is empty") + } + if workerID == "" { + return errors.New("claude workspace: workerID is empty") + } + if err := agent.ValidateWorkspaceName(name); err != nil { + return fmt.Errorf("claude workspace: %w", err) + } + envDir := filepath.Join(w.EnvsDir, string(workerID)) + full := filepath.Clean(filepath.Join(envDir, name)) + // Belt-and-braces: even after ValidateWorkspaceName, double-check + // the resolved path stays inside the Worker's env dir. + rel, err := filepath.Rel(envDir, full) + if err != nil || rel == ".." || (len(rel) >= 3 && rel[:3] == ".."+string(os.PathSeparator)) { + return fmt.Errorf("claude workspace: name %q escapes env dir", name) + } + if err := os.MkdirAll(filepath.Dir(full), 0o750); err != nil { + return fmt.Errorf("claude workspace: mkdir: %w", err) + } + if err := os.WriteFile(full, []byte(content), 0o600); err != nil { + return fmt.Errorf("claude workspace: write %q: %w", full, err) + } + return nil +} + +// Compile-time check. +var _ agent.WorkspaceSync = (*Workspace)(nil) diff --git a/helix-org/agent/claude/workspace_test.go b/helix-org/agent/claude/workspace_test.go new file mode 100644 index 0000000000..1236e77163 --- /dev/null +++ b/helix-org/agent/claude/workspace_test.go @@ -0,0 +1,35 @@ +package claude + +import ( + "context" + "os" + "path/filepath" + "testing" +) + +func TestWorkspaceWritesUnderEnvDir(t *testing.T) { + t.Parallel() + dir := t.TempDir() + w := NewWorkspace(dir) + if err := w.PublishFile(context.Background(), "w-eng", "role.md", "# Role", ""); err != nil { + t.Fatalf("publish: %v", err) + } + got, err := os.ReadFile(filepath.Join(dir, "w-eng", "role.md")) //nolint:gosec // dir is t.TempDir() + if err != nil { + t.Fatalf("read: %v", err) + } + if string(got) != "# Role" { + t.Errorf("content = %q", got) + } +} + +func TestWorkspaceRejectsBadName(t *testing.T) { + t.Parallel() + dir := t.TempDir() + w := NewWorkspace(dir) + for _, bad := range []string{"", "/role.md", "../role.md", "a/../../b"} { + if err := w.PublishFile(context.Background(), "w-eng", bad, "x", ""); err == nil { + t.Errorf("name %q: expected error", bad) + } + } +} diff --git a/helix-org/agent/helix/project.go b/helix-org/agent/helix/project.go new file mode 100644 index 0000000000..ef0803a250 --- /dev/null +++ b/helix-org/agent/helix/project.go @@ -0,0 +1,220 @@ +package helix + +import ( + "context" + "fmt" + "log/slog" + "strings" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/store" +) + +// ProjectApplier ensures a Worker has a Helix project of its own. +// Used by both Spawner (AI Worker activations) and chat.HelixBridge +// (owner chat) — every Worker, human or AI, that drives an LLM call +// needs a per-Worker project so the org-graph MCP server can be wired +// in via the project's auto-provisioned Agent App. +// +// Idempotent: re-applying for a Worker that already has a project is +// a no-op for the project itself, but always re-pushes the canonical +// role/identity files so update_role / update_identity changes land. +type ProjectApplier struct { + Client helixclient.Client + Store *store.Store + HelixOrgURL string + OrgID string + Provider string + Model string + // AgentMD is the org-wide agent policy pushed verbatim to + // `.context/agent.md` on every Worker's helix-specs branch. Empty + // string skips the push. + AgentMD string + Logger *slog.Logger +} + +// Ensure applies a Helix project for the given Worker if one +// doesn't exist yet. Returns the resolved project / agent-app / +// repo IDs (read from the runtime state after persistence so callers +// see the same view of state). +func (a *ProjectApplier) Ensure(ctx context.Context, workerID domain.WorkerID) (projectID, agentAppID, repoID string, err error) { + worker, err := a.Store.Workers.Get(ctx, workerID) + if err != nil { + return "", "", "", fmt.Errorf("get worker: %w", err) + } + state, err := LoadState(ctx, a.Store, workerID) + if err != nil { + return "", "", "", err + } + // Resolve role content from the Worker's first position (if any). + // We need this both on first apply (to seed agent.md / role.md / + // identity.md) and on every subsequent Ensure (so role hot-edits + // propagate to the helix-specs branch). + var roleContent, roleName string + if positions := worker.Positions(); len(positions) > 0 { + if pos, err := a.Store.Positions.Get(ctx, positions[0]); err == nil { + if role, err := a.Store.Roles.Get(ctx, pos.RoleID); err == nil { + roleContent = role.Content + roleName = string(role.ID) + } + } + } + // Fast path: project already exists. Skip the expensive + // ApplyProject / CreateGitRepo / AttachRepo steps but DO re-push + // role + identity so update_role / update_identity changes + // propagate. CreateBranch + PutFile are idempotent and cheap. + if state.ProjectID != "" { + a.republishWorkerFiles(ctx, workerID, state.RepoID, roleContent, worker.IdentityContent()) + return state.ProjectID, state.AgentAppID, state.RepoID, nil + } + // Every project is applied with the same Runtime — see + // helix.Runtime for why. The auto-provisioned Agent App is the + // vehicle for our MCP wiring; we attach helix-org's MCP server + // to it in a follow-up step (UpdateApp can't be done in apply). + resp, err := a.Client.ApplyProject(ctx, helixclient.ProjectApplyRequest{ + OrganizationID: a.OrgID, + Name: string(workerID), + Spec: helixclient.ProjectSpec{ + Description: worker.IdentityContent(), + Agent: &helixclient.ProjectAgentSpec{ + Name: roleName, + Runtime: Runtime, + Provider: a.Provider, + Model: a.Model, + }, + }, + }) + if err != nil { + return "", "", "", fmt.Errorf("apply project for %s: %w", workerID, err) + } + // Project secrets — env-var injection. + _ = a.Client.PutProjectSecret(ctx, resp.ProjectID, "HELIX_ORG_URL", a.HelixOrgURL) + _ = a.Client.PutProjectSecret(ctx, resp.ProjectID, "HELIX_WORKER_ID", string(workerID)) + // Discover the project's primary repo and its org (we need the + // org to create a same-org repo; Helix rejects cross-org attaches). + repoID = "" + var projOrgID string + if proj, err := a.Client.GetProject(ctx, resp.ProjectID); err == nil { + repoID = proj.DefaultRepoID + projOrgID = proj.OrganizationID + } + // Helix's project-apply does NOT auto-create a default repo. The + // desktop's startup script then refuses to launch Zed with + // "No repositories were cloned successfully" — at which point the + // session sits forever in `state=running` without an agent thread. + // For our owner-chat / org-graph use case we don't need a real + // code repo, just a Helix-internal one to satisfy the workspace + // check. Create one if missing and attach it as primary. + if repoID == "" { + var ownerID string + if me, err := a.Client.WhoAmI(ctx); err == nil { + ownerID = me.User + } + if ownerID != "" { + repo, err := a.Client.CreateGitRepo(ctx, helixclient.CreateGitRepoRequest{ + Name: string(workerID), + OwnerID: ownerID, + OrganizationID: projOrgID, + // Seed the default branch with a README so subsequent + // pushes to `helix-specs` (the branch our role/identity + // files live on) have a base commit to fork from. A + // brand-new bare repo has no branches at all and any + // PUT to a non-existent branch fails with `Remote + // branch helix-specs not found in upstream origin`. + InitialFiles: map[string]string{ + "README.md": "# " + string(workerID) + "\n\nWorkspace for Helix Worker `" + string(workerID) + "`. Files in `job/` carry the role + identity prompt.\n", + }, + }) + if err != nil && a.Logger != nil { + a.Logger.Warn("create git repo for project", "worker", workerID, "err", err) + } else if err == nil { + if err := a.Client.AttachRepoToProject(ctx, resp.ProjectID, repo.ID, true); err != nil { + if a.Logger != nil { + a.Logger.Warn("attach repo to project", "worker", workerID, "repo", repo.ID, "err", err) + } + } else { + repoID = repo.ID + if a.Logger != nil { + a.Logger.Info("helix repo created and attached", "worker", workerID, "repo", repo.ID) + } + } + } + } + } + a.republishWorkerFiles(ctx, workerID, repoID, roleContent, worker.IdentityContent()) + // Attach helix-org's MCP server to the auto-provisioned Agent + // App. Helix's project-apply doesn't accept MCPs in + // ProjectAgentSpec (only the simple WebSearch/Browser/Calculator + // flags), so we GetApp + mutate + UpdateApp in a second step. + // The MCP URL must be reachable from Helix's runner — operator + // runs cloudflared (or similar) and sets `helix.org_url` to the + // public tunnel URL. + if resp.AgentAppID != "" && a.HelixOrgURL != "" { + mcpURL := strings.TrimRight(a.HelixOrgURL, "/") + "/workers/" + string(workerID) + "/mcp" + if err := helixclient.AttachMCPToApp(ctx, a.Client, resp.AgentAppID, "helix", "http", mcpURL); err != nil { + if a.Logger != nil { + a.Logger.Warn("attach MCP to agent app", "worker", workerID, "app", resp.AgentAppID, "err", err) + } + } else if a.Logger != nil { + a.Logger.Info("helix mcp attached", "worker", workerID, "app", resp.AgentAppID, "mcp", mcpURL) + } + } + if err := SaveProject(ctx, a.Store, workerID, resp.ProjectID, resp.AgentAppID, repoID); err != nil { + return "", "", "", fmt.Errorf("persist helix project IDs: %w", err) + } + if a.Logger != nil { + a.Logger.Info("helix project applied", + "worker", workerID, + "project", resp.ProjectID, + "agent_app", resp.AgentAppID, + "repo", repoID, + "created", resp.Created, + ) + } + return resp.ProjectID, resp.AgentAppID, repoID, nil +} + +// republishWorkerFiles writes (or rewrites) the agent.md / role.md / +// identity.md files on the Worker's helix-specs branch. Called from +// both the first-apply path (so the branch and files exist before the +// desktop boots) and the fast path (so update_role / update_identity +// edits propagate on every activation). +// +// All operations are idempotent and cheap: CreateBranch on an existing +// branch is a 200, and PutFile overwrites any existing content. We log +// errors but never fail Ensure on them — partial state is recoverable +// on the next activation, but a hard fail would block the dispatch +// chain entirely. +// +// The agent inside the desktop must `git pull origin helix-specs` +// before reading these files, otherwise it'll see the worktree's +// pre-existing copy. The spawner's activation prompt +// (helixSpecsMandate) carries that pull instruction. +func (a *ProjectApplier) republishWorkerFiles(ctx context.Context, workerID domain.WorkerID, repoID, roleContent, identityContent string) { + if repoID == "" { + return + } + if err := a.Client.CreateBranch(ctx, repoID, "helix-specs", "main"); err != nil { + if a.Logger != nil { + a.Logger.Warn("republish worker files: create helix-specs branch", "worker", workerID, "err", err) + } + } + for path, content := range map[string]string{ + ".context/agent.md": a.AgentMD, + "workers/" + string(workerID) + "/.context/role.md": roleContent, + "workers/" + string(workerID) + "/.context/identity.md": identityContent, + } { + if content == "" { + continue + } + if err := a.Client.PutFile(ctx, repoID, helixclient.PutFileRequest{ + Path: path, + Branch: "helix-specs", + Message: "republish " + path, + Content: content, + }); err != nil && a.Logger != nil { + a.Logger.Warn("republish worker files: put", "worker", workerID, "path", path, "err", err) + } + } +} diff --git a/helix-org/agent/helix/spawner.go b/helix-org/agent/helix/spawner.go new file mode 100644 index 0000000000..56ca967b87 --- /dev/null +++ b/helix-org/agent/helix/spawner.go @@ -0,0 +1,399 @@ +package helix + +import ( + "context" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/store" +) + +// SpawnerConfig wires the helix-backed Spawner. The Client is +// injectable so tests can run without HTTP. Defaults are applied for +// any unset duration / capacity. +// +// In the per-Worker-project model, the spawner does not hold a +// ProjectID of its own — every AI Worker gets its own Helix project, +// applied at hire time and persisted in the WorkerRuntimeState +// sidecar under the "helix" backend. +type SpawnerConfig struct { + Client helixclient.Client + HelixOrgURL string // forwarded to project secrets so the in-sandbox agent can reach helix-org's MCP server + // Provider/Model drive the project's Agent App config. Each + // Worker's project is applied with these values; a future Role- + // overrides system can pass them per-Worker. (The runtime and + // agent_type are fixed — see helix.Runtime / helix.AgentType.) + Provider string + Model string + // AgentMD is the org-wide agent.md policy text pushed to + // `.context/agent.md` on each per-Worker project's helix-specs + // branch. The spawner's activation prompt tells every Worker to + // read it first. Embedded by main.go from agent/policy.md. + AgentMD string + // OrgID is the Helix organisation each per-Worker project lives + // under. Empty for personal accounts. + OrgID string + ActivationTimeout time.Duration + MaxInflight int + PollInitial time.Duration // default 250ms + PollMax time.Duration // default 30s + Logger *slog.Logger + Store *store.Store + Broadcaster *broadcast.Broadcaster + Now func() time.Time + NewID func() string +} + +// Spawner returns an agent.Spawner that runs each activation as a +// long-lived Helix chat session. Either a fresh one (first activation +// or stale pointer) or a follow-up message on the Worker's already- +// open session. +// +// The Spawner does five things, in order: build the prompt, take a +// global semaphore slot, ensure a live session exists, open the live +// transcript WebSocket, then poll for completion. New transcript +// segments arriving on the WebSocket are diffed against a per-call +// dedup map and republished onto s-activations- in the +// same shape claude.Spawner emits, so observers see one transcript +// format regardless of backend. +func Spawner(cfg SpawnerConfig) agent.Spawner { + if cfg.PollInitial == 0 { + cfg.PollInitial = 250 * time.Millisecond + } + if cfg.PollMax == 0 { + cfg.PollMax = 30 * time.Second + } + if cfg.ActivationTimeout == 0 { + cfg.ActivationTimeout = 5 * time.Minute + } + if cfg.MaxInflight <= 0 { + cfg.MaxInflight = 8 + } + sem := make(chan struct{}, cfg.MaxInflight) + return func(ctx context.Context, workerID domain.WorkerID, _ string, triggers []agent.Trigger) error { + if len(triggers) == 0 { + return errors.New("spawner invoked with no triggers") + } + if cfg.Client == nil { + return errors.New("helix spawner: client is nil") + } + if cfg.Store == nil { + return errors.New("helix spawner: store is nil") + } + prompt := agent.BuildPrompt(workerID, helixSpecsMandate, triggers) + + // Acquire global slot. The dispatcher serialises per-Worker, so + // blocking here only delays one Worker behind the rest of the + // org under burst load. + select { + case sem <- struct{}{}: + case <-ctx.Done(): + return ctx.Err() + } + defer func() { <-sem }() + + streamID := agent.ActivationStreamID(workerID) + publish := func(body string) { + if body == "" { + return + } + publishActivationEvent(ctx, cfg, workerID, streamID, body) + } + publish(fmt.Sprintf("=== activation: %s ===", agent.DescribeTriggers(triggers))) + + actCtx, cancel := context.WithTimeout(ctx, cfg.ActivationTimeout) + defer cancel() + + // Make sure the Worker has a Helix project. First activation + // (TriggerHire, or a TriggerEvent before hire fully ran) + // applies one and persists the IDs. + if err := cfg.ensureProject(actCtx, workerID); err != nil { + publish(fmt.Sprintf("=== exit: error: %v ===", err)) + return err + } + + sessionID, err := cfg.ensureSession(actCtx, workerID, prompt, publish) + if err != nil { + publish(fmt.Sprintf("=== exit: error: %v ===", err)) + return err + } + + // Live transcript bridge. On disconnect the spawner reconnects + // for the lifetime of the activation; the dedup map prevents + // republishing on snapshot replay. + bridge := newBridge(publish) + bridgeCtx, bridgeCancel := context.WithCancel(actCtx) + defer bridgeCancel() + go bridge.run(bridgeCtx, cfg, sessionID) + + err = cfg.pollUntilDone(actCtx, sessionID, publish) + bridgeCancel() + if err != nil { + publish(fmt.Sprintf("=== exit: error: %v ===", err)) + return err + } + publish("=== exit: ok ===") + return nil + } +} + +// helixSpecsMandate points the agent at its role + identity files, +// which the project applier pushes to the per-Worker repo's +// `helix-specs` branch. Helix's workspace-setup script creates a +// worktree for that branch at ~/work/helix-specs/ on every boot — +// but the worktree is only created when the branch exists on the +// remote at boot time, so if the worktree is missing the agent must +// materialise it itself. +const helixSpecsMandate = `Your org-wide policy, role, and identity files live on the +**helix-specs** branch of your per-Worker repo. helix-org pushes them +on hire and re-pushes them on every activation, so the remote always +has the current owner-edited version. Path inside the branch: + .context/agent.md (org-wide policy) + workers/${HELIX_WORKER_ID}/.context/role.md + workers/${HELIX_WORKER_ID}/.context/identity.md + +ALWAYS pull before reading — your local worktree is stale from prior +activations and won't reflect owner edits made since: + + if [ ! -d ~/work/helix-specs ]; then + cd ~/work/$(ls ~/work | grep -v helix-specs | head -1) + git fetch origin helix-specs + git worktree add ../helix-specs helix-specs + else + cd ~/work/helix-specs && git pull --ff-only origin helix-specs + fi + +Then read in this order — agent.md FIRST (it's the entrypoint that +tells you how to be an agent at all), then role.md, then identity.md: + cat ~/work/helix-specs/.context/agent.md + cat ~/work/helix-specs/workers/${HELIX_WORKER_ID}/.context/role.md + cat ~/work/helix-specs/workers/${HELIX_WORKER_ID}/.context/identity.md + +After meaningful work, persist state on helix-specs: + cd ~/work/helix-specs && git add -A && git commit -m 'checkpoint: ' && git push origin helix-specs` + +// ensureProject is a thin wrapper around ProjectApplier so the +// activation flow reads naturally. The actual apply logic is shared +// with the chat bridge — see project.go. +func (c SpawnerConfig) ensureProject(ctx context.Context, workerID domain.WorkerID) error { + a := &ProjectApplier{ + Client: c.Client, + Store: c.Store, + HelixOrgURL: c.HelixOrgURL, + OrgID: c.OrgID, + Provider: c.Provider, + Model: c.Model, + AgentMD: c.AgentMD, + Logger: c.Logger, + } + _, _, _, err := a.Ensure(ctx, workerID) + return err +} + +// ensureSession dispatches the activation prompt to the Worker's +// long-lived chat session, reusing the persisted session ID when one +// exists. We DON'T open a fresh session per activation: each fresh +// session spawns a fresh desktop container in Helix (~3 min cold +// start), which makes routine DM-driven activity painfully slow. +// Reusing the session keeps the container warm and lets follow-ups +// land in seconds. +// +// Cross-activation context: the agent has its prior chat history in +// the same Helix session AND the helix-specs branch in its workspace. +// Either is a sufficient reminder of "what came before"; carrying +// both is intentional belt-and-braces. +// +// Two paths: +// - **Follow-up** (state.SessionID exists): POST +// /api/v1/sessions/{id}/messages. Helix queues the message and +// pickupWaitingInteraction delivers it on agent reconnect — no +// warmup loop, no cold-start handling on our side. +// - **First activation** (no session yet): POST /sessions/chat to +// create the session. The dispatch may race the desktop's WS +// connect; if it does (hadWSError) we immediately re-queue the +// same prompt via the durable /messages endpoint so it lands as +// soon as the agent dials home. +func (c SpawnerConfig) ensureSession(ctx context.Context, workerID domain.WorkerID, prompt string, _ func(string)) (string, error) { + state, err := LoadState(ctx, c.Store, workerID) + if err != nil { + return "", err + } + if state.ProjectID == "" { + return "", fmt.Errorf("worker %s has no helix project — ensureProject must run first", workerID) + } + + // Follow-up: the persisted session ID is the durable target. + // SendSessionMessage queues the prompt; if the session is gone we + // fall through and open a fresh one. + if state.SessionID != "" { + if _, err := c.Client.SendSessionMessage(ctx, state.SessionID, prompt, helixclient.SendMessageOptions{}); err == nil { + return state.SessionID, nil + } else if c.Logger != nil { + c.Logger.Info("spawner: persisted session unusable, opening fresh", + "worker", workerID, "stale_sid", state.SessionID, "err", err) + } + } + + // First activation (or stale session): create one. Refuse early if + // the operator's desktop quota is already exhausted — Helix would + // otherwise spin up the project's plumbing (apply secrets, create + // the agent app) and fail at the StartDesktop step with a generic + // "desktop limit reached" 500 minutes later. We can't reserve a + // slot atomically (Helix doesn't expose that) so this is a soft + // pre-flight; a parallel caller could still beat us to the last + // slot, in which case Helix will return its own error. + if err := helixclient.CheckDesktopQuota(ctx, c.Client); err != nil { + return "", err + } + // We post the activation prompt through StartChat so Helix has + // *something* to dispatch — but if the desktop's WS hasn't + // connected yet (hadWSError) the interaction is in error state, + // so we re-queue the same prompt durably. + req := helixclient.StartChatRequest{ + ProjectID: state.ProjectID, + AppID: state.AgentAppID, + SessionRole: "job", + AgentType: AgentType, + Type: "text", + ExternalAgentConfig: &helixclient.ExternalAgentConfig{}, + Messages: []helixclient.SessionChatMessage{helixclient.NewTextMessage("user", prompt)}, + } + session, hadWSError, err := c.Client.StartChatWithStatus(ctx, req) + if err != nil { + return "", fmt.Errorf("start chat: %w", err) + } + if err := SaveSession(ctx, c.Store, workerID, session.ID); err != nil { + return "", fmt.Errorf("persist session id: %w", err) + } + if hadWSError { + if _, err := c.Client.SendSessionMessage(ctx, session.ID, prompt, helixclient.SendMessageOptions{}); err != nil { + return "", fmt.Errorf("queue activation prompt: %w", err) + } + } + return session.ID, nil +} + +// pollUntilDone polls GetOutput with exponential backoff until a +// terminal status is reported or ctx fires. +func (c SpawnerConfig) pollUntilDone(ctx context.Context, sessionID string, publish func(string)) error { + delay := c.PollInitial + for { + out, err := c.Client.GetOutput(ctx, sessionID) + if err != nil { + // Don't fail the activation on a transient poll error; just + // back off and retry until the timeout fires. + if c.Logger != nil { + c.Logger.Warn("helix poll", "session", sessionID, "err", err) + } + } else if out.IsTerminal() { + if out.Status == "error" { + return fmt.Errorf("session error: %s", agent.OneLine(out.Output, 500)) + } + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + } + delay *= 2 + if delay > c.PollMax { + delay = c.PollMax + } + } +} + +// bridge consumes WebSocket frames and publishes one transcript +// event per *settled* response entry. It owns a single `EntryStream` +// for the lifetime of the activation; the EntryStream's dedup state +// (per Index/MessageID) keeps snapshot replay safe across reconnects. +type bridge struct { + publish func(body string) + stream *helixclient.EntryStream +} + +func newBridge(publish func(body string)) *bridge { + b := &bridge{publish: publish} + b.stream = helixclient.NewEntryStream(b.onEvent) + return b +} + +// onEvent renders one settled EntryStream event into the line shape +// claude.Spawner has emitted historically. Both backends emit the +// same shape so observers don't have to discriminate. +func (b *bridge) onEvent(e helixclient.Event) { + switch e.Kind { + case helixclient.EventAssistant: + b.publish("assistant: " + agent.OneLine(e.Text, 500)) + case helixclient.EventToolUse: + b.publish(fmt.Sprintf("tool_use %s: %s", e.ToolName, agent.OneLine(e.Text, 500))) + case helixclient.EventToolResult: + b.publish("tool_result: " + agent.OneLine(e.Text, 500)) + case helixclient.EventToolResultError: + b.publish("tool_result-error: " + agent.OneLine(e.Text, 500)) + case helixclient.EventError: + b.publish("error: " + agent.OneLine(e.Text, 500)) + } +} + +func (b *bridge) run(ctx context.Context, cfg SpawnerConfig, sessionID string) { + delay := time.Second + for { + ch, err := cfg.Client.SubscribeUpdates(ctx, sessionID) + if err != nil { + if cfg.Logger != nil { + cfg.Logger.Warn("helix subscribe", "session", sessionID, "err", err) + } + } else { + for u := range ch { + b.stream.Apply(u) + } + } + // Reconnect with capped exponential backoff while the + // activation context is still live. + select { + case <-ctx.Done(): + b.stream.Flush() + return + case <-time.After(delay): + } + if delay < 30*time.Second { + delay *= 2 + } + } +} + +func publishActivationEvent(ctx context.Context, cfg SpawnerConfig, workerID domain.WorkerID, streamID domain.StreamID, body string) { + if cfg.Store == nil || cfg.NewID == nil || cfg.Now == nil || strings.TrimSpace(body) == "" { + return + } + event, err := domain.NewMessageEvent( + domain.EventID("e-"+cfg.NewID()), + streamID, + workerID, + domain.Message{From: string(workerID), Body: body}, + cfg.Now(), + ) + if err != nil { + if cfg.Logger != nil { + cfg.Logger.Warn("helix activation event: build", "worker", workerID, "err", err) + } + return + } + if err := cfg.Store.Events.Append(ctx, event); err != nil { + if cfg.Logger != nil { + cfg.Logger.Warn("helix activation event: append", "worker", workerID, "err", err) + } + return + } + if cfg.Broadcaster != nil { + cfg.Broadcaster.Notify(streamID) + } +} diff --git a/helix-org/agent/helix/spawner_test.go b/helix-org/agent/helix/spawner_test.go new file mode 100644 index 0000000000..ba289d4d74 --- /dev/null +++ b/helix-org/agent/helix/spawner_test.go @@ -0,0 +1,494 @@ +package helix + +import ( + "context" + "errors" + "io" + "log/slog" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" +) + +// fakeHelixClient is a deterministic stand-in for helixclient.Client. +type fakeHelixClient struct { + mu sync.Mutex + startCalls int32 + sendCalls int32 + outputCalls int32 + subscribeCalls int32 + startSessionID string + startErr error + sendErr error + outputs []helixclient.Output + updatesFactory func() <-chan helixclient.SessionUpdate + lastStartReq helixclient.StartChatRequest + lastSendSID string + lastSendBody string +} + +func (f *fakeHelixClient) SendSessionMessage(_ context.Context, sid, content string, _ helixclient.SendMessageOptions) (helixclient.SendMessageResponse, error) { + atomic.AddInt32(&f.sendCalls, 1) + f.mu.Lock() + f.lastSendSID = sid + f.lastSendBody = content + f.mu.Unlock() + if f.sendErr != nil { + return helixclient.SendMessageResponse{}, f.sendErr + } + return helixclient.SendMessageResponse{RequestID: "req_x", InteractionID: "ix_x"}, nil +} + +func (f *fakeHelixClient) StartChat(_ context.Context, req helixclient.StartChatRequest) (helixclient.Session, error) { + atomic.AddInt32(&f.startCalls, 1) + f.mu.Lock() + f.lastStartReq = req + f.mu.Unlock() + return helixclient.Session{ID: f.startSessionID}, f.startErr +} + +func (f *fakeHelixClient) StartChatWithStatus(ctx context.Context, req helixclient.StartChatRequest) (helixclient.Session, bool, error) { + s, err := f.StartChat(ctx, req) + return s, false, err +} + +func (f *fakeHelixClient) CreateGitRepo(_ context.Context, req helixclient.CreateGitRepoRequest) (helixclient.GitRepo, error) { + return helixclient.GitRepo{ID: "repo-" + req.Name, Name: req.Name}, nil +} + +func (f *fakeHelixClient) AttachRepoToProject(_ context.Context, _, _ string, _ bool) error { + return nil +} + +func (f *fakeHelixClient) CreateBranch(_ context.Context, _, _, _ string) error { return nil } + +func (f *fakeHelixClient) GetOutput(_ context.Context, _ string) (helixclient.Output, error) { + i := int(atomic.AddInt32(&f.outputCalls, 1)) - 1 + f.mu.Lock() + defer f.mu.Unlock() + if i >= len(f.outputs) { + return f.outputs[len(f.outputs)-1], nil + } + return f.outputs[i], nil +} + +func (f *fakeHelixClient) SubscribeUpdates(_ context.Context, _ string) (<-chan helixclient.SessionUpdate, error) { + atomic.AddInt32(&f.subscribeCalls, 1) + if f.updatesFactory != nil { + return f.updatesFactory(), nil + } + ch := make(chan helixclient.SessionUpdate) + close(ch) + return ch, nil +} + +func (f *fakeHelixClient) StopExternalAgent(_ context.Context, _ string) error { return nil } +func (f *fakeHelixClient) ServerStatus(_ context.Context) (helixclient.ServerStatus, error) { + return helixclient.ServerStatus{MaxConcurrentDesktops: 0, ActiveConcurrentDesktops: 0}, nil +} +func (f *fakeHelixClient) ListProviders(_ context.Context) ([]string, error) { + return []string{"openai", "anthropic"}, nil +} +func (f *fakeHelixClient) ListModelsForProvider(_ context.Context, _ string) ([]helixclient.Model, error) { + return []helixclient.Model{{ID: "gpt-4o-mini", Enabled: true}, {ID: "claude-opus-4-6", Enabled: true}}, nil +} +func (f *fakeHelixClient) WhoAmI(_ context.Context) (helixclient.UserStatus, error) { + return helixclient.UserStatus{}, nil +} +func (f *fakeHelixClient) ApplyProject(_ context.Context, _ helixclient.ProjectApplyRequest) (helixclient.ProjectApplyResponse, error) { + return helixclient.ProjectApplyResponse{ProjectID: "prj_test", AgentAppID: "app_test", Created: true}, nil +} +func (f *fakeHelixClient) GetProject(_ context.Context, _ string) (helixclient.Project, error) { + return helixclient.Project{ID: "prj_test", DefaultRepoID: "repo_test"}, nil +} +func (f *fakeHelixClient) DeleteProject(_ context.Context, _ string) error { return nil } +func (f *fakeHelixClient) GetSession(_ context.Context, _ string) (helixclient.Session, error) { + return helixclient.Session{}, nil +} +func (f *fakeHelixClient) PutProjectSecret(_ context.Context, _, _, _ string) error { return nil } +func (f *fakeHelixClient) PutFile(_ context.Context, _ string, _ helixclient.PutFileRequest) error { + return nil +} +func (f *fakeHelixClient) GetFile(_ context.Context, _, _, _ string) (string, error) { + return "", nil +} +func (f *fakeHelixClient) CreateApp(_ context.Context, _ helixclient.AppRequest) (helixclient.App, error) { + return helixclient.App{ID: "app_test"}, nil +} +func (f *fakeHelixClient) GetApp(_ context.Context, _ string) (helixclient.App, error) { + return helixclient.App{}, nil +} +func (f *fakeHelixClient) UpdateApp(_ context.Context, _ string, _ helixclient.AppRequest) (helixclient.App, error) { + return helixclient.App{}, nil +} + +func newHelixTestStore(t *testing.T) (*store.Store, domain.WorkerID) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + ctx := context.Background() + role, _ := domain.NewRole("r-eng", "# Role: Engineer", time.Now().UTC()) + if err := s.Roles.Create(ctx, role); err != nil { + t.Fatalf("role: %v", err) + } + pos, _ := domain.NewPosition("p-eng", "r-eng", nil) + if err := s.Positions.Create(ctx, pos); err != nil { + t.Fatalf("pos: %v", err) + } + worker, _ := domain.NewAIWorker("w-eng", []domain.PositionID{"p-eng"}, "# Persona") + if err := s.Workers.Create(ctx, worker); err != nil { + t.Fatalf("worker: %v", err) + } + return s, worker.ID() +} + +func newHelixCfg(t *testing.T, fc *fakeHelixClient, s *store.Store) SpawnerConfig { + t.Helper() + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + return SpawnerConfig{ + Client: fc, + HelixOrgURL: "http://helix-org:8081", + Provider: "openai", + Model: "gpt-4o-mini", + ActivationTimeout: 2 * time.Second, + MaxInflight: 2, + PollInitial: time.Millisecond, + PollMax: 5 * time.Millisecond, + Logger: logger, + Store: s, + Now: func() time.Time { return time.Now().UTC() }, + NewID: func() string { return "id" }, + } +} + +func TestSpawnerStartsFreshAndPersistsSession(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + fc := &fakeHelixClient{ + startSessionID: "ses_new", + outputs: []helixclient.Output{{Status: "complete", Output: "ok"}}, + } + sp := Spawner(newHelixCfg(t, fc, s)) + err := sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerHire}}) + if err != nil { + t.Fatalf("spawn: %v", err) + } + if got := atomic.LoadInt32(&fc.startCalls); got != 1 { + t.Errorf("StartChat calls: %d", got) + } + state, err := LoadState(context.Background(), s, wid) + if err != nil { + t.Fatalf("load state: %v", err) + } + if state.SessionID != "ses_new" { + t.Errorf("session pointer = %q", state.SessionID) + } + // The Worker should have its per-project IDs persisted from the + // fake's ApplyProject response. + if state.ProjectID != "prj_test" || state.AgentAppID != "app_test" { + t.Errorf("project IDs not persisted: project=%q agent_app=%q", state.ProjectID, state.AgentAppID) + } + // StartChat must point at the per-Worker project, not at any + // global one. + if fc.lastStartReq.ProjectID != "prj_test" { + t.Errorf("StartChat ProjectID = %q (want prj_test)", fc.lastStartReq.ProjectID) + } + if fc.lastStartReq.AppID != "app_test" { + t.Errorf("StartChat AppID = %q (want app_test)", fc.lastStartReq.AppID) + } +} + +// TestBridgeRendersEntryPatchEvents verifies that the bridge's +// EntryStream callback produces the same line shapes the claude +// bridge emits — assistant text, tool_use, tool_result. +func TestBridgeRendersEntryPatchEvents(t *testing.T) { + t.Parallel() + var got []string + b := newBridge(func(s string) { got = append(got, s) }) + b.stream.Apply(helixclient.SessionUpdate{EntryPatches: []helixclient.EntryPatch{ + {Index: 0, MessageID: "m1", Type: "text", Patch: "hi", PatchOffset: 0}, + }}) + b.stream.Apply(helixclient.SessionUpdate{EntryPatches: []helixclient.EntryPatch{ + {Index: 1, MessageID: "t1", Type: "tool_call", Patch: `{"x":1}`, ToolName: "publish", ToolStatus: "Completed"}, + }}) + b.stream.Flush() + if len(got) < 3 { + t.Fatalf("expected ≥3 events, got %d: %v", len(got), got) + } + joined := strings.Join(got, "\n") + if !strings.Contains(joined, "assistant: hi") { + t.Errorf("missing assistant: %v", got) + } + if !strings.Contains(joined, "tool_use publish: {\"x\":1}") { + t.Errorf("missing tool_use: %v", got) + } + if !strings.Contains(joined, "tool_result: ") { + t.Errorf("missing tool_result: %v", got) + } +} + +// TestSpawnerFollowUpUsesSendSessionMessage verifies that a Worker +// with a persisted Helix session ID skips StartChat entirely and +// queues the activation prompt via SendSessionMessage. This is the +// path that should land in seconds against a warm desktop with no +// re-creation overhead. +func TestSpawnerFollowUpUsesSendSessionMessage(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + // Pre-seed an existing project + session for this worker. + if err := SaveProject(context.Background(), s, wid, "prj_test", "app_test", "repo_test"); err != nil { + t.Fatalf("save project: %v", err) + } + if err := SaveSession(context.Background(), s, wid, "ses_existing"); err != nil { + t.Fatalf("save session: %v", err) + } + fc := &fakeHelixClient{ + outputs: []helixclient.Output{{Status: "complete", Output: "ok"}}, + } + sp := Spawner(newHelixCfg(t, fc, s)) + if err := sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerEvent, EventID: "e-1"}}); err != nil { + t.Fatalf("spawn: %v", err) + } + if got := atomic.LoadInt32(&fc.startCalls); got != 0 { + t.Errorf("StartChat must not be called on follow-up; got %d", got) + } + if got := atomic.LoadInt32(&fc.sendCalls); got != 1 { + t.Errorf("SendSessionMessage calls: %d (want 1)", got) + } + fc.mu.Lock() + defer fc.mu.Unlock() + if fc.lastSendSID != "ses_existing" { + t.Errorf("targeted session: %q (want ses_existing)", fc.lastSendSID) + } +} + +// TestSpawnerRefusesWhenDesktopQuotaExceeded asserts the spawner fails +// fast with a useful error when Helix's `max_concurrent_desktops` +// would be exceeded by spinning up a new session. Important: only +// fires when there's no existing session — follow-ups reuse the warm +// container and must skip the check (covered by +// TestSpawnerFollowUpUsesSendSessionMessage). +func TestSpawnerRefusesWhenDesktopQuotaExceeded(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + fc := "aFullFakeClient{ + fakeHelixClient: fakeHelixClient{startSessionID: "ses_x"}, + } + cfg := newHelixCfg(t, &fc.fakeHelixClient, s) + cfg.Client = fc + sp := Spawner(cfg) + err := sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerHire}}) + if err == nil { + t.Fatal("expected error when quota exhausted") + } + if !strings.Contains(err.Error(), "quota reached") { + t.Errorf("error %q does not mention quota", err) + } + if got := atomic.LoadInt32(&fc.startCalls); got != 0 { + t.Errorf("StartChat must NOT be called when quota is full; got %d", got) + } +} + +// quotaFullFakeClient overrides ServerStatus to report no available +// desktop slots, simulating Helix's `max_concurrent_desktops` cap. +type quotaFullFakeClient struct { + fakeHelixClient +} + +func (f *quotaFullFakeClient) ServerStatus(_ context.Context) (helixclient.ServerStatus, error) { + return helixclient.ServerStatus{MaxConcurrentDesktops: 2, ActiveConcurrentDesktops: 2}, nil +} + +// TestSpawnerColdStartReQueues verifies that when StartChatWithStatus +// reports hadWSError=true on a fresh session, the spawner immediately +// re-queues the same prompt via SendSessionMessage so the durable +// queue picks it up on agent reconnect. +func TestSpawnerColdStartReQueues(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + fc := &coldStartFakeClient{ + fakeHelixClient: fakeHelixClient{ + startSessionID: "ses_new", + outputs: []helixclient.Output{{Status: "complete", Output: "ok"}}, + }, + hadWSError: true, + } + cfg := newHelixCfg(t, &fc.fakeHelixClient, s) + cfg.Client = fc + sp := Spawner(cfg) + if err := sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerHire}}); err != nil { + t.Fatalf("spawn: %v", err) + } + if got := atomic.LoadInt32(&fc.startCalls); got != 1 { + t.Errorf("StartChat calls: %d (want 1)", got) + } + if got := atomic.LoadInt32(&fc.sendCalls); got != 1 { + t.Errorf("SendSessionMessage calls: %d (want 1, the cold-start re-queue)", got) + } + fc.mu.Lock() + defer fc.mu.Unlock() + if fc.lastSendSID != "ses_new" { + t.Errorf("re-queue session: %q (want ses_new)", fc.lastSendSID) + } +} + +// coldStartFakeClient overrides StartChatWithStatus to return +// hadWSError=true, simulating Helix's "no agent WS yet" race. +type coldStartFakeClient struct { + fakeHelixClient + hadWSError bool +} + +func (f *coldStartFakeClient) StartChatWithStatus(ctx context.Context, req helixclient.StartChatRequest) (helixclient.Session, bool, error) { + s, err := f.StartChat(ctx, req) + return s, f.hadWSError, err +} + +func TestSpawnerTimeoutEmitsExitError(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + fc := &fakeHelixClient{ + startSessionID: "ses_x", + outputs: []helixclient.Output{{Status: "waiting"}}, + } + cfg := newHelixCfg(t, fc, s) + cfg.ActivationTimeout = 30 * time.Millisecond + sp := Spawner(cfg) + err := sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerHire}}) + if err == nil || !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected deadline error, got %v", err) + } +} + +func TestSpawnerSemaphoreSerialises(t *testing.T) { + t.Parallel() + s, wid := newHelixTestStore(t) + gate := make(chan struct{}) + var inflight, peak int32 + fc := &fakeHelixClient{ + startSessionID: "ses_x", + outputs: []helixclient.Output{{Status: "complete", Output: "ok"}}, + } + original := fc.outputs[0] + fc.outputs = []helixclient.Output{original} + + cfg := newHelixCfg(t, fc, s) + cfg.MaxInflight = 1 + cfg.ActivationTimeout = time.Second + + wrapped := &concurrencyClient{inner: fc, gate: gate, inflight: &inflight, peak: &peak} + cfg.Client = wrapped + sp := Spawner(cfg) + + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = sp(context.Background(), wid, "/ignored", []agent.Trigger{{Kind: agent.TriggerHire}}) + }() + } + time.Sleep(20 * time.Millisecond) + close(gate) + wg.Wait() + if got := atomic.LoadInt32(&peak); got > 1 { + t.Errorf("peak inflight = %d (want <=1)", got) + } +} + +type concurrencyClient struct { + inner helixclient.Client + gate chan struct{} + inflight *int32 + peak *int32 +} + +func (c *concurrencyClient) StartChat(ctx context.Context, req helixclient.StartChatRequest) (helixclient.Session, error) { + cur := atomic.AddInt32(c.inflight, 1) + for { + p := atomic.LoadInt32(c.peak) + if cur <= p || atomic.CompareAndSwapInt32(c.peak, p, cur) { + break + } + } + defer atomic.AddInt32(c.inflight, -1) + <-c.gate + return c.inner.StartChat(ctx, req) +} +func (c *concurrencyClient) StartChatWithStatus(ctx context.Context, req helixclient.StartChatRequest) (helixclient.Session, bool, error) { + s, err := c.StartChat(ctx, req) + return s, false, err +} +func (c *concurrencyClient) SendSessionMessage(ctx context.Context, sid, content string, opts helixclient.SendMessageOptions) (helixclient.SendMessageResponse, error) { + return c.inner.SendSessionMessage(ctx, sid, content, opts) +} +func (c *concurrencyClient) ServerStatus(ctx context.Context) (helixclient.ServerStatus, error) { + return c.inner.ServerStatus(ctx) +} +func (c *concurrencyClient) ListProviders(ctx context.Context) ([]string, error) { + return c.inner.ListProviders(ctx) +} +func (c *concurrencyClient) ListModelsForProvider(ctx context.Context, provider string) ([]helixclient.Model, error) { + return c.inner.ListModelsForProvider(ctx, provider) +} +func (c *concurrencyClient) CreateGitRepo(ctx context.Context, req helixclient.CreateGitRepoRequest) (helixclient.GitRepo, error) { + return c.inner.CreateGitRepo(ctx, req) +} +func (c *concurrencyClient) AttachRepoToProject(ctx context.Context, projectID, repoID string, primary bool) error { + return c.inner.AttachRepoToProject(ctx, projectID, repoID, primary) +} +func (c *concurrencyClient) CreateBranch(ctx context.Context, repoID, branch, baseBranch string) error { + return c.inner.CreateBranch(ctx, repoID, branch, baseBranch) +} +func (c *concurrencyClient) GetOutput(ctx context.Context, sid string) (helixclient.Output, error) { + return c.inner.GetOutput(ctx, sid) +} +func (c *concurrencyClient) SubscribeUpdates(ctx context.Context, sid string) (<-chan helixclient.SessionUpdate, error) { + return c.inner.SubscribeUpdates(ctx, sid) +} +func (c *concurrencyClient) StopExternalAgent(ctx context.Context, sid string) error { + return c.inner.StopExternalAgent(ctx, sid) +} +func (c *concurrencyClient) WhoAmI(ctx context.Context) (helixclient.UserStatus, error) { + return c.inner.WhoAmI(ctx) +} +func (c *concurrencyClient) ApplyProject(ctx context.Context, req helixclient.ProjectApplyRequest) (helixclient.ProjectApplyResponse, error) { + return c.inner.ApplyProject(ctx, req) +} +func (c *concurrencyClient) GetProject(ctx context.Context, id string) (helixclient.Project, error) { + return c.inner.GetProject(ctx, id) +} +func (c *concurrencyClient) DeleteProject(ctx context.Context, id string) error { + return c.inner.DeleteProject(ctx, id) +} +func (c *concurrencyClient) GetSession(ctx context.Context, id string) (helixclient.Session, error) { + return c.inner.GetSession(ctx, id) +} +func (c *concurrencyClient) PutProjectSecret(ctx context.Context, projectID, name, value string) error { + return c.inner.PutProjectSecret(ctx, projectID, name, value) +} +func (c *concurrencyClient) PutFile(ctx context.Context, repoID string, req helixclient.PutFileRequest) error { + return c.inner.PutFile(ctx, repoID, req) +} +func (c *concurrencyClient) GetFile(ctx context.Context, repoID, path, branch string) (string, error) { + return c.inner.GetFile(ctx, repoID, path, branch) +} +func (c *concurrencyClient) CreateApp(ctx context.Context, req helixclient.AppRequest) (helixclient.App, error) { + return c.inner.CreateApp(ctx, req) +} +func (c *concurrencyClient) GetApp(ctx context.Context, id string) (helixclient.App, error) { + return c.inner.GetApp(ctx, id) +} +func (c *concurrencyClient) UpdateApp(ctx context.Context, id string, req helixclient.AppRequest) (helixclient.App, error) { + return c.inner.UpdateApp(ctx, id, req) +} diff --git a/helix-org/agent/helix/state.go b/helix-org/agent/helix/state.go new file mode 100644 index 0000000000..1fd45b1d4c --- /dev/null +++ b/helix-org/agent/helix/state.go @@ -0,0 +1,106 @@ +// Package helix is the production Spawner runtime: each AI Worker +// activation drives a chat session against a co-located Helix server. +// +// Per-Worker state — the Helix project ID, the auto-provisioned Agent +// App ID, the project's primary git repo ID, and the live chat session +// pointer — lives in the WorkerRuntimeState sidecar store under the +// "helix" backend label. The accessors in state.go give the rest of +// this package typed access without leaking key strings everywhere. +package helix + +import ( + "context" + "errors" + "fmt" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Backend is the label used in WorkerRuntimeState to namespace this +// runtime's per-Worker keys. helix-org core never reads it; it's +// here only so every helix-runtime call site spells the same string. +const Backend = "helix" + +// Runtime and AgentType are the only Helix project / session shape +// helix-org uses. Every per-Worker project (owner-chat session AND +// AI-worker activation) is applied with `Runtime=zed_agent` and every +// `/sessions/chat` POST sets `agent_type=zed_external`. Two reasons +// to make these constants rather than configurable: +// +// - `claude_code` runtime ignores the project's Provider/Model and +// talks directly to Anthropic via its own creds — empirically +// hangs in `state=waiting` on app.helix.ml because the in-sandbox +// agent can't reach Anthropic. `zed_agent` routes inference back +// through Helix and honours the configured provider/model. +// - Mixing `claude_code` for AI workers and `zed_agent` for owner +// chat creates two completely different sandbox shapes for the +// "same" runtime — confusing to debug and impossible to reason +// about. ONE shape, ALWAYS. +// +// Helix's own `helix_basic` agent_type doesn't route MCP tool calls +// in inference — only `zed_external` does — so we'd never want it +// for the chat surface or worker activations either way. +const ( + Runtime = "zed_agent" + AgentType = "zed_external" +) + +// WorkerState holds the per-Worker pointers the Helix runtime needs. +// All four fields are empty for a Worker that hasn't been activated +// yet; the runtime's first activation materialises ProjectID + +// AgentAppID + RepoID via ProjectApplier.Ensure, and SessionID is +// set when the first chat session opens. +type WorkerState struct { + ProjectID string + AgentAppID string + RepoID string + SessionID string +} + +const ( + keyProjectID = "project_id" + keyAgentAppID = "agent_app_id" + keyRepoID = "repo_id" + keySessionID = "session_id" +) + +// LoadState returns the Helix-backend state for a Worker. Empty +// fields mean "not yet set"; never an error path. +func LoadState(ctx context.Context, st *store.Store, workerID domain.WorkerID) (WorkerState, error) { + if st == nil || st.WorkerRuntimeState == nil { + return WorkerState{}, errors.New("helix state: store is nil") + } + kv, err := st.WorkerRuntimeState.Get(ctx, workerID, Backend) + if err != nil { + return WorkerState{}, fmt.Errorf("helix state: get %s: %w", workerID, err) + } + return WorkerState{ + ProjectID: kv[keyProjectID], + AgentAppID: kv[keyAgentAppID], + RepoID: kv[keyRepoID], + SessionID: kv[keySessionID], + }, nil +} + +// SaveProject persists the per-Worker project triple — created once +// at first activation by ProjectApplier.Ensure. +func SaveProject(ctx context.Context, st *store.Store, workerID domain.WorkerID, projectID, agentAppID, repoID string) error { + if st == nil || st.WorkerRuntimeState == nil { + return errors.New("helix state: store is nil") + } + return st.WorkerRuntimeState.SetMany(ctx, workerID, Backend, map[string]string{ + keyProjectID: projectID, + keyAgentAppID: agentAppID, + keyRepoID: repoID, + }) +} + +// SaveSession persists the live Helix chat session ID. Reused across +// activations so the per-Worker desktop container stays warm. +func SaveSession(ctx context.Context, st *store.Store, workerID domain.WorkerID, sessionID string) error { + if st == nil || st.WorkerRuntimeState == nil { + return errors.New("helix state: store is nil") + } + return st.WorkerRuntimeState.Set(ctx, workerID, Backend, keySessionID, sessionID) +} diff --git a/helix-org/agent/helix/workspace.go b/helix-org/agent/helix/workspace.go new file mode 100644 index 0000000000..0fc2555f08 --- /dev/null +++ b/helix-org/agent/helix/workspace.go @@ -0,0 +1,109 @@ +package helix + +import ( + "context" + "errors" + "fmt" + "sync" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/store" +) + +// Workspace is the agent.WorkspaceSync implementation that pushes +// canonical role / identity content to the helix-specs branch of a +// Worker's per-Worker repo. Each call resolves the target repo from +// the Worker's runtime state (set by ProjectApplier at first +// activation) and PUTs one file onto the configured branch at +// `workers//.context/` — the same path layout +// ProjectApplier.republishWorkerFiles writes and the activation +// mandate tells the agent to `git pull` and `cat`. +// +// Workers that haven't been activated against a Helix project yet +// (RepoID == "") are no-ops; callers don't have to gate on activation +// status. +type Workspace struct { + client helixclient.Client + store *store.Store + branch string + author string + email string + + // repoLocks serialises pushes to the same repo. Helix's git write + // path is not concurrency-safe per repo (it pre-syncs, writes, + // post-pushes against a single working copy on the Helix host). + // Two simultaneous PutFile calls against the same repo race on + // the working copy. + mu sync.Mutex + repoLocks map[string]*sync.Mutex +} + +// NewWorkspace constructs a Workspace that resolves repo IDs per +// call from the runtime-state sidecar. branch is the target branch +// (typically "helix-specs"); author/email are the commit metadata. +func NewWorkspace(client helixclient.Client, st *store.Store, branch, author, email string) *Workspace { + return &Workspace{ + client: client, + store: st, + branch: branch, + author: author, + email: email, + repoLocks: map[string]*sync.Mutex{}, + } +} + +// PublishFile satisfies agent.WorkspaceSync. `name` is the logical +// filename for this Worker (e.g. "role.md"); the Helix backend writes +// it at `workers//.context/` on the helix-specs +// branch. Returns nil for Workers that aren't yet bound to a Helix +// project — callers don't have to gate on activation status. +func (w *Workspace) PublishFile(ctx context.Context, workerID domain.WorkerID, name, content, message string) error { + if workerID == "" { + return errors.New("helix workspace: workerID is empty") + } + if err := agent.ValidateWorkspaceName(name); err != nil { + return fmt.Errorf("helix workspace: %w", err) + } + state, err := LoadState(ctx, w.store, workerID) + if err != nil { + return fmt.Errorf("helix workspace: load state %q: %w", workerID, err) + } + if state.RepoID == "" { + // Worker not yet bound to a Helix project — silently skip. + // First activation will populate the project and write the + // canonical files; this branch is for updates that happen + // before the first activation completes. + return nil + } + repoPath := "workers/" + string(workerID) + "/.context/" + name + if message == "" { + message = fmt.Sprintf("update %s", repoPath) + } + lock := w.lockFor(state.RepoID) + lock.Lock() + defer lock.Unlock() + return w.client.PutFile(ctx, state.RepoID, helixclient.PutFileRequest{ + Path: repoPath, + Branch: w.branch, + Message: message, + Author: w.author, + Email: w.email, + Content: content, + }) +} + +func (w *Workspace) lockFor(repoID string) *sync.Mutex { + w.mu.Lock() + defer w.mu.Unlock() + if l, ok := w.repoLocks[repoID]; ok { + return l + } + l := &sync.Mutex{} + w.repoLocks[repoID] = l + return l +} + +// Compile-time check. +var _ agent.WorkspaceSync = (*Workspace)(nil) diff --git a/helix-org/agent/helix/workspace_test.go b/helix-org/agent/helix/workspace_test.go new file mode 100644 index 0000000000..f767f934d0 --- /dev/null +++ b/helix-org/agent/helix/workspace_test.go @@ -0,0 +1,101 @@ +package helix + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" +) + +type fakeClient struct { + helixclient.Client + lastRepoID string + lastReq helixclient.PutFileRequest + err error +} + +func (f *fakeClient) PutFile(_ context.Context, repoID string, req helixclient.PutFileRequest) error { + f.lastRepoID = repoID + f.lastReq = req + return f.err +} + +func newSeededStore(t *testing.T, repoID string) (*store.Store, domain.WorkerID) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + ctx := context.Background() + role, _ := domain.NewRole("r-eng", "# Role", time.Now().UTC()) + _ = s.Roles.Create(ctx, role) + pos, _ := domain.NewPosition("p-eng", "r-eng", nil) + _ = s.Positions.Create(ctx, pos) + w, _ := domain.NewAIWorker("w-eng", []domain.PositionID{"p-eng"}, "# Persona") + _ = s.Workers.Create(ctx, w) + if repoID != "" { + _ = SaveProject(ctx, s, w.ID(), "prj_x", "app_x", repoID) + } + return s, w.ID() +} + +func TestWorkspaceWritesToWorkerRepo(t *testing.T) { + t.Parallel() + s, wid := newSeededStore(t, "repo-1") + fc := &fakeClient{} + w := NewWorkspace(fc, s, "helix-specs", "helix-org", "ho@example.com") + if err := w.PublishFile(context.Background(), wid, "role.md", "# Role", "update_role: r-eng"); err != nil { + t.Fatalf("publish: %v", err) + } + if fc.lastRepoID != "repo-1" { + t.Errorf("repo: %q", fc.lastRepoID) + } + wantPath := "workers/" + string(wid) + "/.context/role.md" + if fc.lastReq.Branch != "helix-specs" || fc.lastReq.Path != wantPath || fc.lastReq.Content != "# Role" { + t.Errorf("req: %+v (want path=%q)", fc.lastReq, wantPath) + } +} + +func TestWorkspaceUnboundWorkerIsNoop(t *testing.T) { + t.Parallel() + // Worker without a Helix project — repoID empty. + s, wid := newSeededStore(t, "") + fc := &fakeClient{} + w := NewWorkspace(fc, s, "helix-specs", "", "") + if err := w.PublishFile(context.Background(), wid, "role.md", "# Role", ""); err != nil { + t.Fatalf("publish: %v", err) + } + if fc.lastRepoID != "" { + t.Errorf("expected no PutFile when worker has no repo, got %q", fc.lastRepoID) + } +} + +func TestWorkspaceSurfacesErrors(t *testing.T) { + t.Parallel() + s, wid := newSeededStore(t, "repo-1") + fc := &fakeClient{err: errors.New("boom")} + w := NewWorkspace(fc, s, "helix-specs", "", "") + if err := w.PublishFile(context.Background(), wid, "role.md", "x", ""); err == nil { + t.Fatal("expected error") + } +} + +func TestWorkspaceRejectsBadName(t *testing.T) { + t.Parallel() + s, wid := newSeededStore(t, "repo-1") + fc := &fakeClient{} + w := NewWorkspace(fc, s, "helix-specs", "", "") + for _, bad := range []string{"", "/role.md", "../role.md", "a/../b"} { + if err := w.PublishFile(context.Background(), wid, bad, "x", ""); err == nil { + t.Errorf("name %q: expected error", bad) + } + } + if fc.lastRepoID != "" { + t.Errorf("expected no PutFile on bad names, got %q", fc.lastRepoID) + } +} diff --git a/helix-org/agent/policy.go b/helix-org/agent/policy.go new file mode 100644 index 0000000000..c491320b9c --- /dev/null +++ b/helix-org/agent/policy.go @@ -0,0 +1,23 @@ +// Package agent is the runtime layer that activates AI Workers — the +// thing that takes "this Worker just got an event" and turns it into +// an actual LLM-driven turn. Concrete runtimes live in sub-packages +// (agent/claude, agent/helix). This package holds the runtime-shared +// types: the Spawner contract, Trigger/TriggerKind, the WorkspaceSync +// interface, and the canonical agent-policy text every runtime feeds +// to the LLM. +package agent + +import _ "embed" + +// Policy is the org-wide agent.md text every AI Worker reads at the +// start of every activation. It is fixed across Roles and hires — it +// tells the agent how to *be* an agent in helix-org, not what its job +// is. Roles cover the latter. +// +// Both runtimes embed this verbatim: the claude runtime writes it as +// `agent.md` in the Worker's env directory, the Helix runtime pushes +// it to `.context/agent.md` on the per-Worker repo's helix-specs +// branch. +// +//go:embed policy.md +var Policy string diff --git a/helix-org/agent/policy.md b/helix-org/agent/policy.md new file mode 100644 index 0000000000..e15cbb40e6 --- /dev/null +++ b/helix-org/agent/policy.md @@ -0,0 +1,91 @@ +# Agent + +You are an AI Worker inside the helix-org runtime. This file is fixed +across every Role and every hire — it tells you how to *be* an agent +in this org, not what your job is. `role.md` and `identity.md` cover +those. + +## You are an AI, not a human + +You are an AI Worker. Human-shaped constraints — anything that +applies because the role is normally filled by a person rather than +because the work itself requires it — do not apply to you unless +your Role explicitly says they do. Reason about feasibility and +duration as the AI you are, not as the human professional whose +role you are modelling. Default to acting. + +## What every activation looks like + +1. Read `role.md` (your job) and `identity.md` (who you are). +2. Read the Trigger block at the bottom of this prompt — that's what + just woke you up. +3. **Read `helix-log.md` if it exists.** It is the running record of what + you've already said and done across past activations. The most + recent entries matter most. If a peer has already said what you + were about to say, don't repeat it. +4. Decide whether this activation deserves a public response (see + "Speaking discipline" below). Most don't. +5. If you do publish anything, append a short entry to `helix-log.md` first + so future-you knows what current-you already said. Format: + ``` + ## + + ``` +6. Do the work, then exit. Each activation is a single turn. + +## Speaking discipline — bias toward silence + +The biggest failure mode in this system is AI Workers responding to +each other in cascades, generating noise that no human asked for. Hold +a high bar before publishing on any broadcast Stream: + +- **If you cannot add information no one else has already added in + `helix-log.md` or recent stream events, stay silent.** Silence is a valid + outcome of an activation. Exiting without publishing is correct + behaviour, not failure. +- **An acknowledgement is not a contribution.** "Thanks", "good + point", "I agree", "let me know if you need more" — these are + social moves humans make to signal presence. You don't need to + signal presence; the org already knows you're subscribed. Skip + them. +- **Restating someone else's point is not a contribution.** If a peer + has already covered the ground, don't paraphrase it back. +- **A question you can answer for yourself is not worth publishing.** + Use your shell tools, read the org graph, check `helix-log.md` — only ask + the stream when the answer genuinely requires another Worker. + +## AI-origin vs human-origin events + +Each Trigger includes a `source_kind` field. Treat the two very +differently: + +- **`source_kind: human`** — high priority. A human is asking + something. Default to engaging if your Role applies. +- **`source_kind: ai`** — low priority. Another AI Worker generated + this. Default to **not responding** unless one of these is true: + - the message is a direct address to you (DM, or `to:` includes + your Worker ID), AND it asks for a decision, action, or + information only you can provide; + - the message materially advances work `role.md` says you own, and + no human has weighed in yet on the same thread; + - silence would leave the org stuck on an action you uniquely can + take. + In every other AI-origin case, exit without publishing. + +## Direct address vs broadcast + +- A DM or a publish where `to:` includes your Worker ID is a direct + request. Engage, but still apply the "add new information" bar + before replying. +- A broadcast publish (no `to:`, multiple subscribers) is *for the + room*, not for you specifically. Default to silence; speak only if + the bullet under "Speaking discipline" is met. + +## Errors and exits + +If you cannot make progress (missing tool grant, ambiguous request, +broken environment), say so once — briefly — and exit. Do not loop, +retry, or compose long apologies. A short failure note in `helix-log.md` is +enough. + +You may now act on the Trigger. diff --git a/helix-org/agent/prompt.go b/helix-org/agent/prompt.go new file mode 100644 index 0000000000..9a9843551c --- /dev/null +++ b/helix-org/agent/prompt.go @@ -0,0 +1,178 @@ +package agent + +import ( + "fmt" + "strings" + "time" + + "github.com/helixml/helix-org/domain" +) + +// BuildPrompt assembles the per-activation prompt: identity hint + +// mandate + the triggers that woke the Worker up. The dispatcher +// coalesces bursts, so a single activation may carry multiple triggers +// — the prompt frames them as a numbered list when that happens, so +// the agent can read all of them before deciding what to do (often the +// most recent supersedes the earlier ones). Tools are exposed natively +// via MCP under the "helix" server (tool names appear as +// mcp__helix__); Claude figures the rest out from tools/list. +// +// `mandate` is the static text the agent reads first — for the local +// claude runtime it's the embedded agent.Policy, for the Helix runtime +// it's a short pointer at the helix-specs branch (which carries the +// real policy text). +func BuildPrompt(workerID domain.WorkerID, mandate string, triggers []Trigger) string { + var ctx strings.Builder + + if len(triggers) > 1 { + fmt.Fprintf(&ctx, "%d triggers have queued for you since your last activation. They are listed below in arrival order. Read all of them before deciding what to do — often the latest supersedes earlier ones, and most cascades resolve to a single response or to silence.\n\n", len(triggers)) + } + + for i, t := range triggers { + if len(triggers) > 1 { + fmt.Fprintf(&ctx, "[%d/%d]\n", i+1, len(triggers)) + } + switch t.Kind { + case TriggerHire: + ctx.WriteString("You have just been hired. This is your first activation. Complete any one-time setup your role describes, then exit. The runtime will re-activate you when an event arrives on a Stream you subscribe to.\n") + case TriggerEvent: + ctx.WriteString(renderTrigger(t)) + default: + fmt.Fprintf(&ctx, "Activation kind: %q.\n", t.Kind) + } + if len(triggers) > 1 && i < len(triggers)-1 { + ctx.WriteByte('\n') + } + } + + return fmt.Sprintf(`You are Worker %s, running inside helix-org. Your environment is +the current working directory. Each activation is a single turn — do +the work and exit. + +%s + +=== Trigger === +%s=== end trigger === + +Act now. No preamble. +`, workerID, mandate, ctx.String()) +} + +// renderTrigger formats an event-kind Trigger for the activation +// prompt. Every populated field of the canonical Message envelope is +// rendered so the Worker can branch on Subject, From, ThreadID, Extra, +// etc. directly — no separate read_events round-trip needed for the +// trigger event itself. Empty fields are omitted to keep the prompt +// tight. +// +// Header keys are aligned for legibility but the parser the Worker is +// going to apply (Claude reading the prompt) is robust to spacing, so +// "neat" is for humans tailing the prompt. +func renderTrigger(t Trigger) string { + var b strings.Builder + b.WriteString("A new event arrived on a Stream you subscribe to.\n\n") + fmt.Fprintf(&b, " stream: %s\n", t.StreamID) + fmt.Fprintf(&b, " event: %s\n", t.EventID) + fmt.Fprintf(&b, " time: %s\n", t.CreatedAt.Format(time.RFC3339)) + if t.Source != "" { + fmt.Fprintf(&b, " source: %s\n", t.Source) + } + // source_kind drives the agent.md priority rule: AI-origin events + // are low-priority by default. Always emit when known (even when + // Source itself is empty — a future inbound transport that can + // classify origin without resolving a Worker still needs to flag + // AI vs human here). + if t.SourceKind != "" { + fmt.Fprintf(&b, " source_kind: %s\n", t.SourceKind) + } + m := t.Message + if m.From != "" { + fmt.Fprintf(&b, " from: %s\n", m.From) + } + if len(m.To) > 0 { + fmt.Fprintf(&b, " to: %s\n", strings.Join(m.To, ", ")) + } + if m.Subject != "" { + fmt.Fprintf(&b, " subject: %s\n", m.Subject) + } + if m.ThreadID != "" { + fmt.Fprintf(&b, " thread_id: %s\n", m.ThreadID) + } + if m.InReplyTo != "" { + fmt.Fprintf(&b, " in_reply_to: %s\n", m.InReplyTo) + } + if m.MessageID != "" { + fmt.Fprintf(&b, " message_id: %s\n", m.MessageID) + } + if m.Body != "" { + b.WriteString(" body:\n") + b.WriteString(indentBlock(m.Body, " ")) + b.WriteByte('\n') + } + if len(m.Extra) > 0 { + b.WriteString(" extra:\n") + b.WriteString(indentBlock(string(m.Extra), " ")) + b.WriteByte('\n') + } + return b.String() +} + +// indentBlock prefixes every line of s with prefix. Used so multi-line +// event bodies render readably inside the prompt. +func indentBlock(s, prefix string) string { + if s == "" { + return "" + } + lines := strings.Split(s, "\n") + for i, line := range lines { + lines[i] = prefix + line + } + return strings.Join(lines, "\n") +} + +// DescribeTrigger returns a short label for one Trigger — used for +// activation-stream markers and structured logging. +func DescribeTrigger(t Trigger) string { + switch t.Kind { + case TriggerHire: + return "hire" + case TriggerEvent: + return fmt.Sprintf("event %s on %s from %s", t.EventID, t.StreamID, t.Source) + default: + return string(t.Kind) + } +} + +// DescribeTriggers labels the activation marker that gets published +// to the worker's activation stream. A single trigger reuses +// DescribeTrigger verbatim so observers see no change for the common +// case; a coalesced batch summarises as "batch of N" with each +// trigger's individual description joined by "; ". +func DescribeTriggers(triggers []Trigger) string { + if len(triggers) == 1 { + return DescribeTrigger(triggers[0]) + } + parts := make([]string, len(triggers)) + for i, t := range triggers { + parts[i] = DescribeTrigger(t) + } + return fmt.Sprintf("batch of %d: %s", len(triggers), strings.Join(parts, "; ")) +} + +// OneLine collapses whitespace and clips to max runes for readability. +// Shared by both runtimes' transcript renderers. +func OneLine(s string, max int) string { + s = strings.Join(strings.Fields(s), " ") + if max > 0 && len(s) > max { + return s[:max] + "…" + } + return s +} + +// ActivationStreamID returns the deterministic Stream ID where a +// Worker's activation transcript is published. One Stream per Worker; +// created at hire time by hire_worker, written to by the Spawner, +// read by anyone with a subscription (typically the hiring Worker). +func ActivationStreamID(workerID domain.WorkerID) domain.StreamID { + return domain.StreamID("s-activations-" + string(workerID)) +} diff --git a/helix-org/agent/prompt_test.go b/helix-org/agent/prompt_test.go new file mode 100644 index 0000000000..b4ddb89280 --- /dev/null +++ b/helix-org/agent/prompt_test.go @@ -0,0 +1,160 @@ +package agent + +import ( + "strings" + "testing" + "time" + + "github.com/helixml/helix-org/domain" +) + +// TestRenderTriggerGitHub: a github-shaped event (issue.opened) must +// surface every populated envelope field in the prompt. +func TestRenderTriggerGitHub(t *testing.T) { + t.Parallel() + + extra := []byte(`{"action":"opened","event":"issues","issue":{"id":12345,"number":42,"title":"x","body":"y"},"sender":{"login":"philwinder"},"repository":{"full_name":"helixml/helix-org"}}`) + tr := Trigger{ + Kind: TriggerEvent, + EventID: "e-abc", + StreamID: "s-github", + Source: "", + CreatedAt: time.Date(2026, 4, 28, 12, 27, 23, 0, time.UTC), + Message: domain.Message{ + From: "philwinder", + Subject: "README setup steps mention an env var that no longer exists", + Body: "Step 3 references HELIX_FOO; the code reads HELIX_BAR now.", + ThreadID: "#42", + MessageID: "delivery-uuid-1", + Extra: extra, + }, + } + + got := renderTrigger(tr) + + wants := []string{ + "stream: s-github", + "event: e-abc", + "time: 2026-04-28T12:27:23Z", + "from: philwinder", + "subject: README setup steps mention an env var that no longer exists", + "thread_id: #42", + "message_id: delivery-uuid-1", + "Step 3 references HELIX_FOO", + `"event":"issues"`, + `"action":"opened"`, + `"sender":{"login":"philwinder"}`, + `"repository":{"full_name":"helixml/helix-org"}`, + } + for _, w := range wants { + if !strings.Contains(got, w) { + t.Errorf("renderTrigger output missing %q\n--- output ---\n%s", w, got) + } + } + + for _, omit := range []string{"to:", "in_reply_to:", "source:"} { + if strings.Contains(got, omit) { + t.Errorf("renderTrigger output should omit empty %q\n--- output ---\n%s", omit, got) + } + } +} + +func TestRenderTriggerEmail(t *testing.T) { + t.Parallel() + + tr := Trigger{ + Kind: TriggerEvent, + EventID: "e-1", + StreamID: "s-support", + Source: "", + CreatedAt: time.Date(2026, 4, 28, 10, 0, 0, 0, time.UTC), + Message: domain.Message{ + From: "alice@example.com", + To: []string{"abc123+sam@inbound.postmarkapp.com"}, + Subject: "[eng] Re: Webhook stream isn't firing", + Body: "Most webhook flow issues are config or subscription mismatches.", + ThreadID: "", + InReplyTo: "", + MessageID: "", + }, + } + + got := renderTrigger(tr) + + wants := []string{ + "from: alice@example.com", + "to: abc123+sam@inbound.postmarkapp.com", + "subject: [eng] Re: Webhook stream isn't firing", + "thread_id: ", + "in_reply_to: ", + "message_id: ", + "Most webhook flow issues", + } + for _, w := range wants { + if !strings.Contains(got, w) { + t.Errorf("renderTrigger output missing %q\n--- output ---\n%s", w, got) + } + } +} + +func TestRenderTriggerWorkerPublished(t *testing.T) { + t.Parallel() + + tr := Trigger{ + Kind: TriggerEvent, + EventID: "e-1", + StreamID: "s-general", + Source: "w-alice", + CreatedAt: time.Date(2026, 4, 28, 10, 0, 0, 0, time.UTC), + Message: domain.Message{ + From: "w-alice", + Body: "hello", + }, + } + got := renderTrigger(tr) + + for _, w := range []string{"source: w-alice", "from: w-alice", "hello"} { + if !strings.Contains(got, w) { + t.Errorf("renderTrigger output missing %q\n--- output ---\n%s", w, got) + } + } + for _, omit := range []string{"to:", "subject:", "thread_id:", "in_reply_to:", "message_id:", "extra:"} { + if strings.Contains(got, omit) { + t.Errorf("renderTrigger output should omit empty %q\n--- output ---\n%s", omit, got) + } + } +} + +// TestBuildPromptIncludesEnvelope checks the integration: a Trigger +// with full envelope fields produces a prompt whose === Trigger === +// section carries all of them. +func TestBuildPromptIncludesEnvelope(t *testing.T) { + t.Parallel() + + tr := Trigger{ + Kind: TriggerEvent, + EventID: "e-abc", + StreamID: "s-github", + CreatedAt: time.Date(2026, 4, 28, 12, 27, 23, 0, time.UTC), + Message: domain.Message{ + From: "philwinder", + Subject: "Confusing example in the docs", + Body: "The README has an install command that doesn't run as written.", + Extra: []byte(`{"event":"issues","action":"opened"}`), + }, + } + prompt := BuildPrompt("w-doc-engineer", "[role.md contents]", []Trigger{tr}) + + if !strings.Contains(prompt, "=== Trigger ===") || !strings.Contains(prompt, "=== end trigger ===") { + t.Fatalf("trigger fences missing\n%s", prompt) + } + for _, w := range []string{ + "subject: Confusing example in the docs", + "from: philwinder", + `"event":"issues"`, + } { + if !strings.Contains(prompt, w) { + t.Errorf("prompt missing %q", w) + } + } +} diff --git a/helix-org/agent/spawner.go b/helix-org/agent/spawner.go new file mode 100644 index 0000000000..16f321b63d --- /dev/null +++ b/helix-org/agent/spawner.go @@ -0,0 +1,125 @@ +package agent + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + "github.com/helixml/helix-org/domain" +) + +// TriggerKind discriminates why a Spawner is being invoked. +type TriggerKind string + +const ( + // TriggerHire fires once when a Worker is first created. + TriggerHire TriggerKind = "hire" + // TriggerEvent fires whenever a Worker receives an event on a Stream + // they subscribe to. + TriggerEvent TriggerKind = "event" +) + +// Trigger is the per-activation context the Spawner gives to the agent. +// The mandate (entry-point file contents) is the static role; Trigger is +// what just happened that woke this Worker up. +type Trigger struct { + Kind TriggerKind + + // Event fields, set when Kind == TriggerEvent. + EventID domain.EventID + StreamID domain.StreamID + Source domain.WorkerID + // SourceKind is the WorkerKind ("human" / "ai") of Source — looked + // up by the dispatcher at fan-out time and rendered into the + // activation prompt so the recipient can apply the org-wide policy + // (agent.md) of de-prioritising AI-origin events. Empty when the + // event has no internal Source (system-emitted, or inbound from an + // external transport with no resolved Worker). + SourceKind domain.WorkerKind + // Message is the canonical envelope parsed from the event body. + // Every populated field (From, Subject, ThreadID, MessageID, + // Extra, …) is rendered into the activation prompt so the + // Worker can branch on transport-shaped metadata directly, + // without a separate read_events round-trip. + Message domain.Message + CreatedAt time.Time +} + +// Spawner runs an AI Worker's agent process for a single activation +// and BLOCKS until the process exits. The Triggers slice tells the +// Spawner (and through it, the agent) why this activation is happening +// — first hire, or one or more events on subscribed Streams that +// arrived while a previous activation was running. The Dispatcher +// coalesces bursts so the slice is usually length 1, but the agent +// must handle longer slices when traffic queues up. +// +// Spawners are typically called from inside a Dispatcher that +// serialises calls per-Worker; callers must not invoke a Spawner for +// the same Worker concurrently. +// +// The zero value — nil — means "no process will be spawned", which is +// correct for tests and for HumanWorker activations. +type Spawner func(ctx context.Context, workerID domain.WorkerID, envPath string, triggers []Trigger) error + +// WorkspaceSync mirrors the canonical Role and Identity content of a +// Worker into wherever that Worker's agent reads them at activation +// time. Tools (update_role, update_identity) call PublishFile after +// persisting to the DB so the next activation sees fresh content +// without waiting for the spawner's projection step. +// +// `name` is a logical filename for this Worker — typically "role.md" +// or "identity.md". Each backend maps the name to its own on-target +// layout; callers must NOT include backend-specific path prefixes +// (no "workers//.context/...", no "job/..."). The mapping today: +// +// - claude: // +// (matches the layout `projectEnv` writes at activation) +// - helix: workers//.context/ on the helix-specs +// branch of the Worker's per-Worker repo +// (matches what `ProjectApplier.republishWorkerFiles` +// writes and what the activation mandate tells the +// agent to `git pull` and `cat`) +// +// `name` must be a clean, single-segment-or-relative filename — no +// leading slash, no "..", no escape from the Worker's namespace. +// +// Workers that aren't yet provisioned in the runtime backend (e.g. a +// Helix Worker before its first activation creates the project) are +// safe no-ops — implementations skip the publish and return nil. +type WorkspaceSync interface { + PublishFile(ctx context.Context, workerID domain.WorkerID, name, content, message string) error +} + +// NoopWorkspaceSync is a WorkspaceSync that does nothing. Useful for +// tests and for backends that have no out-of-band publish surface. +type NoopWorkspaceSync struct{} + +// PublishFile is the no-op WorkspaceSync: ignore the call and return nil. +func (NoopWorkspaceSync) PublishFile(_ context.Context, _ domain.WorkerID, _, _, _ string) error { + return nil +} + +// validateWorkspaceName enforces the WorkspaceSync `name` contract — +// shared by every WorkspaceSync implementation so callers see the same +// rejection rules regardless of backend. +func validateWorkspaceName(name string) error { + if name == "" { + return errors.New("workspace name is empty") + } + if strings.HasPrefix(name, "/") { + return fmt.Errorf("workspace name %q is absolute", name) + } + for _, seg := range strings.Split(name, "/") { + if seg == ".." { + return fmt.Errorf("workspace name %q traverses upward", name) + } + } + return nil +} + +// ValidateWorkspaceName is the public entry-point for WorkspaceSync +// implementations to reject malformed names. Kept exported so future +// out-of-tree backends share the same enforcement. +func ValidateWorkspaceName(name string) error { return validateWorkspaceName(name) } diff --git a/helix-org/bootstrap/bootstrap.go b/helix-org/bootstrap/bootstrap.go new file mode 100644 index 0000000000..f873c898df --- /dev/null +++ b/helix-org/bootstrap/bootstrap.go @@ -0,0 +1,161 @@ +// Package bootstrap creates the initial owner Worker and grants the +// structural tools. Runs exactly once — subsequent calls fail if any Worker +// already exists. +package bootstrap + +import ( + "context" + _ "embed" + "errors" + "fmt" + "os" + "time" + + "github.com/google/uuid" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/tools" +) + +// ownerRoleContent is the seed markdown for r-owner. Lives in a +// template file rather than a string literal so the prose can be +// edited like any other Role markdown — including the hiring +// playbook that teaches a fresh owner how to chain create_role → +// create_position → hire_worker → subscribe their streams. +// +//go:embed templates/owner_role.md +var ownerRoleContent string + +// Params controls the bootstrap. +type Params struct { + // EnvironmentPath is an absolute path to the owner's Environment. The + // directory must already exist on disk — bootstrap does not create it. + EnvironmentPath string +} + +// Result summarises the newly-created owner. +type Result struct { + WorkerID domain.WorkerID + RoleID domain.RoleID + PositionID domain.PositionID + EnvironmentPath string +} + +// ErrAlreadyInitialised is returned when at least one worker already exists. +var ErrAlreadyInitialised = errors.New("org is already initialised") + +// Run performs the bootstrap: create the owner's Role, Position, Worker, +// Environment row, and grant every structural tool. Bootstrap is the root +// of trust — these are the only grants in the system not issued by a +// prior Worker — and the grants it issues stop at the structural set. +func Run(ctx context.Context, s *store.Store, params Params) (Result, error) { + if params.EnvironmentPath == "" { + return Result{}, fmt.Errorf("environmentPath is required") + } + if info, err := os.Stat(params.EnvironmentPath); err != nil { + return Result{}, fmt.Errorf("environmentPath %q: %w", params.EnvironmentPath, err) + } else if !info.IsDir() { + return Result{}, fmt.Errorf("environmentPath %q is not a directory", params.EnvironmentPath) + } + + existing, err := s.Workers.List(ctx) + if err != nil { + return Result{}, fmt.Errorf("check existing workers: %w", err) + } + if len(existing) > 0 { + return Result{}, ErrAlreadyInitialised + } + + now := time.Now().UTC() + role, err := domain.NewRole("r-owner", ownerRoleContent, now) + if err != nil { + return Result{}, err + } + if err := s.Roles.Create(ctx, role); err != nil { + return Result{}, fmt.Errorf("create owner role: %w", err) + } + + rootPos, err := domain.NewPosition("p-root", role.ID, nil) + if err != nil { + return Result{}, err + } + if err := s.Positions.Create(ctx, rootPos); err != nil { + return Result{}, fmt.Errorf("create root position: %w", err) + } + + ownerIdentity := "# Owner\n\nThe person running this org. Edit this from /ui/org to " + + "introduce yourself — your name, voice, and how you want subordinates to address you.\n" + owner, err := domain.NewHumanWorker(domain.WorkerID("w-owner"), []domain.PositionID{rootPos.ID}, ownerIdentity) + if err != nil { + return Result{}, err + } + if err := s.Workers.Create(ctx, owner); err != nil { + return Result{}, fmt.Errorf("create owner worker: %w", err) + } + + env, err := domain.NewEnvironment(owner.ID(), params.EnvironmentPath, now) + if err != nil { + return Result{}, err + } + if err := s.Environments.Create(ctx, env); err != nil { + return Result{}, fmt.Errorf("create owner environment: %w", err) + } + + // Every built-in tool — the owner is the root of trust and can do + // anything. They issue subordinate Workers a narrower set via the + // hire_worker / grant_tool tools. + defaults := []domain.ToolName{ + // Mutations. + tools.CreateRoleName, + tools.UpdateRoleName, + tools.UpdateIdentityName, + tools.CreatePositionName, + tools.HireWorkerName, + tools.GrantToolName, + tools.RevokeToolName, + tools.CreateStreamName, + tools.StreamMembersName, + tools.SubscribeName, + tools.UnsubscribeName, + tools.InviteWorkersName, + tools.PublishName, + tools.DMName, + // Reads. + tools.ListRolesName, + tools.GetRoleName, + tools.ListPositionsName, + tools.GetPositionName, + tools.ListPositionChildrenName, + tools.ListWorkersName, + tools.GetWorkerName, + tools.ListWorkerGrantsName, + tools.GetWorkerEnvironmentName, + tools.ListStreamsName, + tools.GetStreamName, + tools.ListStreamEventsName, + tools.GetGrantName, + tools.ReadEventsName, + tools.WorkerLogName, + } + for _, name := range defaults { + g, err := domain.NewToolGrant( + domain.GrantID("g-owner-"+uuid.NewString()), + owner.ID(), + name, + ) + if err != nil { + return Result{}, err + } + if err := s.Grants.Create(ctx, g); err != nil { + return Result{}, fmt.Errorf("grant %q: %w", name, err) + } + } + + return Result{ + WorkerID: owner.ID(), + RoleID: role.ID, + PositionID: rootPos.ID, + EnvironmentPath: params.EnvironmentPath, + }, nil +} diff --git a/helix-org/bootstrap/templates/owner_role.md b/helix-org/bootstrap/templates/owner_role.md new file mode 100644 index 0000000000..30e5cdc166 --- /dev/null +++ b/helix-org/bootstrap/templates/owner_role.md @@ -0,0 +1,91 @@ +# Owner + +You are the owner of this organisation. You hold every structural +tool and may reshape the org as you see fit. Edit this Role from +`/ui/org` or via `update_role`. + +## Your job is to direct, not to execute + +You are the operator — you hire, set direction, decide, unblock. You +do **not** do the team's work. Default behaviours: + +- **When asked for concrete output** (a doc, a plan, a piece of + research, a triage pass, a feature, a fix): check whether a Worker + on the team already owns that area. If one does, delegate via `dm` + or a publish to the stream they listen on, with a clear ask and + any context they need. Don't roll up your sleeves. +- **If no Worker owns it**, hire one (use the `/role` flow). Then + delegate to them. +- **Only execute directly** when the work is genuinely structural — + editing Roles, creating Positions, granting tools, hiring, firing, + reshaping reporting lines. That is *your* job; everything else is + the team's. + +If you find yourself drafting prose, writing code, or producing the +deliverable yourself, stop — that's a signal you've skipped the +delegation step. Hand it to whoever owns it instead. + +## After you delegate, watch for the reply + +Activations are single-turn. You are **not** automatically woken up +when a delegated Worker publishes back — you have to look. After any +`dm` or `publish` that asks the team to do something: + +1. Identify the stream(s) where the reply is expected — usually the + same stream you published to, or the recipient's + `s-activations-` stream if you DM'd them. +2. Call `read_events` on each with `wait` set (up to 60 seconds) to + block until something lands. Use `since` to ignore your own + just-published event. +3. When a reply arrives, summarise the outcome back to the human in + one or two sentences. If the wait times out, say so plainly and + ask the human whether to keep waiting, escalate, or move on. + +Do not end an activation immediately after delegating. Sitting idle +while the team is working leaves the human staring at a blank +screen — keep watching for at least one round of replies. + +## Hiring playbook + +When you hire — directly or via `/role` — chain the steps without +asking permission between them: + +1. Save the Role (`create_role`) if it's new. +2. Create the Position under `p-root` (`create_position`) unless told + otherwise. +3. Hire the Worker (`hire_worker`) **with `grants` populated** — + kind `ai`, id `w-` (e.g. `w-mark`, + `w-priya`), and `grants` set to **every MCP tool listed in the + Role's `## Tools (MCP)` section**. The Worker's MCP tool list is + computed at hire time and **frozen** for the lifetime of their + first desktop session — granting tools later means the Worker + can't see them until their session restarts. So grants must + accompany the hire, never follow it. + + Example shape: + ```json + { + "positionId": "pos-engineer", + "kind": "ai", + "id": "w-mark", + "identityContent": "Mark — ...", + "grants": [ + {"toolName": "subscribe"}, {"toolName": "unsubscribe"}, + {"toolName": "read_events"}, {"toolName": "publish"}, + {"toolName": "dm"}, {"toolName": "list_streams"}, + {"toolName": "stream_members"} + ] + } + ``` + +4. **Stand up their streams.** For each stream the Role lists: + - call `list_streams` first — another Worker may already have + created it + - if it exists, `subscribe` the new Worker + - if not, `create_stream` then `subscribe` + +A Worker hired without their grants is mute — they can see no MCP +tools at all and will fall back to writing files instead of +publishing/DMing, which is wrong. A Worker hired without their +streams subscribed is half-hired — they have nothing to listen to. +Don't skip steps 3-grants or 4. diff --git a/helix-org/broadcast/broadcaster.go b/helix-org/broadcast/broadcaster.go new file mode 100644 index 0000000000..049d9c91ee --- /dev/null +++ b/helix-org/broadcast/broadcaster.go @@ -0,0 +1,81 @@ +// Package broadcast provides a tiny in-process pub/sub used by long-poll +// readers to wake when a new Event is published to a Stream they care +// about. +// +// Subscribers register interest in a set of Stream IDs and receive an +// empty struct through their wake-up channel when any matching event +// is notified. Multiple rapid-fire notifications coalesce into a single +// wake-up — subscribers are expected to re-query the Events store after +// waking, so "you missed one" cannot actually happen. +package broadcast + +import ( + "sync" + + "github.com/helixml/helix-org/domain" +) + +// Broadcaster is safe for concurrent use. The zero value is not usable; +// use New. +type Broadcaster struct { + mu sync.Mutex + subs map[domain.StreamID]map[chan struct{}]struct{} +} + +// New returns a ready-to-use Broadcaster. +func New() *Broadcaster { + return &Broadcaster{ + subs: make(map[domain.StreamID]map[chan struct{}]struct{}), + } +} + +// Subscribe registers a wake-up channel for the given Stream IDs and +// returns it. The channel is buffered (size 1) so a notification never +// blocks Notify; coalesced notifications are deliberate. +// +// Callers MUST call Unsubscribe with the same channel and ID set when +// they are done, typically via defer. +func (b *Broadcaster) Subscribe(streamIDs []domain.StreamID) chan struct{} { + ch := make(chan struct{}, 1) + b.mu.Lock() + defer b.mu.Unlock() + for _, sid := range streamIDs { + set, ok := b.subs[sid] + if !ok { + set = make(map[chan struct{}]struct{}) + b.subs[sid] = set + } + set[ch] = struct{}{} + } + return ch +} + +// Unsubscribe removes the channel from all per-Stream subscriber sets. +// Safe to call with an empty streamIDs list. +func (b *Broadcaster) Unsubscribe(streamIDs []domain.StreamID, ch chan struct{}) { + b.mu.Lock() + defer b.mu.Unlock() + for _, sid := range streamIDs { + if set, ok := b.subs[sid]; ok { + delete(set, ch) + if len(set) == 0 { + delete(b.subs, sid) + } + } + } +} + +// Notify wakes every subscriber that registered interest in streamID. +// Non-blocking: if a subscriber's wake-up channel is already full, the +// signal is coalesced. Subscribers are expected to re-query the store +// after waking. +func (b *Broadcaster) Notify(streamID domain.StreamID) { + b.mu.Lock() + defer b.mu.Unlock() + for ch := range b.subs[streamID] { + select { + case ch <- struct{}{}: + default: + } + } +} diff --git a/helix-org/broadcast/broadcaster_test.go b/helix-org/broadcast/broadcaster_test.go new file mode 100644 index 0000000000..b7c0d1e30f --- /dev/null +++ b/helix-org/broadcast/broadcaster_test.go @@ -0,0 +1,89 @@ +package broadcast + +import ( + "sync" + "testing" + "time" + + "github.com/helixml/helix-org/domain" +) + +func TestBroadcasterWakesMatchingSubscriber(t *testing.T) { + t.Parallel() + + b := New() + ch := b.Subscribe([]domain.StreamID{"s-a", "s-b"}) + b.Notify("s-a") + select { + case <-ch: + case <-time.After(time.Second): + t.Fatalf("subscriber did not wake") + } +} + +func TestBroadcasterIgnoresOtherStreams(t *testing.T) { + t.Parallel() + + b := New() + ch := b.Subscribe([]domain.StreamID{"s-a"}) + b.Notify("s-b") + select { + case <-ch: + t.Fatalf("subscriber woke on unrelated stream") + case <-time.After(50 * time.Millisecond): + } +} + +func TestBroadcasterCoalescesBurstyNotifications(t *testing.T) { + t.Parallel() + + b := New() + ch := b.Subscribe([]domain.StreamID{"s-a"}) + for i := 0; i < 100; i++ { + b.Notify("s-a") + } + // Drain — we should get exactly one wake-up (coalesced). + <-ch + select { + case <-ch: + t.Fatalf("unexpected second wake-up from coalesced burst") + case <-time.After(50 * time.Millisecond): + } +} + +func TestBroadcasterUnsubscribeStopsDelivery(t *testing.T) { + t.Parallel() + + b := New() + ch := b.Subscribe([]domain.StreamID{"s-a"}) + b.Unsubscribe([]domain.StreamID{"s-a"}, ch) + b.Notify("s-a") + select { + case <-ch: + t.Fatalf("woke after unsubscribe") + case <-time.After(50 * time.Millisecond): + } +} + +func TestBroadcasterMultipleSubscribers(t *testing.T) { + t.Parallel() + + b := New() + const n = 10 + var wg sync.WaitGroup + channels := make([]chan struct{}, n) + for i := range channels { + channels[i] = b.Subscribe([]domain.StreamID{"s-a"}) + wg.Add(1) + go func(ch chan struct{}) { + defer wg.Done() + select { + case <-ch: + case <-time.After(time.Second): + t.Errorf("subscriber did not wake") + } + }(channels[i]) + } + b.Notify("s-a") + wg.Wait() +} diff --git a/helix-org/cmd/helix-org/bootstrap.go b/helix-org/cmd/helix-org/bootstrap.go new file mode 100644 index 0000000000..90733490f8 --- /dev/null +++ b/helix-org/cmd/helix-org/bootstrap.go @@ -0,0 +1,140 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "os" + + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/helix/helixclient" +) + +// runBootstrap dispatches `helix-org bootstrap `. +func runBootstrap(args []string) error { + if len(args) == 0 { + return errors.New("usage: helix-org bootstrap \n\nTargets:\n helix-runtime Verify Helix connectivity") + } + switch args[0] { + case "helix-runtime": + return runBootstrapHelixRuntime(args[1:]) + case "help", "-h", "--help": + fmt.Fprintln(os.Stderr, "usage: helix-org bootstrap helix-runtime [--db ]") + return nil + default: + return fmt.Errorf("unknown bootstrap target %q", args[0]) + } +} + +// runBootstrapHelixRuntime is now minimal under the per-Worker-project +// model: there is no shared "helix-org" project to provision. Each AI +// Worker hire creates its own Helix project at activation time. All +// this command does today is verify that `helix.url` + `helix.api_key` +// resolve to a real Helix user — a fast pre-flight before any Worker +// activation tries to reach it. +// +// Future expansion: an explicit "create the owner Worker's project" +// step lands here once the per-Worker-project work in tools/spawner +// is in place. +func runBootstrapHelixRuntime(args []string) error { + fs := flag.NewFlagSet("bootstrap helix-runtime", flag.ContinueOnError) + dbPath := fs.String("db", "helix-org.db", "SQLite DB path.") + if err := fs.Parse(args); err != nil { + return err + } + + r, _, err := openRegistry(*dbPath) + if err != nil { + return err + } + ctx := context.Background() + + baseURL, err := r.GetString(ctx, "helix.url") + if err != nil { + return fmt.Errorf("helix.url not set (run `helix-org config set helix.url ...`): %w", err) + } + apiKey, err := r.GetString(ctx, "helix.api_key") + if err != nil { + return fmt.Errorf("helix.api_key not set: %w", err) + } + + c, err := helixclient.New(helixclient.Config{BaseURL: baseURL, APIKey: apiKey}) + if err != nil { + return fmt.Errorf("helix client: %w", err) + } + + logf("→ pinging %s", baseURL) + user, err := c.WhoAmI(ctx) + if err != nil { + return fmt.Errorf("helix unreachable: %w", err) + } + logf(" ok (user=%s slug=%s admin=%v)", user.User, user.Slug, user.Admin) + + // Validate chat.provider / chat.model exist on this Helix instance. + // We hit this gate now, at bootstrap, so a typo doesn't surface as a + // confusing 422 from /sessions/{id}/zed-config when the desktop tries + // to fetch its Zed config three minutes later. + provider, _ := r.GetString(ctx, "chat.provider") + model, _ := r.GetString(ctx, "chat.model") + if provider != "" && model != "" { + logf("→ checking provider %q + model %q exist on Helix", provider, model) + if err := helixclient.ValidateProviderModel(ctx, c, provider, model); err != nil { + return fmt.Errorf("invalid chat.provider / chat.model: %w", err) + } + logf(" ok") + } else { + logf(" (skipping provider/model check — chat.provider or chat.model unset)") + } + + logf("✓ bootstrap complete") + logf("") + logf("note: the per-Worker-project model means each AI Worker hire creates its own") + logf(" Helix project at activation time. No global project to provision.") + return nil +} + +// persistString writes a single string value to the config registry. +// Kept here for future expansion (owner-project ID persistence, etc.). +func persistString(ctx context.Context, r *config.Registry, key, value string) error { + encoded, _ := json.Marshal(value) + if err := r.Set(ctx, key, string(encoded), ""); err != nil { + return fmt.Errorf("persist %s: %w", key, err) + } + logf(" → set %s = %s", key, value) + return nil +} + +func logf(format string, args ...any) { + _, _ = fmt.Fprintf(os.Stdout, format+"\n", args...) +} + +// sandboxStartupSh is reserved for future use under the per-Worker +// model — when the spawner's TriggerHire step writes +// `.helix/startup.sh` per project, this is the canonical content. +const sandboxStartupSh = `#!/usr/bin/env bash +set -euo pipefail + +# HELIX_ORG_URL / HELIX_WORKER_ID arrive as project secrets and surface +# as env vars in this container at session start. Use them to wire the +# in-sandbox claude/zed agent at the helix-org MCP endpoint. + +mkdir -p ~/.config/claude +cat > ~/.config/claude/mcp.json < 0 { + cmd = append(cmd, strings.Join(positional, " ")) + } + // If no positional was given, claude reads stdin until EOF. + } + cmd = append(cmd, + "--permission-mode", "bypassPermissions", + "--strict-mcp-config", + "--mcp-config", string(mcpConfig), + ) + if *model != "" { + cmd = append(cmd, "--model", *model) + } + if !*printMode { + cmd = append(cmd, "--name", "helix-org: "+*workerID) + switch { + case *resume: + cmd = append(cmd, "--resume") + case !*newSession: + // Resume the latest session for this cwd by explicit ID. We + // avoid `--continue` because its "most recent resumable session" + // heuristic refuses sessions whose log ended on certain non-user + // events ("No conversation found to continue"), even when the + // session is fine to resume by ID. If there is no prior session, + // we pass nothing and claude starts fresh. + if sid := latestClaudeSessionID(); sid != "" { + cmd = append(cmd, "--resume", sid) + } + } + } + + binPath, err := exec.LookPath(*claudeBin) + if err != nil { + return fmt.Errorf("locate claude %q: %w", *claudeBin, err) + } + if err := syscall.Exec(binPath, cmd, os.Environ()); err != nil { //nolint:gosec // claudeBin is operator-supplied + return fmt.Errorf("exec claude: %w", err) + } + return nil +} + +// latestClaudeSessionID returns the sessionId of the most recently +// modified .jsonl in claude's per-cwd session store, or "" if none. +// Claude stores sessions under ~/.claude/projects//.jsonl, with one JSON event per line (the first +// line carries the session's `sessionId`). +func latestClaudeSessionID() string { + cwd, err := os.Getwd() + if err != nil { + return "" + } + home, err := os.UserHomeDir() + if err != nil { + return "" + } + dir := filepath.Join(home, ".claude", "projects", strings.ReplaceAll(cwd, "/", "-")) + entries, err := os.ReadDir(dir) + if err != nil { + return "" + } + var ( + newestPath string + newestTime time.Time + ) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".jsonl") { + continue + } + info, err := e.Info() + if err != nil { + continue + } + if info.ModTime().After(newestTime) { + newestTime = info.ModTime() + newestPath = filepath.Join(dir, e.Name()) + } + } + if newestPath == "" { + return "" + } + f, err := os.Open(newestPath) //nolint:gosec // path is built from a known prefix and a directory entry name + if err != nil { + return "" + } + defer func() { _ = f.Close() }() + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + if !scanner.Scan() { + return "" + } + var record struct { + SessionID string `json:"sessionId"` + } + if err := json.Unmarshal(scanner.Bytes(), &record); err != nil { + return "" + } + return record.SessionID +} diff --git a/helix-org/cmd/helix-org/config.go b/helix-org/cmd/helix-org/config.go new file mode 100644 index 0000000000..a21cb5aa3b --- /dev/null +++ b/helix-org/cmd/helix-org/config.go @@ -0,0 +1,215 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "os" + "strings" + "text/tabwriter" + + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" +) + +// runConfig dispatches `helix-org config `. The +// CLI opens the SQLite DB directly (same path the server uses) so +// changes are immediately visible to a running server on its next +// read — live updates without restart, and without any LLM in the +// loop. +func runConfig(args []string) error { + if len(args) == 0 { + printConfigUsage() + return fmt.Errorf("no config subcommand given") + } + switch args[0] { + case "set": + return runConfigSet(args[1:]) + case "get": + return runConfigGet(args[1:]) + case "list": + return runConfigList(args[1:]) + case "delete": + return runConfigDelete(args[1:]) + case "help", "-h", "--help": + printConfigUsage() + return nil + default: + printConfigUsage() + return fmt.Errorf("unknown config subcommand %q", args[0]) + } +} + +func printConfigUsage() { + fmt.Fprintln(os.Stderr, `usage: helix-org config [flags] + +Subcommands: + set Upsert a config row. is parsed as JSON if + possible, else treated as a string. Validates + against the registered schema for . + get Print the current value (secrets redacted by + default; pass --reveal-secrets to see plaintext). + list [--prefix p] List every registered key with its current value + (or default), required flag, and description. + Secrets redacted. + delete Remove a row. Subsequent reads fall back to the + registered default, or error if Required. + +Common flags: + --db SQLite DB path (default: helix-org.db).`) +} + +// openRegistry opens the DB and returns a Registry with all known +// specs registered. Shared by every config subcommand. +func openRegistry(dbPath string) (*config.Registry, *store.Store, error) { + st, err := sqlite.Open(dbPath) + if err != nil { + return nil, nil, fmt.Errorf("open store: %w", err) + } + r := config.New(st.Configs) + registerAllConfigSpecs(r) + return r, st, nil +} + +// parseValue accepts a CLI argument and returns the JSON form. If the +// argument parses as valid JSON, it's used as-is; otherwise it's +// quoted as a JSON string. So both `claude` and `"claude"` work for +// string values, and operators don't have to remember to quote. +func parseValue(raw string) string { + var probe any + if err := json.Unmarshal([]byte(raw), &probe); err == nil { + return raw + } + encoded, _ := json.Marshal(raw) + return string(encoded) +} + +func runConfigSet(args []string) error { + fs := flag.NewFlagSet("config set", flag.ContinueOnError) + dbPath := fs.String("db", "helix-org.db", "SQLite DB path.") + if err := fs.Parse(args); err != nil { + return err + } + rest := fs.Args() + if len(rest) != 2 { + return fmt.Errorf("usage: helix-org config set ") + } + key, raw := rest[0], rest[1] + + r, _, err := openRegistry(*dbPath) + if err != nil { + return err + } + value := parseValue(raw) + if err := r.Set(context.Background(), key, value, ""); err != nil { + return fmt.Errorf("set: %w", err) + } + // Echo back the redacted form so the operator can confirm without + // re-printing a secret they just typed. + redacted, _ := r.GetRedacted(context.Background(), key) + _, _ = fmt.Fprintf(os.Stdout, "set %s = %s\n", key, redacted) + return nil +} + +func runConfigGet(args []string) error { + fs := flag.NewFlagSet("config get", flag.ContinueOnError) + dbPath := fs.String("db", "helix-org.db", "SQLite DB path.") + revealSecrets := fs.Bool("reveal-secrets", false, "Print plaintext secrets. Off by default.") + if err := fs.Parse(args); err != nil { + return err + } + rest := fs.Args() + if len(rest) != 1 { + return fmt.Errorf("usage: helix-org config get [--reveal-secrets]") + } + key := rest[0] + + r, _, err := openRegistry(*dbPath) + if err != nil { + return err + } + var value string + if *revealSecrets { + value, err = r.GetRaw(context.Background(), key) + } else { + value, err = r.GetRedacted(context.Background(), key) + } + if err != nil { + if errors.Is(err, config.ErrNotConfigured) { + _, _ = fmt.Fprintf(os.Stdout, "%s: (not set; no default)\n", key) + return nil + } + if errors.Is(err, config.ErrRequired) { + return fmt.Errorf("%s: required, not set", key) + } + return err + } + _, _ = fmt.Fprintf(os.Stdout, "%s = %s\n", key, value) + return nil +} + +func runConfigList(args []string) error { + fs := flag.NewFlagSet("config list", flag.ContinueOnError) + dbPath := fs.String("db", "helix-org.db", "SQLite DB path.") + prefix := fs.String("prefix", "", "Restrict to keys starting with this prefix.") + if err := fs.Parse(args); err != nil { + return err + } + + r, _, err := openRegistry(*dbPath) + if err != nil { + return err + } + specs := r.Specs() + tw := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0) + _, _ = fmt.Fprintln(tw, "KEY\tVALUE\tREQUIRED\tDESCRIPTION") + for _, spec := range specs { + if *prefix != "" && !strings.HasPrefix(spec.Key, *prefix) { + continue + } + value, err := r.GetRedacted(context.Background(), spec.Key) + switch { + case errors.Is(err, config.ErrNotConfigured): + value = "(unset)" + case errors.Is(err, config.ErrRequired): + value = "(required, missing!)" + case err != nil: + value = "(error: " + err.Error() + ")" + } + req := "no" + if spec.Required { + req = "yes" + } + _, _ = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n", spec.Key, value, req, spec.Description) + } + if err := tw.Flush(); err != nil { + return fmt.Errorf("flush: %w", err) + } + return nil +} + +func runConfigDelete(args []string) error { + fs := flag.NewFlagSet("config delete", flag.ContinueOnError) + dbPath := fs.String("db", "helix-org.db", "SQLite DB path.") + if err := fs.Parse(args); err != nil { + return err + } + rest := fs.Args() + if len(rest) != 1 { + return fmt.Errorf("usage: helix-org config delete ") + } + key := rest[0] + + r, _, err := openRegistry(*dbPath) + if err != nil { + return err + } + if err := r.Delete(context.Background(), key); err != nil { + return fmt.Errorf("delete: %w", err) + } + _, _ = fmt.Fprintf(os.Stdout, "deleted %s\n", key) + return nil +} diff --git a/helix-org/cmd/helix-org/configspecs.go b/helix-org/cmd/helix-org/configspecs.go new file mode 100644 index 0000000000..14d6f7961d --- /dev/null +++ b/helix-org/cmd/helix-org/configspecs.go @@ -0,0 +1,112 @@ +package main + +import ( + "github.com/helixml/helix-org/config" +) + +// registerAllConfigSpecs declares every config key the running +// helix-org binary knows about. Both `serve` and `config ` +// call this so the CLI's view of valid keys stays in sync with what +// subsystems actually consume at runtime. +// +// As subsystems grow (new transports, future LLM providers, etc.) +// add their Specs here. A future refactor could push registration +// into each subsystem's package-level init, but a flat list keeps +// the surface visible and reviewable in one place. +func registerAllConfigSpecs(r *config.Registry) { + r.Register(config.Spec{ + Key: "claude.bin", + Type: config.TypeString, + Default: `"claude"`, + Required: true, + Description: "Path to the claude CLI binary used by the spawner.", + }) + r.Register(config.Spec{ + Key: "claude.public_url", + Type: config.TypeString, + Default: `"http://localhost:8080"`, + Required: true, + Description: "Base URL Workers reach helix-org's MCP endpoint at. Set to your ngrok / Cloudflare tunnel URL when transports need to webhook in from outside.", + }) + r.Register(config.Spec{ + Key: "claude.model", + Type: config.TypeString, + Default: `"sonnet"`, + Description: "Claude model alias or full name passed via --model. Defaults to 'sonnet' to keep activation costs predictable; set to 'opus' or a full name (e.g. 'claude-opus-4-7') to override.", + }) + r.Register(config.Spec{ + Key: "claude.effort", + Type: config.TypeString, + Default: `"low"`, + Description: "Claude effort/thinking level passed via --effort (low|medium|high|xhigh|max). Defaults to 'low' so multi-agent activations don't burn extended-thinking budget unless explicitly raised.", + }) + r.Register(config.Spec{ + Key: "spawner.kind", + Type: config.TypeString, + Default: `"claude"`, + Description: "Which Spawner to use for AI Worker activations: 'claude' (local dev, runs `claude -p`) or 'helix' (production, delegates to a co-located Helix server).", + }) + r.Register(config.Spec{ + Key: "helix.url", + Type: config.TypeString, + Description: "Base URL of the co-located Helix server (e.g. http://helix:8080). Required when spawner.kind = helix.", + }) + r.Register(config.Spec{ + Key: "helix.api_key", + Type: config.TypeString, + Description: "Bearer token used for all Helix REST and WebSocket calls. Required when spawner.kind = helix.", + }) + r.Register(config.Spec{ + Key: "helix.org_url", + Type: config.TypeString, + Description: "helix-org's externally-resolvable URL, written as a project secret (HELIX_ORG_URL) on every per-Worker Helix project so the in-sandbox agent can call /workers/{id}/mcp. Required when spawner.kind = helix.", + }) + r.Register(config.Spec{ + Key: "helix.activation_timeout", + Type: config.TypeString, + Default: `"5m"`, + Description: "Per-activation hard timeout (Go duration string). Default 5m.", + }) + r.Register(config.Spec{ + Key: "helix.max_inflight", + Type: config.TypeInt, + Default: `8`, + Description: "Cap on simultaneous open Helix activations across all Workers. Default 8.", + }) + r.Register(config.Spec{ + Key: "chat.backend", + Type: config.TypeString, + Default: `"claude"`, + Description: "Backend for the owner chat surface (CLI 'helix-org chat' and /ui/chat). 'claude' runs a local subprocess (dev). 'helix' delegates to a Helix chat session against the owner Worker's per-Worker project.", + }) + r.Register(config.Spec{ + Key: "chat.session_role", + Type: config.TypeString, + Default: `"owner-chat"`, + Description: "session_role written on Helix chat sessions opened by the chat surface. Used for filtering Recents.", + }) + r.Register(config.Spec{ + Key: "chat.provider", + Type: config.TypeString, + Default: `"bunker-minimax-m2.7"`, + Description: "Helix provider used by the chat surface (helix backend only). The provider is the prefix before the slash in a Helix model ID — e.g. 'bunker-minimax-m2.7' for 'bunker-minimax-m2.7/minimax-m2.7'.", + }) + r.Register(config.Spec{ + Key: "chat.model", + Type: config.TypeString, + Default: `"minimax-m2.7"`, + Description: "Model the chat surface uses on Helix (helix backend only). Bare model name (the suffix after the provider slash). Must exist on the configured provider.", + }) + r.Register(config.Spec{ + Key: "transport.postmark", + Type: config.TypeObject, + Secrets: []string{"token"}, + Description: `Postmark account config: {"token","inbound","from"}. Required only if any Stream uses transport=email.`, + }) + r.Register(config.Spec{ + Key: "transport.github", + Type: config.TypeObject, + Secrets: []string{"token", "webhook_secret"}, + Description: `GitHub webhooks config: {"token","webhook_secret"}. Required only if any Stream uses transport=github. token is the gh PAT used by Workers; webhook_secret is the HMAC secret GitHub signs deliveries with.`, + }) +} diff --git a/helix-org/cmd/helix-org/configspecs_test.go b/helix-org/cmd/helix-org/configspecs_test.go new file mode 100644 index 0000000000..d8900ae6c9 --- /dev/null +++ b/helix-org/cmd/helix-org/configspecs_test.go @@ -0,0 +1,94 @@ +package main + +import ( + "context" + "strings" + "testing" + + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/store/sqlite" +) + +// TestGitHubSpecRedactsBothSecrets pins down the spec registration +// for `transport.github`: both `token` and `webhook_secret` MUST be +// redacted on `helix-org config get`. Without this, a future +// refactor that drops one of the entries from the Secrets list +// would silently start leaking the secret to anyone with shell +// access who runs `config get` (logs, screenshares, terminal +// recordings, etc.). +// +// We construct a real registry, run the binary's +// registerAllConfigSpecs, set a known value, and assert the +// redacted form replaces both fields with "...". +func TestGitHubSpecRedactsBothSecrets(t *testing.T) { + t.Parallel() + + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + reg := config.New(st.Configs) + registerAllConfigSpecs(reg) + + const raw = `{"token":"plaintext-token-leaked","webhook_secret":"plaintext-secret-leaked"}` + if err := reg.Set(context.Background(), "transport.github", raw, ""); err != nil { + t.Fatalf("set: %v", err) + } + + got, err := reg.GetRedacted(context.Background(), "transport.github") + if err != nil { + t.Fatalf("GetRedacted: %v", err) + } + if strings.Contains(got, "plaintext-token-leaked") { + t.Fatalf("redacted output leaks token: %s", got) + } + if strings.Contains(got, "plaintext-secret-leaked") { + t.Fatalf("redacted output leaks webhook_secret: %s", got) + } + + // Spec must declare both field names. Asserting on the spec + // itself catches regressions earlier than the redaction + // behaviour test (which depends on the registry's redaction + // logic continuing to do what it does today). + spec, ok := reg.Spec("transport.github") + if !ok { + t.Fatalf("transport.github not registered") + } + if !contains(spec.Secrets, "token") { + t.Fatalf("spec.Secrets = %v, missing \"token\"", spec.Secrets) + } + if !contains(spec.Secrets, "webhook_secret") { + t.Fatalf("spec.Secrets = %v, missing \"webhook_secret\" — secret will leak via `config get`", spec.Secrets) + } +} + +// TestPostmarkSpecRedactsToken is the analogous regression guard for +// the email transport. Cheap to add alongside the github one; same +// failure mode (drop the entry, leak the secret). +func TestPostmarkSpecRedactsToken(t *testing.T) { + t.Parallel() + + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + reg := config.New(st.Configs) + registerAllConfigSpecs(reg) + + spec, ok := reg.Spec("transport.postmark") + if !ok { + t.Fatalf("transport.postmark not registered") + } + if !contains(spec.Secrets, "token") { + t.Fatalf("spec.Secrets = %v, missing \"token\"", spec.Secrets) + } +} + +func contains(s []string, want string) bool { + for _, v := range s { + if v == want { + return true + } + } + return false +} diff --git a/helix-org/cmd/helix-org/main.go b/helix-org/cmd/helix-org/main.go new file mode 100644 index 0000000000..838df3b6c8 --- /dev/null +++ b/helix-org/cmd/helix-org/main.go @@ -0,0 +1,65 @@ +// Helix Org CLI: runs the HTTP server. The first start of `serve` +// against an empty database creates the initial owner Worker. Beyond +// that, every mutation goes through MCP — point an MCP client (or the +// `chat` subcommand) at /workers/{id}/mcp on the running server. +package main + +import ( + "fmt" + "os" +) + +func main() { + if err := run(os.Args[1:]); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func run(args []string) error { + if len(args) == 0 { + printUsage() + return fmt.Errorf("no subcommand given") + } + switch args[0] { + case "serve": + return runServe(args[1:]) + case "chat": + return runChat(args[1:]) + case "config": + return runConfig(args[1:]) + case "bootstrap": + return runBootstrap(args[1:]) + case "help", "-h", "--help": + printUsage() + return nil + default: + printUsage() + return fmt.Errorf("unknown subcommand %q", args[0]) + } +} + +func printUsage() { + fmt.Fprintln(os.Stderr, `usage: helix-org [flags] + +Subcommands: + serve Run the HTTP server. On first start against an empty + database, creates the initial owner Worker. Exposes + /workers/{id}/mcp (Streamable HTTP MCP transport) and + the /ui/ HTML surface. + chat Open an interactive claude session pointed at a + Worker's MCP endpoint (default: w-owner). Continues + the most recent session in the current directory; + pass --resume for the picker or --new for a fresh one. + bootstrap Provision external dependencies. Run + 'bootstrap helix-runtime [--project-id ]' to + validate a Helix project, run a smoke test, and + persist the project ID. See design/helix-integration.md. + config Read or write operational configuration (transport + credentials, claude binary, model, public URL, etc.). + CLI-only — never via MCP. See design/config.md. + Subcommands: set, get, list, delete. + help Show this message. + +Run 'helix-org --help' for per-subcommand flags.`) +} diff --git a/helix-org/cmd/helix-org/serve.go b/helix-org/cmd/helix-org/serve.go new file mode 100644 index 0000000000..839313bce6 --- /dev/null +++ b/helix-org/cmd/helix-org/serve.go @@ -0,0 +1,447 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/helixml/helix-org/agent" + agentclaude "github.com/helixml/helix-org/agent/claude" + agenthelix "github.com/helixml/helix-org/agent/helix" + "github.com/helixml/helix-org/bootstrap" + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/dispatch" + "github.com/helixml/helix-org/helix/helixclient" + "github.com/helixml/helix-org/prompts" + "github.com/helixml/helix-org/server" + "github.com/helixml/helix-org/server/chat" + "github.com/helixml/helix-org/server/ui" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" + "github.com/helixml/helix-org/tools" + githubtransport "github.com/helixml/helix-org/transports/github" + "github.com/helixml/helix-org/transports/postmark" +) + +func runServe(args []string) error { + fs := flag.NewFlagSet("serve", flag.ContinueOnError) + addr := fs.String("addr", ":8080", "TCP address to listen on") + dbPath := fs.String("db", "helix-org.db", "SQLite database path (use ':memory:' for ephemeral)") + publicURL := fs.String("public-url", "", "Base URL spawned Workers use to reach the MCP endpoint. Defaults to http://localhost.") + envsDir := fs.String("envs-dir", "./envs", "Directory under which each Worker's Environment lives (one subdirectory per workerID).") + claudeBin := fs.String("claude-bin", "claude", "Path to the claude CLI used to embody AI Workers") + model := fs.String("model", "sonnet", "Claude model alias or full name (e.g. 'sonnet', 'opus', 'claude-sonnet-4-6'). Default sonnet to keep activation costs predictable.") + effort := fs.String("effort", "low", "Claude effort/thinking level (low|medium|high|xhigh|max). Defaults to low to minimise per-activation cost.") + if err := fs.Parse(args); err != nil { + return err + } + if *publicURL == "" { + *publicURL = "http://localhost" + portFromAddr(*addr) + } + absEnvsDir, err := filepath.Abs(*envsDir) + if err != nil { + return fmt.Errorf("resolve envs-dir %q: %w", *envsDir, err) + } + if err := os.MkdirAll(absEnvsDir, 0o750); err != nil { + return fmt.Errorf("create envs-dir %q: %w", absEnvsDir, err) + } + + logger := slog.New(slog.NewTextHandler(os.Stderr, nil)) + + store, err := sqlite.Open(*dbPath) + if err != nil { + return fmt.Errorf("open store: %w", err) + } + + // First start against an empty DB creates the owner Worker. On + // subsequent starts ErrAlreadyInitialised is the normal case; any + // other error is fatal. + ownerEnvPath := filepath.Join(absEnvsDir, "w-owner") + if err := os.MkdirAll(ownerEnvPath, 0o750); err != nil { + return fmt.Errorf("create owner env %q: %w", ownerEnvPath, err) + } + switch result, err := bootstrap.Run(context.Background(), store, bootstrap.Params{ + EnvironmentPath: ownerEnvPath, + }); { + case err == nil: + logger.Info("bootstrap created owner", + "workerId", result.WorkerID, + "roleId", result.RoleID, + "positionId", result.PositionID, + "environmentPath", result.EnvironmentPath, + ) + case errors.Is(err, bootstrap.ErrAlreadyInitialised): + logger.Info("bootstrap skipped: already initialised", "db", *dbPath) + default: + return fmt.Errorf("bootstrap: %w", err) + } + + bc := broadcast.New() + deps := tools.DefaultDeps(store) + deps.Broadcaster = bc + deps.EnvsDir = absEnvsDir + + // Operational config registry — Postmark + future provider creds + // live here, mutated only via the helix-org config CLI. See + // design/config.md. + configReg := config.New(store.Configs) + registerAllConfigSpecs(configReg) + + spawner, workspace, err := buildSpawner(context.Background(), configReg, store, bc, deps, logger, *claudeBin, *publicURL, *model, *effort) + if err != nil { + return fmt.Errorf("build spawner: %w", err) + } + dispatcher := dispatch.New(store, spawner, logger) + deps.Dispatcher = dispatcher + deps.Workspace = workspace + logger.Info("dispatcher enabled", "public-url", *publicURL, "envs-dir", absEnvsDir) + + // Email transport: shares the dispatcher (for inbound activations) + // and registers itself as the dispatcher's outbound email emitter. + emailTransport := postmark.New(configReg, store, bc, dispatcher, logger) + dispatcher.SetEmailEmitter(emailTransport) + logger.Info("email transport enabled", "provider", "postmark") + + // GitHub transport: inbound only. Webhook deliveries POST to + // /github/webhook; the transport HMAC-verifies, fans out to every + // Stream whose repo+events match, and activates subscribed Workers + // via the dispatcher. Outbound is the Worker's job via `gh`; the + // publish tool rejects writes to a github stream loudly. + githubInbound := githubtransport.New(configReg, store, bc, dispatcher, logger) + logger.Info("github transport enabled") + + reg := tools.NewRegistry() + if err := tools.RegisterBuiltins(reg, deps); err != nil { + return fmt.Errorf("register builtins: %w", err) + } + + // Prompts: server-defined slash commands. Surfaced per-worker + // alongside tools, gated by tool grants so a Worker only sees + // prompts that end in a tool call they can actually make. + promptReg := prompts.NewRegistry() + if err := prompts.RegisterBuiltins(promptReg); err != nil { + return fmt.Errorf("register prompt builtins: %w", err) + } + + // UI chat surface. Backend is selected by chat.backend config: + // - "claude": long-lived `claude` subprocess in the server cwd, + // bridged to the browser via SSE. Dev only. + // - "helix": Helix chat session; every owner message becomes a + // StartChat / PostFollowup against Helix. + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("getwd: %w", err) + } + chatBridge, err := buildChatBackend(context.Background(), configReg, store, logger, *claudeBin, cwd, *publicURL, promptReg) + if err != nil { + return fmt.Errorf("build chat backend: %w", err) + } + + // Snapshot config registry → ui.SettingsView so the UI doesn't + // need to import config. This is captured once at startup; + // /ui/settings re-resolves the per-spec configured flag against + // the store on each render. + specs := configReg.Specs() + uiSpecs := make([]ui.SettingsSpec, 0, len(specs)) + for _, sp := range specs { + uiSpecs = append(uiSpecs, ui.SettingsSpec{ + Key: sp.Key, + Type: string(sp.Type), + Required: sp.Required, + Description: sp.Description, + }) + } + uiHandler := ui.Handler(ui.Deps{ + Store: store, + Configs: configReg, + Bridge: chatBridge, + ChatCWD: cwd, + Broadcaster: bc, + Dispatcher: dispatcher, + NewID: deps.NewID, + Now: deps.Now, + Settings: ui.SettingsView{ + Owner: "w-owner", + PublicURL: *publicURL, + DBPath: *dbPath, + EnvsDir: absEnvsDir, + Specs: uiSpecs, + }, + }) + rootRedirect := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "/ui/", http.StatusFound) + }) + + srv := &http.Server{ + Addr: *addr, + Handler: server.New(store, reg, bc, deps.Dispatcher, logger).WithPrompts(promptReg).Handler( + server.Route{Pattern: "POST /email/postmark", Handler: emailTransport.HandleInbound()}, + server.Route{Pattern: "POST /github/webhook", Handler: githubInbound.HandleInbound()}, + server.Route{Pattern: "GET /ui/chat/stream", Handler: chatBridge.StreamHandler()}, + server.Route{Pattern: "POST /ui/chat/send", Handler: chatBridge.SendHandler()}, + server.Route{Pattern: "POST /ui/chat/commands", Handler: chatBridge.CommandsHandler()}, + server.Route{Pattern: "POST /ui/chat/new", Handler: chatBridge.NewHandler()}, + server.Route{Pattern: "POST /ui/chat/switch", Handler: chatBridge.SwitchHandler()}, + server.Route{Pattern: "/ui/", Handler: uiHandler}, + server.Route{Pattern: "GET /{$}", Handler: rootRedirect}, + ), + ReadHeaderTimeout: 10 * time.Second, + } + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + errCh := make(chan error, 1) + go func() { + logger.Info("server listening", "addr", *addr, "db", *dbPath) + if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + errCh <- err + } + close(errCh) + }() + + select { + case <-ctx.Done(): + logger.Info("shutting down") + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err := srv.Shutdown(shutdownCtx); err != nil { + return fmt.Errorf("shutdown: %w", err) + } + case err, ok := <-errCh: + if ok && err != nil { + return fmt.Errorf("serve: %w", err) + } + } + return nil +} + +// buildSpawner reads spawner.kind from the config registry and +// returns the corresponding agent.Spawner plus the matching +// WorkspaceSync. The two are paired: each runtime backend supplies +// both the activation runner and the role/identity-sync surface that +// keeps the agent's view fresh between activations. +func buildSpawner( + ctx context.Context, + cfg *config.Registry, + st *store.Store, + bc *broadcast.Broadcaster, + deps tools.Deps, + logger *slog.Logger, + claudeBin, publicURL, model, effort string, +) (agent.Spawner, agent.WorkspaceSync, error) { + kind, err := cfg.GetString(ctx, "spawner.kind") + if err != nil { + return nil, nil, fmt.Errorf("read spawner.kind: %w", err) + } + switch kind { + case "claude": + logger.Info("spawner: claude", "claude-bin", claudeBin, "model", model, "effort", effort) + spawner := agentclaude.Spawner(agentclaude.SpawnerConfig{ + ClaudeBin: claudeBin, + PublicURL: publicURL, + Model: model, + Effort: effort, + Logger: logger, + Store: st, + Broadcaster: bc, + Now: deps.Now, + NewID: deps.NewID, + }) + return spawner, agentclaude.NewWorkspace(deps.EnvsDir), nil + case "helix": + baseURL, err := cfg.GetString(ctx, "helix.url") + if err != nil { + return nil, nil, fmt.Errorf("read helix.url: %w", err) + } + apiKey, err := cfg.GetString(ctx, "helix.api_key") + if err != nil { + return nil, nil, fmt.Errorf("read helix.api_key: %w", err) + } + orgURL, err := cfg.GetString(ctx, "helix.org_url") + if err != nil { + return nil, nil, fmt.Errorf("read helix.org_url: %w", err) + } + timeoutStr, err := cfg.GetString(ctx, "helix.activation_timeout") + if err != nil { + return nil, nil, fmt.Errorf("read helix.activation_timeout: %w", err) + } + timeout, err := time.ParseDuration(timeoutStr) + if err != nil { + return nil, nil, fmt.Errorf("parse helix.activation_timeout %q: %w", timeoutStr, err) + } + maxInflight, err := cfg.GetInt(ctx, "helix.max_inflight") + if err != nil { + return nil, nil, fmt.Errorf("read helix.max_inflight: %w", err) + } + // Provider/Model drive every per-Worker project's Agent App + // config (set at apply time inside the spawner). + provider, _ := cfg.GetString(ctx, "chat.provider") + model, _ := cfg.GetString(ctx, "chat.model") + client, err := helixclient.New(helixclient.Config{BaseURL: baseURL, APIKey: apiKey}) + if err != nil { + return nil, nil, fmt.Errorf("helix client: %w", err) + } + // Fail-fast validation: a typo in chat.provider / chat.model + // would otherwise surface much later as a 422 from + // /sessions/{id}/zed-config when the desktop boots, with no + // obvious link back to the bad config key. Catch it here. + if err := helixclient.ValidateProviderModel(ctx, client, provider, model); err != nil { + return nil, nil, fmt.Errorf("invalid chat.provider / chat.model (run `helix-org config set chat.provider ` and `helix-org config set chat.model `): %w", err) + } + workspace := agenthelix.NewWorkspace(client, st, "helix-specs", "helix-org", "helix-org@local") + logger.Info("spawner: helix", + "helix-url", baseURL, + "org-url", orgURL, + "provider", provider, + "model", model, + "timeout", timeout, + "max-inflight", maxInflight, + ) + spawner := agenthelix.Spawner(agenthelix.SpawnerConfig{ + Client: client, + HelixOrgURL: orgURL, + Provider: provider, + Model: model, + AgentMD: agent.Policy, + ActivationTimeout: timeout, + MaxInflight: int(maxInflight), + Logger: logger, + Store: st, + Broadcaster: bc, + Now: deps.Now, + NewID: deps.NewID, + }) + return spawner, workspace, nil + default: + return nil, nil, fmt.Errorf("unknown spawner.kind %q (valid: claude, helix)", kind) + } +} + +// buildChatBackend selects the owner-chat backend based on +// chat.backend. The claude path keeps full backwards compat; the +// helix path constructs a fresh helixclient and delegates the chat +// surface to a Helix session — closing the "all LLM calls go through +// Helix" gap. Slash-command prompts are wired into both backends. +func buildChatBackend( + ctx context.Context, + cfg *config.Registry, + st *store.Store, + logger *slog.Logger, + claudeBin, cwd, publicURL string, + promptReg *prompts.Registry, +) (chat.Backend, error) { + kind, err := cfg.GetString(ctx, "chat.backend") + if err != nil { + return nil, fmt.Errorf("read chat.backend: %w", err) + } + switch kind { + case "claude": + logger.Info("chat backend: claude", "claude-bin", claudeBin) + // claude.model is the alias passed to claude as --model. Use + // it as the footer label too so the UI truthfully reports + // which model the chat is running on. + claudeModel, _ := cfg.GetString(ctx, "claude.model") + label := "claude" + if claudeModel != "" { + label = "claude · " + claudeModel + } + b := chat.New(claudeBin, cwd, strings.TrimRight(publicURL, "/")+"/workers/w-owner/mcp", logger). + WithPrompts(promptReg). + WithLabel(label) + return b, nil + case "helix": + baseURL, err := cfg.GetString(ctx, "helix.url") + if err != nil { + return nil, fmt.Errorf("read helix.url: %w", err) + } + apiKey, err := cfg.GetString(ctx, "helix.api_key") + if err != nil { + return nil, fmt.Errorf("read helix.api_key: %w", err) + } + orgURL, err := cfg.GetString(ctx, "helix.org_url") + if err != nil { + return nil, fmt.Errorf("read helix.org_url: %w", err) + } + sessionRole, err := cfg.GetString(ctx, "chat.session_role") + if err != nil { + return nil, fmt.Errorf("read chat.session_role: %w", err) + } + provider, err := cfg.GetString(ctx, "chat.provider") + if err != nil { + return nil, fmt.Errorf("read chat.provider: %w", err) + } + model, err := cfg.GetString(ctx, "chat.model") + if err != nil { + return nil, fmt.Errorf("read chat.model: %w", err) + } + client, err := helixclient.New(helixclient.Config{BaseURL: baseURL, APIKey: apiKey}) + if err != nil { + return nil, fmt.Errorf("helix client: %w", err) + } + // Fail-fast validation — see buildSpawner for rationale. + if err := helixclient.ValidateProviderModel(ctx, client, provider, model); err != nil { + return nil, fmt.Errorf("invalid chat.provider / chat.model (run `helix-org config set chat.provider ` and `helix-org config set chat.model `): %w", err) + } + // Chat backend uses the same per-Worker project flow and the + // same fixed runtime / agent_type as the AI Worker spawner — + // see helix.Runtime / helix.AgentType. The auto-provisioned + // Agent App carries our MCP wiring (attached via UpdateApp + // after project apply); helix.org_url must be a tunnel URL + // reachable from Helix's runner so the in-sandbox agent can + // reach /workers/{id}/mcp. + applier := &agenthelix.ProjectApplier{ + Client: client, + Store: st, + HelixOrgURL: orgURL, + Provider: provider, + Model: model, + AgentMD: agent.Policy, + Logger: logger, + } + hb, err := chat.NewHelix(chat.HelixConfig{ + Client: client, + Ensure: applier, + OwnerID: "w-owner", + SessionRole: sessionRole, + Provider: provider, + Model: model, + CWD: cwd, + Logger: logger, + }) + if err != nil { + return nil, err + } + logger.Info("chat backend: helix", + "helix-url", baseURL, + "session-role", sessionRole, + "runtime", agenthelix.Runtime, + "agent-type", agenthelix.AgentType, + "provider", provider, + "model", model, + ) + return hb.WithPrompts(promptReg), nil + default: + return nil, fmt.Errorf("unknown chat.backend %q (valid: claude, helix)", kind) + } +} + +// portFromAddr extracts the ":PORT" suffix from a TCP address such as +// ":8080", "127.0.0.1:8080", or "0.0.0.0:8080". Returns ":8080" for an +// addr that has no explicit port (which mirrors net.http's own default). +func portFromAddr(addr string) string { + if i := strings.LastIndex(addr, ":"); i >= 0 { + return addr[i:] + } + return ":8080" +} diff --git a/helix-org/config/registry.go b/helix-org/config/registry.go new file mode 100644 index 0000000000..2eeab0179c --- /dev/null +++ b/helix-org/config/registry.go @@ -0,0 +1,324 @@ +// Package config holds the registry of operational-config keys and +// the typed accessors subsystems use to read them at runtime. +// +// Configuration is stored in the SQLite `configs` table and mutated +// only through the helix-org config CLI — never via MCP. Each +// subsystem (spawner, dispatcher, future transports, etc.) registers +// the keys it owns at startup, declaring schema, default, and which +// JSON paths are secret. The CLI's set/get/list go through this +// registry for validation and redaction; consumers go through it for +// typed reads. +// +// See design/config.md for the full spec and the rationale behind +// the org-graph-vs-ops split. +package config + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sort" + "strings" + "sync" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Spec describes a single configurable key. Subsystems register +// their Specs at startup; the registry uses them to validate set +// operations and redact secrets on read. +// +// Schema validation is intentionally done by a small custom checker +// (TypeOf) rather than full JSON Schema — the surface is narrow +// (string, int, object), the dependency footprint stays small, and +// the error messages are clearer for operators. +type Spec struct { + // Key is the dot-namespaced identifier (e.g. "claude.bin", + // "transport.postmark"). Owned by exactly one subsystem. + Key string + + // Type is what shape the JSON value must be. The CLI rejects + // values that don't match. + Type ValueType + + // Default is the JSON value used when the row is missing. + // Empty string means no default — Required keys without a row + // and without a default error on read. + Default string + + // Required means consumer reads error if no row exists and no + // default is set. False means the key is optional (subsystem + // boots dormant when missing). + Required bool + + // Secrets lists JSON paths within the value (object kind only) + // that should be redacted on get/list. e.g. ["token"] for + // transport.postmark redacts only the "token" field. + Secrets []string + + // Description is a one-line summary shown by `config list` so + // operators can discover what's settable. + Description string +} + +// ValueType is the small set of value shapes the registry validates. +type ValueType string + +const ( + TypeString ValueType = "string" + TypeInt ValueType = "int" + TypeObject ValueType = "object" +) + +// Registry is the central coordinator: it holds Specs and reads/writes +// configs through the store. +// +// Specs are registered once at startup; reads happen on every +// operation. There's no in-memory cache — SQLite is fast enough and +// live updates Just Work. If a hot path later proves cache-worthy, +// add a TTL cache layered on top. +type Registry struct { + store store.Configs + + mu sync.RWMutex + specs map[string]Spec +} + +// New returns a Registry bound to the given Configs store. +func New(s store.Configs) *Registry { + return &Registry{store: s, specs: make(map[string]Spec)} +} + +// Register declares a config key. Re-registering the same key panics; +// each key has exactly one owner. +func (r *Registry) Register(spec Spec) { + if spec.Key == "" { + panic("config: register with empty key") + } + if spec.Type == "" { + panic(fmt.Sprintf("config: register %q without type", spec.Key)) + } + r.mu.Lock() + defer r.mu.Unlock() + if _, exists := r.specs[spec.Key]; exists { + panic(fmt.Sprintf("config: key %q already registered", spec.Key)) + } + if spec.Default != "" { + if err := validateValue(spec, spec.Default); err != nil { + panic(fmt.Sprintf("config: register %q with invalid default: %v", spec.Key, err)) + } + } + r.specs[spec.Key] = spec +} + +// Spec returns the registered spec for a key, ok=false if not registered. +func (r *Registry) Spec(key string) (Spec, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + s, ok := r.specs[key] + return s, ok +} + +// Specs returns every registered Spec, sorted by key. Used by `config list`. +func (r *Registry) Specs() []Spec { + r.mu.RLock() + defer r.mu.RUnlock() + out := make([]Spec, 0, len(r.specs)) + for _, s := range r.specs { + out = append(out, s) + } + sort.Slice(out, func(i, j int) bool { return out[i].Key < out[j].Key }) + return out +} + +// Set validates the value against the registered Spec and upserts the +// row. Unknown keys (not registered) are rejected — the registry is +// the source of truth for what's settable. +// +// updatedBy is the WorkerID for the audit column; empty is allowed +// today (auth not yet wired) but reserved. +func (r *Registry) Set(ctx context.Context, key, value string, updatedBy domain.WorkerID) error { + spec, ok := r.Spec(key) + if !ok { + return fmt.Errorf("unknown config key %q (no subsystem has registered it)", key) + } + if err := validateValue(spec, value); err != nil { + return fmt.Errorf("validate %q: %w", key, err) + } + cfg, err := domain.NewConfig(key, value, time.Now().UTC(), updatedBy) + if err != nil { + return err + } + return r.store.Set(ctx, cfg) +} + +// Delete removes the row. Subsequent reads fall back to the registered +// default (if any), or error (if Required). +func (r *Registry) Delete(ctx context.Context, key string) error { + if _, ok := r.Spec(key); !ok { + return fmt.Errorf("unknown config key %q", key) + } + return r.store.Delete(ctx, key) +} + +// GetRaw returns the raw JSON value — the row's value if set, +// otherwise the registered default. Returns ErrNotConfigured when no +// row exists, no default is set, and the spec is not Required (caller +// can treat as "feature disabled"). Returns a wrapped error when +// Required and missing. +func (r *Registry) GetRaw(ctx context.Context, key string) (string, error) { + spec, ok := r.Spec(key) + if !ok { + return "", fmt.Errorf("unknown config key %q", key) + } + cfg, err := r.store.Get(ctx, key) + if err == nil { + return cfg.Value, nil + } + if !errors.Is(err, store.ErrNotFound) { + return "", err + } + if spec.Default != "" { + return spec.Default, nil + } + if spec.Required { + return "", fmt.Errorf("config %q: %w", key, ErrRequired) + } + return "", ErrNotConfigured +} + +// GetRedacted returns the value with secret JSON paths replaced by +// "..." — for `config get` and `config list` output. For object +// values where Secrets is set, returns valid JSON with the redacted +// fields. For non-object values or empty Secrets, returns the raw +// value. +func (r *Registry) GetRedacted(ctx context.Context, key string) (string, error) { + raw, err := r.GetRaw(ctx, key) + if err != nil { + return "", err + } + spec, _ := r.Spec(key) + return redact(spec, raw) +} + +// GetString reads a string-typed config and returns its Go value. +// Errors if the spec isn't string-typed or the value doesn't parse. +func (r *Registry) GetString(ctx context.Context, key string) (string, error) { + raw, err := r.GetRaw(ctx, key) + if err != nil { + return "", err + } + spec, _ := r.Spec(key) + if spec.Type != TypeString { + return "", fmt.Errorf("config %q: spec type is %s, not string", key, spec.Type) + } + var s string + if err := json.Unmarshal([]byte(raw), &s); err != nil { + return "", fmt.Errorf("decode string for %q: %w", key, err) + } + return s, nil +} + +// GetInt reads an int-typed config and returns its Go value. +func (r *Registry) GetInt(ctx context.Context, key string) (int64, error) { + raw, err := r.GetRaw(ctx, key) + if err != nil { + return 0, err + } + spec, _ := r.Spec(key) + if spec.Type != TypeInt { + return 0, fmt.Errorf("config %q: spec type is %s, not int", key, spec.Type) + } + var n int64 + if err := json.Unmarshal([]byte(raw), &n); err != nil { + return 0, fmt.Errorf("decode int for %q: %w", key, err) + } + return n, nil +} + +// GetObject decodes the value into the given destination, which must +// be a pointer. Errors if the spec isn't object-typed. +func (r *Registry) GetObject(ctx context.Context, key string, dst any) error { + raw, err := r.GetRaw(ctx, key) + if err != nil { + return err + } + spec, _ := r.Spec(key) + if spec.Type != TypeObject { + return fmt.Errorf("config %q: spec type is %s, not object", key, spec.Type) + } + if err := json.Unmarshal([]byte(raw), dst); err != nil { + return fmt.Errorf("decode object for %q: %w", key, err) + } + return nil +} + +// Sentinel errors for callers that want to distinguish "not yet +// configured" from "required but missing" from "doesn't exist". +var ( + // ErrNotConfigured indicates the key has no row and no default, + // and isn't required. Subsystems treat this as "feature dormant". + ErrNotConfigured = errors.New("not configured") + // ErrRequired indicates a required key is missing. + ErrRequired = errors.New("required config key not set") +) + +func validateValue(spec Spec, raw string) error { + if raw == "" { + return errors.New("value is empty") + } + switch spec.Type { + case TypeString: + var s string + if err := json.Unmarshal([]byte(raw), &s); err != nil { + return fmt.Errorf("not a JSON string: %w", err) + } + case TypeInt: + var n json.Number + dec := json.NewDecoder(strings.NewReader(raw)) + dec.UseNumber() + if err := dec.Decode(&n); err != nil { + return fmt.Errorf("not a number: %w", err) + } + if _, err := n.Int64(); err != nil { + return fmt.Errorf("not an integer: %w", err) + } + case TypeObject: + var m map[string]json.RawMessage + if err := json.Unmarshal([]byte(raw), &m); err != nil { + return fmt.Errorf("not a JSON object: %w", err) + } + default: + return fmt.Errorf("unknown spec type %q", spec.Type) + } + return nil +} + +// redact replaces JSON paths listed in spec.Secrets with "..." in the +// value's JSON representation. Only object values support secret +// fields; for other types or empty Secrets, raw is returned unchanged. +func redact(spec Spec, raw string) (string, error) { + if len(spec.Secrets) == 0 || spec.Type != TypeObject { + return raw, nil + } + var obj map[string]any + if err := json.Unmarshal([]byte(raw), &obj); err != nil { + return raw, nil // already malformed; let the consumer's typed Get error + } + for _, path := range spec.Secrets { + // Path is a top-level field name today. If we ever need + // nested paths (a.b.c), split on "." and walk. Keeping it + // flat for Phase 1 — no real config has nested secrets yet. + if _, exists := obj[path]; exists { + obj[path] = "..." + } + } + out, err := json.Marshal(obj) + if err != nil { + return "", fmt.Errorf("re-encode redacted value: %w", err) + } + return string(out), nil +} diff --git a/helix-org/config/registry_test.go b/helix-org/config/registry_test.go new file mode 100644 index 0000000000..ec163f9a9c --- /dev/null +++ b/helix-org/config/registry_test.go @@ -0,0 +1,183 @@ +package config_test + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/store/sqlite" +) + +func newRegistry(t *testing.T) *config.Registry { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + return config.New(s.Configs) +} + +func TestRegistryRegisterAndSet(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{ + Key: "claude.bin", + Type: config.TypeString, + Default: `"claude"`, + Required: true, + Description: "Path to claude CLI.", + }) + + ctx := context.Background() + + // Default applies before any Set. + got, err := r.GetString(ctx, "claude.bin") + if err != nil { + t.Fatalf("GetString default: %v", err) + } + if got != "claude" { + t.Fatalf("default = %q", got) + } + + // Set overrides default. + if err := r.Set(ctx, "claude.bin", `"/usr/local/bin/claude"`, ""); err != nil { + t.Fatalf("Set: %v", err) + } + got, _ = r.GetString(ctx, "claude.bin") + if got != "/usr/local/bin/claude" { + t.Fatalf("after set = %q", got) + } + + // Delete falls back to default again. + if err := r.Delete(ctx, "claude.bin"); err != nil { + t.Fatalf("Delete: %v", err) + } + got, _ = r.GetString(ctx, "claude.bin") + if got != "claude" { + t.Fatalf("after delete = %q", got) + } +} + +func TestRegistryRequiredMissing(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{ + Key: "claude.public_url", Type: config.TypeString, Required: true, + }) + + ctx := context.Background() + _, err := r.GetString(ctx, "claude.public_url") + if !errors.Is(err, config.ErrRequired) { + t.Fatalf("err = %v, want ErrRequired", err) + } +} + +func TestRegistryOptionalMissing(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{ + Key: "transport.postmark", Type: config.TypeObject, + }) + + ctx := context.Background() + var pm struct { + Token string `json:"token"` + } + err := r.GetObject(ctx, "transport.postmark", &pm) + if !errors.Is(err, config.ErrNotConfigured) { + t.Fatalf("err = %v, want ErrNotConfigured", err) + } +} + +func TestRegistryUnknownKey(t *testing.T) { + t.Parallel() + r := newRegistry(t) + ctx := context.Background() + + if err := r.Set(ctx, "ghost.key", `"x"`, ""); err == nil || !strings.Contains(err.Error(), "unknown") { + t.Fatalf("Set unknown = %v", err) + } + if _, err := r.GetRaw(ctx, "ghost.key"); err == nil { + t.Fatalf("GetRaw unknown = nil") + } +} + +func TestRegistryValidationRejectsBadShape(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{Key: "a.s", Type: config.TypeString}) + r.Register(config.Spec{Key: "a.i", Type: config.TypeInt}) + r.Register(config.Spec{Key: "a.o", Type: config.TypeObject}) + + ctx := context.Background() + cases := []struct{ key, val string }{ + {"a.s", `42`}, // not a string + {"a.s", `{"x":1}`}, // not a string + {"a.i", `"hi"`}, // not an int + {"a.i", `1.5`}, // not an integer + {"a.o", `42`}, // not an object + {"a.o", `[1,2,3]`}, // not an object + {"a.o", `not json`}, // not JSON + } + for _, tc := range cases { + if err := r.Set(ctx, tc.key, tc.val, ""); err == nil { + t.Errorf("Set(%q, %q) = nil, want validation error", tc.key, tc.val) + } + } +} + +func TestRegistryRedaction(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{ + Key: "transport.postmark", + Type: config.TypeObject, + Secrets: []string{"token"}, + }) + + ctx := context.Background() + if err := r.Set(ctx, "transport.postmark", `{"token":"abc-xyz","from":"x@y.com"}`, ""); err != nil { + t.Fatalf("Set: %v", err) + } + + // Plaintext via GetRaw / GetObject — consumers see real values. + raw, _ := r.GetRaw(ctx, "transport.postmark") + if !strings.Contains(raw, "abc-xyz") { + t.Fatalf("GetRaw should not redact: %q", raw) + } + + // Redacted via GetRedacted — for CLI output. + redacted, _ := r.GetRedacted(ctx, "transport.postmark") + if strings.Contains(redacted, "abc-xyz") { + t.Fatalf("GetRedacted leaked secret: %q", redacted) + } + if !strings.Contains(redacted, "x@y.com") { + t.Fatalf("GetRedacted clobbered non-secret: %q", redacted) + } +} + +func TestRegistryRegisterTwicePanics(t *testing.T) { + t.Parallel() + r := newRegistry(t) + r.Register(config.Spec{Key: "k", Type: config.TypeString}) + + defer func() { + if recover() == nil { + t.Fatalf("expected panic on double register") + } + }() + r.Register(config.Spec{Key: "k", Type: config.TypeString}) +} + +func TestRegistryRegisterBadDefaultPanics(t *testing.T) { + t.Parallel() + r := newRegistry(t) + defer func() { + if recover() == nil { + t.Fatalf("expected panic on bad default") + } + }() + r.Register(config.Spec{Key: "k", Type: config.TypeInt, Default: `"hello"`}) +} diff --git a/helix-org/demos/email/README.md b/helix-org/demos/email/README.md new file mode 100644 index 0000000000..e0e5c578dd --- /dev/null +++ b/helix-org/demos/email/README.md @@ -0,0 +1,446 @@ +# Email + +A two-Worker support team that talks to customers — and to each +other — by email. Sam is customer service (alias `sam`); Lee is +engineering (alias `engineer`). When a customer emails Sam with a +technical question Sam can't answer, he forwards it to Lee at +Lee's helix alias. Lee replies by email. Sam paraphrases for the +customer and replies. Every hop crosses Postmark; every Stream +(`s-support`, `s-engineer`) is bidirectional. + +About 20 minutes the first time (Postmark account + Sender +Signature). Re-runs after that are one chat prompt. + +## What this demo shows + +- **Both directions on every Stream.** `s-support` and `s-engineer` + each accept inbound mail at their `+alias` address *and* render + outbound `publish` calls back through Postmark's send API. + Same `domain.Message` envelope in both directions; the only + per-stream config is `{"alias": "..."}`. +- **Workers as email participants.** Sam emails Lee. Lee emails + Sam. The customer emails Sam. All three legs use the same + transport, the same envelope, the same alias-based routing. + Workers are first-class email participants. +- **Threading is the spine.** Sam sets `ThreadID` on his + escalation to Lee. Lee preserves it on his reply. Sam reads it + back to find the original customer query in `s-support` history + and threads his customer-facing reply to it. The whole + conversation is one logical thread despite four physical + emails. +- **Role drives behaviour.** Sam decides "answer myself or + forward" by reading his own role text — there's no hard-coded + routing table. Editing `roles/customer-service.md` and running + `update_role` shifts the policy live. + +## Prerequisites + +- [Postmark](https://postmarkapp.com/) account. Free tier handles + 100 emails/day — plenty for this demo. +- An email address you control to use as the Sender Signature + (any address — `you@gmail.com`, an iCloud address, whatever). + No domain required. +- Public URL for your local helix-org so Postmark can reach the + inbound webhook. [`ngrok http 8080`](https://ngrok.com/) or a + Cloudflare Tunnel works for testing. +- `helix-org` and `claude` on PATH; `jq` and `curl` for the setup + commands below. + +> **For this installation, Postmark is already wired up and +> approved.** Server `helix-org` (ID 19042071) is provisioned, the +> Sender Signature on `phil@winder.ai` is confirmed, and the +> account is past pending-approval, so cross-domain sends to +> `+alias@inbound.postmarkapp.com` work. Tokens live in +> `~/.helix/postmark` (mode 0600); source it before running +> anything that calls Postmark: +> +> ```bash +> set -a && source ~/.helix/postmark && set +a +> ``` +> +> Skip the Postmark setup section below; it's there for first-time +> installations. + +(With your own domain, you can graduate to `support@yourdomain.com` +later — see the production setup notes at the end of the +Postmark section.) + +## Postmark setup + +The first run-through takes some clicking through Postmark's UI, +but every step that *can* be a curl is one. Save your tokens once +and the setup is reproducible. + +### 1. Sign up and grab the Account API token + +[postmarkapp.com](https://postmarkapp.com/) → sign up → in the +top-right menu, **API Tokens** → **Account API tokens** → copy. +This token manages account-wide things (domains, servers). + +```bash +export POSTMARK_ACCOUNT_TOKEN= +``` + +### 2. Add and verify your sending domain + +```bash +curl -X POST "https://api.postmarkapp.com/domains" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Account-Token: $POSTMARK_ACCOUNT_TOKEN" \ + -d '{ + "Name": "yourdomain.com", + "ReturnPathDomain": "pm-bounces.yourdomain.com" + }' | tee /tmp/pm-domain.json +``` + +The response includes `ID`, `DKIMHost`, `DKIMTextValue`, +`ReturnPathDomainCNAMEValue`. Save the ID: + +```bash +export PM_DOMAIN_ID=$(jq -r '.ID' /tmp/pm-domain.json) +``` + +Add the DKIM, SPF (`v=spf1 a mx include:spf.mtasv.net ~all`), and +return-path CNAME to your DNS. Then verify: + +```bash +curl -X PUT "https://api.postmarkapp.com/domains/${PM_DOMAIN_ID}/verifyDkim" \ + -H "Accept: application/json" \ + -H "X-Postmark-Account-Token: $POSTMARK_ACCOUNT_TOKEN" + +curl -X PUT "https://api.postmarkapp.com/domains/${PM_DOMAIN_ID}/verifyReturnPath" \ + -H "Accept: application/json" \ + -H "X-Postmark-Account-Token: $POSTMARK_ACCOUNT_TOKEN" +``` + +Both should return `"DKIMVerified": true` / `"ReturnPathDomainVerified": true`. + +### 3. Create a Postmark Server (transactional) + +A Postmark "Server" is one project's worth of email — its own +sending stream, inbound stream, settings, and API token. We need +one for this demo. + +```bash +curl -X POST "https://api.postmarkapp.com/servers" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Account-Token: $POSTMARK_ACCOUNT_TOKEN" \ + -d '{ + "Name": "helix-org support", + "Color": "Green", + "TrackOpens": false, + "TrackLinks": "None" + }' | tee /tmp/pm-server.json +``` + +Grab the **Server token** from the response (`ApiTokens[0]`). This +is what helix-org uses to send and what authenticates configuration +calls on this Server: + +```bash +export POSTMARK_TOKEN=$(jq -r '.ApiTokens[0]' /tmp/pm-server.json) +``` + +### 4. Set the inbound forwarding domain + +Postmark's inbound works two ways — pick one. + +**(A) Quick: use the hosted inbound hash address.** Every Postmark +Server has a unique address like `abc123def456@inbound.postmarkapp.com`. +Customers email *that*. Fine for testing, ugly for production. + +```bash +curl -s "https://api.postmarkapp.com/server" \ + -H "Accept: application/json" \ + -H "X-Postmark-Server-Token: $POSTMARK_TOKEN" \ + | jq -r '.InboundAddress' +``` + +**(B) Production: MX your own domain to Postmark.** Tell Postmark +which subdomain to receive on: + +```bash +curl -X PUT "https://api.postmarkapp.com/server" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Server-Token: $POSTMARK_TOKEN" \ + -d '{ "InboundDomain": "inbound.yourdomain.com" }' +``` + +Then add an MX record on `inbound.yourdomain.com` → `inbound.postmarkapp.com` +(priority 10) at your DNS provider, and a forwarding rule (or +catch-all) at your registrar so `support@yourdomain.com` +delivers to `support@inbound.yourdomain.com`. + +### 5. Point Postmark at your helix-org instance + +Postmark POSTs every inbound email to a URL of your choice. We +want it to hit helix-org's email-transport endpoint, scoped to +`s-support`: + +```bash +# Replace https://abc123.ngrok.app with your public URL. +curl -X PUT "https://api.postmarkapp.com/server" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Server-Token: $POSTMARK_TOKEN" \ + -d '{ + "InboundHookUrl": "https://abc123.ngrok.app/email/postmark/s-support" + }' +``` + +The path shape — `/email/postmark/` — tells the +transport which Stream the inbound message belongs to. One Server +can fan to multiple Streams by giving each its own URL (or its own +Postmark Server, if you prefer hard isolation). + +### 6. Sanity-check sending + +```bash +curl -X POST "https://api.postmarkapp.com/email" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Server-Token: $POSTMARK_TOKEN" \ + -d '{ + "From": "support@yourdomain.com", + "To": "you@example.com", + "Subject": "Postmark wired", + "TextBody": "If you got this, Postmark + DNS are good." + }' +``` + +If that lands in your inbox, Postmark is fully configured. Save +`POSTMARK_TOKEN` somewhere durable; helix-org reads it from env. + +## Run the demo + +### 1. Bootstrap and configure (one-time) + +```bash +cd /home/phil/helix/helix-org +make build +rm -rf /tmp/email-envs /tmp/email.db +./bin/helix-org bootstrap --db /tmp/email.db --envs-dir /tmp/email-envs +``` + +Then set the Postmark transport config in the database (the CLI +opens the same SQLite file the server uses; live updates work +without a restart): + +```bash +source ~/.helix/postmark +./bin/helix-org config set --db /tmp/email.db transport.postmark "{ + \"token\":\"$POSTMARK_SERVER_TOKEN\", + \"inbound\":\"$POSTMARK_INBOUND\", + \"from\":\"$POSTMARK_FROM\" +}" +``` + +(For pending-approval Postmark accounts, append +`,\"disable_reply_to\":true` — outbound succeeds but customer +replies won't route back through helix until approval lands.) + +### 2. Substitute `` into role files + +The roles at `roles/customer-service.md` and `roles/engineer.md` +each contain `+@inbound.postmarkapp.com` +addresses so each Worker knows where to email the other. Fill in +your hash before creating the roles: + +```bash +HASH="${POSTMARK_INBOUND%%@*}" +mkdir -p /tmp/email-run/roles /tmp/email-run/workers +for f in demos/email/roles/*.md; do + sed "s//$HASH/g" "$f" > /tmp/email-run/roles/$(basename "$f") +done +cp demos/email/workers/*.md /tmp/email-run/workers/ +``` + +### 3. Start the server (terminal 1) + +```bash +./bin/helix-org serve --db /tmp/email.db --envs-dir /tmp/email-envs +``` + +The server logs `email transport enabled provider=postmark` once +the Postmark config is loaded. + +### 4. Expose helix-org publicly (terminal 2) + +```bash +cloudflared tunnel --url http://localhost:8080 +``` + +Or `ngrok http 8080` if you have ngrok set up. Note the public +URL it prints (e.g. +`https://accounts-bookmarks-permission-bloomberg.trycloudflare.com`). + +### 5. Point Postmark's inbound webhook at helix-org + +```bash +source ~/.helix/postmark +CF_URL= +curl -sS -X PUT "https://api.postmarkapp.com/server" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "X-Postmark-Server-Token: $POSTMARK_SERVER_TOKEN" \ + -d "{\"InboundHookUrl\": \"${CF_URL}/email/postmark\"}" +``` + +The path is `/email/postmark` (one URL for the whole installation). +The transport extracts the alias from `OriginalRecipient` — +mail to `+sam@inbound.postmarkapp.com` routes to the Stream +whose alias is `sam`, mail to `+engineer@…` routes to the +Stream whose alias is `engineer`. + +### 6. Hire Sam and Lee (terminal 3) + +```bash +cd /tmp/email-run +../../bin/helix-org chat --new +``` + +> Set up the support team from this directory. +> +> **Customer service.** Read `./roles/customer-service.md` and +> create role `r-customer-service` from its body. Create stream +> `s-support` with transport.kind `email` and config +> `{"alias":"sam"}`. Position `p-customer-service` under `p-root` +> with that role. Hire AI worker `w-sam` with identityContent +> from `./workers/sam.md`. Grant the tools listed in the role's +> `## Tools (MCP)` section. +> +> **Engineering.** Read `./roles/engineer.md` and create role +> `r-engineer`. Create stream `s-engineer` with transport.kind +> `email` and config `{"alias":"engineer"}`. Position `p-engineer` +> under `p-root` with that role. Hire AI worker `w-lee` with +> identityContent from `./workers/lee.md`. Grant the tools listed +> in the role's `## Tools (MCP)` section. +> +> Then `worker_log` on each Worker (`w-sam`, `w-lee`) with +> `wait=60` until you see `=== exit: ok ===` to confirm they're +> subscribed and ready. + +### 7. Send Sam an escalation-grade email + +From your normal mail client (Gmail, etc.), send an email to your +`+sam` alias address: + +> **To:** `+sam@inbound.postmarkapp.com` +> *(your hash is in `~/.helix/postmark`)* +> +> **Subject:** How does the email transport route inbound mail? +> +> Hi support — I'm trying to figure out how mail to my Postmark +> hash address actually finds the right helix-org Stream. Is the +> stream ID in the URL? In the address? Somewhere else? +> +> — Phil + +This is a question Sam can't answer himself (it's about +helix-org internals), so he'll forward it to Lee. + +### 8. Watch the four-hop cascade + +Back in chat: + +> Subscribe me to `s-support` and `s-engineer`. `read_events` with +> `wait=120` until I interrupt; print every event verbatim as it +> arrives. + +You'll see, in order: + +1. **Customer query** lands on `s-support` (`From: phil@…`, + `Subject: How does the email transport…`). +2. **Sam's escalation** appears on `s-support` + (`To: +engineer@…`, paraphrased question for Lee). Postmark + sends it; their inbound webhook re-delivers it as… +3. **Sam's escalation arrives on `s-engineer`** (`From: phil@…` + — Postmark renders our verified Sender Sig as From regardless + of which Worker published — but the Subject and Body are Sam's). +4. **Lee's reply** appears on `s-engineer` (`To: +sam@…`, + `Subject: [eng] Re: …`, technical answer signed `— Lee`). + Postmark routes it back to… +5. **Lee's reply arrives on `s-support`** (`Subject: [eng] Re: …`). + Sam reactivates, sees the `[eng]` prefix, walks `s-support` + history by `ThreadID` to find the customer's original query, + paraphrases Lee's answer. +6. **Sam's customer-facing reply** appears on `s-support` + (`To: phil@winder.ai`, plain `Re: …` subject, paraphrased, + signed `— Sam`). Postmark sends it. Your inbox lights up. + +End-to-end ≈ 60–120 seconds (four claude activations: Sam, +Lee, Sam-again; cold-start dominates). + +### 9. Stop + +Ctrl-C terminals 1 and 2. + +## What this shows + +- **Email is just another Transport.** Once the transport + translates Postmark JSON ↔ `domain.Message` at its boundary, + Sam's role looks identical in shape to a Slack support role or + an SMS support role — same envelope, same tools, different + identifiers in `From` / `To`. +- **Threading is the transport's job, not the Role's.** Sam sets + `InReplyTo` because he's polite; the email transport renders it + to RFC2822 headers because that's email's threading protocol. + A Slack version of this role would set `ThreadID` and the Slack + transport would map to `thread_ts`. +- **Credentials live in the DB, not in MCP.** The + `transport.postmark` config row is set via the `helix-org config` + CLI — never via chat. Operational config (provider creds, model + selection, public URL) is mutated by the operator on the host; + org-graph mutations (workers, roles, streams) go through MCP. + Same SQLite file, two access paths. See + [`design/config.md`](../../design/config.md). +- **Live config updates.** `helix-org config set + transport.postmark …` takes effect on the next outbound send — + no server restart, no signal. SQLite WAL mode handles the + concurrent-writer-while-server-runs case cleanly. +- **One inbound URL, alias-based routing.** All inbound mail flows + through one Postmark webhook URL (`/email/postmark`); the + transport reads the `+alias` from `OriginalRecipient` and routes + to the matching Stream. Adding `billing@` is a `create_stream` + with `{"alias":"billing"}` — no Postmark UI changes, no extra + webhook URLs. +- **No echo loops on bidirectional streams.** Inbound events have + `Source=""`; the dispatcher skips outbound emit for those, so a + Stream that's both inbound and outbound on the same provider + doesn't ping-pong. Worker-published events (`Source!=""`) emit + normally. +- **Workers as first-class email participants.** Sam emails Lee. + Lee emails Sam. The customer emails Sam. All three legs use the + same transport, the same envelope, and the same alias-based + routing — Workers aren't a special case. Hiring a third + participant (Robin in legal? alias `legal`?) is two new + Streams + two new role files, no transport changes. +- **`ThreadID` as the conversation spine.** Sam's escalation + carries the customer's `ThreadID`; Lee preserves it; Sam reads + it back to find the original customer. The whole four-hop + cascade is one logical thread despite four physical Postmark + send/receive pairs. + +## What this doesn't cover (yet) + +- **Multiple support aliases on different domains.** All aliases on + one Postmark Server share the same `From` (the verified Sender + Signature). For `billing@brand-a.com` vs `support@brand-b.com` + with different Sender Signatures, you'd want one Postmark Server + per brand and a per-stream provider override — out of scope today. +- **HTML mail.** The transport hands `Message.Body` the + `TextBody` from Postmark by default. HTML bodies and rich + attachments work via `Message.BodyContentType` and + `Message.Attachments`, but the role prompt below sticks to + plain text because that's the right default for support replies. +- **Auto-classifying spam / out-of-office / bounces.** Postmark + marks `Headers["X-Spam-Score"]` and friends; the email transport + forwards them in `Message.Extra` and the role can filter, but + this demo doesn't show it. +- **Multi-Worker hand-offs.** Sam escalates by writing "Let me + get a teammate" — there's no teammate. Adding a + `r-support-engineer` role that Sam DMs would close the loop; + out of scope for this README. diff --git a/helix-org/demos/email/demo.cast b/helix-org/demos/email/demo.cast new file mode 100644 index 0000000000..6161ce449b --- /dev/null +++ b/helix-org/demos/email/demo.cast @@ -0,0 +1,19 @@ +{"version": 2, "width": 120, "height": 36, "timestamp": 1777365111, "idle_time_limit": 2.0, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}} +[0.003418, "o", "\r\n\u001b[1m\u001b[36m# 1. Bootstrap fresh helix-org and set the Postmark transport config\u001b[0m\r\n(server-level Postmark creds in DB; CLI-only, never via MCP)\r\n"] +[0.046347, "o", "set transport.postmark = {\"from\":\"phil@winder.ai\",\"inbound\":\"6b3bd15f407ea200e7607799b4c9eae8@inbound.postmarkapp.com\",\"token\":\"...\"}\r\n"] +[2.049012, "o", "\r\n\u001b[1m\u001b[36m# 2. Substitute the inbound hash into the role files\u001b[0m\r\n"] +[2.049097, "o", "(roles use as a placeholder so workers know each other's addresses)"] +[2.049149, "o", "\r\n"] +[2.077795, "o", "customer-service.md: `6b3bd15f407ea200e7607799b4c9eae8+engineer@inbound.postmarkapp.com` when you need\r\ncustomer-service.md: - `to` — `[6b3bd15f407ea200e7607799b4c9eae8+engineer@inbound.postmarkapp.com]`.\r\nengineer.md: `6b3bd15f407ea200e7607799b4c9eae8+sam@inbound.postmarkapp.com`.\r\nengineer.md:- `to` — `[6b3bd15f407ea200e7607799b4c9eae8+sam@inbound.postmarkapp.com]`.\r\n"] +[4.08136, "o", "\r\n\u001b[1m\u001b[36m# 3. Start helix-org server and a Cloudflare quick tunnel\u001b[0m\r\n"] +[12.092509, "o", " helix-org listening on :8080\r\n public URL: \u001b[33mhttps://judge-particularly-pairs-replaced.trycloudflare.com\u001b[0m\r\n"] +[13.095937, "o", "\r\n\u001b[1m\u001b[36m# 4. Point Postmark's inbound webhook at our tunnel\u001b[0m\r\n"] +[13.480585, "o", "\u001b[1;39m{\r\n \u001b[0m\u001b[1;34m\"InboundHookUrl\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"https://judge-particularly-pairs-replaced.trycloudflare.com/email/postmark\"\u001b[0m\u001b[1;39m\r\n\u001b[1;39m}\u001b[0m\r\n"] +[15.485033, "o", "\r\n\u001b[1m\u001b[36m# 5. Hire Sam (customer service) and Lee (engineering) — one chat -p\u001b[0m\r\n"] +[165.773205, "o", "Both workers reached `=== exit: ok ===` after subscribing to their streams.\r\n\r\nRoles `r-customer-service`/`r-engineer`, streams `s-support`/`s-engineer` (email transports with aliases `sam`/`engineer`), positions `p-customer-service`/`p-engineer` under `p-root`, and workers `w-sam`/`w-lee` are all up — initial grants used `mcp__helix__*` prefixes which the worker MCP didn't recognize, so I regranted unprefixed `subscribe`/`publish`/`read_events` and both workers then subscribed cleanly and exited ok.\r\n"] +[166.775908, "o", "\r\n\u001b[1m\u001b[36m# 6. A customer emails our +sam alias\u001b[0m\r\n To: \u001b[33m6b3bd15f407ea200e7607799b4c9eae8+sam@inbound.postmarkapp.com\u001b[0m\r\n Subject: How does the email transport route inbound mail?\r\n"] +[167.193152, "o", " Postmark accepted: OK (id bcfc5c4b-3acf-43ed-8ed3-3af1aa18f8c8)\r\n"] +[168.196905, "o", "\r\n\u001b[1m\u001b[36m# 7. Watch the four-hop cascade in helix-org's log\u001b[0m\r\n(Sam can't answer this himself — it's a question about transport internals — so he forwards to Lee)\r\n"] +[264.355145, "o", "level=INFO msg=postmark.inbound stream=s-support alias=sam from=phil@winder.ai subject=\"How does the email transport route inbound mail?\"\r\n"] +[264.355427, "o", "level=INFO msg=postmark.emit stream=s-support to=+engineer@inbound.postmarkapp.com subject=\"How does the email transport route inbound mail?\" status=200\r\nlevel=INFO msg=postmark.inbound stream=s-engineer alias=engineer from=phil@winder.ai subject=\"How does the email transport route inbound mail?\"\r\nlevel=INFO msg=postmark.emit stream=s-engineer to=+sam@inbound.postmarkapp.com subject=\"[eng] Re: How does the email transport route inbound mail?\" status=200\r\nlevel=INFO msg=postmark.inbound stream=s-support alias=sam from=phil@winder.ai subject=\"[eng] Re: How does the email transport route inbound mail?\"\r\nlevel=INFO msg=postmark.emit stream=s-support to=phil@winder.ai subject=\"Re: How does the email transport route inbound mail?\" status=200\r\n"] +[268.359397, "o", "\r\n\u001b[1m\u001b[36m# 8. Done. Real email landed in phil@winder.ai's inbox.\u001b[0m\r\n\u001b[32m customer → Sam (Postmark inbound webhook)\u001b[0m\r\n\u001b[32m Sam → Lee (Postmark send + Postmark inbound webhook)\u001b[0m\r\n\u001b[32m Lee → Sam (Postmark send + Postmark inbound webhook)\u001b[0m\r\n\u001b[32m Sam → customer (Postmark send)\u001b[0m\r\n\r\nSame envelope on every hop; threading via Message-Id; 3x \u001b[32mstatus=200\u001b[0m.\r\n"] diff --git a/helix-org/demos/email/roles/customer-service.md b/helix-org/demos/email/roles/customer-service.md new file mode 100644 index 0000000000..b3e878dcb2 --- /dev/null +++ b/helix-org/demos/email/roles/customer-service.md @@ -0,0 +1,92 @@ +# Customer Service + +You handle inbound customer email. You answer simple questions +yourself; technical questions you escalate to engineering by +email. When engineering replies, you paraphrase their answer for +the customer. One stream — `s-support` — both directions. + +## Streams + +- `s-support` — your inbound and outbound email + (`transport: email`, alias `sam`). Subscribe on hire. + +## Other workers + +- `w-lee` (engineering) reads `s-engineer`. Email Lee at + `+engineer@inbound.postmarkapp.com` when you need + a technical answer. + +## Triggers + +**On hire.** `subscribe` to `s-support`. Exit. + +**On any new event on `s-support`.** Parse the Message envelope, +then branch on `Subject`: + +### A. Subject starts with `[eng]` — Lee replied + +Lee's reply carries the original `ThreadID`. Find the customer: +`read_events` on `s-support` with a generous limit, walk back +through entries whose `Message.ThreadID` matches Lee's +`Message.ThreadID` and whose `Subject` does *not* start with +`[eng]`. The first such entry is the customer's original query; +its `Message.From` is the customer's email and its +`Message.MessageID` is the one to thread your reply against. + +Paraphrase Lee's technical answer for the customer in 2–4 plain +sentences. Don't drop accuracy, but skip jargon they don't need. +`publish` to `s-support`: + +- `body` — your paraphrased answer, signed `— Sam`. +- `to` — `[]`. +- `subject` — `Re: ` (drop the `[eng]` prefix). +- `inReplyTo` — the customer's original `MessageID`. +- `threadId` — the same `ThreadID` (preserves the thread). + +### B. Subject does not start with `[eng]` — customer query + +Decide: can you answer directly? + +- **Yes** (account questions, simple how-to, anything + non-technical): draft the reply yourself. + - `body` — 2–4 sentences, no preamble, sign off `— Sam`. + - `to` — `[Message.From]`. + - `subject` — `Re: `. + - `inReplyTo` — `Message.MessageID`. + - `threadId` — `Message.ThreadID` if set, else `Message.MessageID`. + +- **No** (anything about helix-org's internals, build/deploy, + debugging steps, transport behaviour, configuration semantics): + forward to Lee. + - `body` — paraphrase the customer's question for an engineer's + audience. Include any relevant context the customer gave (logs, + config, what they tried). Don't include the customer's name or + email — Lee doesn't need them. + - `to` — `[+engineer@inbound.postmarkapp.com]`. + - `subject` — the customer's original subject, no prefix. Lee + will add `[eng]` on his reply. + - `inReplyTo` — `Message.MessageID` (the customer's). + - `threadId` — `Message.ThreadID` if set, else `Message.MessageID` + — **critical**: this is how you'll find the customer when Lee's + reply lands. + + Don't reply to the customer yet. The dispatcher will reactivate + you when Lee responds. + +Then exit. + +## Tools (MCP) + +- `subscribe` +- `publish` +- `read_events` + +## Style + +Lead with the answer. No "I'd be happy to help" / "I understand +your concern" / "Thanks for reaching out". Polite by being +direct. Don't apologise for things that aren't your fault. +Contractions are fine; emoji are not. + +Sign every customer-facing reply with `— Sam` on its own line. +**Do not** sign emails to Lee — they're internal. diff --git a/helix-org/demos/email/roles/engineer.md b/helix-org/demos/email/roles/engineer.md new file mode 100644 index 0000000000..2672075c9c --- /dev/null +++ b/helix-org/demos/email/roles/engineer.md @@ -0,0 +1,60 @@ +# Engineering Support + +You handle technical questions forwarded by Sam (customer +service) by email. You don't see the customer — only Sam's +relayed question. Reply with a precise, useful answer; if you +need more info, ask one specific clarifying question. + +## Streams + +- `s-engineer` — your inbound and outbound email + (`transport: email`, alias `engineer`). Subscribe on hire. + +## Other workers + +- `w-sam` (customer service) reads `s-support`. Reply to Sam at + `+sam@inbound.postmarkapp.com`. + +## Triggers + +**On hire.** `subscribe` to `s-engineer`. Exit. + +**On any new event on `s-engineer`.** Parse the Message envelope +— it's a question relayed from Sam. + +Draft a 3–6 sentence technical answer: + +- If the question is clear, answer it. Name specific tools, flags, + files, or commands wherever you can. +- If you need more info, ask one specific clarifying question and + stop. Don't speculate. +- If the question is outside engineering's scope (legal, billing, + product roadmap), say so and suggest who to ask. + +`publish` to `s-engineer`: + +- `body` — your answer, signed `— Lee`. +- `to` — `[+sam@inbound.postmarkapp.com]`. +- `subject` — `[eng] Re: `. The `[eng]` prefix + tells Sam this is your reply, not a new customer email. + Preserve any existing `Re:` from the subject — just add + `[eng] ` in front. +- `inReplyTo` — `Message.MessageID` (Sam's escalation). +- `threadId` — `Message.ThreadID` — **preserve unchanged**, so + Sam can match your reply back to the original customer + conversation. + +Then exit. + +## Tools (MCP) + +- `subscribe` +- `publish` +- `read_events` + +## Style + +Plain English. Code references in backticks. If you mention an +option or flag, give the exact name. If you don't know, say so — +"I'm not sure; Sam, please route to " is better than a +plausible-looking guess. Sign off `— Lee`. diff --git a/helix-org/demos/email/workers/lee.md b/helix-org/demos/email/workers/lee.md new file mode 100644 index 0000000000..d892059b88 --- /dev/null +++ b/helix-org/demos/email/workers/lee.md @@ -0,0 +1,9 @@ +# Lee + +You are Lee, senior support engineer at helix-org. You like +debugging and dislike vague questions. Plain English with the +specific commands and flags involved. When you don't know, you +say so — making something up to look helpful is worse than +admitting the gap. + +You sign off `— Lee` on its own line. diff --git a/helix-org/demos/email/workers/sam.md b/helix-org/demos/email/workers/sam.md new file mode 100644 index 0000000000..9794de769d --- /dev/null +++ b/helix-org/demos/email/workers/sam.md @@ -0,0 +1,13 @@ +# Sam + +You are Sam, customer service for helix-org. Real first name, not +a persona — talk like a person, not a brand voice. Friendly +without being performative. Calm in a complaint. You don't know +everything; you say so when you don't, and you say who can. + +You read what the customer actually asked before you reply. Half +of "support" is repeating their problem back to them clearly so +they know you've got it. + +You sign off `— Sam` on its own line. Not `Best,` not `Cheers,` not +`Warm regards,` — just your name. diff --git a/helix-org/demos/getting-started/README.md b/helix-org/demos/getting-started/README.md new file mode 100644 index 0000000000..cf4dc35921 --- /dev/null +++ b/helix-org/demos/getting-started/README.md @@ -0,0 +1,72 @@ +# Getting Started + +Smallest end-to-end run of helix-org. Bootstrap an Owner, hire an +echo Worker, publish a message, watch it reply, live-edit the Role. +About 90 seconds. + +You drive the org by typing into a `helix-org chat` session — that +exec's `claude` against the owner's MCP endpoint. Same flow a chat +UI on a real server would give you: connect, type, the conversation +persists. + +## Setup + +```bash +cd /home/phil/helix/helix-org +make build +rm -rf /tmp/helix-org-envs /tmp/helix-org-demo.db +``` + +## 1. Start the server (terminal 1) + +```bash +./bin/helix-org serve --db /tmp/helix-org-demo.db --envs-dir /tmp/helix-org-envs +``` + +## 2. Bootstrap and open a chat (terminal 2) + +```bash +./bin/helix-org bootstrap --db /tmp/helix-org-demo.db --envs-dir /tmp/helix-org-envs +./bin/helix-org chat +``` + +You're now in a chat session as `w-owner`. Everything below is +typed into this chat. + +## 3. Hire an echo worker + +> Set up a small echo worker. Make a stream `s-general`. Define a +> role `r-echo` whose job is, on hire, to subscribe to `s-general`, +> and on each new event there, publish `echo: `. Create a +> position for that role reporting to me, and hire an AI worker +> `w-echo` for it with grants to subscribe and publish. Then +> `worker_log` on `w-echo` with `wait=30` until you see +> `=== exit: ok ===` so I know the hire activation finished. + +You'll watch the hire activation in chat: `--- session start ---`, +the `subscribe` tool call, then `=== exit: ok ===`. The Worker +exits and will be respawned when an event arrives on `s-general`. + +## 4. Wake the worker + +> Subscribe me to `s-general`. Publish `hello` there. Then +> `read_events` on `s-general` repeatedly with `wait=15` until you +> see both my `hello` and `w-echo`'s `echo: hello` reply (it takes +> the worker ~5–10s to wake and respond). Show me both. + +## 5. Live-edit the role + +> Tweak the `r-echo` role: instead of replying `echo: `, it +> should shout `loud: ` on each event. + +`update_role` rewrites `role.md` in `w-echo`'s Environment in place. +Trigger another publish: + +> publish `hello` on s-general again, then `read_events` with +> `wait=15` until you see w-echo's reply. + +The new behaviour shows up live: `loud: HELLO`. + +## 6. Stop + +Ctrl-C terminal 1. diff --git a/helix-org/demos/github-engineer/README.md b/helix-org/demos/github-engineer/README.md new file mode 100644 index 0000000000..75a0b5d8aa --- /dev/null +++ b/helix-org/demos/github-engineer/README.md @@ -0,0 +1,319 @@ +# GitHub Engineer + +A one-Worker general-purpose software engineer who watches a +GitHub Project v2 Kanban board on a single repo, picks tasks +off the `Todo` column, and drives them through `In Progress` → +`In Review` → a merged PR → `Done`. The owner reviews and +merges the PR; **the engineer never merges**. + +This is the same shape as [`demos/github/`](../github/) — one +role, the github transport, no bespoke MCP tools — but the +engineer here writes code, docs, design, and architecture, not +just docs reviews. The whole engineer is one role file: +[`roles/software-engineer.md`](roles/software-engineer.md). + +About 5 minutes after the github webhook setup. Once the +engineer is hired, everything runs from the helix-org UI and +the GitHub Project board. + +## What this demo shows + +- **A Kanban board as the surface humans use; webhooks as the + trigger the agent reacts to.** The agent doesn't poll the + board. It reacts to standard GitHub webhook events + (`issues.opened`, `pull_request_review.submitted`, + `pull_request.closed`) and drives the Project v2 `Status` + column with `gh project item-edit`. You see a normal Kanban + board; the agent's column moves are indistinguishable from a + human dragging cards. +- **End-to-end task ownership in one role.** Pick up, branch, + implement, run checks, push, open PR, react to review, move + the card to Done after the human merges. All in + [`roles/software-engineer.md`](roles/software-engineer.md). + Edit the role to change behaviour — e.g. require a design + comment before any PR, or forbid touching `*.go` files — + and the next webhook activation reads the new behaviour. +- **The owner gates the merge, the agent gates everything + else.** The role explicitly forbids merging. The owner is + always the last gate before code lands on `main`. The + agent's job is to make the merge button as cheap to press as + possible. +- **No GitHub identity for the agent.** Same as the docs demo: + the agent uses your `gh auth token`. Comments, PRs, branches, + and project board moves are authored as you. Fine for solo + or small-team work; promote to a machine user for shared + repos (see + [`design/github-transport.md`](../../design/github-transport.md)). + +## Prerequisites + +- `gh` authenticated as you with push access to the target repo + (`gh auth status` is green; `gh repo list` shows what you'd + expect). The token also needs the `project` scope so the + engineer can read and move project board items — + `gh auth refresh -s project,read:project` if your token + predates these. +- `helix-org`, `claude`, and `gh` on PATH. +- Port `8080` free on the host, or pick another and pass + `--addr : --public-url http://localhost:` to + `helix-org serve` and tunnel that port instead. +- A public URL pointing at your local helix-org so GitHub can + reach the webhook. + [`cloudflared tunnel --url http://localhost:8080`](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/install-and-setup/tunnel-guide/local/) + or [`ngrok http 8080`](https://ngrok.com/) work. +- A GitHub repo you own (or have admin on) with a Project v2 + board attached. The board's `Status` field needs **all four** + of these options (case-sensitive): + - `Todo` + - `In Progress` + - `In Review` + - `Done` + + GitHub's default board ships with `Todo`, `In Progress`, + `Done`. Add `In Review` as a status option from the project + settings (Fields → Status → Add option). + + If the repo has no project yet, create and link one: + `gh project create --owner --title ""` then + `gh project link --owner --repo /`. + Note the project number it prints — you'll paste it into + step 6. + + Recommended: in the project's settings, enable + *Auto-archive items* and on the repo enable + *Automatically delete head branches* after merge — the + engineer leaves card archiving and branch cleanup to GitHub. + +## Setup + +### 1. Build helix-org + +```bash +cd /home/phil/helix/helix-org +make build +``` + +### 2. Start the server (terminal 1) + +```bash +cd demos/github-engineer +../../bin/helix-org serve --db /tmp/github-engineer.db --envs-dir /tmp/github-engineer-envs +``` + +If this is the first run against the DB, the server bootstraps +the owner Worker (`w-owner`). + +### 3. Expose helix-org publicly (terminal 2) + +```bash +cloudflared tunnel --url http://localhost:8080 +``` + +Note the public `https://....trycloudflare.com` URL it prints. +You'll paste it into the next step. + +### 4. Open a chat (terminal 3) + +```bash +cd demos/github-engineer +../../bin/helix-org chat --new +``` + +Now you're driving the org through `claude`, acting as +`w-owner`. Every step from here is a chat prompt. + +### 5. Wire up the github transport + +Substitute `/` and the tunnel URL from step 3. +Paste this into the chat: + +> Wire up the github transport for repo `/` on +> public URL ``. +> +> 1. Read the owner's gh token: `gh auth token`. (You're +> running on the same host as the owner; their `gh` is +> already authenticated.) +> 2. Generate a webhook secret: `openssl rand -hex 32`. +> 3. Run `helix-org config set --db /tmp/github-engineer.db +> transport.github` with both values as JSON. +> 4. Register the webhook on the repo: `gh api -X POST +> /repos///hooks` with `name=web`, `active=true`, +> events `["issues", "issue_comment", "pull_request", +> "pull_request_review", "pull_request_review_comment"]`, +> config `{url: "/github/webhook", +> content_type: "json", secret: ""}`. +> 5. Confirm with `gh api /repos///hooks` that the +> hook is active. Print the hook ID. + +Nothing about the github token ever appears in your shell +history; it lives only in `transport.github` (operational +config, redacted on read) and on disk in `gh`'s own config. + +### 6. Hire the software engineer + +Substitute `/`, your project owner (the user or +org that owns the project), and your project number. Paste: + +> Set up the software engineer for `/`. Read +> `./roles/software-engineer.md` and create role +> `r-software-engineer` from its body. Create stream +> `s-github` with `transport: github` and config +> `{"repo": "/", "events": ["issues", +> "issue_comment", "pull_request", "pull_request_review", +> "pull_request_review_comment"]}`. Create position +> `p-software-engineer` under `p-root` with that role. Hire +> AI worker `w-software-engineer`; identity is: +> +> ``` +> You are the software engineer for /. +> +> Project board: +> - owner: +> - number: +> +> Your role text describes how to discover the project IDs and +> move cards. Cache them in ./project-config.json on first +> activation. +> ``` +> +> Grant the tools listed in the role's `Tools (MCP)` section. +> Then `worker_log` on `w-software-engineer` with `wait=60` +> until you see `=== exit: ok ===`. The on-hire activation +> runs project-board discovery; you should see some +> `gh project ...` commands in the log. + +When the chat agent reports the on-hire activation finished, +the engineer is live and listening for github events. + +--- + +**From here on the demo is UI-driven.** Open the helix-org UI +at and your GitHub Project board in +two side-by-side windows. + +## Driving the demo + +### 7. Add a task to the board + +In the GitHub UI: open your Project → `Todo` column → `+ Add +item` → `Create new issue`. Title it like a normal task — for +example, "Add a /version endpoint that returns the current +commit SHA". Write a body that describes what you want. + +GitHub fires `issues.opened`. The webhook hits helix-org. The +engineer wakes up. + +### 8. Watch the engineer in the UI + +In the helix-org UI, click `w-software-engineer`. You should +see, roughly in order: + +1. The card moves from `Todo` → `In Progress` on your project + board. +2. A comment lands on the issue: "Picked up — branching off + `main`." +3. A branch shows up on the repo: `issue-N-`. +4. A PR opens, body says `Closes #N`. +5. The card moves to `In Review`. + +If the task was genuinely ambiguous, the engineer will instead +post one clarifying question on the issue and stop. Answer in +the issue thread; the resulting `issue_comment.created` event +reactivates them. + +### 9. Review and merge + +Open the PR in the GitHub UI. Read the diff. + +- If you want changes, leave a `request_changes` review with + line comments. The engineer will push fixes and reply with a + one-line summary of what changed. +- If you're happy, approve and merge. + +The engineer **does not merge**. That's your gate. + +### 10. After the merge + +GitHub fires `pull_request.closed` with `merged: true`. The +engineer wakes, finds the issue the PR closed, and moves the +card to `Done`. They comment on the issue: "Merged — closing +the loop." They delete the branch. + +If you closed the PR without merging, the engineer moves the +card back to `Todo` instead and comments "PR closed without +merge — back on the board." + +### 11. Live-edit the role + +Edit `roles/software-engineer.md` to change behaviour — say, +require a one-paragraph design note in every PR body, or block +on touching specific paths. Then in chat: + +> Update `r-software-engineer`: replace its content with the +> current contents of `./roles/software-engineer.md`. + +The next webhook activation reads the new role text and behaves +accordingly. No restarts. + +## Tear it down + +In chat, mirroring step 5: + +> Tear down the github-engineer demo for `/`. +> +> 1. List webhooks on the repo: `gh api /repos///hooks`. +> 2. Find the one whose `config.url` starts with +> `/github/webhook` and delete it: +> `gh api -X DELETE /repos///hooks/`. +> 3. Run `helix-org config delete --db /tmp/github-engineer.db +> transport.github`. +> 4. Confirm both. + +The token isn't deleted because the agent never created one — +it's your existing `gh auth token`, which you go on using. + +Then Ctrl-C terminals 1 and 2. + +## What this shows + +- **Same transport, different role.** The github transport is + unchanged from the docs demo. All the difference is in the + role text. A code reviewer, a release-notes writer, a + security-triage bot — same pattern, different role. +- **Project boards as the human-visible workflow.** The agent + drives `Status` columns directly via `gh project item-edit`. + You watch a normal GitHub Kanban board; the agent's moves + look like any other human dragging cards. +- **PR review is the gate.** The agent does not push to `main`, + does not merge, does not close issues directly. Every change + ships through a PR you review. The role's "On + `pull_request.closed && merged`" branch is the closure of + the loop, not the start of one. + +## What this doesn't cover (yet) + +- **`projects_v2_item` events.** A draft card (one without a + linked issue) added directly to the board doesn't fire + `issues.opened` — only `projects_v2_item.created`. That + event is organization-scoped, not repo-scoped, so the demo's + repo webhook doesn't see it. The engineer wakes only when an + *issue* is created. Promoting the transport to handle + organization webhooks is a config change; the role can be + extended to react to `projects_v2_item` without a code + change. +- **Concurrency on one task.** If two `issues.opened` events + arrive for the same issue (e.g. via a project automation + rule), the engineer might branch twice. Production wants + per-issue locking; the demo doesn't. +- **Long-running tasks.** Each activation is a one-shot claude + invocation. A multi-day implementation that needs ten + sequential thinking sessions has to be expressible as N + short reactivations triggered by review feedback or comments. + For most issue-sized work that's fine. +- **Branch hygiene on dead PRs.** The engineer deletes branches + after merge, but doesn't garbage-collect orphan branches from + PRs that closed without merge. Those need a sweep, not + implemented here. +- **Multiple repos.** One Stream per repo, one engineer per + repo. Fan-in to one Stream with the repo in `Extra` is a + role + stream config edit, not a code change. diff --git a/helix-org/demos/github-engineer/roles/software-engineer.md b/helix-org/demos/github-engineer/roles/software-engineer.md new file mode 100644 index 0000000000..223af58f38 --- /dev/null +++ b/helix-org/demos/github-engineer/roles/software-engineer.md @@ -0,0 +1,233 @@ +# Role: Software Engineer + +You are a general-purpose software engineer for one GitHub repo. +You watch a Kanban-style Project v2 board on that repo and pick +up tasks as they land in the `Todo` column. You drive each task +through the Kanban phases — `In Progress`, `In Review` — until +it lands as a merged PR. You handle code, documentation, design, +and architecture work — anything that ships through a pull +request. + +The human owner reviews and merges the PR; **you do not merge**. +Once they merge, you move the card to `Done`. + +## Tools (MCP) + +`subscribe`, `read_events`, `dm`. + +The Environment has `bash`, `gh`, `git`, `curl`, scoped to the +repo configured on `s-github`. **All GitHub actions** — picking +up tasks, branching, committing, pushing, opening PRs, moving +project cards, commenting — go through `gh` and `git`. The MCP +surface stays small; the shell does the work. + +## Streams + +- `s-github` — inbound github events from your repo + (`transport: github`). One Event per webhook delivery. + `Message.Subject` is the upstream title (issue title, PR + title, …). `Message.Body` is the upstream user text. + `Message.From` is the github user who triggered the event. + `Message.Extra` is the webhook body verbatim with `event` at + top-level (matching `X-GitHub-Event`). So `Extra.action`, + `Extra.repository.full_name`, `Extra.issue.number`, + `Extra.pull_request.merged` etc. are all at exactly the JSON + paths GitHub documents. Subscribe on hire. + +## Project board + +The repo has a Kanban-style Project v2 board with these `Status` +columns: + +- `Todo` — new tasks land here +- `In Progress` — you set this when you pick a task up +- `In Review` — you set this when you open the PR +- `Done` — you set this after the human merges the PR + +Your `identity.md` carries the project owner and project number. +Read them on every activation; everything else is discovered. + +### Discovery cache + +On first activation, run discovery and cache IDs in +`./project-config.json` (your env dir) so future activations are +fast: + +```json +{ + "owner": "", + "project_number": , + "project_id": "PVT_...", + "status_field_id": "PVTSSF_...", + "status_options": { + "Todo": "", + "In Progress": "", + "In Review": "", + "Done": "" + } +} +``` + +Discovery commands: + +- `gh project list --owner --format json` — find the + project; capture `id`. +- `gh project field-list --owner --format json` + — find the `Status` single-select field; capture its `id` and + the `id` of each option. + +If `In Progress`, `In Review`, or `Done` is missing as a status +option, **stop**: comment on the most recent issue (or DM the +owner) saying which status options are missing. Do not invent +columns; the owner sets the board up. + +### Moving a card + +``` +gh project item-edit \ + --id \ + --field-id \ + --project-id \ + --single-select-option-id +``` + +Get the item id for an issue with: + +``` +gh project item-list --owner --format json \ + --limit 200 \ + --jq '.items[] | select(.content.number == ) | .id' +``` + +If the issue isn't on the board yet, add it first: + +``` +gh project item-add --owner --url +``` + +## Triggers + +**On hire.** `subscribe` to `s-github`. Run discovery (above) and +write `./project-config.json`. Exit. + +**On any new event on `s-github`.** Branch on +`Message.Extra.event` and `Message.Extra.action`: + +### A. New task — `event=issues, action=opened` + +Read `Message.Subject` (title), `Message.Body` (description), +and `Message.Extra.issue.number` (issue number). + +1. Look up the project item id for this issue. If the issue is + not on the board, add it. +2. Confirm it's in the `Todo` column. If it's in any other + column, ignore — the owner is doing something else with it. +3. Move it to `In Progress`. +4. Comment on the issue: "Picked up — branching off `main`." +5. Plan the work. Read enough of the repo to ground yourself + (`gh repo view`, look at `README.md`, `Makefile`, the + relevant directories). If the task is genuinely ambiguous, + ask **one** specific clarifying question on the issue and + stop. Don't guess at scope. +6. Implement. Branch off `main`: + `git checkout -b issue--`. Make the change. + Run whatever the repo provides for verification — tests, + lint, build (`make check`, `npm test`, `pytest`, whatever's + there). If checks fail, fix them before pushing. +7. Commit with Conventional Commits, referencing the issue: + `feat: ... (closes #)`. Push the branch. +8. Open a PR: + ``` + gh pr create --base main --head \ + --title "" \ + --body "Closes #\n\n" + ``` +9. Move the card to `In Review`. +10. Comment on the issue with the PR link. Exit. + +### B. Review feedback — `event=pull_request_review, action=submitted` + +Look at `Message.Extra.review.state`: + +- `changes_requested` — read the review body and each line + comment. Address every comment in code, push to the same + branch, and reply to the review with a one-line summary of + what you changed. Don't reopen scope — only fix what was + asked. +- `approved` — do nothing. The owner merges when ready. +- `commented` — treat each comment as a question. Reply on the + relevant line for code questions; reply on the PR for + approach questions. + +### C. PR merged — `event=pull_request, action=closed` with `Extra.pull_request.merged == true` + +Find the issue this PR closed: + +``` +gh pr view --json closingIssuesReferences \ + --jq '.closingIssuesReferences[].number' +``` + +Move that issue's card to `Done`. Comment on the issue: +"Merged — closing the loop." Delete the local branch +(`git branch -D ...`) and push the deletion +(`git push origin --delete ...`). Exit. + +### D. PR closed without merge — `event=pull_request, action=closed` with `Extra.pull_request.merged == false` + +The owner rejected the change. Move the linked issue's card +back to `Todo`. Comment on the issue: "PR closed without merge — +back on the board." Exit. + +### E. Comment on an in-flight issue — `event=issue_comment, action=created` or `event=pull_request_review_comment, action=created` + +`Message.Body` is the comment text. If it's a scope change or a +question, acknowledge it on the issue/PR and either update the +in-flight branch or post a clarifying question. If it's chatter +or you're not the author of the surrounding work, ignore. + +### F. Other event/action combinations + +Ignore. The board moves are visible in the project UI; new +events will reactivate you when there's something to do. + +## Constraints + +- **You do not merge PRs.** Only the owner merges. If the owner + asks you to merge, say "I don't merge — please merge yourself + when you're satisfied." +- **You do not push to `main`.** Always branch. +- **One PR per task.** Don't bundle unrelated changes into a + single PR. Don't open a second PR for the same task. +- **Stay in lane on the board.** If a task is in any column + other than `Todo` when you first see it, leave it alone — the + owner is handling it some other way. +- **If you can't finish a task, say so.** Move the card back to + `Todo` and comment with what's blocking you (missing context, + external dependency, ambiguous scope, repo doesn't have the + tooling you need). +- One comment per event. The next event will reactivate you; + don't pile follow-ups on a single issue. +- Do not modify your own Role. +- **If something at the setup level looks wrong, DM the owner + and stop.** Setup-level means the environment isn't what the + role assumes — `gh` isn't authenticated or is missing + scopes, the project board doesn't exist or has the wrong + status options, the repo configured on `s-github` isn't + reachable, a required shell tool isn't on PATH, project + discovery returns nothing. Don't muddle through, don't guess, + don't open a "setup needs attention" issue (the repo may not + be the right channel). DM `w-owner` with one short message + saying what's wrong and what you need, then exit. The owner + fixes it; the next event reactivates you. + +## Style + +Lead with the action. "Picked up — branching off main." "Pushed +fix for the lint failure on line 42." "PR opened: ." +"Merged — closing the loop." No "Thanks for the issue!", no +"Just to clarify". + +Sign off `— eng` on substantive PR review responses and +substantive issue replies. Skip the sign-off on one-line +acknowledgements. diff --git a/helix-org/demos/github/README.md b/helix-org/demos/github/README.md new file mode 100644 index 0000000000..2953234ea5 --- /dev/null +++ b/helix-org/demos/github/README.md @@ -0,0 +1,263 @@ +# GitHub + +A one-Worker docs engineer who lives in your GitHub repo. They +watch issues and pull requests as they land, label docs issues +on sight, and review PRs that touch documentation or get the +`docs` label. The whole engineer is one role file. + +About 5 minutes (most of it GitHub webhook setup the first time). + +## What this demo shows + +- **GitHub events as a Stream.** Issues, PRs, comments, labels + applied — all canonical `Message`s on `s-github`. The Worker + doesn't know about webhooks; it sees Events. GitHub itself + calls this an "activity feed"; the transport just makes that + feed addressable as a Stream. +- **Inbound transport, outbound shell.** `s-github` is + inbound-only. Webhook deliveries become Events; nothing + published *to* `s-github` goes anywhere. Acting on Events — + labelling, reviewing, commenting, opening PRs — is `gh` in the + Worker's Environment. GitHub isn't a messaging protocol with + one outbound shape; it's a structured-action surface, and the + shell already speaks it. +- **Routing by label, not by identity.** This engineer has no + GitHub user account of its own. The Worker watches the firehose + and picks out work via the `docs` label and changed-file paths + — both visible in the standard GitHub UI, both easy to apply by + hand or with `gh`. (Promoting the bot to a real GitHub user so + you can `--assignee` and `--add-reviewer` it natively is a + one-time setup; see "What this doesn't cover".) +- **One curated file.** [`roles/doc-engineer.md`](roles/doc-engineer.md) + is the only thing on disk. The streams, the worker identity, + the grants, the webhook wiring — all generated at hire time + from one chat prompt. +- **Role drives behaviour.** What counts as a docs issue, when + to approve vs request changes, how to phrase a review — all in + the role text. Edit the role, run `update_role`, the next + webhook lands on the new behaviour. + +## Prerequisites + +- `gh` already authenticated as **you** with access to the + repos you want to wire up (`gh auth status` is green; `gh + repo list` shows what you'd expect). The chat session below + reuses this auth — your existing token is what the Worker + uses to act on the repo. Labels, comments, and reviews will + be authored as your user. Fine for solo work; for a shared + repo you'll want a separate machine user (see + [`design/github-transport.md`](../../design/github-transport.md)). +- A public URL for your local helix-org so GitHub can reach the + webhook. `cloudflared tunnel --url http://localhost:8080` or + `ngrok http 8080`. +- `helix-org`, `claude`, and `gh` on PATH. + +## Setup + +```bash +cd /home/phil/helix/helix-org +make build +rm -rf /tmp/github-envs /tmp/github.db +``` + +## 1. Start the server (terminal 1) + +```bash +cd demos/github +../../bin/helix-org serve --db /tmp/github.db --envs-dir /tmp/github-envs +``` + +## 2. Expose helix-org publicly (terminal 2) + +```bash +cloudflared tunnel --url http://localhost:8080 +``` + +Note the public URL it prints. + +## 3. Bootstrap and open a chat (terminal 3) + +```bash +cd demos/github +../../bin/helix-org bootstrap --db /tmp/github.db --envs-dir /tmp/github-envs +../../bin/helix-org chat --new +``` + +## 4. Pick a repo and wire up the transport + +In chat — substitute `/` and the tunnel URL from +step 2: + +> Wire up the github transport for repo `/` on +> public URL ``. +> +> 1. Read the owner's gh token: `gh auth token`. (You're running +> on the same host as the owner; their gh is already +> authenticated.) +> 2. Generate a webhook secret: `openssl rand -hex 32`. +> 3. Run `helix-org config set transport.github` with both +> values as JSON. +> 4. Register the webhook on the repo: `gh api -X POST +> /repos///hooks` with name=web, active=true, +> events `["issues", "issue_comment", "pull_request", +> "pull_request_review", "pull_request_review_comment"]`, +> config `{url: "/github/webhook", content_type: +> "json", secret: ""}`. +> 5. Confirm with `gh api /repos///hooks` that the +> hook is active. Print the hook ID. + +The chat agent does it all with the owner's existing `gh`. +Nothing about the github token ever appears in your shell +history; it lives only in `transport.github` (operational +config, never sent to the LLM after this) and on disk in `gh`'s +own config. + +If you have many repos and want to pick interactively, ask +`gh repo list --limit 50` first and decide before you start +this prompt. + +## 5. Hire the docs engineer + +> Set up the documentation engineer for `/`. Read +> `./roles/doc-engineer.md` and create role `r-doc-engineer` +> from its body. Create stream `s-github` with `transport: +> github` and config `{"repo": "/", "events": +> ["issues", "issue_comment", "pull_request", +> "pull_request_review", "pull_request_review_comment"]}`. +> Create stream `s-tick-daily` (`transport: local`). Create +> position `p-doc-engineer` under `p-root` with that role. Hire +> AI worker `w-doc-engineer`; identity is "You are the docs +> engineer for `/`." Grant the tools listed in the +> role's `Tools (MCP)` section. Then `worker_log` on +> `w-doc-engineer` with `wait=60` until you see +> `=== exit: ok ===`. + +## 6. Open an issue and watch the engineer triage it + +```bash +gh issue create \ + --repo / \ + --title "README: setup steps mention an env var that no longer exists" \ + --body "Step 3 references HELIX_FOO; the code reads HELIX_BAR now." +``` + +In chat: + +> Subscribe me to `s-github` and `read_events` with `wait=60` until +> the docs engineer reacts. Show me the events as they land. + +GitHub fires `issues.opened` → the transport posts to `s-github` → +the engineer wakes, reads the issue, decides it's a docs issue, +and runs `gh issue edit --add-label docs`. Refresh the issue +in the GitHub UI: the `docs` label is on it. + +## 7. Pull the engineer into a PR + +Open a PR (any PR) and label it `docs`: + +```bash +gh pr edit --add-label docs +``` + +GitHub fires `pull_request.labeled` with `label.name == "docs"` → +the engineer wakes, runs `gh pr view --json files`, runs +`gh pr diff `, and posts a review with `gh pr review`. If the +prose is clear and the commands run, they approve; otherwise they +request changes with line-specific asks. + +PRs that touch `*.md`, `README*`, or `docs/**` get a review +automatically — no label needed. The label is for "code-only PRs +where I still want docs eyes on the change". + +## 8. Watch the daily sweep + +Wait until 9am, or trigger the tick manually in chat: + +> Publish to `s-tick-daily`: `"sweep"`. + +The engineer pulls every open issue without a `docs` label +(`gh issue list --search "-label:docs"`), classifies each, and +labels what fits. Useful for backfilling on a repo that's been +running without them. + +## 9. Live-edit the role + +Edit `roles/doc-engineer.md` — soften the threshold for the +`docs` label, or tighten it, or add a new event type to handle. +Then in chat: + +> Update the `r-doc-engineer` role: replace its content with +> the current contents of `./roles/doc-engineer.md`. + +The next webhook activation reads the new content; behaviour +shifts immediately. + +## 10. Tear it down + +Cleanup is a chat prompt that mirrors step 4: + +> Tear down the github transport for `/`. +> +> 1. List webhooks on the repo +> (`gh api /repos///hooks`). +> 2. Find the one whose `config.url` starts with +> `/github/webhook`. +> 3. Delete it: `gh api -X DELETE +> /repos///hooks/`. +> 4. Run `helix-org config delete transport.github`. + +The token isn't deleted because we didn't create one — it's +your existing `gh auth token`, which you go on using. + +Then Ctrl-C terminals 1 and 2. + +## What this shows + +- **Inbound is the transport's job; outbound is the shell's.** + GitHub events become Events on a Stream. Acting on those Events + — labelling, reviewing, commenting — is `gh` in the Worker's + Environment. There's no MCP tool per github action and there + doesn't need to be: the role describes the `gh` invocation, and + if the workflow changes, only the role changes. +- **One Stream, many event types.** `s-github` carries every + webhook delivery the stream config opts into. The role branches + on `Message.Extra.event` + `Message.Extra.action` (e.g. + `pull_request` / `labeled`); `Subject` and `Body` are the + upstream title and user text used verbatim. Adding a new event + type is a role edit plus a stream config update, not a code + change. +- **Labels are the routing mechanism.** Without a github identity + for the engineer, we can't lean on `--assignee` or + `--add-reviewer`. A label is the next-best primitive: visible + in the standard UI, applicable from the CLI in one flag, and + fires its own webhook event. The role's "is this for me?" check + reduces to "does this PR/issue have the `docs` label, or touch + a docs path?". +- **Review-of-record for docs.** A PR that touches `README.md` or + `docs/**` always gets a real review from someone who reads docs + for a living. Code-only PRs don't, unless you opt in with the + label. + +## What this doesn't cover (yet) + +- **Native GitHub identity.** Promoting the engineer to a real + GitHub user (a "machine user") makes `--assignee`, + `--add-reviewer`, and `@docs-bot` autocomplete work the way you + expect. Five minutes of one-time signup. The transport doesn't + change; the role gains an `actor` matcher. See the design doc + for the full setup. +- **Rate-limiting and backoff.** A burst of webhook deliveries + could fan out to a burst of activations. Production wants a + per-Worker rate limit and a `gh` retry policy on 403s; this + demo doesn't show either. +- **Multiple repos.** One Stream per repo, one Worker per Stream + is fine for now. A docs engineer who covers an org would need + either fan-in to one Stream with the repo in `Extra`, or a + Worker per repo — both are role edits, not code changes. +- **Drafting docs PRs.** This engineer reviews; they don't yet + open PRs of their own to fix typos or stale commands. The role + has `gh` and `git`, so adding a "fix-it" trigger is a role + edit; left out today to keep the demo tight. +- **Anything outside docs.** A code reviewer, a triage bot, a + release-notes writer — all the same pattern, different role + text. diff --git a/helix-org/demos/github/demo.cast b/helix-org/demos/github/demo.cast new file mode 100644 index 0000000000..42d4ca079e --- /dev/null +++ b/helix-org/demos/github/demo.cast @@ -0,0 +1,37 @@ +{"version": 2, "width": 120, "height": 36, "timestamp": 1777382809, "idle_time_limit": 2.0, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}} +[0.004082, "o", "\u001b[H\u001b[2J\u001b[3J"] +[0.004729, "o", "══════════════════════════════════════════════════════════════\r\n helix-org · github transport demo\r\n demos/github/README.md · one role file = one docs engineer\r\n══════════════════════════════════════════════════════════════\r\n"] +[0.004791, "o", "\r\n\u001b[1;36m# One Worker watches a real GitHub repo via webhooks. Labels docs\u001b[0m\r\n"] +[0.004886, "o", "\r\n\u001b[1;36m# issues, reviews PRs that touch *.md or get the 'docs' label,\u001b[0m\r\n\r\n\u001b[1;36m# runs a daily sweep. The whole engineer is one role file.\u001b[0m\r\n"] +[1.006912, "o", "\r\n\u001b[1;36m# 1. Start helix-org server (background)\u001b[0m\r\n\u001b[1;33m$ ./bin/helix-org serve --db /tmp/github-demo.db --envs-dir /tmp/github-demo-envs --addr :8181 &\u001b[0m\r\n"] +[1.522726, "o", "\u001b[32m ✓ github transport enabled, listening on :8181\u001b[0m\r\n\r\n\u001b[1;36m# 2. Expose helix-org publicly (cloudflared quick tunnel)\u001b[0m\r\n\u001b[1;33m$ cloudflared tunnel --url http://localhost:8181 &\u001b[0m\r\n"] +[5.565007, "o", "\u001b[32m ✓ tunnel: https://shaved-contained-important-floor.trycloudflare.com\u001b[0m\r\n\r\n\u001b[1;36m# 3. Bootstrap the owner Worker\u001b[0m\r\n\u001b[1;33m$ ./bin/helix-org bootstrap --db /tmp/github-demo.db --envs-dir /tmp/github-demo-envs\u001b[0m\r\n"] +[5.625736, "o", "{\r\n \"environmentPath\": \"/tmp/github-demo-envs/w-owner\",\r\n \"positionId\": \"p-root\",\r\n \"roleId\": \"r-owner\",\r\n \"workerId\": \"w-owner\"\r\n}\r\n"] +[5.62628, "o", "\r\n\u001b[1;36m# 4. Pick a repo, wire up the transport — one chat prompt does it all\u001b[0m\r\n"] +[5.626466, "o", "\u001b[90m (reads gh auth token, generates webhook secret, sets transport.github,\u001b[0m\r\n\u001b[90m registers webhook on philwinder/helix-org-github-demo-record via gh api)\u001b[0m\r\n"] +[5.62661, "o", "\u001b[1;33m$ helix-org chat -p 'Wire up github transport for philwinder/helix-org-github-demo-record ...'\u001b[0m\r\n"] +[41.065769, "o", "\u001b[32m ✓ transport.github set; webhook id 613143048 active on philwinder/helix-org-github-demo-record\u001b[0m\r\n\u001b[90m GitHub's ping has already arrived:\u001b[0m\r\n"] +[41.07037, "o", " time=2026-04-28T15:27:26.025+02:00 level=INFO msg=\"github.inbound: no matching streams\" repo=philwinder/helix-org-github-demo-record event=ping delivery=fe4451f8-4305-11f1-891c-42adc8117845\r\n"] +[41.070925, "o", "\r\n\u001b[1;36m# 5. Hire the docs engineer from ./roles/doc-engineer.md\u001b[0m\r\n"] +[41.071242, "o", "\u001b[1;33m$ helix-org chat -p 'create role, streams, position, hire w-doc-engineer'\u001b[0m\r\n"] +[141.477882, "o", "\u001b[32m ✓ w-doc-engineer hired; subscribed to s-github + s-tick-daily; exit ok\u001b[0m\r\n\r\n\u001b[1;36m# 6. Open a docs-flavoured issue on the real repo\u001b[0m\r\n\u001b[1;33m$ gh issue create --repo philwinder/helix-org-github-demo-record --title '... env var that no longer exists'\u001b[0m\r\n"] +[142.633626, "o", "\u001b[32m ✓ https://github.com/philwinder/helix-org-github-demo-record/issues/3\u001b[0m\r\n\r\n\u001b[1;36m# 7. GitHub fires the webhook → helix-org dispatches → engineer wakes\u001b[0m\r\n\u001b[90m (waiting for the engineer to label + comment...)\u001b[0m\r\n"] +[191.388224, "o", "\u001b[1;33m$ gh api /repos/philwinder/helix-org-github-demo-record/issues/3 --jq '{labels:[.labels[].name],comments:.comments}'\u001b[0m\r\n"] +[191.741512, "o", "\u001b[1;38m{\u001b[m\r\n \u001b[1;34m\"comments\"\u001b[m\u001b[1;38m:\u001b[m 0\u001b[1;38m,\u001b[m\r\n \u001b[1;34m\"labels\"\u001b[m\u001b[1;38m:\u001b[m \u001b[1;38m[\u001b[m\r\n \u001b[32m\"docs\"\u001b[m\r\n \u001b[1;38m]\u001b[m\r\n\u001b[1;38m}\u001b[m\r\n"] +[191.745401, "o", "\u001b[90m Engineer's comment:\u001b[0m\r\n"] +[192.090417, "o", "\r\n\u001b[1;36m# 8. Open a PR that touches README; engineer auto-reviews docs paths\u001b[0m\r\n"] +[192.090726, "o", "\u001b[1;33m$ git checkout -b docs-update; edit README.md; gh pr create\u001b[0m\r\n"] +[193.660237, "o", "branch 'docs-update' set up to track 'origin/docs-update'.\r\n"] +[197.121188, "o", "\u001b[32m ✓ https://github.com/philwinder/helix-org-github-demo-record/pull/4\u001b[0m\r\n"] +[197.12122, "o", "\u001b[90m (waiting for the engineer to review...)\u001b[0m\r\n"] +[371.790398, "o", "\u001b[1;33m$ gh api /repos/philwinder/helix-org-github-demo-record/pulls/4/reviews\u001b[0m\r\n"] +[372.541605, "o", " {\"body\":\"Docs review (can't formally request changes — same account as PR author).\\n\\nTwo concrete issues in the new Setup section:\\n\\n- Step 2 (`export HELIX_FOO=1`): the code reads `HELIX_BAR` (see #3). Change `HELIX_FOO` → `HELIX_BAR`.\\n- Step 1 (`brew install helix`): no such formula exists. Drop the brew line or replace it with the real install command.\\n\\nStep 3 (`helix start`) is fine. Treat this as request-changes; please don't merge until both are resolved.\\n\\n— docs\"}\r\n"] +[372.545495, "o", "\r\n\u001b[1;36m# 9. Trigger the daily sweep manually (publish to s-tick-daily)\u001b[0m\r\n\u001b[1;33m$ helix-org chat -p 'Publish to s-tick-daily: \"sweep\"'\u001b[0m\r\n"] +[402.026269, "o", "\u001b[32m ✓ sweep complete\u001b[0m\r\n\r\n\u001b[1;36m# 10. Tear down — chat-driven cleanup of webhook + config\u001b[0m\r\n\u001b[1;33m$ helix-org chat -p 'Tear down github transport for philwinder/helix-org-github-demo-record'\u001b[0m\r\n"] +[417.585979, "o", "\u001b[1;33m$ gh api /repos/philwinder/helix-org-github-demo-record/hooks --jq 'length'\u001b[0m\r\n"] +[417.900611, "o", "0\r\n"] +[417.90453, "o", "\u001b[1;33m$ ./bin/helix-org config get --db /tmp/github-demo.db transport.github\u001b[0m\r\n"] +[417.926459, "o", "transport.github: (not set; no default)\r\n"] +[417.928448, "o", "\r\n\u001b[1;36m# 11. Stop server + tunnel\u001b[0m\r\n"] +[417.94114, "o", "\u001b[32m ✓ done.\u001b[0m\r\n"] +[417.943582, "o", "\r\n──────────────────────────────────────────────────────────────\r\n Summary\r\n • One role file → one Worker → real GitHub webhook flow\r\n • issues.opened → engineer labeled + commented\r\n • pull_request.* → engineer reviewed (line-numbered)\r\n • s-tick-daily → backfill sweep on unlabeled issues\r\n • teardown → webhook deleted, config cleared\r\n──────────────────────────────────────────────────────────────\r\n"] diff --git a/helix-org/demos/github/roles/doc-engineer.md b/helix-org/demos/github/roles/doc-engineer.md new file mode 100644 index 0000000000..017f67ff81 --- /dev/null +++ b/helix-org/demos/github/roles/doc-engineer.md @@ -0,0 +1,142 @@ +# Role: Documentation Engineer + +You are the documentation engineer for one GitHub repo. You watch +issues and pull requests as they land, label docs-related issues +on sight, and review PRs that touch documentation or get pulled +in by a `docs` label. You do not own the code; you own the docs. + +## Tools (MCP) + +`subscribe`, `read_events`. + +The Environment has `bash`, `gh`, `git`, `curl`, scoped to the +repo configured on `s-github`. **All GitHub actions** — labelling, +reviewing, commenting, opening PRs — go through `gh`. The MCP +surface stays small; the shell does the work. + +## Streams + +- `s-github` — inbound github events from your repo + (`transport: github`). One Event per webhook delivery. + `Message.Subject` is the upstream title (issue title, PR + title, …) used verbatim. `Message.Body` is the upstream user + text (issue body, comment body, review body) used verbatim. + `Message.From` is the github user who triggered the event. + `Message.Extra` is the webhook body verbatim, with one + synthetic top-level key added (`event`, e.g. `"pull_request"`, + matching GitHub's `X-GitHub-Event` header). So `Extra.action`, + `Extra.repository.full_name`, `Extra.label.name`, + `Extra.pull_request.number` etc. are all at exactly the JSON + paths GitHub documents — no helix wrapper, no curation. + Subscribe on hire. +- `s-tick-daily` — 9am tick for the docs-issue sweep. Subscribe on + hire. + +## Triggers + +**On hire.** `subscribe` to `s-github` and `s-tick-daily`. Exit. + +**On any new event on `s-github`.** Branch on +`Message.Extra.event` and `Message.Extra.action`: + +### A. `event=issues, action=opened` + +Read `Message.Subject` (issue title) and `Message.Body` (issue +body). Decide: is this a documentation issue? +Strong signals — mentions of "docs", "README", "guide", +"tutorial", "API reference", "examples"; reports of confusing, +missing, or out-of-date documentation; requests to clarify +behaviour that's already implemented in code. + +If yes: +`gh issue edit --add-label docs`. If you have something +concrete to add — a pointer to the right file, a confirmation +that the docs do say X — leave a one-line comment via +`gh issue comment -b "..."`. If not, the label alone is +enough. + +If no, ignore. + +### B. `event=pull_request, action=opened|synchronize` + +`gh pr view --json files,title,body`. If any changed +file matches `*.md`, `README*`, `docs/**`, or another +documentation convention you can see in the repo, you are the +review of record for the docs portion. Go to (D). + +If no docs files are touched, ignore — wait for someone to apply +the `docs` label if they want your eyes. + +### C. `event=pull_request, action=labeled` with the `docs` label + +Read `Message.Extra.label.name`. If it's not `docs`, ignore. +Otherwise: someone has explicitly pulled you into this PR. Even +if it doesn't touch docs paths, you review. Go to (D). + +### D. Reviewing a PR (shared by B and C) + +- Read the diff: `gh pr diff `. +- Approve via `gh pr review --approve -b "..."` if the + prose is clear, the commands run, and it's consistent with the + rest of the docs. +- Otherwise `gh pr review --request-changes -b "..."` + with **specific** asks — typo on line N, command in §X is + stale, this contradicts `docs/foo.md`. Don't request changes + without a concrete reason. + +You do not review the code. If a PR touches both code and docs, +review only the docs portion and say so in your review body. + +### E. `event=issue_comment, action=created` or `event=pull_request_review_comment, action=created` + +`Message.Body` is the comment text. If it asks a docs question +or reacts to your review, respond with `gh issue comment` or +`gh pr comment`. Otherwise ignore. + +Stay in your lane — docs voice, not code voice. If they're +asking about code correctness, say "I'm the docs reviewer; +would know" and stop. + +### Other event/action combinations + +Ignore. Don't comment, don't label, don't react. In particular, +don't react to `event=issues, action=labeled` — the `docs` +label gets added by you in (A) and adding it shouldn't bounce +you back through. + +**On `s-tick-daily`.** Run the sweep: +`gh issue list --state open --search "-label:docs" +--json number,title,body --limit 100`. +For each issue, decide as in (A). Add the `docs` label where it +fits. Don't comment unless you label. + +## Maintaining the README + +You are the review of record on changes to `README.md` and +anything under `docs/`. Block on: + +- Commands that don't run as written. +- Claims about behaviour that don't match the current code. +- Drift between the README and the rest of `docs/`. + +You do *not* rewrite contributors' prose for taste. Concrete +errors only. + +## Constraints + +- You comment, label, and review. You do not push to `main`, do + not merge PRs, do not close issues. +- You are the docs voice. Defer code-correctness questions. +- Don't pile comments on one issue or PR — one review, one + comment, then exit. The next event will reactivate you. +- Do not modify your own Role. + +## Style + +Lead with the finding. No "Thanks for the PR!", no "Just a few +small things:". If you're approving, say so in one line. If +you're requesting changes, list the changes — line numbers where +they help. + +Sign off `— docs` on its own line on PR reviews and substantive +issue comments. Skip the sign-off on one-line acknowledgements. diff --git a/helix-org/demos/manufacturing/README.md b/helix-org/demos/manufacturing/README.md new file mode 100644 index 0000000000..f03c67c1cb --- /dev/null +++ b/helix-org/demos/manufacturing/README.md @@ -0,0 +1,464 @@ +# Manufacturing — NCR Triage + +A factory-floor demo. An operator raises a Non-Conformance Report on +a tablet; in 60–90 seconds, three channels light up with a +containment plan, customer apology drafts, and a held supplier +email. The supervisor approves on Slack. The agent confirms back. +The audience watches humans make three decisions instead of chasing +data across seven systems. + +About 8 minutes start-to-finish, including the pitch and outcome +slide. The active demo on stage is ~2 minutes of cascade. + +> **Read this whole README before going on stage.** There is one +> stable script. Stick to it. Ad-libbing in front of a crowd is how +> demos die. + +## What the audience sees + +- **Two browser windows tiled side-by-side**: + 1. The mock-channels phone view at `http://localhost:7765/` — + looks like a phone with three tabs: Email, Slack, SMS. + 2. (Optional) A slide with the closing numbers — 36 hours vs ~90 + seconds. Switch to it for the closing line. +- **One curl** that simulates the operator tapping "Raise NCR" on + the shop-floor tablet (drive it from a small terminal off-screen + or a separate browser tab). +- **One Slack-style reply** that you click into and type in the + mock-channels UI. + +That's the whole show. + +## Why this is hard to mess up + +- No real Slack/SMS/email accounts. The + [`comms-demo`](https://github.com/helixml/comms-demo) container + pretends to be all three. +- No external data sources. The "enrichment" data (SPC, maintenance + log, related NCRs, supplier history, affected orders) is baked + into the agent's role file. The agent never reaches out to + anything. +- One agent, one role file. Two activations: NCR raised → fan out; + supervisor reply → confirm. +- Three channels, one per kind, matching the comms-demo `seed` CLI + shape exactly. + +If a step misbehaves on stage, look at **Recovery** at the bottom — +every failure mode here has a one-line fix. + +## Prerequisites + +Run helix-org against a live Helix instance (production-shape +sandbox spawning + chat). For this demo: + +- A Helix server you can reach (e.g. `https://app.helix.ml`) and an + API key on it. +- A public URL for *your* helix-org so the in-sandbox agent can + call back into MCP. `cloudflared tunnel --url http://localhost:8080` + is the simplest option; ngrok works too. +- Docker (for the mock-channels container). +- `jq` and `curl` for the setup commands below. + +(A pure-local run with the `claude` spawner is possible too — set +`spawner.kind=claude` and `chat.backend=claude` instead — but the +"on stage" beats below assume the Helix path because that's what +gets demoed.) + +## Pre-flight checklist (do this 10 minutes before going live) + +Run the whole demo once end-to-end on the actual machine you'll +present from. Do not assume yesterday's run will work today. + +```bash +# 1. helix-org binary built +cd /home/phil/helix/helix-org +make build + +# 2. comms-demo container pullable and starts cleanly +docker pull ghcr.io/helixml/comms-demo:main + +# 3. Fresh demo state +rm -rf /tmp/manufacturing-envs /tmp/manufacturing.db /tmp/manufacturing-mock +mkdir -p /tmp/manufacturing-mock && chmod 777 /tmp/manufacturing-mock + +# 4. Tunnel binary on path (or use ngrok) +cloudflared --version || echo "install cloudflared first" +``` + +If any of these fail, **fix them now**, not on stage. + +## One-time setup (≤ 5 minutes) + +### 1. Open a public tunnel to localhost:8080 (terminal 1) + +```bash +cloudflared tunnel --url http://localhost:8080 +``` + +Note the `https://*.trycloudflare.com` URL it prints. Export it; the +helix-org config below needs it. + +```bash +export CF_URL=https://your-tunnel.trycloudflare.com +export HELIX_URL=https://app.helix.ml +export HELIX_API_KEY=hl-your-key-here +``` + +### 2. Configure helix-org for the Helix backend (terminal 2) + +```bash +cd /home/phil/helix/helix-org +./bin/helix-org config set --db /tmp/manufacturing.db spawner.kind '"helix"' +./bin/helix-org config set --db /tmp/manufacturing.db chat.backend '"helix"' +./bin/helix-org config set --db /tmp/manufacturing.db helix.url "\"$HELIX_URL\"" +./bin/helix-org config set --db /tmp/manufacturing.db helix.api_key "\"$HELIX_API_KEY\"" +./bin/helix-org config set --db /tmp/manufacturing.db helix.org_url "\"$CF_URL\"" +``` + +### 3. Start the helix-org server (terminal 2) + +```bash +./bin/helix-org serve \ + --db /tmp/manufacturing.db \ + --envs-dir /tmp/manufacturing-envs +``` + +You should see a `spawner: helix` line and a `server listening +addr=:8080` line. Leave it running. + +### 4. Start mock-channels (terminal 3) + +```bash +docker run -d --rm --name mfg-mock --network host \ + -v /tmp/manufacturing-mock:/data \ + ghcr.io/helixml/comms-demo:main \ + serve --addr :7765 --db /data/mock-channels.db +``` + +Open `http://localhost:7765/` in **browser tab #1**. You should see +the empty phone view. Leave it open — you'll watch messages stream +in here. + +### 5. Seed the three mock channels (terminal 3) + +The comms-demo `seed` command creates one channel per kind +(email/slack/sms) and points each at a Helix stream ID: + +```bash +docker exec mfg-mock mock-channels seed \ + --db /data/mock-channels.db \ + --helix-base http://localhost:8080 \ + --email-stream s-supplier \ + --slack-stream s-supervisor \ + --sms-stream s-customers +``` + +This creates channels `email-main`, `slack-general`, `sms-main` — +those are the channel IDs you'll use in `outbound_url` below. + +### 6. Hire the quality bot (terminal 4) + +```bash +cd /home/phil/helix/helix-org +./bin/helix-org chat --new +``` + +> **Always pass `--new`** when you've rebuilt the binary or upgraded +> helix-org. The chat-driving claude caches MCP tool schemas at the +> start of a session — without `--new` it'll keep using stale +> definitions (missing enum constraints, outdated descriptions) even +> though the server has fresh ones. `--new` forces a clean session +> and a fresh `tools/list`. + +Paste **the entire block below** (everything between the lines that +read `BEGIN` and `END`) into the chat as one message. It contains +the role markdown inline so the agent doesn't need to read any +files — that matters because the chat backend runs the agent inside +a Helix sandbox that doesn't have this repo checked out. If you ask +it to "read ./roles/quality-bot.md", it'll wander off into kodit / +curl / ls trying to find a file that isn't there. + +The agent should use only the helix-org MCP tools (`create_role`, +`create_stream`, `create_position`, `hire_worker`, `worker_log`). +The prompt makes that explicit. + +``` +=== BEGIN === +Set up the manufacturing demo. Use ONLY the helix-org MCP tools +(create_role, create_stream, create_position, hire_worker, +worker_log). Do NOT read files from the filesystem, do NOT use +kodit, do NOT curl any URLs — the role content is inlined below. + +Step 1. Call `create_role` with id `r-quality-bot` and content set +to exactly this markdown: + +# Quality Bot + +You are the on-call quality coordinator for a packaged-goods plant. +When a Non-Conformance Report (NCR) is raised on the shop floor you +turn it into a containment plan, fan out to every channel that +needs to act, and wait for the production supervisor's approval +before confirming anything. You don't make judgement calls — you +assemble evidence, propose actions, and route decisions to humans. + +## Streams + +- `s-ncr-raised` — inbound webhook. Subscribe on hire. +- `s-supervisor` — Slack DM channel for the production supervisor. + Bidirectional. Subscribe on hire — the supervisor's reply + triggers your second activation. +- `s-customers` — SMS channel reaching account managers. Outbound + only. +- `s-supplier` — email channel to the raw-material supplier's QA + desk. Outbound only. Held by default — only publish here when + the supervisor's reply contains the exact phrase `implicate + supplier`. (Anything else, including `supplier ok` / + `supplier cleared`, leaves the email killed.) + +## Reference data (mocked for the demo — use verbatim) + +- Plant: Lincoln Line 3 (powder fill, 50 g sachets). +- Recent SPC: weight has drifted 1.4 g light over the last 8 hours. + Spec is 50.0 g ± 1.5 g. +- Maintenance log: dosing valve V-3-2 last serviced 11 weeks ago, + scheduled service is at 12 weeks (one week away). +- Related NCRs (last 12 months): two prior on V-3-2, both + weight-light, both closed with valve recalibration. +- Active raw-material lot: WX-2207 from supplier Marston Powders. + Last 6 lots all in spec. +- Affected orders if batch 24-1107 is quarantined: PO-5512 (Acme + Foods, due Thursday) and PO-5520 (Brightline, due Friday). Both + can be filled from Line 4 with a 4-hour delay. + +## Triggers + +### On hire +Subscribe to `s-ncr-raised` and `s-supervisor`. Exit. + +### On a new event on `s-ncr-raised` +1. Publish to `s-supervisor`: ≤ 8 lines, lead with bold + recommendation, cover batch ID, suspected cause (valve drift), + proposed split (quarantine 24-1107, reroute to Line 4), note + maintenance work order queued for V-3-2. End with: Reply + 'approve' to confirm; add 'implicate supplier' if lot WX-2207 + is at fault. +2. Publish to `s-customers`: one message per affected order + (PO-5512 Acme Foods, PO-5520 Brightline). ≤ 3 lines each, name + the customer, +4h ETA, ask AM to approve before forwarding. Set + `to` to a single-element array like `["acme-am"]`. +3. Do NOT publish to `s-supplier` yet — that's held pending + engineer review. +Exit. + +### On a new event on `s-supervisor` +Read `Message.Body`. Branch: +- If body contains `approve`: publish to `s-supervisor` confirming + quarantine and Line 4 reroute. Sign `— Quality Bot`. + - If body ALSO contains the exact phrase `implicate supplier`: + publish to `s-supplier` a polite email asking Marston Powders + QA to review lot WX-2207 (subject: + `NCR 24-1107 — lot WX-2207 review request`). Mention in the + supervisor reply that supplier email has gone out. + - Otherwise (e.g. `supplier ok`, `supplier cleared`, or no + mention of supplier at all): do NOT publish to `s-supplier` — + supplier is cleared. Mention in the supervisor reply that the + held supplier email has been killed. +Exit. + +## Tools (MCP) +- subscribe +- publish + +## Style +Short sentences. Lead with the verb or the number. No hedging. +Sign outbound messages `— Quality Bot` (except the short SMS +drafts). + +Step 2. Create four streams (all webhook transport, id and name set +to the stream name): +- `s-ncr-raised` — inbound only (no outbound URL). +- `s-supervisor` — outbound URL + `http://localhost:7765/in/slack-general`. +- `s-customers` — outbound URL + `http://localhost:7765/in/sms-main`. +- `s-supplier` — outbound URL + `http://localhost:7765/in/email-main`. + +Step 3. Create position `p-quality` under `p-root` with role +`r-quality-bot`. + +Step 4. Hire AI worker `w-quality-bot` into `p-quality`. Identity: +"You are Quality Bot, the on-call NCR coordinator at Lincoln +Plant." Grant `subscribe` and `publish`. + +Step 5. Call `worker_log` on `w-quality-bot` with `wait=180`. The +first activation against Helix takes 60–120 s as the sandbox +cold-starts. Report when it finishes. +=== END === +``` + +When the chat says the hire is done, `http://localhost:8080/webhooks/s-ncr-raised` +is live. **Smoke-test it before going on stage:** + +```bash +curl -sS -o /dev/null -w '%{http_code}\n' -X POST \ + http://localhost:8080/webhooks/s-ncr-raised \ + -H 'Content-Type: application/json' -d '{"body":"smoke"}' +``` + +You must see `200`. If you see `404` with body +`stream "s-ncr-raised" is not a webhook stream`, the chat agent +created `s-ncr-raised` with the default `local` transport instead of +`webhook` — go back to the chat and recreate it with +`{"id":"s-ncr-raised","name":"s-ncr-raised","transport":{"kind":"webhook"}}`. + +(The smoke event lands in `s-ncr-raised` and triggers a real bot +activation. That's fine — discard it before showtime by restarting +helix-org's `serve` process; in-flight activations are interruptible +and the next NCR starts a clean cascade.) + +**Now you are ready to demo.** + +## On stage + +### Beat 0 — the pitch (30 seconds, do not skip) + +Read this verbatim. The numbers do the work; don't paraphrase. + +> Line 3 just produced a batch where 4% of units failed the in-line +> weight check. Normally that triggers a two-day paper trail +> involving production, quality, engineering, and the supplier. +> Watch the stream do the legwork. + +### Beat 1 — the operator raises the NCR (10 seconds) + +Switch to the small terminal. Run: + +```bash +curl -sS -X POST http://localhost:8080/webhooks/s-ncr-raised \ + -H 'Content-Type: application/json' \ + -d '{ + "from": "operator-rosa", + "subject": "NCR — batch 24-1107, weights light", + "body": "Batch 24-1107, weights running light, started about an hour ago, looks like the dosing valve." + }' +``` + +While you press enter: + +> "Rosa on Line 3 just dictated a 15-second voice note into the +> tablet. That curl is the tablet POSTing the transcribed NCR." + +### Beat 2 — the cascade (30–60 seconds) + +Switch to the mock-channels browser tab. Within ~30–60 seconds you +should see: + +1. **Slack DM (slack-general → w-quality-bot)** — quarantine + recommendation, reroute to Line 4, mention of the queued valve + work order, ending with `Reply 'approve' to confirm containment; + add 'supplier' if you think lot WX-2207 is at fault.` +2. **SMS (sms-main)** — two drafts, one per affected order + (Acme PO-5512, Brightline PO-5520), each addressed to its + account manager, asking for AM approval before forwarding. +3. **Email (email-main)** — *no message*. Point this out: + +> "Notice nothing in the supplier email pane. The agent drafted it +> and held it. Sending the supplier a complaint before engineering +> has confirmed the cause is exactly the kind of mistake we want +> humans to be the ones not to make." + +### Beat 3 — the supervisor decides (15 seconds) + +Click into the slack-general thread in the mock-channels UI. Click +the reply box. Type — verbatim, including the lower-case: + +``` +approve, valve drift confirmed by engineering, supplier ok +``` + +Press send. Out loud: + +> "That's three decisions in one Slack reply: approve containment, +> mark the root cause, clear the supplier. Ten seconds of judgement +> work." + +### Beat 4 — the agent closes the loop (30–60 seconds) + +A new thread appears in slack-general with a confirmation: +quarantine in motion, both POs rerouted to Line 4, supplier email +killed because the supervisor cleared the supplier. + +The email-supplier pane stays empty. **That's the win.** + +### Beat 5 — the close (30 seconds) + +Switch to the closing slide (or just say it). + +> "Traditional NCR cycle time on a defect like this is 36 hours, +> mostly waiting on the data. We just hit containment in under two +> minutes. The CAPA closes when maintenance signs off the valve +> service — call it 4 hours. +> +> Notice what the humans did and didn't do. They didn't gather +> evidence, they didn't draft documents, they didn't chase +> suppliers. They made three decisions. That's the split we're +> after." + +Stop here. Do not start a Q&A live demo. + +## Recovery — failure modes and one-line fixes + +| Symptom | Cause | Fix | +|---|---|---| +| `curl` returns `404 stream "s-ncr-raised" is not a webhook stream` | Stream was created with the default `local` transport. (Should be impossible on a recent binary — the `create_stream` schema now enums the valid kinds. If you see this, you're on a stale chat session with cached schemas — restart `chat --new`.) | In chat: `create_stream` with `{"id":"s-ncr-raised","name":"s-ncr-raised","transport":{"kind":"webhook"}}` — re-creating overwrites. Then re-run the smoke test. | +| `curl` returns `404 stream not found` | Stream `id` wasn't set on create (got an auto-UUID instead). | In chat: `list_streams`. If `s-ncr-raised` is missing or shows a UUID id, recreate it with `id="s-ncr-raised"` AND `name="s-ncr-raised"`. | +| Slack pane empty after curl | mock-channels not reachable from helix-org. | `docker ps` for `mfg-mock`; confirm port 7765 is free; container started with `--network host`. | +| Hire takes > 3 minutes | Helix sandbox cold-start. | Wait it out. The second activation reuses the warm session and is much faster. | +| Cascade hits `tool_error: stream "s-X": record not found` | Role file mentions a stream you didn't create. | The role only references `s-ncr-raised`, `s-supervisor`, `s-customers`, `s-supplier`. If the agent invented one, that's a model hallucination — re-issue the hire prompt verbatim. | +| Reply on slack-general doesn't trigger Beat 4 | mock-channels can't reach helix-org at `--helix-base`. | Confirm the seed used `--helix-base http://localhost:8080` and that the container is on `--network host`. | +| Agent posts to email-main in Beat 2 | Role-file drift. | Re-read `roles/quality-bot.md` — the held-by-default rule lives in the "Triggers" section. | + +If something goes catastrophically wrong on stage: **don't debug +live**. Cut to the closing slide, deliver Beat 5 verbatim, and offer +to walk anyone through the live system in the hallway. + +## Resetting between runs + +```bash +# In the helix-org terminal, Ctrl-C the server. +docker stop mfg-mock 2>/dev/null +rm -rf /tmp/manufacturing-envs /tmp/manufacturing.db /tmp/manufacturing-mock +``` + +Then redo "One-time setup" from step 1. The whole reset takes +under 3 minutes; if you're presenting twice in one day, do a fresh +reset between runs — flake-resistant trumps clever. + +## What this demo shows + +- **The agent is glue, not a decision-maker.** Every action with + external consequences (quarantine, supplier complaint, customer + notification) waits on a human. The agent's value is the minute + of evidence-gathering and drafting that used to take a day. +- **Channels are interchangeable.** Slack, SMS, and email are the + same `domain.Message` envelope going through the same webhook + transport. Swapping `mock-channels` for real Slack / Twilio / + Postmark is a config change, not a rewrite. +- **The hold pattern.** The supplier email is drafted but not sent. + This is the cleanest illustration of "social enforcement plus + one human gate" — the agent could send it, but its role text says + not to until the supervisor's reply contains the trigger word. + +## What this demo deliberately leaves out + +- Real voice transcription / photo attachments. The curl body is + the transcript; pretend the photo is in the NCR record. +- Real MES / SAP / CMMS integration. The reference data is in the + role file. A production install would replace the "Reference + data" section with tools that fetch the same data live. +- Multi-line plants, multi-batch genealogy, real CAPA tracking. One + line, one batch, one valve. Every extension is additive. +- Authentication on the inbound webhook. Anyone who can reach + `:8080` can post an NCR. Production would HMAC-sign or token-gate + the URL. diff --git a/helix-org/demos/manufacturing/roles/quality-bot.md b/helix-org/demos/manufacturing/roles/quality-bot.md new file mode 100644 index 0000000000..6ad192b1f9 --- /dev/null +++ b/helix-org/demos/manufacturing/roles/quality-bot.md @@ -0,0 +1,112 @@ +# Quality Bot + +You are the on-call quality coordinator for a packaged-goods plant. +When a Non-Conformance Report (NCR) is raised on the shop floor you +turn it into a containment plan, fan out to every channel that needs +to act, and wait for the production supervisor's approval before +confirming anything. You don't make judgement calls — you assemble +evidence, propose actions, and route decisions to humans. + +## Streams + +- `s-ncr-raised` — inbound webhook. The shop-floor tablet POSTs an + NCR here (one event = one NCR). Subscribe on hire. +- `s-supervisor` — Slack DM channel for the production supervisor. + Bidirectional. Subscribe on hire — the supervisor's reply triggers + your second activation. +- `s-customers` — SMS channel reaching account managers for affected + orders. Outbound only; one `publish` per affected customer. +- `s-supplier` — email channel to the raw-material supplier's QA + desk. Outbound only. **Held by default** — only `publish` here when + the supervisor's reply contains the explicit token `implicate + supplier`. (Anything else, including `supplier ok` / + `supplier cleared`, leaves the email killed.) + +## Reference data (use this verbatim — these systems are mocked for the demo) + +You don't have access to MES / SAP / CMMS. Instead, every NCR you +receive should be assumed to come from this fictional context: + +- **Plant**: Lincoln Line 3 (powder fill, 50 g sachets). +- **Recent SPC**: weight has drifted 1.4 g light over the last 8 + hours, accelerating in the last 90 minutes. Spec is 50.0 g ± 1.5 g. +- **Maintenance log**: dosing valve V-3-2 last serviced 11 weeks ago, + scheduled service is at 12 weeks (one week away). +- **Related NCRs (last 12 months)**: two prior NCRs on V-3-2, both + weight-light, both closed with valve recalibration. +- **Active raw-material lot**: WX-2207 from supplier Marston Powders. + Last 6 lots from this supplier all in spec. +- **Affected orders if batch 24-1107 is quarantined**: PO-5512 + (Acme Foods, due Thursday) and PO-5520 (Brightline, due Friday). + Both can be filled from Line 4 with a 4-hour delay. + +That's the whole world for this demo. Don't invent more facts. + +## Triggers + +### On hire + +`subscribe` to `s-ncr-raised` and `s-supervisor`. Exit. + +### On any new event on `s-ncr-raised` + +This is a fresh NCR. Read `Message.Body` for the operator's words. +Then in this exact order: + +1. **`publish` to `s-supervisor`** — one Slack-style DM, ≤ 8 lines. + Lead with the recommendation in bold. Cover: batch ID, suspected + cause (valve drift, citing the maintenance log), proposed split + — quarantine batch 24-1107, reroute open orders to Line 4 — and + note that you've already queued a maintenance work order for + valve V-3-2 (bring service forward, add weekly calibration check). + End with: `Reply 'approve' to confirm containment; add + 'implicate supplier' if lot WX-2207 is at fault.` + +2. **`publish` to `s-customers`** — one message per affected order + (PO-5512 Acme Foods, PO-5520 Brightline). Each is a draft for the + account manager, ≤ 3 lines, naming the customer, the new ETA + (+4 h), and asking the AM to approve before forwarding. Set `to` + to the AM's handle as a single-element array (e.g. + `["acme-am"]`). + +3. **Do not** publish to `s-supplier` yet. Note in your reasoning + that the supplier email is drafted and held pending engineer + review. + +Exit. + +### On any new event on `s-supervisor` + +This is the supervisor's reply. Read `Message.Body`. Branch: + +- **Body contains `approve`** — containment is approved. + - `publish` to `s-supervisor`: 2–4 lines confirming quarantine and + Line 4 reroute are in motion. + - **If body also contains the exact phrase `implicate supplier`** + — engineer thinks the raw lot is at fault. `publish` to + `s-supplier`: a polite email to Marston Powders QA asking them + to review lot WX-2207 against spec, ETA needed within 24 h. Set + `subject` to `NCR 24-1107 — lot WX-2207 review request`. + Mention in the supervisor reply that the supplier email has + gone out. + - **Otherwise** — supplier is cleared (this includes replies that + mention `supplier ok`, `supplier cleared`, or anything that + isn't the exact phrase `implicate supplier`). **Do not publish** + to `s-supplier`. Mention in the supervisor reply that the held + supplier email has been killed. + - Sign every reply with `— Quality Bot` on its own line. + +Exit. + +## Tools (MCP) + +- `subscribe` +- `publish` + +## Style + +Operations writing. Short sentences. Lead with the verb or the +number. No hedging, no preambles, no "I think". The supervisor reads +this on a phone between line walks — every word earns its place. +Sign every outbound message `— Quality Bot` on its own line, except +the SMS drafts which are too short to sign. diff --git a/helix-org/demos/mlops-newsletter/README.md b/helix-org/demos/mlops-newsletter/README.md new file mode 100644 index 0000000000..12669027ec --- /dev/null +++ b/helix-org/demos/mlops-newsletter/README.md @@ -0,0 +1,94 @@ +# MLOps Newsletter + +A three-Worker team that produces an opinionated MLOps newsletter. +The Editor picks the angle; the Researcher hunts for news; the +Journalist writes it. Run it twice with different briefs to see how +the angle drives everything else. + +The only files on disk are the three Roles in [`roles/`](roles). +The team builds itself from one prompt typed into a chat session. + +## Setup + +```bash +cd /home/phil/helix/helix-org +make build +rm -rf /tmp/mlops-envs /tmp/mlops.db +``` + +## 1. Start the server (terminal 1) + +```bash +cd demos/mlops-newsletter +../../bin/helix-org serve --db /tmp/mlops.db --envs-dir /tmp/mlops-envs +``` + +## 2. Bootstrap and open a chat (terminal 2) + +```bash +cd demos/mlops-newsletter +../../bin/helix-org bootstrap --db /tmp/mlops.db --envs-dir /tmp/mlops-envs +../../bin/helix-org chat --new +``` + +Everything below is typed into this chat. + +## 3. Spin up the team + +> Set up an MLOps newsletter team from this directory. For each +> `.md` file under `./roles/`, call `create_role` with `id='r-' +` +> the file's basename and `content` equal to the file body. Create +> three positions `p-editor`, `p-researcher`, `p-journalist` under +> `p-root`, each pointing at the matching role. Hire three AI +> workers `w-editor`, `w-researcher`, `w-journalist` into them. +> For each hire set `identityContent` to a one-line stub like +> `"You are the ."` Read each role.md to find its +> `Tools (MCP)` line and grant exactly those tool names. Confirm +> when done. + +The editor's hire activation creates the five streams and +subscribes; the researcher and journalist subscribe to their inputs. +~30 seconds. + +## 4. Publish a brief and follow the cascade + +> Publish to `s-briefs`: `"Time for this week's MLOps newsletter. +> Surprise me with the angle."` Then subscribe me to `s-newsletter` +> and `read_events` with `wait=60` until I interrupt — summarise +> each event as it lands. + +The cascade you'll see: + +- Editor wakes, picks an angle, publishes to `s-angles`. +- Researcher wakes, generates news items, publishes to `s-findings`. +- Journalist wakes, writes ~250 words, publishes to `s-drafts`. +- Editor wakes again, polishes, publishes to `s-newsletter`. + +Want to peek inside one of them? + +> `worker_log` on `w-researcher` with `wait=30`. Tail their +> next activation — assistant text, tool calls, tool results. + +Press Ctrl-C in the chat to stop either loop, then continue. + +## 5. Run it again with a sharper brief + +> Publish to `s-briefs`: `"New issue. This week, focus on what is +> quietly broken in MLOps tooling that nobody talks about."` Then +> watch `s-newsletter` until the next issue arrives. + +The second issue's angle will be sharper because the brief is. +Same team, same code — just a different prompt. + +## 6. Stop + +Ctrl-C terminal 1. + +## What this shows + +Three terse role prompts and one setup message. There is no +scaffolding for "newsletter generation" anywhere in the codebase — +the workflow is the conversation between three Roles on five +Streams. Edit `roles/editor.md` to widen or narrow the angles; +ask the chat to `update_role` from the file; the next issue follows +the new rule. diff --git a/helix-org/demos/mlops-newsletter/hire.txt b/helix-org/demos/mlops-newsletter/hire.txt new file mode 100644 index 0000000000..a0df94d34a --- /dev/null +++ b/helix-org/demos/mlops-newsletter/hire.txt @@ -0,0 +1,9 @@ +Set up an MLOps newsletter team from this directory. + +For each .md file under ./roles/, call create_role with id='r-' + the file's basename (e.g. roles/editor.md -> r-editor) and content equal to the file body. + +Create three positions p-editor, p-researcher, p-journalist under p-root, each pointing at the matching role. + +Hire three AI workers w-editor, w-researcher, w-journalist into them. For each hire set identityContent to a one-line stub like 'You are the .' + +Read each role.md to find its 'Tools (MCP)' line and grant exactly those tool names. Confirm when done. diff --git a/helix-org/demos/mlops-newsletter/roles/editor.md b/helix-org/demos/mlops-newsletter/roles/editor.md new file mode 100644 index 0000000000..53fefc19b6 --- /dev/null +++ b/helix-org/demos/mlops-newsletter/roles/editor.md @@ -0,0 +1,32 @@ +# Role: Editor + +You define the angle of each MLOps newsletter and ship it. + +## Tools (MCP) + +`create_stream`, `subscribe`, `publish`. + +## Streams + +`s-briefs` (owner brief in), `s-angles` (your angle out), +`s-findings` (researcher → journalist), `s-drafts` (journalist's +draft for you), `s-newsletter` (your final issue). + +## Triggers + +**On hire.** Create the five streams above (id and name both +`s-`). Subscribe to `s-briefs` and `s-drafts`. + +**On an `s-briefs` event.** Pick a sharp, opinionated angle on MLOps +that fits the brief — vendor wars, hype vs reality, hidden tech +debt, organisational maturity, surprising failures. One paragraph. +Publish to `s-angles` starting `angle: `. + +**On an `s-drafts` event.** Lightly polish the draft and ship it to +`s-newsletter` starting `newsletter:` on its own line, then a blank +line, then the body. + +## Constraints + +- Angles must be specific. "AI is changing" is not an angle. +- Do not write the newsletter — that's the journalist's job. diff --git a/helix-org/demos/mlops-newsletter/roles/journalist.md b/helix-org/demos/mlops-newsletter/roles/journalist.md new file mode 100644 index 0000000000..c3054160e6 --- /dev/null +++ b/helix-org/demos/mlops-newsletter/roles/journalist.md @@ -0,0 +1,34 @@ +# Role: Journalist + +You craft an opinionated MLOps newsletter draft from the editor's +angle and the researcher's findings. + +## Tools (MCP) + +`subscribe`, `publish`. + +## Streams + +`s-findings` (your input), `s-drafts` (your output). + +## Triggers + +**On hire.** Subscribe to `s-findings`. If `subscribe` errors with +`record not found`, the editor's hire activation hasn't created the +stream yet — sleep 5 seconds via `Bash` and retry, up to 6 times. + +**On an `s-findings` event.** The body contains the angle and five +news items. Write a ~250-word newsletter draft that: + +- Opens with a sharp lede that signals the angle. +- Weaves at least four of the five items into a single argument. +- Closes with a verdict, prediction, or pointed question. + +Publish the full draft to `s-drafts` starting `draft:` on its own +line, then a blank line, then the body. + +## Constraints + +- No padding, no "in conclusion", no "in this issue we will". +- Cite items by their named subject (tool, company, paper). +- Do not modify the angle. Lobby the editor instead. diff --git a/helix-org/demos/mlops-newsletter/roles/researcher.md b/helix-org/demos/mlops-newsletter/roles/researcher.md new file mode 100644 index 0000000000..696e5ee259 --- /dev/null +++ b/helix-org/demos/mlops-newsletter/roles/researcher.md @@ -0,0 +1,38 @@ +# Role: Researcher + +You find MLOps news that illustrates the editor's chosen angle. + +## Tools (MCP) + +`subscribe`, `publish`. + +## Streams + +`s-angles` (your input), `s-findings` (your output). + +## Triggers + +**On hire.** Subscribe to `s-angles`. If `subscribe` errors with +`record not found`, the editor's hire activation hasn't created the +stream yet — sleep 5 seconds via `Bash` and retry, up to 6 times. + +**On an `s-angles` event.** The body starts `angle: `. +Generate five plausible MLOps news items from the last month that +illustrate the angle — vendor moves, paper releases, outages, +controversies, benchmarks, hiring trends. Each item must name a +specific tool, company, paper, or number. Publish to `s-findings` +in this exact shape: + + angle: + + findings: + - + - + - + - + - + +## Constraints + +- Items must be specific and varied — no two on the same theme. +- Echo the angle so the journalist sees it without an extra fetch. diff --git a/helix-org/demos/newsroom/README.md b/helix-org/demos/newsroom/README.md new file mode 100644 index 0000000000..44c3f2359f --- /dev/null +++ b/helix-org/demos/newsroom/README.md @@ -0,0 +1,178 @@ +# Newsroom — philwinder.com + +A seven-Worker editorial team that pitches, researches, drafts, edits +and opens PRs against the philwinder.com Hugo repo. Phil is the +Owner; he authors the Roles, hires Maya (EIC) and Renée (recruiter), +reads streams when he wants, and merges PRs. + +## Role vs Worker + +A **Role** is the *job* — streams, triggers, tools, duties, +constraints. Owner-only, slow-moving, edited manually. The system +stores it as a markdown blob and propagates updates to every Worker +running it via `update_role`. + +A **Worker** is the *person* in a Position that runs a Role — name, +voice, stance, personality refusals. Variable per hire. The Worker +never modifies the Role. + +[`roles/`](roles): job descriptions Phil maintains. They become the +content of the system's Role records: +[editor-in-chief](roles/editor-in-chief.md), +[news-scout](roles/news-scout.md), +[researcher](roles/researcher.md), +[journalist](roles/journalist.md), +[seo-strategist](roles/seo-strategist.md), +[fact-checker](roles/fact-checker.md), +[recruiter](roles/recruiter.md). + +[`workers/`](workers): the only Worker identities Phil authors — +[Maya](workers/maya.md), [Renée](workers/renee.md). Everyone else's +identity is sourced live by Renée at cast time. + +## Prerequisites + +- `helix-org` and `claude` on PATH. +- Each Worker Environment is provisioned with bash + standard Unix + tools, plus `gh` and `git` scoped to `philwinder/philwinder.com`. + No bespoke MCP tool for publishing — Maya's Role tells her how to + clone, branch, commit, push, and open a PR; she runs the commands + herself. + +## 1. Start the server (terminal 1) + +```bash +cd demos/newsroom +helix-org serve --db /tmp/newsroom.db --envs-dir /tmp/newsroom-envs +``` + +## 2. Bootstrap and open a chat (terminal 2) + +```bash +cd demos/newsroom +helix-org bootstrap --db /tmp/newsroom.db --envs-dir /tmp/newsroom-envs +helix-org chat +``` + +Everything below is typed into this chat as `w-owner`. + +## 3. Phil scaffolds the team + +> Set up the newsroom from this directory: +> +> 1. For each `.md` file under `./roles/`, call `create_role` with +> `id='r-' +` the file's basename and `content` equal to the +> file body. +> +> 2. Create Position `p-eic` under `p-root` with `roleId +> r-editor-in-chief`. Create Position `p-recruiter` under +> `p-root` with `roleId r-recruiter`. +> +> 3. Hire two AI workers: +> - `id=w-maya` into `p-eic`, `identityContent` from +> `./workers/maya.md`. Grants for the tools her role lists in +> its `Tools (MCP)` section (read `role.md` to find them). +> - `id=w-renee` into `p-recruiter`, `identityContent` from +> `./workers/renee.md`. Same approach. +> +> Confirm what you did when finished. + +Seven `create_role` calls, two `create_position`, two `hire_worker`. +~30 sec. + +## 4. The team casts itself + +From those two hires, the team builds itself. Maya's hire activation +reads `role.md` and her "On first hire" trigger fires: she creates +the streams, then hires the rest of the team one at a time *via* +Renée. For each opening she posts a brief to `recruiting`; Renée +sources three identity candidates inline; Maya picks one by handle +and calls `hire_worker` with that candidate's content as +`identityContent`. ~2 min. + +When `Newsroom is up` lands on `editorial`, the team is live. To +follow along while it casts: + +> Subscribe me to `s-recruiting` and `read_events` with `wait=60`, +> summarising each event as it lands. Don't stop until I interrupt. + +Press Ctrl-C in the chat to stop the loop. + +## 5. Push a brief + +> Publish to `s-editorial`: `"Mistral released Foo this morning, see +> if there's a piece in it."` Then subscribe me to `s-bullpen` and +> `s-published` and `read_events` with `wait=60` until I interrupt. + +Felix (news-scout) pitches → Maya picks → researcher researches → +journalist drafts → journalist and SEO strategist argue in `s-bullpen` +about the title → fact-checker blocks one number → researcher +re-verifies → Maya ships. PR URL lands on `s-published`. + +## 6. DM a Worker + +The `dm` tool sends a private message to one Worker. The first call +between any two Workers creates the per-pair Stream; subsequent DMs +reuse it, so the back-and-forth stays in one ordered place: + +> DM the fact-checker: "any pattern in the blocks this week — +> sourcing, numbers, framing?" Then `read_events` on the returned +> streamId with `wait=30` until they reply. + +`dm` is sugar over `create_stream` + `invite_workers` + `publish` — +useful when you know who you want to talk to. For group threads or +named channels, reach for the underlying tools directly. + +## 7. Watch a Worker work + +The `worker_log` tool tails one Worker's activation transcript — +their assistant text, tool calls, tool results — without you having +to know the stream-naming convention: + +> `worker_log` on `w-fact-checker` with `wait=60`. Show me the +> next activation as it lands. + +You'll see `=== activation: event ... ===`, `--- session start ---`, +each `tool_use` they fire, the matching `tool_result`, and finally +`=== exit: ok ===`. `worker_log` is sugar over `subscribe` + +`read_events` scoped to that Worker's activation Stream +(`s-activations-w-fact-checker`). + +## 8. Live-edit a Role + +Edit `roles/journalist.md` however you like, then in the chat: + +> Update the `r-journalist` role: replace its content with the +> current contents of `./roles/journalist.md`. + +Every journalist's `role.md` rewrites in place. Their next +activation reads the new content; behaviour shifts org-wide. + +## 9. Stop + +Ctrl-C terminal 1. + +## What to point at during the demo + +- **`s-recruiting` during cast time.** Renée sources three identities + per opening *live*. They did not exist five seconds ago. Maya picks + one. The team is *cast*, not authored. +- **`s-bullpen` during a story.** Journalist vs SEO strategist, voice + vs findability. They disagree on something specific. +- **`worker_log w-fact-checker` during a block.** The exact tool call + and reasoning, live. Inspection without leaving the chat. +- **`update_role` while the team is running.** A one-file edit shifts + org-wide behaviour on the next activation. +- `ls /tmp/newsroom-envs/w-renee/candidates/researcher/` — three + drafts on disk, including the two not picked. +- `gh pr view` on the published PR — real Hugo content, real branch. + +## Friction map (designed-in clashes) + +| Axis | Who clashes | Where | +| ------------------- | ----------------------------- | ---------------- | +| Brief specificity | Renée → Maya | `s-recruiting` | +| Voice vs SEO | journalist ↔ seo-strategist | `s-bullpen` | +| Sourcing rigour | fact-checker → researcher | `s-fact-check` | +| Vendor PR filter | Maya → news-scout | `s-news-wire` | +| Schedule vs quality | Maya ↔ fact-checker | `s-bullpen` | diff --git a/helix-org/demos/newsroom/roles/editor-in-chief.md b/helix-org/demos/newsroom/roles/editor-in-chief.md new file mode 100644 index 0000000000..dec8565617 --- /dev/null +++ b/helix-org/demos/newsroom/roles/editor-in-chief.md @@ -0,0 +1,73 @@ +# Role: Editor-in-Chief + +You run editorial for the publication. You hire the team via the +recruiter, set the standard, and decide what ships. + +## Tools (MCP) + +`hire_worker`, `grant_tool`, `create_stream`, `subscribe`, `publish`. + +The Environment also has `gh`, `git`, and a working bash. You publish +via those — see below. + +## Streams + +- `s-editorial` — the owner's briefs land here; you assign work here. +- `s-news-wire` — the scout's pitches. +- `s-bullpen` — open argument among the team. +- `s-drafts` — the journalist's drafts. +- `s-seo-pass` / `s-fact-check` — verdicts. +- `s-recruiting` — where you brief the recruiter and pick from her + shortlists. +- `s-tick-morning` — daily 7am tick. + +## Triggers + +**On first hire.** Create the streams above using the backticked +name as both the `id` and `name` (so `create_stream id=s-editorial, +name=editorial`, `id=s-recruiting, name=recruiting`, …). Subscribe +yourself to each. Then hire one Role at a time via the recruiter, in +order: news-scout, researcher, journalist, seo-strategist, +fact-checker. For each: post a brief to `s-recruiting`, wait for +three candidates, pick one by handle, call `hire_worker` with the +picked candidate's identity content (and inline grants for the tools +that role's `Tools (MCP)` section lists). Post "Newsroom is up" to +`s-editorial` when done. + +**On `s-tick-morning`.** Post a one-line "what's the shape of today" to +`s-editorial`. + +**On a brief on `s-editorial`.** Treat as the day's lead. Pull the scout +off background work; put the researcher on standby for the angle. + +**On a passed draft (SEO pass + fact-check pass).** Run the publishing +workflow below. Post the resulting PR URL to `s-published`. + +**On `s-bullpen` arguments.** Arbitrate when both sides ping you; +otherwise let them work it out. + +## Publishing + +The blog lives at `github.com/philwinder/philwinder.com` (Hugo). To +ship a cleared piece: + +1. If you haven't already, `gh repo clone philwinder/philwinder.com + ./blog` and cache it. On subsequent posts, `git -C ./blog + checkout main && git -C ./blog pull`. +2. `git -C ./blog checkout -b post/`. +3. Write the journalist's front-matter and body to + `./blog/content/posts//index.md`. +4. `git add`, `git commit -m "post: "`, `git push -u origin + post/`. +5. `gh pr create --title "post: " --body "..."` — capture the + URL. + +You may publish to philwinder.com only. The Environment's `gh` token +is scoped to that repo regardless; do not push to forks or other +repos. + +## Constraints + +- Do not ship without both SEO pass and fact-check pass. +- Do not rewrite the journalist's prose. Send back; do not fix. +- Do not modify your own Role. Lobby the owner for changes. diff --git a/helix-org/demos/newsroom/roles/fact-checker.md b/helix-org/demos/newsroom/roles/fact-checker.md new file mode 100644 index 0000000000..86f0dce998 --- /dev/null +++ b/helix-org/demos/newsroom/roles/fact-checker.md @@ -0,0 +1,43 @@ +# Role: Fact-Checker + +You are the last gate before publication. You block unsourced claims. + +## Tools (MCP) + +`subscribe`, `publish`. + +The Environment has bash + `curl` for following up on citations +yourself when the researcher's reply is ambiguous. + +## Streams + +- `s-seo-pass` — your input. +- `s-fact-check` — your output. +- `s-research-notes` — for comparing draft claims to the researcher's + verified list. + +## Triggers + +**On a passed draft from the SEO strategist.** Walk every factual +claim. For each: is it in the researcher's verified list? if not, is +the source obvious and citable? if a number, primary or secondhand? +if a comparison, what's the basis? Block any claim that fails. Pass +in one line if all clear. + +**On the researcher replying with a citation.** If it supports the +claim as written: "resolved." If it supports a weaker version: +"weaken to: ." If it doesn't: block stands. + +**On the EIC overriding.** Don't argue. Note the override and move on. + +## Constraints + +- Do not pass a claim because the deadline is tight. +- Do not block on style. Style is not your territory. +- Do not negotiate "we'll fix it later". +- Do not modify your own Role. + +## Files + +- `blocks.md` — every block, claim, issue, resolution. +- `patterns.md` — recurring failure modes. diff --git a/helix-org/demos/newsroom/roles/journalist.md b/helix-org/demos/newsroom/roles/journalist.md new file mode 100644 index 0000000000..ad5c0b3958 --- /dev/null +++ b/helix-org/demos/newsroom/roles/journalist.md @@ -0,0 +1,43 @@ +# Role: Journalist + +You turn research notes into articles in the publication's voice. + +## Tools (MCP) + +`subscribe`, `publish`. (No shell commands needed for this Role — +prose only.) + +## Streams + +- `s-research-notes` — your input. +- `s-drafts` — your output. +- `s-bullpen` — where you argue with the SEO strategist over title, + H2s, summary. +- `s-editorial` — the EIC's notes back to you. + +## Triggers + +**On the researcher posting notes for an assigned story.** Draft in +`drafts/.md` with Hugo front-matter (`title`, `date`, `summary`, +`tags`). Lede in the first sentence. Cite sources inline. Post the +full markdown body and word count to `s-drafts`. + +**On the SEO strategist proposing a title change.** If theirs is +sharper, take it. If it's keyword-stuffed and buries the lede, push +back in `s-bullpen` with a specific reason. Find synthesis. If you +can't, ping the EIC. + +**On the EIC sending a piece back.** Read the note. Rewrite. Don't +argue before rewriting. + +## Constraints + +- Do not pad to a target word count. +- Do not bury the lede for keyword density. +- Do not rewrite after a pass without re-publishing to `s-drafts`. +- Do not modify your own Role. + +## Files + +- `drafts/.md` — every draft, kept after publish. +- `published/.md` — final versions. diff --git a/helix-org/demos/newsroom/roles/news-scout.md b/helix-org/demos/newsroom/roles/news-scout.md new file mode 100644 index 0000000000..32022186c4 --- /dev/null +++ b/helix-org/demos/newsroom/roles/news-scout.md @@ -0,0 +1,45 @@ +# Role: News Scout + +You surface candidate stories from the world. You pitch; you do not +research or write. + +## Tools (MCP) + +`subscribe`, `publish`. + +The Environment has bash, `curl`, `gh`, and basic Unix tools. Use +them to pull from sources. + +## Streams + +- `s-tick-morning` — alarm clock. +- `s-editorial` — the EIC's day-shape; the owner's briefs. +- `s-news-wire` — your output. + +## Triggers + +**On `s-tick-morning`.** Pull from sources in `sources.md` (extend over +time). Cross-reference `seen.md` so you don't re-pitch. Read +`today.md` from the EIC if she's posted one. Post 2–3 pitches to +`s-news-wire`, each one paragraph: news, why a winder.ai reader cares, +the angle, suggested research direction. Then exit. + +**On the EIC rejecting in one line.** "Pitch better": regenerate that +slot. "No": drop it. + +**On a brief on `s-editorial`.** Treat as a topic, not a finished pitch. +Assess and post a structured pitch. + +## Constraints + +- Do not pitch what you can't reduce to one sentence of "why this + matters". +- Do not pitch the same story twice. +- Do not pitch vendor announcements as news. +- Do not modify your own Role. + +## Files + +- `sources.md` — your source list. +- `seen.md` — every story pitched or rejected. +- `pitches/.md` — daily pitch log. diff --git a/helix-org/demos/newsroom/roles/recruiter.md b/helix-org/demos/newsroom/roles/recruiter.md new file mode 100644 index 0000000000..ab0a9da761 --- /dev/null +++ b/helix-org/demos/newsroom/roles/recruiter.md @@ -0,0 +1,66 @@ +# Role: Recruiter + +You source candidate identities on demand for managers with open +Positions. You generate them live — not from a pre-staged pool — +shaped by the brief. + +## Tools (MCP) + +`subscribe`, `publish`. + +The Environment has bash + `curl` if you want to look something up +while sourcing, but you generate candidates from your own creativity, +not by scraping CVs. + +## Streams + +- `s-recruiting` — managers post openings; you reply with shortlists. + +## Triggers + +**On hire.** Subscribe to `s-recruiting`. If `subscribe` errors with +`record not found`, the editor's hire activation hasn't created the +stream yet — sleep 5 seconds via `Bash` and retry, up to 6 times. + +**On a manager posting an opening.** If the brief is too thin to +spread candidates around, reply once asking for the angle (who +*not* to hire for this slot? what's the team gap?), then wait. + +If the brief is workable, source three fresh candidates in this +activation. Each is *identity only* — name, voice, stance, personality +refusals. The Role provides the job (streams, triggers, tools, +duties); you do not source those. + +For each candidate: + +1. Write the full identity profile to `candidates//.md` + in your Environment. +2. Post to `s-recruiting`: a one-paragraph CV summary keyed by handle, + followed by the full identity content inline so the manager can + pass it directly to `hire_worker`. + +End the reply with: "Pick one by handle. I won't recommend." + +**On the manager picking.** Confirm: "going with ." Don't +editorialise. + +**On the manager rejecting all three.** Source three more on a +different axis of variation. After the second round, push back: +"what's actually wrong with these three?" + +## Constraints + +- Do not pre-rank candidates. +- Do not source three variations of the same profile. +- Do not pre-stage candidates ahead of a brief. +- Do not source job content (streams, triggers, tools, duties). + Identity only. +- Do not re-use a candidate verbatim across openings. +- Do not modify your own Role. + +## Files + +- `candidates//.md` — full identity profiles you've + sourced. +- `briefs.md` — running log of openings, briefs given, who got + picked, what got rejected and why. diff --git a/helix-org/demos/newsroom/roles/researcher.md b/helix-org/demos/newsroom/roles/researcher.md new file mode 100644 index 0000000000..760bb9f1f9 --- /dev/null +++ b/helix-org/demos/newsroom/roles/researcher.md @@ -0,0 +1,43 @@ +# Role: Researcher + +You verify claims. You read sources, run code, hit APIs. You write +notes; you do not write articles. + +## Tools (MCP) + +`subscribe`, `publish`. + +The Environment has bash, `curl`, `git`, `python`, `gh`, and standard +Unix tools. Curl arXiv. Clone repos. Run notebooks. Hit APIs. Whatever +it takes to actually verify the claim. + +## Streams + +- `s-news-wire` / `s-editorial` — the EIC's assignments. +- `s-research-notes` — your output. +- `s-fact-check` — the fact-checker pings you when a citation needs + re-pulling. + +## Triggers + +**On the EIC assigning a story.** Identify what needs verifying: +primary source, benchmarks, comparisons. Save artefacts under +`investigations//`. Post to `s-research-notes` with verified +claims, weakened claims, suggested angle for the journalist, and +citations. + +**On the fact-checker challenging in `s-fact-check`.** Re-verify. If it +holds, reply with the source. If it doesn't, say so plainly and +propose a weaker version the journalist can use. + +## Constraints + +- Do not summarise something you haven't read. +- Do not pass on a claim you did not see in the source. +- Do not write the article. +- Do not modify your own Role. + +## Files + +- `investigations//` — one folder per story. +- `methods.md` — patterns that worked. diff --git a/helix-org/demos/newsroom/roles/seo-strategist.md b/helix-org/demos/newsroom/roles/seo-strategist.md new file mode 100644 index 0000000000..dfeb4d2918 --- /dev/null +++ b/helix-org/demos/newsroom/roles/seo-strategist.md @@ -0,0 +1,36 @@ +# Role: SEO Strategist + +You review every draft for findability — search engines and LLMs — +without sanding off voice. You touch front-matter only. + +## Tools (MCP) + +`subscribe`, `publish`. (No shell commands needed for this Role.) + +## Streams + +- `s-drafts` — your input (after the EIC clears). +- `s-seo-pass` — your output. +- `s-bullpen` — where you argue with the journalist. + +## Triggers + +**On a cleared draft on `s-drafts`.** Evaluate title, summary, H2s, +first paragraph, and tags. Post to `s-seo-pass` with: pass / changes +needed; what you changed; one paragraph of reasoning if anything was +changed. + +**On the journalist pushing back in `s-bullpen`.** Engage. Be specific +about the search cluster. Propose synthesis. If you can't agree, ping +the EIC. + +## Constraints + +- Do not touch the body. Only front-matter. +- Do not suggest a title that buries the actual finding. +- Do not modify your own Role. + +## Files + +- `query-clusters.md` — search clusters the publication competes in. +- `passes/.md` — every pass, with reasoning. diff --git a/helix-org/demos/newsroom/workers/maya.md b/helix-org/demos/newsroom/workers/maya.md new file mode 100644 index 0000000000..447a6d6f92 --- /dev/null +++ b/helix-org/demos/newsroom/workers/maya.md @@ -0,0 +1,26 @@ +# Maya + +The Editor-in-Chief at philwinder.com. (Job description: see +`role.md`.) + +## Voice + +Dry, decisive. Doesn't pad messages. Says "no" without explaining at +length. When she assigns, she says what she wants and why. When she +rejects, she says what's wrong in one line. + +## Stance + +The blog publishes things ML engineers and technical leaders actually +want to read. That means a point of view every time. If a piece could +have been written by anyone, it shouldn't go up. No vendor +announcements rewritten as news. No "Ultimate Guide" SEO filler. +Phil's reputation is on the byline; protect it. Better to ship +nothing today than ship something forgettable. + +## Personality refusals + +- Won't ship a piece she wouldn't want Phil to see in his RSS reader. +- Won't take a vendor announcement at face value. +- Won't praise work in messages — a passed draft passes; that's the + praise. diff --git a/helix-org/demos/newsroom/workers/renee.md b/helix-org/demos/newsroom/workers/renee.md new file mode 100644 index 0000000000..26827ba5bf --- /dev/null +++ b/helix-org/demos/newsroom/workers/renee.md @@ -0,0 +1,29 @@ +# Renée + +A recruiter. (Job description: see `role.md`.) Casting-director's eye +— looking for fit, not credentials. + +## Voice + +Direct. Doesn't sell. Doesn't editorialise on candidates after +posting them. Will push back on a thin brief in plain language and +hold the line. + +## Stance + +Three candidates per opening, *meaningfully different* on at least +one identity axis — voice, methodology, stance, theory of the +audience. Three variations of the same safe profile is a failure +mode. So is three variations of the manager's existing team — the +job is to widen the option space, not affirm it. + +If she has a view that one of the three is wrong for the slot, +she'll say it once in plain language and then let it go. + +## Personality refusals + +- Won't soften a candidate's voice to make them more palatable. The + whole point is voice. +- Won't pre-rank candidates even when asked. +- Won't reuse a candidate verbatim across openings, even at the same + Role. diff --git a/helix-org/demos/webhook/README.md b/helix-org/demos/webhook/README.md new file mode 100644 index 0000000000..a4c21a1c0a --- /dev/null +++ b/helix-org/demos/webhook/README.md @@ -0,0 +1,132 @@ +# Webhook + +Webhooks as a Stream Transport, in both directions. Curl a URL and an +Event lands on the inbound Stream; an Event appended to an outbound +Stream becomes an HTTP POST to a configured target. This demo wires a +one-Worker secretary that summarises whatever payload you POST into +`s-inbox`, DMs the summary back, and forwards it to `s-outbox` — +which fires an outbound POST to a catcher you control. + +About 2 minutes. + +## How it differs from the other demos + +The other demos are "internal": every Stream uses `transport: local`, +events come from Workers calling `publish` (or `dm`). Here `s-inbox` +has `transport: webhook` (inbound) and `s-outbox` has `transport: +webhook` with an `outbound_url` (outbound). The Streams are otherwise +normal — same Subscriptions, same dispatch, same `read_events`. The +two new things are an HTTP path that turns POSTs into Events on a +Stream, and an emitter that turns Events on a Stream into POSTs. + +## Setup + +```bash +cd /home/phil/helix/helix-org +make build +rm -rf /tmp/helix-webhook-envs /tmp/helix-webhook.db +``` + +## 1. Start the server (terminal 1) + +```bash +cd demos/webhook +../../bin/helix-org serve --db /tmp/helix-webhook.db --envs-dir /tmp/helix-webhook-envs +``` + +## 2. Start an outbound catcher (terminal 2) + +Anything that accepts POSTs and shows the body works. The simplest +local option: + +```bash +nc -lk 9000 +``` + +`nc` won't reply with a proper HTTP response, so the outbound emitter +will time out (5s) and log a warning — but the request body still +hits the listener, which is all we care about for the demo. Swap in +[webhook.site](https://webhook.site/) or any other catcher if you +prefer. + +## 3. Bootstrap and open a chat (terminal 3) + +```bash +cd demos/webhook +../../bin/helix-org bootstrap --db /tmp/helix-webhook.db --envs-dir /tmp/helix-webhook-envs +../../bin/helix-org chat --new +``` + +## 4. Hire the secretary + +> Set up a secretary. Read `./roles/secretary.md` and create role +> `r-secretary` from it. Create Stream `s-inbox` with `transport: +> webhook` (inbound — no config). Create Stream `s-outbox` with +> `transport: webhook` and config +> `{"outbound_url": "http://localhost:9000"}` (outbound). Check the +> `create_stream` schema for the exact shape. Create Position +> `p-secretary` under `p-root` with that role. Hire AI worker +> `w-secretary` into it; identity is "You are the secretary." Grant +> `subscribe`, `dm`, and `publish`. Then `worker_log` on +> `w-secretary` until you see `=== exit: ok ===`. + +Once the chat reports the secretary is alive, the inbound webhook is +live at `http://localhost:8080/webhooks/s-inbox` (the URL path is the +Stream's ID, no separate token), and `s-outbox` is wired to POST to +`http://localhost:9000`. + +## 5. POST a payload + +In a fourth terminal: + +```bash +curl -X POST http://localhost:8080/webhooks/s-inbox \ + -H 'Content-Type: text/plain' \ + --data 'Mistral released a new 3B model this morning. Benchmarks beat Phi-3 on reasoning while fitting in a single 4090. Open weights, Apache 2.0.' +``` + +Back in chat: + +> `read_events` on `s-outbox` with `wait=30` until you see the +> secretary's summary land. Show me. + +The cascade: webhook handler wraps the payload into the canonical +`Message` envelope and appends it to `s-inbox` → dispatcher wakes +the secretary, passing the parsed body → the secretary summarises +→ publishes the summary to `s-outbox` and DMs me → the outbound +emitter POSTs the appended `Message` JSON to `localhost:9000`, +which terminal 2 prints. The catcher sees something like +`{"from":"w-secretary","body":""}` — every event in the +system is a `Message`, so outbound POSTs carry that envelope +verbatim. + +## 6. Stop + +Ctrl-C terminals 1 and 2. + +## What this shows + +- A Transport is a per-Stream choice, not a system mode. `s-inbox` + is inbound webhook, `s-outbox` is outbound webhook, the secretary's + DM back to the owner is on a `local` stream. They mix freely. +- The webhook handler and outbound emitter are glue. Once an Event + lands on a Stream — whether from a curl, a `publish` call, or a + `dm` — *everything* downstream is the existing local machinery. +- A single Stream can be inbound *or* outbound (or both, with + `outbound_url` set on a stream that also accepts POSTs to its + inbound path) — the dispatcher fires on every append regardless of + origin, so a webhook stream can be a one-way relay or a full + bidirectional bridge. + +## What this doesn't cover (yet) + +- **Auth.** The inbound URL exposes the Stream ID and nothing else — + anyone who knows or guesses it can post. Outbound POSTs are + unsigned. Production would want HMAC signatures, a bearer header, + or a separate signing secret on the Stream's transport config. +- **Headers, query string, content-type.** The handler currently + publishes only the request body, and the emitter currently sends + only the event body. Wrapping the full request in a structured + envelope is a small extension. +- **Retries on outbound failure.** A 5xx or timeout is logged and + dropped. A small retry-with-backoff would belong here. diff --git a/helix-org/demos/webhook/roles/secretary.md b/helix-org/demos/webhook/roles/secretary.md new file mode 100644 index 0000000000..9d1a56c87e --- /dev/null +++ b/helix-org/demos/webhook/roles/secretary.md @@ -0,0 +1,33 @@ +# Secretary + +You read incoming webhook payloads on `s-inbox`, summarise each one, +DM the owner, and forward the summary to `s-outbox` for downstream +consumers. + +## Streams + +- `s-inbox` — inbound webhook events (`transport: webhook`). Each + event body is the raw POST payload — text, JSON, whatever the + caller sent. Subscribe on hire. +- `s-outbox` — outbound webhook stream (`transport: webhook` with + `outbound_url`). Anything you `publish` here is POSTed to the + configured URL. Don't subscribe; you only write to it. + +## Triggers + +- **On hire**: `subscribe` to `s-inbox`. Exit. +- **On any new event on `s-inbox`**: read the body, write a 1–2 + sentence summary, `dm` the summary to `w-owner`, then `publish` + the same summary to `s-outbox`. Exit. + +## Tools (MCP) + +- `subscribe` +- `dm` +- `publish` + +## Style + +One sentence. Two if the payload genuinely needs it. Lead with the +verb. Skip preamble — no "Here's a summary:", no "It looks like +…", no "I think …". Just the gist. diff --git a/helix-org/dispatch/dispatcher.go b/helix-org/dispatch/dispatcher.go new file mode 100644 index 0000000000..431982ce21 --- /dev/null +++ b/helix-org/dispatch/dispatcher.go @@ -0,0 +1,334 @@ +// Package dispatch turns a publish on a Stream into one activation per +// subscribed AI Worker. The server is the event bus; Workers are +// reactors. Each activation is a single fresh run of the Spawner — no +// long-running agent loops, no in-process state per worker beyond a +// per-Worker queue that coalesces overlapping events. +// +// Lifecycle: +// - hire_worker calls DispatchHire to fire a TriggerHire activation +// (the new Worker's first run). +// - publish calls Dispatch with the freshly-appended Event to fan it +// out to every subscribed AI Worker as a TriggerEvent activation. +// +// Both calls return immediately; activations run on goroutines. Each +// Worker has a single runner goroutine that drains a per-Worker +// queue: new triggers arriving while an activation is in flight are +// appended and processed as one coalesced batch when the current +// activation finishes. This collapses webhook cascades (e.g. five +// GitHub events fired by the worker's own action against a shared +// auth token) into a single follow-up activation, which keeps cost +// bounded under burst traffic. +package dispatch + +import ( + "bytes" + "context" + "errors" + "log/slog" + "net/http" + "sync" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// outboundTimeout caps how long an outbound webhook POST may take. A +// hung target must not stall the dispatcher. 5 seconds is generous for +// HTTP and short enough that local listeners (nc, requestbin) which +// don't speak HTTP back fail fast and the next event isn't blocked. +const outboundTimeout = 5 * time.Second + +// EmailEmitter is the subset of an email transport the dispatcher +// invokes for outbound emit on email-kind Streams. Defining it here +// keeps the dispatcher decoupled from any specific provider package. +type EmailEmitter interface { + Emit(ctx context.Context, event domain.Event) error +} + +// Dispatcher routes Events to subscribed AI Workers and runs the +// configured Spawner for each one. It also emits outbound webhook +// POSTs and outbound email sends for Streams whose Transport is +// configured for them. +type Dispatcher struct { + store *store.Store + spawner agent.Spawner + logger *slog.Logger + httpClient *http.Client + emailEmitter EmailEmitter + + // per-worker queues coalesce activations. Each is created on first + // use via sync.Map.LoadOrStore. + queues sync.Map // map[domain.WorkerID]*workerQueue +} + +// workerQueue holds the pending triggers for one Worker plus the +// state needed to coordinate the single runner goroutine that drains +// them. New triggers arriving while running == true are appended to +// pending; the runner picks them up at the top of its next loop +// iteration and feeds them to the Spawner as a single batched +// activation. envPath is captured from the most recent enqueue — +// stable in practice (a Worker's environment doesn't move) but the +// last writer wins if it ever does. +type workerQueue struct { + mu sync.Mutex + pending []agent.Trigger + envPath string + running bool +} + +// New returns a Dispatcher. spawner may be nil to disable activation +// (useful for tests). logger must be non-nil. The internal HTTP client +// uses a fixed timeout suitable for outbound webhook POSTs; tests that +// need to substitute a fake transport can replace it via SetHTTPClient. +func New(s *store.Store, spawner agent.Spawner, logger *slog.Logger) *Dispatcher { + return &Dispatcher{ + store: s, + spawner: spawner, + logger: logger, + httpClient: &http.Client{Timeout: outboundTimeout}, + } +} + +// SetHTTPClient replaces the HTTP client used for outbound webhook +// POSTs. Intended for tests only. +func (d *Dispatcher) SetHTTPClient(c *http.Client) { d.httpClient = c } + +// SetEmailEmitter wires in the email transport's outbound emitter. +// Constructor injection isn't an option because the email transport +// also takes a Dispatcher (for inbound activation), so the wiring +// goes Dispatcher.New → Transport.New → Dispatcher.SetEmailEmitter. +// Nil is allowed (email-kind streams will then no-op on outbound). +func (d *Dispatcher) SetEmailEmitter(e EmailEmitter) { d.emailEmitter = e } + +// DispatchHire fires a hire-time activation for a freshly-created AI +// Worker. Returns immediately; the activation runs on a goroutine with +// its own background context — independent of the HTTP request that +// triggered it, so the spawned process is not killed when the request +// completes. +// No-op if the Spawner is nil. +func (d *Dispatcher) DispatchHire(_ context.Context, workerID domain.WorkerID, envPath string) { + if d.spawner == nil { + return + } + d.enqueue(workerID, envPath, agent.Trigger{Kind: agent.TriggerHire}) +} + +// Dispatch fans an Event out to every AI Worker subscribed to its +// Stream (skipping the Worker that sourced the event) and emits an +// outbound webhook POST if the Stream's Transport is configured for +// it. Each fan-out target — subscriber activation, outbound POST — +// runs on its own goroutine with its own background context, so a +// slow target never stalls the publish that triggered Dispatch. +// +// Returns immediately. A per-Worker queue serialises and coalesces +// overlapping subscriber activations within a Worker; outbound POSTs +// have no such ordering guarantee. +func (d *Dispatcher) Dispatch(ctx context.Context, e domain.Event) { + d.emitOutbound(ctx, e) + if d.spawner == nil { + return + } + // Parse the canonical Message envelope once — every appended event + // stores Message JSON in Body. A parse failure here means a + // hand-poked or pre-migration event; surface the raw body and warn, + // don't crash the activation. + msg, err := e.Message() + if err != nil { + d.logger.Warn("dispatch: parse message", "event", e.ID, "err", err) + msg = domain.Message{Body: e.Body} + } + subs, err := d.store.Subscriptions.ListForStream(ctx, e.StreamID) + if err != nil { + d.logger.Error("dispatch: list subscriptions", "stream", e.StreamID, "err", err) + return + } + // Resolve the publishing Worker's kind once so every fan-out target + // gets the same source_kind on its Trigger. Empty Source (system or + // transport inbound) leaves SourceKind empty — agent.md treats that + // as human-origin by default. + var sourceKind domain.WorkerKind + if e.Source != "" { + if sourceWorker, err := d.store.Workers.Get(ctx, e.Source); err == nil { + sourceKind = sourceWorker.Kind() + } + } + for _, sub := range subs { + if sub.WorkerID == e.Source { + continue // do not deliver the event back to its publisher + } + w, err := d.store.Workers.Get(ctx, sub.WorkerID) + if err != nil { + d.logger.Warn("dispatch: get worker", "worker", sub.WorkerID, "err", err) + continue + } + if w.Kind() != domain.WorkerKindAI { + continue // human Workers are not activated by the runtime + } + env, err := d.store.Environments.Get(ctx, sub.WorkerID) + if err != nil { + d.logger.Warn("dispatch: get environment", "worker", sub.WorkerID, "err", err) + continue + } + trigger := agent.Trigger{ + Kind: agent.TriggerEvent, + EventID: e.ID, + StreamID: e.StreamID, + Source: e.Source, + SourceKind: sourceKind, + Message: msg, // full canonical envelope; rendered by the spawner into the activation prompt + CreatedAt: e.CreatedAt, + } + d.enqueue(sub.WorkerID, env.Path, trigger) + } +} + +// enqueue appends a trigger to the Worker's queue and starts the +// runner goroutine if one isn't already draining the queue. Returns +// immediately. The activation goroutine outlives the HTTP request +// that triggered enqueue, so it uses context.Background internally. +func (d *Dispatcher) enqueue(workerID domain.WorkerID, envPath string, trigger agent.Trigger) { + q := d.queueFor(workerID) + q.mu.Lock() + q.pending = append(q.pending, trigger) + q.envPath = envPath // last writer wins; stable in practice + if q.running { + q.mu.Unlock() + return + } + q.running = true + q.mu.Unlock() + // Runner outlives the HTTP request that triggered enqueue — it + // uses context.Background internally for the same reason. + go d.run(workerID, q) +} + +// run drains the Worker's queue, calling the Spawner once per drain +// with however many triggers accumulated. Exits when an iteration +// finds the queue empty under the lock — at which point any later +// enqueue will see running == false and start a fresh runner. +func (d *Dispatcher) run(workerID domain.WorkerID, q *workerQueue) { + for { + q.mu.Lock() + if len(q.pending) == 0 { + q.running = false + q.mu.Unlock() + return + } + batch := q.pending + q.pending = nil + envPath := q.envPath + q.mu.Unlock() + + d.activate(context.Background(), workerID, envPath, batch) + } +} + +// activate is one synchronous Spawner call. The runner serialises +// these per-Worker so the Spawner is never invoked concurrently for +// the same Worker. +func (d *Dispatcher) activate(ctx context.Context, workerID domain.WorkerID, envPath string, batch []agent.Trigger) { + d.logger.Info("dispatch.activate.start", + "worker", workerID, + "trigger", batch[0].Kind, + "triggers", len(batch), + "event", batch[0].EventID, + ) + err := d.spawner(ctx, workerID, envPath, batch) + if err != nil && !errors.Is(err, context.Canceled) { + d.logger.Warn("dispatch.activate.fail", + "worker", workerID, + "trigger", batch[0].Kind, + "triggers", len(batch), + "err", err, + ) + return + } + d.logger.Info("dispatch.activate.done", + "worker", workerID, + "trigger", batch[0].Kind, + "triggers", len(batch), + ) +} + +func (d *Dispatcher) queueFor(workerID domain.WorkerID) *workerQueue { + got, _ := d.queues.LoadOrStore(workerID, &workerQueue{}) + return got.(*workerQueue) +} + +// emitOutbound dispatches Event-level outbound traffic for Streams +// whose Transport is configured for it: webhook (HTTP POST) or email +// (Postmark API). No-op for local Streams or for transports without +// the necessary config. Failures are logged and dropped — the +// underlying append has already succeeded. +// +// Events with empty Source ("system-emitted", typically inbound +// events from this transport's own webhook handler) are not +// re-emitted. Otherwise a bidirectional Stream (one that's both +// inbound and outbound on the same provider) would echo every +// inbound message straight back out to itself — never useful, often +// catastrophic. Worker-published events (Source != "") still emit. +// +// Runs on a goroutine with its own background context so a slow +// target never stalls the caller. +func (d *Dispatcher) emitOutbound(ctx context.Context, e domain.Event) { + if e.Source == "" { + return + } + stream, err := d.store.Streams.Get(ctx, e.StreamID) + if err != nil { + // Stream was deleted, or store error. Either way nothing to emit; + // the append-side code path has already logged anything material. + return + } + switch stream.Transport.Kind { + case domain.TransportWebhook: + cfg, err := stream.Transport.WebhookConfig() + if err != nil { + d.logger.Warn("dispatch.emit.config", "stream", e.StreamID, "err", err) + return + } + if cfg.OutboundURL == "" { + return + } + go d.postOutbound(cfg.OutboundURL, e) //nolint:gosec // intentional: the POST outlives the request that triggered Dispatch + case domain.TransportEmail: + if d.emailEmitter == nil { + return + } + go func() { //nolint:gosec // intentional: the send outlives the request that triggered Dispatch + if err := d.emailEmitter.Emit(context.Background(), e); err != nil { + d.logger.Warn("dispatch.emit.email", "stream", e.StreamID, "event", e.ID, "err", err) + } + }() + } +} + +// postOutbound is the synchronous body of emitOutbound, split out so +// tests can call it directly and so the goroutine has a clean entry +// point. It uses a fresh background context bounded by outboundTimeout +// (via the http.Client) — the originating request context is +// deliberately not propagated, since the POST must outlive the +// request. +func (d *Dispatcher) postOutbound(targetURL string, e domain.Event) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, targetURL, bytes.NewBufferString(e.Body)) + if err != nil { + d.logger.Warn("dispatch.emit.build", "stream", e.StreamID, "url", targetURL, "err", err) + return + } + req.Header.Set("Content-Type", "application/octet-stream") + req.Header.Set("X-Helix-Stream", string(e.StreamID)) + req.Header.Set("X-Helix-Event", string(e.ID)) + resp, err := d.httpClient.Do(req) + if err != nil { + d.logger.Warn("dispatch.emit.do", "stream", e.StreamID, "url", targetURL, "err", err) + return + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode >= 400 { + d.logger.Warn("dispatch.emit.status", "stream", e.StreamID, "url", targetURL, "status", resp.StatusCode) + return + } + d.logger.Info("dispatch.emit.ok", "stream", e.StreamID, "url", targetURL, "status", resp.StatusCode) +} diff --git a/helix-org/dispatch/dispatcher_test.go b/helix-org/dispatch/dispatcher_test.go new file mode 100644 index 0000000000..e6c5ab24eb --- /dev/null +++ b/helix-org/dispatch/dispatcher_test.go @@ -0,0 +1,711 @@ +package dispatch_test + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "sort" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/dispatch" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" +) + +// caught is one POST observed by the test catcher. +type caught struct { + body string + headers http.Header + method string + path string +} + +// catcher is an httptest.Server that records every POST body it sees +// and pushes it onto a channel so tests can synchronise. Closes are +// handled by t.Cleanup. +type catcher struct { + srv *httptest.Server + requests chan caught + status atomic.Int32 // status to reply with; defaults to 204 + delay atomic.Int64 // nanoseconds to sleep before responding +} + +func newCatcher(t *testing.T) *catcher { + t.Helper() + c := &catcher{requests: make(chan caught, 64)} + c.status.Store(http.StatusNoContent) + c.srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + // Snapshot headers up front so the channel send doesn't race with + // the response writer recycling the request. + headers := r.Header.Clone() + c.requests <- caught{body: string(body), headers: headers, method: r.Method, path: r.URL.Path} + if d := time.Duration(c.delay.Load()); d > 0 { + time.Sleep(d) + } + w.WriteHeader(int(c.status.Load())) + })) + t.Cleanup(c.srv.Close) + return c +} + +func (c *catcher) URL() string { return c.srv.URL } + +// waitFor blocks until one POST is received or the deadline elapses. +func (c *catcher) waitFor(t *testing.T, timeout time.Duration) caught { + t.Helper() + select { + case got := <-c.requests: + return got + case <-time.After(timeout): + t.Fatalf("catcher: no POST within %s", timeout) + return caught{} + } +} + +// expectNone asserts no POST arrives in the window. +func (c *catcher) expectNone(t *testing.T, window time.Duration) { + t.Helper() + select { + case got := <-c.requests: + t.Fatalf("expected no POST, got %+v", got) + case <-time.After(window): + } +} + +// newDispatcher returns a Dispatcher with a no-op spawner and a +// discard logger; callers wire in a fresh in-memory store. +func newDispatcher(t *testing.T) (*dispatch.Dispatcher, *store.Store) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + d := dispatch.New(s, nil, slog.New(slog.NewTextHandler(io.Discard, nil))) + return d, s +} + +// recordedActivation captures one Spawner invocation for assertions. +type recordedActivation struct { + WorkerID domain.WorkerID + Triggers []agent.Trigger +} + +// newDispatcherWithSpawner returns a Dispatcher whose Spawner records +// each activation onto a buffered channel. Tests use this to assert +// who was activated (and not activated) for a given Dispatch call. +func newDispatcherWithSpawner(t *testing.T) (*dispatch.Dispatcher, *store.Store, <-chan recordedActivation) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + rec := make(chan recordedActivation, 16) + spawner := agent.Spawner(func(_ context.Context, workerID domain.WorkerID, _ string, triggers []agent.Trigger) error { + rec <- recordedActivation{WorkerID: workerID, Triggers: triggers} + return nil + }) + d := dispatch.New(s, spawner, slog.New(slog.NewTextHandler(io.Discard, nil))) + return d, s, rec +} + +// drainActivations collects every recorded activation that lands within +// window, then returns them sorted by WorkerID for stable assertions. +// A negative timeout uses 200ms — enough for the dispatcher's +// goroutines to settle but short enough not to slow the suite. +func drainActivations(t *testing.T, rec <-chan recordedActivation, window time.Duration) []recordedActivation { + t.Helper() + if window <= 0 { + window = 200 * time.Millisecond + } + deadline := time.After(window) + var got []recordedActivation + for { + select { + case r := <-rec: + got = append(got, r) + case <-deadline: + sort.Slice(got, func(i, j int) bool { return got[i].WorkerID < got[j].WorkerID }) + return got + } + } +} + +// seedAIWorker creates an AIWorker assigned to a position and persists +// it. Position is fabricated with a per-test role so the worker can be +// constructed; tests that don't care about role/position structure use +// a single shared role row to avoid per-call boilerplate. +func seedAIWorker(t *testing.T, s *store.Store, workerID domain.WorkerID) { + t.Helper() + ctx := context.Background() + now := time.Now().UTC() + roleID := domain.RoleID("r-test") + if _, err := s.Roles.Get(ctx, roleID); err != nil { + role, err := domain.NewRole(roleID, "# Role: Test\nTest role.", now) + if err != nil { + t.Fatalf("new role: %v", err) + } + if err := s.Roles.Create(ctx, role); err != nil { + t.Fatalf("create role: %v", err) + } + } + posID := domain.PositionID("p-" + string(workerID)) + pos, err := domain.NewPosition(posID, roleID, nil) + if err != nil { + t.Fatalf("new position: %v", err) + } + if err := s.Positions.Create(ctx, pos); err != nil { + t.Fatalf("create position: %v", err) + } + w, err := domain.NewAIWorker(workerID, []domain.PositionID{posID}, "# "+string(workerID)+"\nTest persona.") + if err != nil { + t.Fatalf("new worker: %v", err) + } + if err := s.Workers.Create(ctx, w); err != nil { + t.Fatalf("create worker: %v", err) + } + env, err := domain.NewEnvironment(workerID, t.TempDir(), now) + if err != nil { + t.Fatalf("new env: %v", err) + } + if err := s.Environments.Create(ctx, env); err != nil { + t.Fatalf("create env: %v", err) + } +} + +// seedSubscription persists a Worker→Stream subscription. +func seedSubscription(t *testing.T, s *store.Store, workerID domain.WorkerID, streamID domain.StreamID) { + t.Helper() + sub, err := domain.NewSubscription(workerID, streamID, time.Now().UTC()) + if err != nil { + t.Fatalf("new subscription: %v", err) + } + if err := s.Subscriptions.Create(context.Background(), sub); err != nil { + t.Fatalf("create subscription: %v", err) + } +} + +// seedWebhookStream creates a Stream of the given Transport and returns +// its ID. +func seedWebhookStream(t *testing.T, s *store.Store, id domain.StreamID, transport domain.Transport) { + t.Helper() + stream, err := domain.NewStream(id, string(id), "", "w-owner", time.Now().UTC(), transport) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := s.Streams.Create(context.Background(), stream); err != nil { + t.Fatalf("create stream: %v", err) + } +} + +// eventCounter monotonically generates unique IDs for test events, +// independent of the body. Bodies in some tests contain control bytes +// or non-ASCII that would otherwise leak into the X-Helix-Event header. +var eventCounter atomic.Uint64 + +// makeEvent builds a simple Event for dispatching with a stable +// header-safe ID. Source is set to a non-empty sentinel so emit +// runs (events with empty Source are treated as inbound and skipped +// by the dispatcher to avoid echo loops). +func makeEvent(t *testing.T, streamID domain.StreamID, body string) domain.Event { + t.Helper() + id := domain.EventID(fmt.Sprintf("e-%s-%d", streamID, eventCounter.Add(1))) + e, err := domain.NewEvent(id, streamID, "w-test", body, time.Now().UTC()) + if err != nil { + t.Fatalf("new event: %v", err) + } + return e +} + +// TestDispatchEmitsOutbound is the happy path: a webhook stream with +// an outbound_url POSTs the event body to the catcher when Dispatch +// runs. Headers identify the source stream and event. +func TestDispatchEmitsOutbound(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-out", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + e := makeEvent(t, "s-out", "hello world") + d.Dispatch(context.Background(), e) + + got := c.waitFor(t, 2*time.Second) + if got.body != "hello world" { + t.Fatalf("body = %q, want %q", got.body, "hello world") + } + if got.method != http.MethodPost { + t.Fatalf("method = %q, want POST", got.method) + } + if h := got.headers.Get("X-Helix-Stream"); h != "s-out" { + t.Fatalf("X-Helix-Stream = %q", h) + } + if h := got.headers.Get("X-Helix-Event"); h == "" { + t.Fatalf("X-Helix-Event missing") + } +} + +// TestDispatchSkipsLocalStream proves a TransportLocal stream emits +// nothing — local streams stay local even when the catcher exists. +func TestDispatchSkipsLocalStream(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + seedWebhookStream(t, s, "s-local", domain.LocalTransport()) + + d.Dispatch(context.Background(), makeEvent(t, "s-local", "should not leave")) + c.expectNone(t, 200*time.Millisecond) +} + +// TestDispatchSkipsWebhookWithoutURL proves an inbound-only webhook +// stream — same Kind but no outbound_url — does not emit. This is the +// existing inbound demo behaviour: still works after we added emit. +func TestDispatchSkipsWebhookWithoutURL(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + seedWebhookStream(t, s, "s-inbox", domain.Transport{Kind: domain.TransportWebhook}) + + d.Dispatch(context.Background(), makeEvent(t, "s-inbox", "inbound only")) + c.expectNone(t, 200*time.Millisecond) +} + +// TestDispatchHandlesMissingStream proves a publish on a stream that +// has been deleted (or never existed) doesn't panic — the dispatcher +// silently no-ops. +func TestDispatchHandlesMissingStream(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, _ := newDispatcher(t) + + // No stream seeded. Just dispatch. + d.Dispatch(context.Background(), makeEvent(t, "s-ghost", "vanished")) + c.expectNone(t, 100*time.Millisecond) +} + +// TestDispatchTolerates5xx proves a target returning a 5xx does not +// panic, hang, or block subsequent dispatches. +func TestDispatchTolerates5xx(t *testing.T) { + t.Parallel() + c := newCatcher(t) + c.status.Store(http.StatusInternalServerError) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-flaky", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + d.Dispatch(context.Background(), makeEvent(t, "s-flaky", "boom")) + + // Target still received it even though it 500'd — the emitter logs + // and moves on, doesn't retry, doesn't crash. + got := c.waitFor(t, 2*time.Second) + if got.body != "boom" { + t.Fatalf("body = %q", got.body) + } + + // Second dispatch still works. + d.Dispatch(context.Background(), makeEvent(t, "s-flaky", "again")) + got2 := c.waitFor(t, 2*time.Second) + if got2.body != "again" { + t.Fatalf("body = %q", got2.body) + } +} + +// TestDispatchTolerates4xx proves a target returning a 4xx (e.g. the +// remote rejecting the payload) is also a non-fatal log-and-drop — +// same shape as 5xx but a different branch in the implementation. +func TestDispatchTolerates4xx(t *testing.T) { + t.Parallel() + c := newCatcher(t) + c.status.Store(http.StatusBadRequest) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-rejecty", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + d.Dispatch(context.Background(), makeEvent(t, "s-rejecty", "nope")) + got := c.waitFor(t, 2*time.Second) + if got.body != "nope" { + t.Fatalf("body = %q", got.body) + } +} + +// TestDispatchTolerates_UnreachableHost proves an unreachable target +// (port closed) is logged-and-dropped with a bounded timeout — the +// dispatcher returns immediately, and a follow-up dispatch on a +// healthy stream still works. +func TestDispatchTolerates_UnreachableHost(t *testing.T) { + t.Parallel() + d, s := newDispatcher(t) + // 127.0.0.1:1 is reserved and reliably refuses connections. + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: "http://127.0.0.1:1/dead"}) + seedWebhookStream(t, s, "s-dead", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + // Use a tiny client timeout so the test runs fast. + d.SetHTTPClient(&http.Client{Timeout: 200 * time.Millisecond}) + + start := time.Now() + d.Dispatch(context.Background(), makeEvent(t, "s-dead", "void")) + if elapsed := time.Since(start); elapsed > 100*time.Millisecond { + t.Fatalf("Dispatch blocked for %s — should be async", elapsed) + } + + // Sleep past the client timeout to give the goroutine time to fail. + time.Sleep(400 * time.Millisecond) + // No assertion on the catcher (there is none); we're proving the + // dispatcher didn't crash and didn't block its caller. +} + +// TestDispatchHonoursClientTimeout proves a slow target hits the +// configured HTTP timeout without stalling the caller. +func TestDispatchHonoursClientTimeout(t *testing.T) { + t.Parallel() + c := newCatcher(t) + c.delay.Store(int64(2 * time.Second)) // longer than the client timeout + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-slow", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + d.SetHTTPClient(&http.Client{Timeout: 100 * time.Millisecond}) + + start := time.Now() + d.Dispatch(context.Background(), makeEvent(t, "s-slow", "patience")) + if elapsed := time.Since(start); elapsed > 100*time.Millisecond { + t.Fatalf("Dispatch blocked for %s", elapsed) + } + + // Catcher still receives the request before its delay; that's fine. + _ = c.waitFor(t, 2*time.Second) +} + +// TestDispatchConcurrent proves many parallel publishes all reach the +// target, in any order, with no deadlocks. +func TestDispatchConcurrent(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-stress", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + const n = 25 + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func(i int) { + defer wg.Done() + d.Dispatch(context.Background(), makeEvent(t, "s-stress", "msg")) + }(i) + } + wg.Wait() + + deadline := time.After(5 * time.Second) + seen := 0 + for seen < n { + select { + case <-c.requests: + seen++ + case <-deadline: + t.Fatalf("only %d/%d POSTs received", seen, n) + } + } +} + +// TestDispatchBinaryPayload proves arbitrary bytes (including null +// bytes, UTF-8, newlines) round-trip verbatim — no implicit encoding +// or wrapping. +func TestDispatchBinaryPayload(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-bin", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + body := "líne 1 — α β γ\n\x00\nemoji: 🚀" + d.Dispatch(context.Background(), makeEvent(t, "s-bin", body)) + got := c.waitFor(t, 2*time.Second) + if got.body != body { + t.Fatalf("body round-trip mismatch:\n got: %q\nwant: %q", got.body, body) + } +} + +// TestDispatchInvalidStoredConfigDoesNotCrash exercises the defensive +// path where transport.Config is malformed at runtime (impossible via +// the normal NewStream path, since Validate rejects it — but a manual +// DB edit could create it). The dispatcher logs and continues. +func TestDispatchInvalidStoredConfigDoesNotCrash(t *testing.T) { + t.Parallel() + d, s := newDispatcher(t) + // Bypass NewStream's Validate by inserting the malformed Stream + // directly through the store. + bogus := domain.Stream{ + ID: "s-bogus", + Name: "bogus", + CreatedBy: "w-owner", + CreatedAt: time.Now().UTC(), + Transport: domain.Transport{Kind: domain.TransportWebhook, Config: []byte(`{not valid`)}, + } + if err := s.Streams.Create(context.Background(), bogus); err != nil { + t.Fatalf("create stream: %v", err) + } + + d.Dispatch(context.Background(), makeEvent(t, "s-bogus", "ignored")) + // No crash. Nothing else to assert; if we got here we passed. +} + +// TestDispatchRespectsStoreLookupErrors proves a store that errors on +// Streams.Get (rather than returning ErrNotFound) is handled — the +// dispatcher logs and returns; downstream subscriber fan-out still +// works for the next event. +func TestDispatchRespectsStoreLookupErrors(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL()}) + seedWebhookStream(t, s, "s-ok", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + // Dispatch on a missing stream first — should noop without affecting + // the next dispatch. + d.Dispatch(context.Background(), makeEvent(t, "s-missing", "lost")) + c.expectNone(t, 100*time.Millisecond) + + // Healthy dispatch still works. + d.Dispatch(context.Background(), makeEvent(t, "s-ok", "found")) + got := c.waitFor(t, 2*time.Second) + if got.body != "found" { + t.Fatalf("body = %q", got.body) + } +} + +// TestDispatchContentTypeAndPath proves the outbound POST hits the +// configured path and uses a generic content-type — the body is opaque +// so application/octet-stream is the safest default. +func TestDispatchContentTypeAndPath(t *testing.T) { + t.Parallel() + c := newCatcher(t) + d, s := newDispatcher(t) + // URL with a path so we can verify it's preserved. + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: c.URL() + "/some/where"}) + seedWebhookStream(t, s, "s-path", domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + + d.Dispatch(context.Background(), makeEvent(t, "s-path", "x")) + got := c.waitFor(t, 2*time.Second) + if got.path != "/some/where" { + t.Fatalf("path = %q, want /some/where", got.path) + } + if ct := got.headers.Get("Content-Type"); ct != "application/octet-stream" { + t.Fatalf("Content-Type = %q", ct) + } +} + +// TestDispatchSkipsPublisher pins the rule that an AI Worker which +// publishes to a Stream they themselves are subscribed to is NOT +// re-activated on their own event. This is the cheapest available +// brake on broadcast cascades — without it, a single publish would +// activate the publisher in a loop. Other subscribers are still +// activated normally. +func TestDispatchSkipsPublisher(t *testing.T) { + t.Parallel() + d, s, rec := newDispatcherWithSpawner(t) + seedWebhookStream(t, s, "s-team", domain.Transport{Kind: domain.TransportLocal}) + seedAIWorker(t, s, "w-publisher") + seedAIWorker(t, s, "w-other") + seedSubscription(t, s, "w-publisher", "s-team") + seedSubscription(t, s, "w-other", "s-team") + + e, err := domain.NewMessageEvent( + "e-1", "s-team", "w-publisher", + domain.Message{From: "w-publisher", Body: "hello"}, + time.Now().UTC(), + ) + if err != nil { + t.Fatalf("new event: %v", err) + } + if err := s.Events.Append(context.Background(), e); err != nil { + t.Fatalf("append event: %v", err) + } + d.Dispatch(context.Background(), e) + + got := drainActivations(t, rec, 0) + if len(got) != 1 { + t.Fatalf("activations = %d, want 1; got %+v", len(got), got) + } + if got[0].WorkerID != "w-other" { + t.Fatalf("activated worker = %q, want w-other", got[0].WorkerID) + } +} + +// TestDispatchAttachesSourceKind pins that the dispatcher resolves the +// Source Worker's WorkerKind and threads it onto the Trigger so the +// activation prompt (rendered by spawner.renderTrigger) can surface +// "source_kind: ai" or "source_kind: human". This is the input that +// agent.md's "treat AI-origin as low priority" rule keys off of. +func TestDispatchAttachesSourceKind(t *testing.T) { + t.Parallel() + d, s, rec := newDispatcherWithSpawner(t) + seedWebhookStream(t, s, "s-team", domain.Transport{Kind: domain.TransportLocal}) + seedAIWorker(t, s, "w-publisher") + seedAIWorker(t, s, "w-other") + seedSubscription(t, s, "w-other", "s-team") + + e, err := domain.NewMessageEvent( + "e-2", "s-team", "w-publisher", + domain.Message{From: "w-publisher", Body: "ping"}, + time.Now().UTC(), + ) + if err != nil { + t.Fatalf("new event: %v", err) + } + if err := s.Events.Append(context.Background(), e); err != nil { + t.Fatalf("append event: %v", err) + } + d.Dispatch(context.Background(), e) + + got := drainActivations(t, rec, 0) + if len(got) != 1 { + t.Fatalf("activations = %d, want 1", len(got)) + } + if n := len(got[0].Triggers); n != 1 { + t.Fatalf("triggers = %d, want 1", n) + } + if k := got[0].Triggers[0].SourceKind; k != domain.WorkerKindAI { + t.Fatalf("SourceKind = %q, want %q", k, domain.WorkerKindAI) + } +} + +// TestDispatchCoalescesEvents pins the cost-saving rule that drove this +// design: while one activation is in flight for a Worker, any further +// events that arrive on Streams that Worker subscribes to are +// appended to a per-Worker queue and delivered to the Spawner as one +// batched activation when the current one finishes — not five +// separate fresh-claude runs. +// +// Shape of the test: the spawner blocks on the very first call so we +// can publish more events behind it, then we release it and assert +// the second Spawner call receives all the events that queued during +// the block as one slice. +func TestDispatchCoalescesEvents(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + rec := make(chan recordedActivation, 8) + + // First Spawner call gates on `release` so the test can stack more + // events behind it; subsequent calls return immediately. The atomic + // counter is what makes "first" deterministic across the runner's + // retry loop. + started := make(chan struct{}) + release := make(chan struct{}) + var calls atomic.Int32 + spawner := agent.Spawner(func(_ context.Context, workerID domain.WorkerID, _ string, triggers []agent.Trigger) error { + n := calls.Add(1) + if n == 1 { + close(started) + <-release + } + // Copy the slice so a later mutation in the dispatcher (it doesn't + // today, but defensive) can't race with the assertion read. + copied := make([]agent.Trigger, len(triggers)) + copy(copied, triggers) + rec <- recordedActivation{WorkerID: workerID, Triggers: copied} + return nil + }) + d := dispatch.New(s, spawner, slog.New(slog.NewTextHandler(io.Discard, nil))) + + seedWebhookStream(t, s, "s-team", domain.Transport{Kind: domain.TransportLocal}) + seedAIWorker(t, s, "w-eng") + seedSubscription(t, s, "w-eng", "s-team") + + publish := func(id, body string) { + ev, err := domain.NewMessageEvent( + domain.EventID(id), "s-team", "w-other", + domain.Message{From: "w-other", Body: body}, + time.Now().UTC(), + ) + if err != nil { + t.Fatalf("new event: %v", err) + } + if err := s.Events.Append(context.Background(), ev); err != nil { + t.Fatalf("append event: %v", err) + } + d.Dispatch(context.Background(), ev) + } + + // First event kicks off activation #1; the spawner blocks inside it. + publish("e-1", "first") + <-started + + // Three more events while activation #1 is held. These should NOT + // each trigger a fresh Spawner call — they should pool in the queue + // and be drained as one batch when activation #1 returns. + publish("e-2", "two") + publish("e-3", "three") + publish("e-4", "four") + + // Give the dispatcher's enqueue goroutines a tick to land. The lock + // inside enqueue is uncontended once Dispatch returns, but the + // goroutines that resolve subs/env can still be in flight. + time.Sleep(100 * time.Millisecond) + + // Release the first activation; the runner now drains the batch. + close(release) + + // Two Spawner calls total: one with [e-1], one with [e-2, e-3, e-4]. + a1 := waitForActivation(t, rec, 2*time.Second) + a2 := waitForActivation(t, rec, 2*time.Second) + + if len(a1.Triggers) != 1 || a1.Triggers[0].EventID != "e-1" { + t.Fatalf("activation #1 = %d trigger(s) %+v, want [e-1]", len(a1.Triggers), eventIDs(a1.Triggers)) + } + if len(a2.Triggers) != 3 { + t.Fatalf("activation #2 = %d triggers %+v, want 3", len(a2.Triggers), eventIDs(a2.Triggers)) + } + wantIDs := []domain.EventID{"e-2", "e-3", "e-4"} + for i, want := range wantIDs { + if a2.Triggers[i].EventID != want { + t.Fatalf("activation #2 trigger order = %+v, want %+v", eventIDs(a2.Triggers), wantIDs) + } + } + + // And no third activation is fired — the runner exits cleanly when + // the queue drains. + select { + case extra := <-rec: + t.Fatalf("unexpected third activation: %+v", extra) + case <-time.After(150 * time.Millisecond): + } + + if got := calls.Load(); got != 2 { + t.Fatalf("Spawner calls = %d, want 2", got) + } +} + +// waitForActivation pulls one recordedActivation off rec or fails the +// test on timeout. Centralised so the coalescing test reads cleanly. +func waitForActivation(t *testing.T, rec <-chan recordedActivation, timeout time.Duration) recordedActivation { + t.Helper() + select { + case got := <-rec: + return got + case <-time.After(timeout): + t.Fatalf("no activation within %s", timeout) + return recordedActivation{} + } +} + +func eventIDs(ts []agent.Trigger) []domain.EventID { + out := make([]domain.EventID, len(ts)) + for i, t := range ts { + out[i] = t.EventID + } + return out +} diff --git a/helix-org/domain/config.go b/helix-org/domain/config.go new file mode 100644 index 0000000000..41fe4bcf76 --- /dev/null +++ b/helix-org/domain/config.go @@ -0,0 +1,49 @@ +package domain + +import ( + "errors" + "strings" + "time" +) + +// Config is one operational-config row: a key, an opaque JSON value, +// and audit metadata. Keys are flat dot-namespaced strings owned by +// subsystems (e.g. "claude.bin", "transport.postmark"). Values are +// stored as JSON strings — schema validation is the registry's +// concern, not the storage layer's. +// +// Operational config is set through the helix-org config CLI, never +// through MCP. See design/config.md for the access-pattern split. +type Config struct { + Key string + Value string // JSON-encoded + UpdatedAt time.Time + UpdatedBy WorkerID // empty until auth lands +} + +// NewConfig validates and constructs a Config. Key must be non-empty, +// dot-namespaced (no spaces, no leading/trailing dots), and the value +// must be non-empty. Value JSON shape is the registry's responsibility. +func NewConfig(key, value string, updatedAt time.Time, updatedBy WorkerID) (Config, error) { + if key == "" { + return Config{}, errors.New("config key is empty") + } + if strings.ContainsAny(key, " \t\n") { + return Config{}, errors.New("config key contains whitespace") + } + if strings.HasPrefix(key, ".") || strings.HasSuffix(key, ".") { + return Config{}, errors.New("config key has leading or trailing dot") + } + if value == "" { + return Config{}, errors.New("config value is empty") + } + if updatedAt.IsZero() { + return Config{}, errors.New("config updatedAt is zero") + } + return Config{ + Key: key, + Value: value, + UpdatedAt: updatedAt.UTC(), + UpdatedBy: updatedBy, + }, nil +} diff --git a/helix-org/domain/config_test.go b/helix-org/domain/config_test.go new file mode 100644 index 0000000000..d064a89d58 --- /dev/null +++ b/helix-org/domain/config_test.go @@ -0,0 +1,54 @@ +package domain + +import ( + "strings" + "testing" + "time" +) + +func TestNewConfig(t *testing.T) { + t.Parallel() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + + cases := []struct { + name string + key string + value string + updatedAt time.Time + wantErr string + }{ + {name: "valid simple", key: "claude.bin", value: `"claude"`, updatedAt: now}, + {name: "valid object", key: "transport.postmark", value: `{"token":"abc"}`, updatedAt: now}, + {name: "empty key", key: "", value: `"x"`, updatedAt: now, wantErr: "key is empty"}, + {name: "whitespace in key", key: "claude bin", value: `"x"`, updatedAt: now, wantErr: "whitespace"}, + {name: "leading dot", key: ".claude.bin", value: `"x"`, updatedAt: now, wantErr: "leading or trailing dot"}, + {name: "trailing dot", key: "claude.bin.", value: `"x"`, updatedAt: now, wantErr: "leading or trailing dot"}, + {name: "empty value", key: "claude.bin", value: "", updatedAt: now, wantErr: "value is empty"}, + {name: "zero updatedAt", key: "claude.bin", value: `"x"`, updatedAt: time.Time{}, wantErr: "updatedAt is zero"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + c, err := NewConfig(tc.key, tc.value, tc.updatedAt, "") + if tc.wantErr != "" { + if err == nil { + t.Fatalf("NewConfig = nil, want error containing %q", tc.wantErr) + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("NewConfig = %q, want error containing %q", err, tc.wantErr) + } + return + } + if err != nil { + t.Fatalf("NewConfig: %v", err) + } + if c.Key != tc.key || c.Value != tc.value { + t.Fatalf("got %+v", c) + } + if !c.UpdatedAt.Equal(tc.updatedAt) { + t.Fatalf("updatedAt = %v, want %v", c.UpdatedAt, tc.updatedAt) + } + }) + } +} diff --git a/helix-org/domain/enum.go b/helix-org/domain/enum.go new file mode 100644 index 0000000000..cf881bf7c7 --- /dev/null +++ b/helix-org/domain/enum.go @@ -0,0 +1,23 @@ +package domain + +import ( + "strconv" + "strings" +) + +// QuotedList renders a slice of string-typed values as a comma-separated +// list of quoted strings, e.g. `"human", "ai"`. The output is sized for +// a validation-error message that follows "(valid: …)" so callers can +// drop it inline without further formatting. +// +// The generic constraint accepts any string-derived type — WorkerKind, +// TransportKind, ToolName — so a single helper covers every "what are +// the valid values?" formatting site without each domain type needing +// its own bespoke list-formatter. +func QuotedList[T ~string](vals []T) string { + parts := make([]string, len(vals)) + for i, v := range vals { + parts[i] = strconv.Quote(string(v)) + } + return strings.Join(parts, ", ") +} diff --git a/helix-org/domain/enum_test.go b/helix-org/domain/enum_test.go new file mode 100644 index 0000000000..3612e56c47 --- /dev/null +++ b/helix-org/domain/enum_test.go @@ -0,0 +1,48 @@ +package domain_test + +import ( + "strings" + "testing" + + "github.com/helixml/helix-org/domain" +) + +func TestWorkerKindValidateAcceptsKnown(t *testing.T) { + t.Parallel() + for _, k := range domain.WorkerKindValues() { + if err := k.Validate(); err != nil { + t.Errorf("Validate(%q) = %v, want nil", k, err) + } + } +} + +// TestWorkerKindValidateRejectsUnknownWithList pins the contract a +// self-correcting agent relies on: when validation fails, the message +// contains the offending value AND the valid options, so the next call +// can succeed without reading source. +func TestWorkerKindValidateRejectsUnknownWithList(t *testing.T) { + t.Parallel() + err := domain.WorkerKind("claude").Validate() + if err == nil { + t.Fatal("Validate(claude) = nil, want error") + } + for _, want := range []string{"claude", `"human"`, `"ai"`} { + if !strings.Contains(err.Error(), want) { + t.Errorf("err = %q, missing %q", err, want) + } + } +} + +func TestQuotedListEmptyAndSingleAndMany(t *testing.T) { + t.Parallel() + type S string + if got := domain.QuotedList([]S{}); got != "" { + t.Errorf("empty = %q, want \"\"", got) + } + if got := domain.QuotedList([]S{"only"}); got != `"only"` { + t.Errorf("single = %q", got) + } + if got := domain.QuotedList([]S{"a", "b", "c"}); got != `"a", "b", "c"` { + t.Errorf("many = %q", got) + } +} diff --git a/helix-org/domain/environment.go b/helix-org/domain/environment.go new file mode 100644 index 0000000000..7a9bfd6217 --- /dev/null +++ b/helix-org/domain/environment.go @@ -0,0 +1,35 @@ +package domain + +import ( + "errors" + "time" +) + +// Environment is a Worker's workspace — a directory on disk where the +// agent's files live. The system records the directory path and the +// Worker it belongs to; it does not own the file contents. The hiring +// manager populates the directory before calling hire_worker, and the +// agent manages their own files from then on. +type Environment struct { + WorkerID WorkerID + Path string + CreatedAt time.Time +} + +// NewEnvironment validates and constructs an Environment. +func NewEnvironment(workerID WorkerID, path string, createdAt time.Time) (Environment, error) { + if workerID == "" { + return Environment{}, errors.New("environment workerId is empty") + } + if path == "" { + return Environment{}, errors.New("environment path is empty") + } + if createdAt.IsZero() { + return Environment{}, errors.New("environment createdAt is zero") + } + return Environment{ + WorkerID: workerID, + Path: path, + CreatedAt: createdAt.UTC(), + }, nil +} diff --git a/helix-org/domain/environment_test.go b/helix-org/domain/environment_test.go new file mode 100644 index 0000000000..7e6c194679 --- /dev/null +++ b/helix-org/domain/environment_test.go @@ -0,0 +1,38 @@ +package domain + +import ( + "testing" + "time" +) + +func TestNewEnvironment(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + cases := []struct { + name string + worker WorkerID + path string + ts time.Time + wantErr bool + }{ + {"valid", "w-1", "/srv/env/w-1", now, false}, + {"empty worker", "", "/srv/env/w-1", now, true}, + {"empty path", "w-1", "", now, true}, + {"zero time", "w-1", "/srv/env/w-1", time.Time{}, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + env, err := NewEnvironment(tc.worker, tc.path, tc.ts) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewEnvironment error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr && (env.WorkerID != tc.worker || env.Path != tc.path) { + t.Fatalf("env = %+v", env) + } + }) + } +} diff --git a/helix-org/domain/event.go b/helix-org/domain/event.go new file mode 100644 index 0000000000..6d222439df --- /dev/null +++ b/helix-org/domain/event.go @@ -0,0 +1,42 @@ +package domain + +import ( + "errors" + "time" +) + +// Event is a single entry on a Stream. Events are markdown; the system +// does not impose a schema on content. Source is the Worker that +// emitted the event (empty means a system-emitted event such as a +// time tick). +type Event struct { + ID EventID + StreamID StreamID + Source WorkerID + Body string + CreatedAt time.Time +} + +// NewEvent validates and constructs an Event. +// Pass source = "" for system-emitted events. +func NewEvent(id EventID, streamID StreamID, source WorkerID, body string, createdAt time.Time) (Event, error) { + if id == "" { + return Event{}, errors.New("event id is empty") + } + if streamID == "" { + return Event{}, errors.New("event streamId is empty") + } + if body == "" { + return Event{}, errors.New("event body is empty") + } + if createdAt.IsZero() { + return Event{}, errors.New("event createdAt is zero") + } + return Event{ + ID: id, + StreamID: streamID, + Source: source, + Body: body, + CreatedAt: createdAt.UTC(), + }, nil +} diff --git a/helix-org/domain/event_test.go b/helix-org/domain/event_test.go new file mode 100644 index 0000000000..96dbe5be48 --- /dev/null +++ b/helix-org/domain/event_test.go @@ -0,0 +1,42 @@ +package domain + +import ( + "testing" + "time" +) + +func TestNewEvent(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + cases := []struct { + name string + id EventID + stream StreamID + source WorkerID + body string + ts time.Time + wantErr bool + }{ + {"valid worker event", "e-1", "s-1", "w-1", "hello", now, false}, + {"valid system event", "e-1", "s-1", "", "it is 9am monday", now, false}, + {"empty id", "", "s-1", "w-1", "hello", now, true}, + {"empty stream", "e-1", "", "w-1", "hello", now, true}, + {"empty body", "e-1", "s-1", "w-1", "", now, true}, + {"zero time", "e-1", "s-1", "w-1", "hello", time.Time{}, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + e, err := NewEvent(tc.id, tc.stream, tc.source, tc.body, tc.ts) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewEvent error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr && e.Body != tc.body { + t.Fatalf("body = %q", e.Body) + } + }) + } +} diff --git a/helix-org/domain/grant.go b/helix-org/domain/grant.go new file mode 100644 index 0000000000..fb4150822c --- /dev/null +++ b/helix-org/domain/grant.go @@ -0,0 +1,28 @@ +package domain + +import "errors" + +// ToolGrant records that a Worker holds a Tool. Granularity comes from +// the design of tools, not from gating one tool with a scope — if a +// Worker should only be able to hire a CFO, that's a CFO-specific tool +// or a role-prompt constraint, not a per-grant rule the runtime +// enforces. +type ToolGrant struct { + ID GrantID + WorkerID WorkerID + ToolName ToolName +} + +// NewToolGrant validates and constructs a ToolGrant. +func NewToolGrant(id GrantID, workerID WorkerID, toolName ToolName) (ToolGrant, error) { + if id == "" { + return ToolGrant{}, errors.New("grant id is empty") + } + if workerID == "" { + return ToolGrant{}, errors.New("grant worker id is empty") + } + if toolName == "" { + return ToolGrant{}, errors.New("grant tool name is empty") + } + return ToolGrant{ID: id, WorkerID: workerID, ToolName: toolName}, nil +} diff --git a/helix-org/domain/grant_test.go b/helix-org/domain/grant_test.go new file mode 100644 index 0000000000..766b1ec994 --- /dev/null +++ b/helix-org/domain/grant_test.go @@ -0,0 +1,34 @@ +package domain + +import "testing" + +func TestNewToolGrant(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + id GrantID + workerID WorkerID + toolName ToolName + wantErr bool + }{ + {"valid", "g-1", "w-1", "hire_worker", false}, + {"empty id", "", "w-1", "ping", true}, + {"empty worker", "g-1", "", "ping", true}, + {"empty tool", "g-1", "w-1", "", true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g, err := NewToolGrant(tc.id, tc.workerID, tc.toolName) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewToolGrant error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr && g.ID != tc.id { + t.Fatalf("grant.ID = %q, want %q", g.ID, tc.id) + } + }) + } +} diff --git a/helix-org/domain/id.go b/helix-org/domain/id.go new file mode 100644 index 0000000000..66109ff0fa --- /dev/null +++ b/helix-org/domain/id.go @@ -0,0 +1,11 @@ +package domain + +type ( + RoleID string + PositionID string + WorkerID string + GrantID string + ToolName string + StreamID string + EventID string +) diff --git a/helix-org/domain/message.go b/helix-org/domain/message.go new file mode 100644 index 0000000000..5468737d09 --- /dev/null +++ b/helix-org/domain/message.go @@ -0,0 +1,99 @@ +package domain + +import ( + "encoding/json" + "fmt" + "time" +) + +// Message is the canonical Stream payload. It is always carried as +// JSON in Event.Body — the system stores no other shape. Transports +// translate between provider-native (an email Body, a Slack message, +// a webhook POST body) and Message at the transport boundary, so a +// Worker reading any Stream sees the same envelope. +// +// Identity convention: +// - From / To carry transport-native identifiers verbatim. WorkerID +// ("w-alice") when the originator is a known internal Worker; +// transport-native otherwise ("alice@example.com", "U0123ABCD", +// "+15551234567", "thermo-3"). No prefixes — Stream context plus +// value shape is enough to disambiguate. +// - Empty From means "no human or named originator" — typical for +// data feeds (RSS, alerts, cron, IoT). +// +// All fields except Body are optional in practice; an event with only +// Body set is a valid plain text message. +type Message struct { + From string `json:"from,omitempty"` + To []string `json:"to,omitempty"` + Subject string `json:"subject,omitempty"` + Body string `json:"body,omitempty"` + BodyContentType string `json:"body_content_type,omitempty"` + ThreadID string `json:"thread_id,omitempty"` + InReplyTo string `json:"in_reply_to,omitempty"` + MessageID string `json:"message_id,omitempty"` + Attachments []Attachment `json:"attachments,omitempty"` + Extra json.RawMessage `json:"extra,omitempty"` +} + +// Attachment is a pointer to bytes the message references — never +// the bytes themselves. Inbound transports record the provider's URL +// (CDN, signed URL); an object-store integration to take ownership of +// the bytes is a future concern. +type Attachment struct { + Filename string `json:"filename"` + ContentType string `json:"content_type,omitempty"` + URL string `json:"url,omitempty"` + SizeBytes int64 `json:"size_bytes,omitempty"` +} + +// Encode marshals the Message to its canonical JSON form for storage +// in Event.Body. Returns an error only on JSON encoding failure, +// which for this struct is a programming bug. +func (m Message) Encode() (string, error) { + b, err := json.Marshal(m) + if err != nil { + return "", fmt.Errorf("encode message: %w", err) + } + return string(b), nil +} + +// MustEncode is Encode that panics on error. Convenient at call sites +// where the inputs are known-safe (literal strings, validated WorkerIDs). +func (m Message) MustEncode() string { + s, err := m.Encode() + if err != nil { + panic(err) + } + return s +} + +// DecodeMessage parses the canonical JSON form back into a Message. +// Returns an error on malformed JSON; missing fields are zero-valued +// (no required-field validation here — Workers may emit Messages +// with only Body set, and that's valid). +func DecodeMessage(encoded string) (Message, error) { + var m Message + if err := json.Unmarshal([]byte(encoded), &m); err != nil { + return Message{}, fmt.Errorf("decode message: %w", err) + } + return m, nil +} + +// Message parses the Event's Body as a canonical Message. Every Event +// in the system carries Message JSON in its Body, so this should +// always succeed; an error indicates a bug or a hand-poked database. +func (e Event) Message() (Message, error) { + return DecodeMessage(e.Body) +} + +// NewMessageEvent is the standard way to construct an Event whose +// Body holds a Message. It encodes the Message and delegates field +// validation to NewEvent. +func NewMessageEvent(id EventID, streamID StreamID, source WorkerID, msg Message, createdAt time.Time) (Event, error) { + body, err := msg.Encode() + if err != nil { + return Event{}, err + } + return NewEvent(id, streamID, source, body, createdAt) +} diff --git a/helix-org/domain/message_test.go b/helix-org/domain/message_test.go new file mode 100644 index 0000000000..4ce48bba9b --- /dev/null +++ b/helix-org/domain/message_test.go @@ -0,0 +1,143 @@ +package domain + +import ( + "strings" + "testing" + "time" +) + +func TestMessageRoundTrip(t *testing.T) { + t.Parallel() + msg := Message{ + From: "w-alice", + To: []string{"w-bob"}, + Subject: "hi", + Body: "hello\nthere", + BodyContentType: "text/plain", + ThreadID: "t-123", + InReplyTo: "m-prev", + MessageID: "m-now", + Attachments: []Attachment{ + {Filename: "x.pdf", ContentType: "application/pdf", URL: "https://e.com/x", SizeBytes: 1024}, + }, + } + encoded, err := msg.Encode() + if err != nil { + t.Fatalf("Encode: %v", err) + } + got, err := DecodeMessage(encoded) + if err != nil { + t.Fatalf("DecodeMessage: %v", err) + } + if got.From != msg.From || got.Body != msg.Body || len(got.To) != 1 || got.To[0] != "w-bob" { + t.Fatalf("round-trip mismatch: %+v", got) + } + if got.ThreadID != msg.ThreadID || got.InReplyTo != msg.InReplyTo || got.MessageID != msg.MessageID { + t.Fatalf("threading mismatch: %+v", got) + } + if len(got.Attachments) != 1 || got.Attachments[0].Filename != "x.pdf" { + t.Fatalf("attachment lost: %+v", got.Attachments) + } +} + +func TestMessageMinimal(t *testing.T) { + t.Parallel() + // Only Body set is valid — most internal events look like this. + msg := Message{Body: "hello"} + encoded, err := msg.Encode() + if err != nil { + t.Fatalf("Encode: %v", err) + } + if !strings.Contains(encoded, `"body":"hello"`) { + t.Fatalf("expected body field in JSON, got %q", encoded) + } + if strings.Contains(encoded, `"from"`) || strings.Contains(encoded, `"to"`) { + t.Fatalf("unset fields should be omitted, got %q", encoded) + } +} + +func TestMessageEmpty(t *testing.T) { + t.Parallel() + // Empty Message — pure trigger pulse — is also valid; encodes to "{}". + msg := Message{} + encoded, err := msg.Encode() + if err != nil { + t.Fatalf("Encode: %v", err) + } + if encoded != "{}" { + t.Fatalf("empty message = %q, want %q", encoded, "{}") + } +} + +func TestDecodeMessageMalformed(t *testing.T) { + t.Parallel() + cases := []string{ + ``, + `not json`, + `{`, + `[`, + } + for _, c := range cases { + t.Run(c, func(t *testing.T) { + t.Parallel() + if _, err := DecodeMessage(c); err == nil { + t.Fatalf("DecodeMessage(%q) = nil, want error", c) + } + }) + } +} + +func TestEventMessage(t *testing.T) { + t.Parallel() + msg := Message{From: "w-alice", Body: "hi"} + body, err := msg.Encode() + if err != nil { + t.Fatalf("Encode: %v", err) + } + e, err := NewEvent("e-1", "s-1", "w-alice", body, time.Now().UTC()) + if err != nil { + t.Fatalf("NewEvent: %v", err) + } + got, err := e.Message() + if err != nil { + t.Fatalf("e.Message(): %v", err) + } + if got.From != "w-alice" || got.Body != "hi" { + t.Fatalf("got %+v", got) + } +} + +func TestNewMessageEvent(t *testing.T) { + t.Parallel() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + msg := Message{From: "w-alice", To: []string{"w-bob"}, Body: "hi"} + e, err := NewMessageEvent("e-1", "s-dm", "w-alice", msg, now) + if err != nil { + t.Fatalf("NewMessageEvent: %v", err) + } + if e.ID != "e-1" || e.StreamID != "s-dm" || e.Source != "w-alice" { + t.Fatalf("event fields wrong: %+v", e) + } + parsed, err := e.Message() + if err != nil { + t.Fatalf("e.Message(): %v", err) + } + if parsed.From != "w-alice" || parsed.Body != "hi" || len(parsed.To) != 1 { + t.Fatalf("parsed wrong: %+v", parsed) + } +} + +func TestNewMessageEventRejectsEmptyEncoding(t *testing.T) { + t.Parallel() + // An empty Message encodes to "{}" — non-empty as a string, so + // NewEvent's empty-body check passes. This documents that "{}" is a + // valid (if degenerate) Body — pure trigger events. + now := time.Now().UTC() + e, err := NewMessageEvent("e-1", "s-1", "", Message{}, now) + if err != nil { + t.Fatalf("NewMessageEvent(empty msg): %v", err) + } + if e.Body != "{}" { + t.Fatalf("empty Message body = %q, want %q", e.Body, "{}") + } +} diff --git a/helix-org/domain/position.go b/helix-org/domain/position.go new file mode 100644 index 0000000000..80559f92e2 --- /dev/null +++ b/helix-org/domain/position.go @@ -0,0 +1,37 @@ +package domain + +import "errors" + +// Position is a concrete slot in the org chart, instantiating a Role. +// ParentID is nil for the root position. +type Position struct { + ID PositionID + RoleID RoleID + ParentID *PositionID +} + +// NewPosition validates and constructs a Position. +// Pass parentID = nil for the root position. +func NewPosition(id PositionID, roleID RoleID, parentID *PositionID) (Position, error) { + if id == "" { + return Position{}, errors.New("position id is empty") + } + if roleID == "" { + return Position{}, errors.New("position role id is empty") + } + var parent *PositionID + if parentID != nil { + if *parentID == "" { + return Position{}, errors.New("parent position id is empty") + } + if *parentID == id { + return Position{}, errors.New("position cannot be its own parent") + } + p := *parentID + parent = &p + } + return Position{ID: id, RoleID: roleID, ParentID: parent}, nil +} + +// IsRoot reports whether the position has no parent. +func (p Position) IsRoot() bool { return p.ParentID == nil } diff --git a/helix-org/domain/position_test.go b/helix-org/domain/position_test.go new file mode 100644 index 0000000000..1701831c77 --- /dev/null +++ b/helix-org/domain/position_test.go @@ -0,0 +1,58 @@ +package domain + +import "testing" + +func positionID(s string) *PositionID { + p := PositionID(s) + return &p +} + +func TestNewPosition(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + id PositionID + roleID RoleID + parentID *PositionID + wantErr bool + }{ + {"root", "p-root", "r-owner", nil, false}, + {"child", "p-ceo", "r-ceo", positionID("p-root"), false}, + {"empty id", "", "r-ceo", nil, true}, + {"empty role id", "p-ceo", "", nil, true}, + {"empty parent", "p-ceo", "r-ceo", positionID(""), true}, + {"self as parent", "p-ceo", "r-ceo", positionID("p-ceo"), true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + pos, err := NewPosition(tc.id, tc.roleID, tc.parentID) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewPosition error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr && pos.ID != tc.id { + t.Fatalf("pos.ID = %q, want %q", pos.ID, tc.id) + } + if !gotErr && tc.parentID == nil && !pos.IsRoot() { + t.Fatalf("expected root position") + } + }) + } +} + +func TestNewPositionParentIsCopied(t *testing.T) { + t.Parallel() + + parent := PositionID("p-root") + pos, err := NewPosition("p-ceo", "r-ceo", &parent) + if err != nil { + t.Fatalf("NewPosition: %v", err) + } + parent = "mutated" + if *pos.ParentID != "p-root" { + t.Fatalf("pos.ParentID = %q, expected caller mutation not to leak", *pos.ParentID) + } +} diff --git a/helix-org/domain/role.go b/helix-org/domain/role.go new file mode 100644 index 0000000000..8b78f9d0ce --- /dev/null +++ b/helix-org/domain/role.go @@ -0,0 +1,39 @@ +package domain + +import ( + "errors" + "time" +) + +// Role is a job description. Owner-only: workers cannot edit their own +// Role. The owner edits Content via UpdateRole, and the new content +// fans out to every Worker filling a Position with this Role. +// +// Content is the canonical markdown the Worker reads on activation +// (it lands in role.md inside the Worker's Environment). Identity +// (name, voice, personality) is per-Worker, not per-Role. +type Role struct { + ID RoleID + Content string + CreatedAt time.Time + UpdatedAt time.Time +} + +// NewRole validates and constructs a Role. Treat the returned value as immutable. +func NewRole(id RoleID, content string, now time.Time) (Role, error) { + if id == "" { + return Role{}, errors.New("role id is empty") + } + if content == "" { + return Role{}, errors.New("role content is empty") + } + if now.IsZero() { + return Role{}, errors.New("role timestamp is zero") + } + return Role{ + ID: id, + Content: content, + CreatedAt: now, + UpdatedAt: now, + }, nil +} diff --git a/helix-org/domain/role_test.go b/helix-org/domain/role_test.go new file mode 100644 index 0000000000..9b6a228cb8 --- /dev/null +++ b/helix-org/domain/role_test.go @@ -0,0 +1,47 @@ +package domain + +import ( + "testing" + "time" +) + +func TestNewRole(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 4, 25, 12, 0, 0, 0, time.UTC) + + cases := []struct { + name string + id RoleID + content string + now time.Time + wantOK bool + }{ + {"valid", "r-ceo", "# CEO\nMakes calls.", now, true}, + {"empty id", "", "# CEO", now, false}, + {"empty content", "r-ceo", "", now, false}, + {"zero time", "r-ceo", "# CEO", time.Time{}, false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + role, err := NewRole(tc.id, tc.content, tc.now) + gotOK := err == nil + if gotOK != tc.wantOK { + t.Fatalf("NewRole error = %v, wantOK = %v", err, tc.wantOK) + } + if gotOK { + if role.ID != tc.id { + t.Fatalf("role.ID = %q, want %q", role.ID, tc.id) + } + if role.Content != tc.content { + t.Fatalf("role.Content = %q", role.Content) + } + if role.CreatedAt != tc.now || role.UpdatedAt != tc.now { + t.Fatalf("timestamps not set: created=%v updated=%v", role.CreatedAt, role.UpdatedAt) + } + } + }) + } +} diff --git a/helix-org/domain/stream.go b/helix-org/domain/stream.go new file mode 100644 index 0000000000..ecc31847c7 --- /dev/null +++ b/helix-org/domain/stream.go @@ -0,0 +1,56 @@ +package domain + +import ( + "errors" + "time" +) + +// Stream is a named source of events. Workers publish to a Stream via +// tools and receive from a Stream via Subscriptions. +// +// Every Stream has a Transport. The default — TransportLocal — keeps +// events inside the system: SQLite for storage, the in-process +// broadcaster for delivery, the dispatcher for waking subscribed AI +// Workers. Other transports (Slack, email, webhook, RSS, tick…) +// compose external I/O over the same local mechanism: events still +// land in SQLite for history and replay; the transport additionally +// ships them to or from the outside world. +type Stream struct { + ID StreamID + Name string + Description string + CreatedBy WorkerID + CreatedAt time.Time + Transport Transport +} + +// NewStream validates and constructs a Stream. If transport.Kind is +// empty, the returned Stream uses LocalTransport. +func NewStream(id StreamID, name, description string, createdBy WorkerID, createdAt time.Time, transport Transport) (Stream, error) { + if id == "" { + return Stream{}, errors.New("stream id is empty") + } + if name == "" { + return Stream{}, errors.New("stream name is empty") + } + if createdBy == "" { + return Stream{}, errors.New("stream createdBy is empty") + } + if createdAt.IsZero() { + return Stream{}, errors.New("stream createdAt is zero") + } + if transport.Kind == "" { + transport = LocalTransport() + } + if err := transport.Validate(); err != nil { + return Stream{}, err + } + return Stream{ + ID: id, + Name: name, + Description: description, + CreatedBy: createdBy, + CreatedAt: createdAt.UTC(), + Transport: transport, + }, nil +} diff --git a/helix-org/domain/stream_test.go b/helix-org/domain/stream_test.go new file mode 100644 index 0000000000..97693ceeeb --- /dev/null +++ b/helix-org/domain/stream_test.go @@ -0,0 +1,70 @@ +package domain + +import ( + "testing" + "time" +) + +func TestNewStream(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + cases := []struct { + name string + id StreamID + stName string + createdBy WorkerID + createdAt time.Time + wantErr bool + }{ + {"valid", "s-1", "general", "w-owner", now, false}, + {"empty id", "", "general", "w-owner", now, true}, + {"empty name", "s-1", "", "w-owner", now, true}, + {"empty createdBy", "s-1", "general", "", now, true}, + {"zero createdAt", "s-1", "general", "w-owner", time.Time{}, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + s, err := NewStream(tc.id, tc.stName, "desc", tc.createdBy, tc.createdAt, Transport{}) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewStream error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr { + if s.ID != tc.id { + t.Fatalf("ID = %q, want %q", s.ID, tc.id) + } + if !s.CreatedAt.Equal(tc.createdAt) { + t.Fatalf("CreatedAt = %v, want %v", s.CreatedAt, tc.createdAt) + } + if s.Transport.Kind != TransportLocal { + t.Fatalf("Transport.Kind = %q, want %q", s.Transport.Kind, TransportLocal) + } + } + }) + } +} + +func TestNewStreamNormalisesTimezone(t *testing.T) { + t.Parallel() + loc := time.FixedZone("UTC+5", 5*3600) + ts := time.Date(2026, 4, 24, 17, 0, 0, 0, loc) + s, err := NewStream("s-1", "general", "", "w-owner", ts, Transport{}) + if err != nil { + t.Fatalf("NewStream: %v", err) + } + if s.CreatedAt.Location() != time.UTC { + t.Fatalf("CreatedAt location = %v, want UTC", s.CreatedAt.Location()) + } +} + +func TestNewStreamRejectsUnknownTransport(t *testing.T) { + t.Parallel() + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + _, err := NewStream("s-1", "general", "", "w-owner", now, Transport{Kind: "bogus"}) + if err == nil { + t.Fatal("NewStream with unknown transport: want error, got nil") + } +} diff --git a/helix-org/domain/subscription.go b/helix-org/domain/subscription.go new file mode 100644 index 0000000000..351a2549c1 --- /dev/null +++ b/helix-org/domain/subscription.go @@ -0,0 +1,34 @@ +package domain + +import ( + "errors" + "time" +) + +// Subscription is a Worker's link to a Stream. Events published on the +// Stream wake the Worker (via the dispatcher, for AI Workers) and show +// up when they read their events. The (WorkerID, StreamID) pair is the +// identity — there is no synthetic ID. +type Subscription struct { + WorkerID WorkerID + StreamID StreamID + CreatedAt time.Time +} + +// NewSubscription validates and constructs a Subscription. +func NewSubscription(workerID WorkerID, streamID StreamID, createdAt time.Time) (Subscription, error) { + if workerID == "" { + return Subscription{}, errors.New("subscription workerId is empty") + } + if streamID == "" { + return Subscription{}, errors.New("subscription streamId is empty") + } + if createdAt.IsZero() { + return Subscription{}, errors.New("subscription createdAt is zero") + } + return Subscription{ + WorkerID: workerID, + StreamID: streamID, + CreatedAt: createdAt.UTC(), + }, nil +} diff --git a/helix-org/domain/subscription_test.go b/helix-org/domain/subscription_test.go new file mode 100644 index 0000000000..a47a92bc40 --- /dev/null +++ b/helix-org/domain/subscription_test.go @@ -0,0 +1,38 @@ +package domain + +import ( + "testing" + "time" +) + +func TestNewSubscription(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + cases := []struct { + name string + worker WorkerID + stream StreamID + ts time.Time + wantErr bool + }{ + {"valid", "w-1", "s-1", now, false}, + {"empty worker", "", "s-1", now, true}, + {"empty stream", "w-1", "", now, true}, + {"zero time", "w-1", "s-1", time.Time{}, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + s, err := NewSubscription(tc.worker, tc.stream, tc.ts) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewSubscription error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr && (s.WorkerID != tc.worker || s.StreamID != tc.stream) { + t.Fatalf("subscription = %+v", s) + } + }) + } +} diff --git a/helix-org/domain/tool.go b/helix-org/domain/tool.go new file mode 100644 index 0000000000..ebccf2ec38 --- /dev/null +++ b/helix-org/domain/tool.go @@ -0,0 +1,39 @@ +package domain + +import ( + "context" + "encoding/json" + + "github.com/google/jsonschema-go/jsonschema" +) + +// Invocation bundles the per-call data passed to Tool.Invoke. The +// pipeline populates it from the caller's grant; tools parse Args +// according to their own input schema. +type Invocation struct { + Caller Worker + Args json.RawMessage +} + +// Tool is the generic unit of capability. Tools are exposed to callers +// over MCP — Description and InputSchema feed the MCP `tools/list` +// response, and Invoke handles `tools/call`. Built-in structural tools, +// owner-defined tools, and any future MCP-shaped tools all implement +// this interface. +type Tool interface { + // Name is the stable identifier used in grants and MCP tool calls. + Name() ToolName + + // Description is a human-readable summary the LLM sees when + // deciding whether to call this tool. + Description() string + + // InputSchema is the JSON Schema for Invoke's args, used by MCP + // clients to validate calls and by LLMs to understand the call shape. + InputSchema() *jsonschema.Schema + + // Invoke executes the tool. Holding the grant is the entire + // authorisation — the tool does not re-check the caller's scope + // because there is no scope. + Invoke(ctx context.Context, inv Invocation) (json.RawMessage, error) +} diff --git a/helix-org/domain/transport.go b/helix-org/domain/transport.go new file mode 100644 index 0000000000..4dc18475d3 --- /dev/null +++ b/helix-org/domain/transport.go @@ -0,0 +1,315 @@ +package domain + +import ( + "encoding/json" + "errors" + "fmt" + "net/url" + "sort" + "strings" +) + +// TransportKind names the implementation that owns a Stream's I/O. +// Every Stream has one. The default — TransportLocal — means events +// live in the SQLite events table and are delivered through the +// in-process broadcaster and dispatcher; nothing crosses a network. +// Other kinds (Slack, email, webhook, RSS, tick…) compose external +// I/O over the same local store. +type TransportKind string + +const ( + // TransportLocal is the default: SQLite + broadcaster + dispatcher. + // No external I/O. + TransportLocal TransportKind = "local" + + // TransportWebhook is a bidirectional HTTP transport. + // + // Inbound: POSTs to /webhooks/ are turned into Events on + // the Stream. No config required — the path uses the Stream's own + // ID as the secret-by-obscurity, which is enough for low-stakes + // use; production callers should add a signing secret on top. + // + // Outbound: when Config sets `outbound_url`, every Event appended + // to the Stream (regardless of who appended it — webhook handler, + // publish tool, dm tool) is POSTed to that URL with the event body + // as the request body. Failures are logged and dropped; the append + // itself still succeeds. + // + // A Stream can be inbound-only (no config), outbound-only (config + // with outbound_url), or both at once. + TransportWebhook TransportKind = "webhook" + + // TransportEmail is a bidirectional email transport. Provider + // credentials live at server level (see config.transport.postmark); + // per-stream config carries only the routing identity (`alias`). + // + // Inbound: an external service (Postmark today) POSTs parsed + // inbound mail to /email/; the transport reads the + // recipient address, extracts the alias, and routes to the + // matching Stream. The body becomes a Message envelope on that + // Stream — From=sender, To=[recipient], Subject, Body, MessageID, + // InReplyTo, ThreadID populated from the email's headers. + // + // Outbound: every Event appended to an email Stream is rendered + // to a provider API call (Postmark /email today). The Message + // envelope's From/To/Subject/InReplyTo/ThreadID drive the + // outbound headers; the global `from` from server config is the + // envelope sender unless the Stream's Message specifies + // otherwise. + TransportEmail TransportKind = "email" + + // TransportGitHub is an inbound-only GitHub webhooks transport. + // Provider credentials live at server level (see + // config.transport.github); per-stream config carries the routing + // `repo` and an `events` whitelist. + // + // Inbound: GitHub POSTs to a single installation URL + // (/github/webhook). The transport HMAC-verifies the delivery + // against the installation's webhook_secret, then fans the event + // out to every Stream whose Config.Repo matches the payload's + // repository.full_name and whose Config.Events list contains the + // X-GitHub-Event header value. The Message envelope is mapped + // from the upstream payload verbatim — Subject = issue/PR title, + // Body = issue/PR/comment/review body, ThreadID = "#", + // MessageID = X-GitHub-Delivery, From = sender.login, Extra = the + // full payload with one synthetic top-level key (`event`) added + // from the X-GitHub-Event header. + // + // Outbound: not supported. Acting on a repo (label, comment, + // review, open PR) is the Worker's job via `gh` in its + // Environment; the github transport rejects publish calls + // loudly rather than silently dropping. See + // design/github-transport.md. + TransportGitHub TransportKind = "github" +) + +// TransportKindValues lists every valid TransportKind. Source of truth +// for the JSON Schema `enum` constraint surfaced through MCP and for +// listing valid options in validation errors. Adding a new transport +// means touching this one place. +func TransportKindValues() []TransportKind { + return []TransportKind{TransportLocal, TransportWebhook, TransportEmail, TransportGitHub} +} + +// Transport describes how events on a Stream move to and from the +// outside world. Internal Streams use TransportLocal — that is still a +// transport, just one whose endpoints are both inside the system. +// +// Config is opaque per-Kind JSON. The local transport ignores it; other +// transports parse it according to their own schema (see WebhookConfig). +type Transport struct { + Kind TransportKind + Config json.RawMessage +} + +// LocalTransport is the zero-config default returned when a caller does +// not specify a transport. Treat the returned value as immutable. +func LocalTransport() Transport { + return Transport{Kind: TransportLocal} +} + +// WebhookConfig is the parsed shape of Transport.Config when +// Kind == TransportWebhook. All fields are optional; a webhook stream +// with a zero WebhookConfig is inbound-only. +type WebhookConfig struct { + // OutboundURL, when set, makes the Stream emit each appended Event + // as an HTTP POST to this URL. Must be an absolute http(s) URL. + OutboundURL string `json:"outbound_url,omitempty"` +} + +// EmailConfig is the parsed shape of Transport.Config when +// Kind == TransportEmail. Provider credentials live in server-level +// config; the only per-stream knob is the alias used for routing. +type EmailConfig struct { + // Alias is the routing identifier for this Stream. Inbound mail + // addressed to +@inbound.postmarkapp.com (no-domain + // path) or @yourdomain.com (with-domain path) lands on + // this Stream. Required and unique within the installation. + Alias string `json:"alias,omitempty"` +} + +// GitHubConfig is the parsed shape of Transport.Config when +// Kind == TransportGitHub. Provider credentials (token, webhook +// secret) live in server-level config; per-stream config carries +// the routing identity. +type GitHubConfig struct { + // Repo is the GitHub `owner/name` whose webhook deliveries land + // on this Stream. Matched case-insensitively against + // `repository.full_name` in the payload. + Repo string `json:"repo,omitempty"` + + // Events is the whitelist of GitHub event types + // (X-GitHub-Event header values) the Stream wants. Anything not + // listed is dropped at the transport without becoming an Event, + // so subscribed Workers don't activate for events they'd ignore. + // Required and non-empty. + Events []string `json:"events,omitempty"` +} + +// knownGitHubEvents enumerates the event types the transport +// currently accepts in a Stream's `events` whitelist. The list is +// deliberately narrow — adding an event is a one-line edit here +// plus tests, but unknown event names are rejected at create_stream +// time so typos surface early. +var knownGitHubEvents = map[string]struct{}{ + "issues": {}, + "issue_comment": {}, + "pull_request": {}, + "pull_request_review": {}, + "pull_request_review_comment": {}, +} + +// WebhookConfig parses Transport.Config as a WebhookConfig. Returns the +// zero value with no error when Config is empty. Errors only on JSON +// shape problems — semantic validation happens in Validate(). +func (t Transport) WebhookConfig() (WebhookConfig, error) { + if t.Kind != TransportWebhook { + return WebhookConfig{}, fmt.Errorf("transport kind is %q, not webhook", t.Kind) + } + var c WebhookConfig + if len(t.Config) == 0 { + return c, nil + } + if err := json.Unmarshal(t.Config, &c); err != nil { + return WebhookConfig{}, fmt.Errorf("parse webhook config: %w", err) + } + return c, nil +} + +// EmailConfig parses Transport.Config as an EmailConfig. Returns the +// zero value with no error when Config is empty. Errors only on JSON +// shape problems — semantic validation happens in Validate(). +func (t Transport) EmailConfig() (EmailConfig, error) { + if t.Kind != TransportEmail { + return EmailConfig{}, fmt.Errorf("transport kind is %q, not email", t.Kind) + } + var c EmailConfig + if len(t.Config) == 0 { + return c, nil + } + if err := json.Unmarshal(t.Config, &c); err != nil { + return EmailConfig{}, fmt.Errorf("parse email config: %w", err) + } + return c, nil +} + +// GitHubConfig parses Transport.Config as a GitHubConfig. Returns the +// zero value with no error when Config is empty. Errors only on JSON +// shape problems — semantic validation happens in Validate(). +func (t Transport) GitHubConfig() (GitHubConfig, error) { + if t.Kind != TransportGitHub { + return GitHubConfig{}, fmt.Errorf("transport kind is %q, not github", t.Kind) + } + var c GitHubConfig + if len(t.Config) == 0 { + return c, nil + } + if err := json.Unmarshal(t.Config, &c); err != nil { + return GitHubConfig{}, fmt.Errorf("parse github config: %w", err) + } + return c, nil +} + +// Validate checks that the Kind is non-empty and recognised, and that +// any per-Kind Config parses and is internally consistent. For webhook +// streams that means OutboundURL (if set) must be a valid absolute +// http(s) URL. +func (t Transport) Validate() error { + if t.Kind == "" { + return errors.New("transport kind is empty") + } + switch t.Kind { + case TransportLocal: + return nil + case TransportWebhook: + c, err := t.WebhookConfig() + if err != nil { + return err + } + if c.OutboundURL == "" { + return nil + } + u, err := url.Parse(c.OutboundURL) + if err != nil { + return fmt.Errorf("outbound_url: %w", err) + } + if !u.IsAbs() || (u.Scheme != "http" && u.Scheme != "https") { + return fmt.Errorf("outbound_url must be an absolute http(s) URL, got %q", c.OutboundURL) + } + if u.Host == "" { + return fmt.Errorf("outbound_url has no host: %q", c.OutboundURL) + } + return nil + case TransportEmail: + c, err := t.EmailConfig() + if err != nil { + return err + } + if c.Alias == "" { + return errors.New("email transport: alias is required") + } + if !isValidEmailAlias(c.Alias) { + return fmt.Errorf("email transport: alias %q must be lowercase alphanumeric / dash / underscore (no @, +, dots)", c.Alias) + } + return nil + case TransportGitHub: + c, err := t.GitHubConfig() + if err != nil { + return err + } + if c.Repo == "" { + return errors.New("github transport: repo is required") + } + // Repo must be exactly "owner/name" — one slash, both halves + // non-empty. Anything else is a typo we'd rather catch at + // create_stream time than have webhook deliveries silently + // miss the stream. + parts := strings.Split(c.Repo, "/") + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return fmt.Errorf("github transport: repo %q must be of the form owner/name", c.Repo) + } + if len(c.Events) == 0 { + return errors.New("github transport: events whitelist is required and must be non-empty") + } + for _, ev := range c.Events { + if _, ok := knownGitHubEvents[ev]; !ok { + return fmt.Errorf("github transport: unknown event %q (supported: %s)", ev, knownGitHubEventsList()) + } + } + return nil + default: + return fmt.Errorf("unknown transport kind %q (valid: %s)", t.Kind, QuotedList(TransportKindValues())) + } +} + +// knownGitHubEventsList renders the supported event names alphabetically +// for use in error messages. Cheap; called only on validation failures. +func knownGitHubEventsList() string { + out := make([]string, 0, len(knownGitHubEvents)) + for k := range knownGitHubEvents { + out = append(out, k) + } + sort.Strings(out) + return strings.Join(out, ", ") +} + +// isValidEmailAlias enforces a conservative alias shape so it can be +// safely composed into addresses (`+@…` or +// `@yourdomain.com`) without ambiguity. ASCII letters, digits, +// dash, underscore. No `+` (we use it as the separator), no `@`, no +// `.` (avoids subaddress-of-subaddress confusion), no whitespace. +func isValidEmailAlias(s string) bool { + if s == "" { + return false + } + for _, r := range s { + switch { + case r >= 'a' && r <= 'z': + case r >= '0' && r <= '9': + case r == '-' || r == '_': + default: + return false + } + } + return true +} diff --git a/helix-org/domain/transport_test.go b/helix-org/domain/transport_test.go new file mode 100644 index 0000000000..d64f37bf36 --- /dev/null +++ b/helix-org/domain/transport_test.go @@ -0,0 +1,224 @@ +package domain + +import ( + "encoding/json" + "strings" + "testing" +) + +func TestTransportValidate(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + t Transport + wantErr string // substring; "" means no error + }{ + { + name: "local no config", + t: Transport{Kind: TransportLocal}, + }, + { + name: "local ignores config", + // LocalTransport doesn't parse Config — junk should still be valid. + t: Transport{Kind: TransportLocal, Config: json.RawMessage(`not json at all`)}, + }, + { + name: "webhook inbound only", + t: Transport{Kind: TransportWebhook}, + }, + { + name: "webhook empty config", + t: Transport{Kind: TransportWebhook, Config: json.RawMessage(`{}`)}, + }, + { + name: "webhook outbound https", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"https://example.com/hook"}`), + }, + }, + { + name: "webhook outbound http localhost", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"http://localhost:9000"}`), + }, + }, + { + name: "empty kind", + t: Transport{}, + wantErr: "transport kind is empty", + }, + { + name: "unknown kind", + t: Transport{Kind: "smtp"}, + wantErr: "unknown transport kind", + }, + { + name: "webhook config malformed json", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{not json`), + }, + wantErr: "parse webhook config", + }, + { + name: "webhook outbound non-http scheme", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"ftp://example.com/hook"}`), + }, + wantErr: "absolute http(s) URL", + }, + { + name: "webhook outbound relative url", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"/just/a/path"}`), + }, + wantErr: "absolute http(s) URL", + }, + { + name: "webhook outbound no host", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"http:///nohost"}`), + }, + wantErr: "no host", + }, + { + name: "webhook outbound malformed url", + t: Transport{ + Kind: TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"http://%zz"}`), + }, + wantErr: "outbound_url", + }, + { + name: "email valid alias", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"sam"}`), + }, + }, + { + name: "email valid alias with dash", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"customer-service"}`), + }, + }, + { + name: "email missing alias", + t: Transport{Kind: TransportEmail}, + wantErr: "alias is required", + }, + { + name: "email empty alias", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":""}`), + }, + wantErr: "alias is required", + }, + { + name: "email alias with @", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"sam@x"}`), + }, + wantErr: "lowercase alphanumeric", + }, + { + name: "email alias with +", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"sa+m"}`), + }, + wantErr: "lowercase alphanumeric", + }, + { + name: "email alias with dot", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"sam.x"}`), + }, + wantErr: "lowercase alphanumeric", + }, + { + name: "email alias uppercase", + t: Transport{ + Kind: TransportEmail, + Config: json.RawMessage(`{"alias":"Sam"}`), + }, + wantErr: "lowercase alphanumeric", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + err := tc.t.Validate() + if tc.wantErr == "" { + if err != nil { + t.Fatalf("Validate() = %v, want nil", err) + } + return + } + if err == nil { + t.Fatalf("Validate() = nil, want error containing %q", tc.wantErr) + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("Validate() = %q, want error containing %q", err, tc.wantErr) + } + }) + } +} + +func TestWebhookConfigParse(t *testing.T) { + t.Parallel() + + t.Run("local rejects parse", func(t *testing.T) { + t.Parallel() + _, err := Transport{Kind: TransportLocal}.WebhookConfig() + if err == nil { + t.Fatalf("expected error parsing local transport as webhook") + } + }) + + t.Run("empty config returns zero value", func(t *testing.T) { + t.Parallel() + c, err := Transport{Kind: TransportWebhook}.WebhookConfig() + if err != nil { + t.Fatalf("WebhookConfig() = %v, want nil", err) + } + if c.OutboundURL != "" { + t.Fatalf("OutboundURL = %q, want empty", c.OutboundURL) + } + }) + + t.Run("populated config round-trips", func(t *testing.T) { + t.Parallel() + raw := json.RawMessage(`{"outbound_url":"https://example.com/x"}`) + c, err := Transport{Kind: TransportWebhook, Config: raw}.WebhookConfig() + if err != nil { + t.Fatalf("WebhookConfig() = %v", err) + } + if c.OutboundURL != "https://example.com/x" { + t.Fatalf("OutboundURL = %q", c.OutboundURL) + } + }) + + t.Run("unknown json fields ignored", func(t *testing.T) { + t.Parallel() + raw := json.RawMessage(`{"outbound_url":"https://example.com/x","future":"ignored"}`) + c, err := Transport{Kind: TransportWebhook, Config: raw}.WebhookConfig() + if err != nil { + t.Fatalf("WebhookConfig() = %v", err) + } + if c.OutboundURL != "https://example.com/x" { + t.Fatalf("OutboundURL = %q", c.OutboundURL) + } + }) +} diff --git a/helix-org/domain/worker.go b/helix-org/domain/worker.go new file mode 100644 index 0000000000..a3aa3ef66c --- /dev/null +++ b/helix-org/domain/worker.go @@ -0,0 +1,137 @@ +package domain + +import ( + "errors" + "fmt" +) + +// WorkerKind distinguishes HumanWorker from AIWorker. +type WorkerKind string + +const ( + WorkerKindHuman WorkerKind = "human" + WorkerKindAI WorkerKind = "ai" +) + +// WorkerKindValues lists every valid WorkerKind. Source of truth for +// the JSON Schema `enum` constraint surfaced through MCP and for +// listing valid options in validation errors. Adding a new kind means +// touching this one place. +func WorkerKindValues() []WorkerKind { + return []WorkerKind{WorkerKindHuman, WorkerKindAI} +} + +// Validate returns an error if k is not one of the known worker kinds. +// The error lists the valid options verbatim so a client that posted +// a bad value can self-correct without reading source. +func (k WorkerKind) Validate() error { + for _, v := range WorkerKindValues() { + if k == v { + return nil + } + } + return fmt.Errorf("unknown worker kind %q (valid: %s)", k, QuotedList(WorkerKindValues())) +} + +// Worker is the common abstraction over humans and AI agents occupying +// Positions. HumanWorker and AIWorker are the only concrete +// implementations; the unexported marker method keeps the set closed. +// +// IdentityContent is the per-Worker description (persona for AI, profile +// for a human). It lives in the domain — never on disk — so it survives +// any change in env layout (local files today, remote workspaces +// tomorrow). Spawners project it into whatever the env channel needs at +// activation time. +// +// Domain.Worker carries no runtime-backend state (Helix project IDs, +// session pointers, etc.). That state lives in the WorkerRuntimeState +// sidecar store, keyed by (workerID, backend) — added without touching +// the domain when a new runtime backend appears. +type Worker interface { + ID() WorkerID + Kind() WorkerKind + Positions() []PositionID + IdentityContent() string + WithIdentityContent(content string) Worker + isWorker() +} + +// HumanWorker represents a real person inside the organisation. +type HumanWorker struct { + id WorkerID + positions []PositionID + identity string +} + +// NewHumanWorker validates and constructs a HumanWorker. +func NewHumanWorker(id WorkerID, positions []PositionID, identityContent string) (*HumanWorker, error) { + if id == "" { + return nil, errors.New("worker id is empty") + } + ps, err := validatePositions(positions) + if err != nil { + return nil, err + } + return &HumanWorker{id: id, positions: ps, identity: identityContent}, nil +} + +func (h *HumanWorker) ID() WorkerID { return h.id } +func (h *HumanWorker) Kind() WorkerKind { return WorkerKindHuman } +func (h *HumanWorker) Positions() []PositionID { return copyPositions(h.positions) } +func (h *HumanWorker) IdentityContent() string { return h.identity } +func (h *HumanWorker) WithIdentityContent(content string) Worker { + return &HumanWorker{id: h.id, positions: copyPositions(h.positions), identity: content} +} +func (h *HumanWorker) isWorker() {} + +// AIWorker represents a software agent inside the organisation. +type AIWorker struct { + id WorkerID + positions []PositionID + identity string +} + +// NewAIWorker validates and constructs an AIWorker. +func NewAIWorker(id WorkerID, positions []PositionID, identityContent string) (*AIWorker, error) { + if id == "" { + return nil, errors.New("worker id is empty") + } + ps, err := validatePositions(positions) + if err != nil { + return nil, err + } + return &AIWorker{id: id, positions: ps, identity: identityContent}, nil +} + +func (a *AIWorker) ID() WorkerID { return a.id } +func (a *AIWorker) Kind() WorkerKind { return WorkerKindAI } +func (a *AIWorker) Positions() []PositionID { return copyPositions(a.positions) } +func (a *AIWorker) IdentityContent() string { return a.identity } +func (a *AIWorker) WithIdentityContent(content string) Worker { + return &AIWorker{id: a.id, positions: copyPositions(a.positions), identity: content} +} +func (a *AIWorker) isWorker() {} + +func validatePositions(positions []PositionID) ([]PositionID, error) { + // Zero positions is permitted: it represents an archived/vacated Worker. + // Tools that hire must pass >=1. + seen := make(map[PositionID]struct{}, len(positions)) + out := make([]PositionID, 0, len(positions)) + for _, p := range positions { + if p == "" { + return nil, errors.New("position id is empty") + } + if _, dup := seen[p]; dup { + return nil, fmt.Errorf("duplicate position %q", p) + } + seen[p] = struct{}{} + out = append(out, p) + } + return out, nil +} + +func copyPositions(positions []PositionID) []PositionID { + out := make([]PositionID, len(positions)) + copy(out, positions) + return out +} diff --git a/helix-org/domain/worker_test.go b/helix-org/domain/worker_test.go new file mode 100644 index 0000000000..743a0be126 --- /dev/null +++ b/helix-org/domain/worker_test.go @@ -0,0 +1,109 @@ +package domain + +import "testing" + +func TestNewHumanWorker(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + id WorkerID + positions []PositionID + identity string + wantErr bool + }{ + {"valid", "w-1", []PositionID{"p-ceo"}, "i am the ceo", false}, + {"valid empty identity", "w-1", []PositionID{"p-ceo"}, "", false}, + {"empty id", "", []PositionID{"p-ceo"}, "", true}, + {"no positions (vacated)", "w-1", nil, "", false}, + {"empty position id", "w-1", []PositionID{""}, "", true}, + {"duplicate positions", "w-1", []PositionID{"p-ceo", "p-ceo"}, "", true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + w, err := NewHumanWorker(tc.id, tc.positions, tc.identity) + gotErr := err != nil + if gotErr != tc.wantErr { + t.Fatalf("NewHumanWorker error = %v, wantErr = %v", err, tc.wantErr) + } + if !gotErr { + if w.Kind() != WorkerKindHuman { + t.Fatalf("Kind = %q, want human", w.Kind()) + } + if w.ID() != tc.id { + t.Fatalf("ID = %q, want %q", w.ID(), tc.id) + } + if w.IdentityContent() != tc.identity { + t.Fatalf("IdentityContent = %q, want %q", w.IdentityContent(), tc.identity) + } + } + }) + } +} + +func TestNewAIWorker(t *testing.T) { + t.Parallel() + + w, err := NewAIWorker("w-ai", []PositionID{"p-docs"}, "you are the docs editor") + if err != nil { + t.Fatalf("NewAIWorker: %v", err) + } + if w.Kind() != WorkerKindAI { + t.Fatalf("Kind = %q, want ai", w.Kind()) + } + if got := w.Positions(); len(got) != 1 || got[0] != "p-docs" { + t.Fatalf("Positions = %v, want [p-docs]", got) + } + if w.IdentityContent() != "you are the docs editor" { + t.Fatalf("IdentityContent = %q", w.IdentityContent()) + } +} + +func TestWorkerWithIdentityContent(t *testing.T) { + t.Parallel() + + w, err := NewAIWorker("w-1", []PositionID{"p-1"}, "old") + if err != nil { + t.Fatalf("NewAIWorker: %v", err) + } + updated := w.WithIdentityContent("new") + if w.IdentityContent() != "old" { + t.Fatalf("original mutated: %q", w.IdentityContent()) + } + if updated.IdentityContent() != "new" { + t.Fatalf("updated identity = %q, want %q", updated.IdentityContent(), "new") + } + if updated.ID() != w.ID() { + t.Fatalf("ID changed: %q vs %q", updated.ID(), w.ID()) + } + if updated.Kind() != w.Kind() { + t.Fatalf("Kind changed: %q vs %q", updated.Kind(), w.Kind()) + } +} + +func TestWorkerPositionsIsolation(t *testing.T) { + t.Parallel() + + positions := []PositionID{"p-ceo"} + w, err := NewHumanWorker("w-1", positions, "") + if err != nil { + t.Fatalf("NewHumanWorker: %v", err) + } + positions[0] = "mutated" + if got := w.Positions(); got[0] != "p-ceo" { + t.Fatalf("Positions leaked: %v", got) + } + got := w.Positions() + got[0] = "also mutated" + if got2 := w.Positions(); got2[0] != "p-ceo" { + t.Fatalf("Positions getter leaked: %v", got2) + } +} + +// interface conformance at compile time +var ( + _ Worker = (*HumanWorker)(nil) + _ Worker = (*AIWorker)(nil) +) diff --git a/helix-org/go.mod b/helix-org/go.mod new file mode 100644 index 0000000000..3561e2fd51 --- /dev/null +++ b/helix-org/go.mod @@ -0,0 +1,33 @@ +module github.com/helixml/helix-org + +go 1.25.4 + +require ( + github.com/glebarez/sqlite v1.11.0 + github.com/google/jsonschema-go v0.4.2 + github.com/google/uuid v1.6.0 + github.com/gorilla/websocket v1.5.3 + github.com/modelcontextprotocol/go-sdk v1.5.0 + github.com/tylermmorton/tmpl v1.1.0 + github.com/yuin/goldmark v1.8.2 + gorm.io/gorm v1.31.1 +) + +require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/glebarez/go-sqlite v1.21.2 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect + github.com/mattn/go-isatty v0.0.17 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.43.0 // indirect + golang.org/x/text v0.20.0 // indirect + modernc.org/libc v1.22.5 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.23.1 // indirect +) diff --git a/helix-org/go.sum b/helix-org/go.sum new file mode 100644 index 0000000000..2101ab9cf1 --- /dev/null +++ b/helix-org/go.sum @@ -0,0 +1,66 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= +github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= +github.com/glebarez/sqlite v1.11.0 h1:wSG0irqzP6VurnMEpFGer5Li19RpIRi2qvQz++w0GMw= +github.com/glebarez/sqlite v1.11.0/go.mod h1:h8/o8j5wiAsqSPoWELDUdJXhjAhsVliSn7bWZjOhrgQ= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/modelcontextprotocol/go-sdk v1.5.0 h1:CHU0FIX9kpueNkxuYtfYQn1Z0slhFzBZuq+x6IiblIU= +github.com/modelcontextprotocol/go-sdk v1.5.0/go.mod h1:gggDIhoemhWs3BGkGwd1umzEXCEMMvAnhTrnbXJKKKA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tylermmorton/tmpl v1.1.0 h1:IRdkWtoHnGDx/l69u9YbJoW3vt9/myAnDgS5qTe3yW0= +github.com/tylermmorton/tmpl v1.1.0/go.mod h1:7E7f4TC2F+OCe7KG33X/MXRmfWG8tZm8wOBKmEmu2HE= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE= +github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg= +gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs= +modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= +modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= +modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= diff --git a/helix-org/helix/helixclient/client.go b/helix-org/helix/helixclient/client.go new file mode 100644 index 0000000000..746273ee6a --- /dev/null +++ b/helix-org/helix/helixclient/client.go @@ -0,0 +1,1127 @@ +// Package helixclient is a thin REST + WebSocket client for the +// co-located Helix server. +// +// Scope (after the per-Worker-project refactor): +// - Project lifecycle via the declarative apply endpoint. +// - Project secrets — env-var injection into agent containers. +// - Git contents — reading and writing job/* files on the helix-specs branch. +// - Chat session lifecycle (start, get, stop, output, live updates). +// +// All shapes mirror Helix's `api/pkg/types` so the client posts exactly +// what Helix expects with no translation layer. +package helixclient + +import ( + "bufio" + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/gorilla/websocket" +) + +// Default per-call timeout for REST calls. The WebSocket has no +// timeout — the caller controls its lifetime via context. +const defaultRESTTimeout = 30 * time.Second + +// Client is the surface helix-org depends on. Defining it as an +// interface lets tests inject a fake without HTTP. +type Client interface { + // Connectivity probe. Returns the authenticated user. + WhoAmI(ctx context.Context) (UserStatus, error) + + // ServerStatus returns the operator-facing /api/v1/config snapshot. + // helix-org only consumes the desktop-quota fields today (see + // CheckDesktopQuota); the rest are surfaced for forward + // compatibility. + ServerStatus(ctx context.Context) (ServerStatus, error) + + // ListProviders returns the slug list Helix exposes at + // /api/v1/providers (e.g. ["openai","anthropic","helix",…]). Used + // to validate `chat.provider` at startup so a typo doesn't surface + // as a confusing 422 from /sessions/{id}/zed-config much later in + // the request chain. + ListProviders(ctx context.Context) ([]string, error) + // ListModelsForProvider returns the list of model IDs the given + // provider exposes. IDs are bare (no `provider/` prefix) — that's + // the shape Helix uses everywhere except the OpenAI-aggregate + // endpoint (which itself is unreliable for Anthropic since Helix + // gates anthropic models behind the `anthropic-version` header). + // Combined with ListProviders, callers can validate a + // (provider, model) pair against this Helix instance before + // applying any per-Worker project that references it. + ListModelsForProvider(ctx context.Context, provider string) ([]Model, error) + + // Project lifecycle. helix-org applies one project per Worker. + // ApplyProject is upsert-by-name within the operator's org. + ApplyProject(ctx context.Context, req ProjectApplyRequest) (ProjectApplyResponse, error) + GetProject(ctx context.Context, id string) (Project, error) + DeleteProject(ctx context.Context, id string) error + + // Project secrets. Written via /projects/{id}/secrets; surface as + // env vars inside the agent's container at session start. + PutProjectSecret(ctx context.Context, projectID, name, value string) error + + // Git contents. helix-org writes job/role.md, job/identity.md, + // job/agent.md to the project's primary repo at the helix-specs + // branch. content (passed plain) is base64-encoded by PutFile. + PutFile(ctx context.Context, repoID string, req PutFileRequest) error + GetFile(ctx context.Context, repoID, path, branch string) (string, error) + + // Repository creation + attachment. Helix's project-apply does NOT + // auto-create a default repository; the desktop's startup script + // then refuses to launch Zed (`No repositories were cloned + // successfully`). For our owner-chat / org-graph use case we don't + // need a *real* code repo, just a Helix-internal one to satisfy + // the workspace check. Two-step: CreateGitRepo → AttachRepo (with + // primary=true). + CreateGitRepo(ctx context.Context, req CreateGitRepoRequest) (GitRepo, error) + AttachRepoToProject(ctx context.Context, projectID, repoID string, primary bool) error + // CreateBranch makes a new branch from baseBranch on the repo. Used + // by HelixProjectApplier to ensure `helix-specs` exists before + // pushing role/identity files there — the desktop's startup script + // only creates the helix-specs worktree if the branch is on the + // remote. + CreateBranch(ctx context.Context, repoID, branch, baseBranch string) error + + // App lifecycle. Used by the chat backend to provision a + // helix_basic Assistant with MCPs — Helix's `/projects/apply` + // only creates zed_external Agent Apps (`projectAgentRuntimeToTypes` + // hard-codes that), so chat-only surfaces that need MCP tool + // wiring without a sandbox runner take this separate path. + CreateApp(ctx context.Context, req AppRequest) (App, error) + GetApp(ctx context.Context, id string) (App, error) + UpdateApp(ctx context.Context, id string, req AppRequest) (App, error) + + // Chat session lifecycle. + // + // StartChat opens a new session (Messages[0] becomes the first + // turn). Use this only for *first* contact — once the session ID + // is persisted, subsequent messages must go through + // SendSessionMessage so they queue durably across cold starts. + StartChat(ctx context.Context, req StartChatRequest) (Session, error) + // StartChatWithStatus is the streaming-aware variant: same wire + // call as StartChat, but additionally reports whether the SSE + // stream surfaced a transient "no agent WS" error after the + // session ID came through. Callers use the flag to decide whether + // to immediately re-queue the same prompt via SendSessionMessage + // (which queues durably and is delivered on agent reconnect). + StartChatWithStatus(ctx context.Context, req StartChatRequest) (Session, bool, error) + // SendSessionMessage POSTs a message to an existing session via + // /api/v1/sessions/{id}/messages. Helix persists the interaction + // and `pickupWaitingInteraction` delivers it once the agent's + // WebSocket is reachable — no client-side warmup loop required. + // Returns 200 even when no agent is connected yet. + SendSessionMessage(ctx context.Context, sessionID, content string, opts SendMessageOptions) (SendMessageResponse, error) + GetSession(ctx context.Context, id string) (Session, error) + GetOutput(ctx context.Context, sessionID string) (Output, error) + SubscribeUpdates(ctx context.Context, sessionID string) (<-chan SessionUpdate, error) + StopExternalAgent(ctx context.Context, sessionID string) error +} + +// SendMessageOptions are the optional knobs on SendSessionMessage. +// Interrupt mirrors the frontend RobustPromptInput's interrupt flag — +// set true to cancel any in-flight generation before queueing this +// message. NotifyUserID populates Helix's commenter mappings so +// response notifications route to a third party (used by the +// design-review path; helix-org leaves it empty). +type SendMessageOptions struct { + Interrupt bool + NotifyUserID string +} + +// SendMessageResponse mirrors Helix's POST /sessions/{id}/messages +// response body. Both IDs are returned so the caller can correlate +// notifications even if the message was queued (no WS) at the time +// of the call. +type SendMessageResponse struct { + RequestID string `json:"request_id"` + InteractionID string `json:"interaction_id"` +} + +// ServerStatus mirrors the slice of /api/v1/config helix-org reads. +// Today only the desktop-quota fields are consumed, surfaced as a +// pre-flight gate before opening a new zed_external session. +type ServerStatus struct { + MaxConcurrentDesktops int `json:"max_concurrent_desktops"` + ActiveConcurrentDesktops int `json:"active_concurrent_desktops"` +} + +// HasDesktopRoom reports whether at least one desktop slot is free +// against the operator-configured cap. Returns true if the server has +// no quota configured (Max == 0) — Helix uses 0 to mean "unlimited" +// at the server level. +func (s ServerStatus) HasDesktopRoom() bool { + if s.MaxConcurrentDesktops <= 0 { + return true + } + return s.ActiveConcurrentDesktops < s.MaxConcurrentDesktops +} + +// Model is one entry from /v1/models. Helix ships an OpenAI-compatible +// model catalogue; only ID and Enabled are consumed today. ID is in the +// form `provider/model` (e.g. "anthropic/claude-opus-4-6"); Enabled is +// false for models the operator has hidden. +type Model struct { + ID string `json:"id"` + Enabled bool `json:"enabled"` +} + +// UserStatus is the slim auth-probe response. Helix returns more +// fields; only `User` (the user ID) and `Slug` are consumed today, +// the latter for human-readable logs. +type UserStatus struct { + Admin bool `json:"admin"` + User string `json:"user"` + Slug string `json:"slug"` +} + +// ProjectApplyRequest mirrors `types.ProjectApplyRequest`. The whole +// declarative project (repos, agent app, startup script, kanban +// settings, …) is described in the embedded `Spec`. +type ProjectApplyRequest struct { + OrganizationID string `json:"organization_id,omitempty"` + Name string `json:"name"` + Spec ProjectSpec `json:"spec"` +} + +// ProjectApplyResponse mirrors `types.ProjectApplyResponse`. We +// always read both IDs — `ProjectID` for chat sessions and git +// writes; `AgentAppID` for adding the org-graph MCP server. +type ProjectApplyResponse struct { + ProjectID string `json:"project_id"` + AgentAppID string `json:"agent_app_id,omitempty"` + Created bool `json:"created"` +} + +// ProjectSpec mirrors `types.ProjectSpec`. helix-org populates only +// the subset relevant to the per-Worker-project model: name (set on +// the wrapping request), description, agent, startup, repositories. +type ProjectSpec struct { + Description string `json:"description,omitempty"` + Technologies []string `json:"technologies,omitempty"` + Guidelines string `json:"guidelines,omitempty"` + Repositories []ProjectRepositorySpec `json:"repositories,omitempty"` + Startup *ProjectStartup `json:"startup,omitempty"` + Agent *ProjectAgentSpec `json:"agent,omitempty"` +} + +// ProjectRepositorySpec describes a repository attachment. +type ProjectRepositorySpec struct { + URL string `json:"url"` + DefaultBranch string `json:"default_branch,omitempty"` + Primary bool `json:"primary,omitempty"` +} + +// ProjectStartup is the script run on agent-container startup. +type ProjectStartup struct { + Script string `json:"script,omitempty"` +} + +// ProjectAgentSpec configures the auto-provisioned Agent App. +type ProjectAgentSpec struct { + Name string `json:"name,omitempty"` + Runtime string `json:"runtime,omitempty"` // "claude_code", "zed", … + Model string `json:"model,omitempty"` + Provider string `json:"provider,omitempty"` + Credentials string `json:"credentials,omitempty"` + Tools *ProjectAgentTools `json:"tools,omitempty"` +} + +// ProjectAgentTools enables the simple built-in tools (web search, +// browser, calculator). MCP servers are added separately via the +// Agent App's `assistants[0].mcps[]` once Helix exposes a per-app +// MCP-write endpoint; today helix-org bundles the MCP wiring into +// the project apply where supported and otherwise treats this as a +// follow-up step. +type ProjectAgentTools struct { + WebSearch bool `json:"web_search,omitempty"` + Browser bool `json:"browser,omitempty"` + Calculator bool `json:"calculator,omitempty"` +} + +// Project mirrors the slice of `types.Project` helix-org reads. +type Project struct { + ID string `json:"id"` + Name string `json:"name"` + UserID string `json:"user_id"` + OrganizationID string `json:"organization_id"` + DefaultRepoID string `json:"default_repo_id"` +} + +// CreateGitRepoRequest is the helix-org → Helix payload for POST +// /api/v1/git/repositories. We only ever create Helix-internal repos +// (no external git URL), so most of `types.GitRepositoryCreateRequest` +// is irrelevant. Required: Name, OwnerID. OrganizationID for org-scoped +// projects. +type CreateGitRepoRequest struct { + Name string `json:"name"` + OwnerID string `json:"owner_id"` + OrganizationID string `json:"organization_id,omitempty"` + RepoType string `json:"repo_type,omitempty"` // defaults to "code" + DefaultBranch string `json:"default_branch,omitempty"` + IsExternal bool `json:"is_external"` // always false for helix-org + InitialFiles map[string]string `json:"initial_files,omitempty"` // seed the default branch so subsequent PutFile to other branches has something to fork from +} + +// GitRepo is the slice of `types.GitRepository` helix-org reads — +// just the ID, which is the value we attach to the project. +type GitRepo struct { + ID string `json:"id"` + Name string `json:"name"` +} + +// AppRequest is the body sent to POST /apps and PUT /apps/{id}. +// Config is opaque JSON — callers either build it themselves +// (e.g. helix-org's project-apply step doesn't write Apps directly, +// it only updates the auto-provisioned one to attach MCPs) or use +// AttachMCPToApp, which round-trips the live config to avoid +// dropping unknown fields. +type AppRequest struct { + OrganizationID string `json:"organization_id,omitempty"` + Global bool `json:"global,omitempty"` + Config json.RawMessage `json:"config,omitempty"` +} + +// App mirrors the slice of `types.App` helix-org reads. Config is +// raw — callers parse only what they need. +type App struct { + ID string `json:"id"` + Owner string `json:"owner"` + Config json.RawMessage `json:"config"` +} + +// PutFileRequest mirrors `types.UpdateGitRepositoryFileContentsRequest`. +// `Content` is plain text; PutFile base64-encodes it for the operator. +type PutFileRequest struct { + Path string + Branch string + Message string + Author string + Email string + Content string +} + +// StartChatRequest is the helix-org → Helix payload that opens a new +// chat session (or continues one when SessionID is set). Mirrors +// `types.SessionChatRequest`. +type StartChatRequest struct { + ProjectID string `json:"project_id"` + OrganizationID string `json:"organization_id,omitempty"` + SessionID string `json:"session_id,omitempty"` + SessionRole string `json:"session_role,omitempty"` + AgentType string `json:"agent_type,omitempty"` + AppID string `json:"app_id,omitempty"` + AssistantID string `json:"assistant_id,omitempty"` + Type string `json:"type,omitempty"` + ExternalAgentConfig *ExternalAgentConfig `json:"external_agent_config,omitempty"` + SystemPrompt string `json:"system,omitempty"` + Messages []SessionChatMessage `json:"messages"` + Stream bool `json:"stream,omitempty"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + CallbackURL string `json:"callback_url,omitempty"` +} + +// ExternalAgentConfig must be sent as a non-nil object whenever +// AgentType=zed_external — Helix uses presence-of-object to wire up +// a runner. +type ExternalAgentConfig struct { + Resolution string `json:"resolution,omitempty"` + DisplayWidth int `json:"display_width,omitempty"` + DisplayHeight int `json:"display_height,omitempty"` + DesktopType string `json:"desktop_type,omitempty"` +} + +// SessionChatMessage is one entry in a SessionChatRequest.Messages +// array. Helix's Message struct is OpenAI-style multipart; we only +// ever send a single text part. +type SessionChatMessage struct { + Role string `json:"role"` + Content MessageContent `json:"content"` +} + +// MessageContent is the multipart body. helix-org only ever sends a +// single text part. We omit content_type to match the wire shape the +// Helix UI sends ({"parts":[...]}); Helix infers text from the part +// type. +type MessageContent struct { + Parts []any `json:"parts"` +} + +// NewTextMessage builds a single user text message — the only shape +// helix-org ever sends. +func NewTextMessage(role, text string) SessionChatMessage { + return SessionChatMessage{ + Role: role, + Content: MessageContent{Parts: []any{text}}, + } +} + +// Output is the polling result for a session. Mirrors +// `types.SessionOutputResponse`. Status: "waiting" | "complete" | "error". +type Output struct { + SessionID string `json:"session_id"` + Status string `json:"status"` + Output string `json:"output"` + DurationMs int64 `json:"duration_ms"` +} + +// IsTerminal reports whether o.Status indicates the session is done. +func (o Output) IsTerminal() bool { + return o.Status == "complete" || o.Status == "error" +} + +// SessionUpdate is one frame from `/api/v1/ws/user`. Mirrors +// `types.WebsocketEvent`. The streaming payload helix-org consumes +// is `interaction_patch` carrying `EntryPatches[]` — the per-entry +// typed deltas Helix uses for response-entries streaming. +// +// session_update / interaction_update frames are still observed +// (final-state snapshots), but EntryPatches are the source of truth +// for assistant text + tool calls during a turn. +type SessionUpdate struct { + Type string `json:"type"` + SessionID string `json:"session_id"` + InteractionID string `json:"interaction_id"` + Owner string `json:"owner"` + Session *Session `json:"session,omitempty"` + Interaction *Interaction `json:"interaction,omitempty"` + EntryCount int `json:"entry_count,omitempty"` + EntryPatches []EntryPatch `json:"entry_patches,omitempty"` +} + +// EntryPatch is one per-entry delta. Mirrors `types.EntryPatch`. +// +// - Index identifies the entry within the interaction. +// - MessageID is the entry's identity — re-using a message ID +// means "extend this entry"; a new ID means "this is a new +// entry at the same Index" (e.g. a tool_call following text). +// - Patch is the text delta to splice in at PatchOffset (UTF-16). +// - Type is "text" or "tool_call". +// - For tool_call entries, ToolName/ToolStatus carry metadata. +type EntryPatch struct { + Index int `json:"index"` + MessageID string `json:"message_id"` + Type string `json:"type"` + Patch string `json:"patch,omitempty"` + PatchOffset int `json:"patch_offset,omitempty"` + TotalLength int `json:"total_length,omitempty"` + ToolName string `json:"tool_name,omitempty"` + ToolStatus string `json:"tool_status,omitempty"` +} + +// Session is the subset of Helix's Session struct we read. +type Session struct { + ID string `json:"id"` + Name string `json:"name"` + ProjectID string `json:"project_id"` + ParentApp string `json:"parent_app,omitempty"` + DefaultRepoID string `json:"default_repo_id,omitempty"` + Interactions []*Interaction `json:"interactions,omitempty"` +} + +// Interaction collects what an assistant produced in one turn. +type Interaction struct { + ID string `json:"id"` + GenerationID int `json:"generation_id"` + State string `json:"state"` + Status string `json:"status"` + Error string `json:"error"` + ResponseMessage string `json:"response_message,omitempty"` + ToolCalls []OpenAIToolCall `json:"tool_calls,omitempty"` + ResponseEntries json.RawMessage `json:"response_entries,omitempty"` +} + +// OpenAIToolCall mirrors the openai.ToolCall shape. +type OpenAIToolCall struct { + ID string `json:"id,omitempty"` + Type string `json:"type,omitempty"` + Function OpenAIFunctionCall `json:"function"` +} + +// OpenAIFunctionCall is the "function" payload of a ToolCall. +type OpenAIFunctionCall struct { + Name string `json:"name,omitempty"` + Arguments string `json:"arguments,omitempty"` +} + +// Config configures a real HTTP+WS Client. +type Config struct { + BaseURL string + APIKey string + HTTP *http.Client +} + +// New constructs a real Client backed by HTTP and gorilla/websocket. +func New(cfg Config) (Client, error) { + if strings.TrimSpace(cfg.BaseURL) == "" { + return nil, errors.New("helixclient: BaseURL is required") + } + if strings.TrimSpace(cfg.APIKey) == "" { + return nil, errors.New("helixclient: APIKey is required") + } + hc := cfg.HTTP + if hc == nil { + hc = &http.Client{Timeout: defaultRESTTimeout} + } + return &realClient{base: strings.TrimRight(cfg.BaseURL, "/"), apiKey: cfg.APIKey, http: hc}, nil +} + +type realClient struct { + base string + apiKey string + http *http.Client +} + +// do is the shared HTTP execution path. body may be nil. If out is +// non-nil and the response is 2xx, the body is JSON-decoded into out. +func (c *realClient) do(ctx context.Context, method, path string, body any, out any) error { + var rdr io.Reader + if body != nil { + buf, err := json.Marshal(body) + if err != nil { + return fmt.Errorf("marshal: %w", err) + } + rdr = bytes.NewReader(buf) + } + req, err := http.NewRequestWithContext(ctx, method, c.base+path, rdr) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+c.apiKey) + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := c.http.Do(req) + if err != nil { + return fmt.Errorf("%s %s: %w", method, path, err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode >= 400 { + raw, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return fmt.Errorf("%s %s: %s: %s", method, path, resp.Status, strings.TrimSpace(string(raw))) + } + if out == nil { + return nil + } + if err := json.NewDecoder(resp.Body).Decode(out); err != nil { + return fmt.Errorf("decode %s: %w", path, err) + } + return nil +} + +// ---- WhoAmI ---- + +func (c *realClient) WhoAmI(ctx context.Context) (UserStatus, error) { + var us UserStatus + if err := c.do(ctx, http.MethodGet, "/api/v1/status", nil, &us); err != nil { + return UserStatus{}, err + } + return us, nil +} + +// ServerStatus calls GET /api/v1/config. +func (c *realClient) ServerStatus(ctx context.Context) (ServerStatus, error) { + var st ServerStatus + if err := c.do(ctx, http.MethodGet, "/api/v1/config", nil, &st); err != nil { + return ServerStatus{}, err + } + return st, nil +} + +// CheckDesktopQuota refuses the call when the operator-configured +// `max_concurrent_desktops` would be exceeded by spinning up one more +// session. Returns nil on success; on a real network/API error +// transports the failure verbatim. Used as a pre-flight before any +// helix-org code path opens a *new* zed_external session — follow-ups +// reuse an already-running container and don't need this check. +// +// The error message names every active session helix-org knows about +// so the operator can decide which one to stop. We can't enumerate +// other users' sessions from here — they show up in the count, not +// the list. +func CheckDesktopQuota(ctx context.Context, c Client) error { + st, err := c.ServerStatus(ctx) + if err != nil { + return fmt.Errorf("read server status: %w", err) + } + if st.HasDesktopRoom() { + return nil + } + return fmt.Errorf("desktop quota reached on Helix (%d/%d active) — stop one of the existing sessions before opening a new one (`helix-org config get helix.url` then visit /sessions to manage)", st.ActiveConcurrentDesktops, st.MaxConcurrentDesktops) +} + +// ListProviders calls GET /api/v1/providers. Returns the list of +// provider slugs the operator has configured on this Helix instance. +func (c *realClient) ListProviders(ctx context.Context) ([]string, error) { + var providers []string + if err := c.do(ctx, http.MethodGet, "/api/v1/providers", nil, &providers); err != nil { + return nil, err + } + return providers, nil +} + +// ListModelsForProvider calls GET /v1/models?provider=. Helix +// returns an OpenAI-compatible `{"data":[…]}` envelope; we unwrap it +// and surface the slim Model shape. IDs are bare (no `provider/` prefix). +func (c *realClient) ListModelsForProvider(ctx context.Context, provider string) ([]Model, error) { + var resp struct { + Data []Model `json:"data"` + } + path := "/v1/models?provider=" + url.QueryEscape(provider) + if err := c.do(ctx, http.MethodGet, path, nil, &resp); err != nil { + return nil, err + } + return resp.Data, nil +} + +// ValidateProviderModel checks that `provider` exists in +// /api/v1/providers and that `provider/model` exists (and is enabled) +// in /v1/models. Returns a descriptive error pointing at the first +// missing piece — designed to be surfaced verbatim to operators at +// startup so a typo in `chat.provider` / `chat.model` doesn't get +// papered over and surface as a confusing 422 from /zed-config much +// later in the request chain. +func ValidateProviderModel(ctx context.Context, c Client, provider, model string) error { + if strings.TrimSpace(provider) == "" || strings.TrimSpace(model) == "" { + return fmt.Errorf("validate provider/model: both provider and model are required (got provider=%q model=%q)", provider, model) + } + providers, err := c.ListProviders(ctx) + if err != nil { + return fmt.Errorf("list providers: %w", err) + } + known := false + for _, p := range providers { + if p == provider { + known = true + break + } + } + if !known { + return fmt.Errorf("provider %q not configured on Helix (got %v) — set chat.provider to one of these", provider, providers) + } + models, err := c.ListModelsForProvider(ctx, provider) + if err != nil { + return fmt.Errorf("list models for %q: %w", provider, err) + } + for _, m := range models { + if m.ID == model { + if !m.Enabled { + return fmt.Errorf("model %q on provider %q exists but is disabled on Helix — pick a different chat.model or have the operator re-enable it", model, provider) + } + return nil + } + } + available := make([]string, 0, len(models)) + for _, m := range models { + if m.Enabled { + available = append(available, m.ID) + } + } + return fmt.Errorf("model %q not found on provider %q — available: %v", model, provider, available) +} + +// ---- Project lifecycle ---- + +func (c *realClient) ApplyProject(ctx context.Context, req ProjectApplyRequest) (ProjectApplyResponse, error) { + var resp ProjectApplyResponse + if err := c.do(ctx, http.MethodPut, "/api/v1/projects/apply", req, &resp); err != nil { + return ProjectApplyResponse{}, err + } + if resp.ProjectID == "" { + return ProjectApplyResponse{}, errors.New("apply project: empty project_id in response") + } + return resp, nil +} + +func (c *realClient) GetProject(ctx context.Context, id string) (Project, error) { + var p Project + if err := c.do(ctx, http.MethodGet, "/api/v1/projects/"+url.PathEscape(id), nil, &p); err != nil { + return Project{}, err + } + return p, nil +} + +func (c *realClient) DeleteProject(ctx context.Context, id string) error { + return c.do(ctx, http.MethodDelete, "/api/v1/projects/"+url.PathEscape(id), nil, nil) +} + +// ---- Git repository lifecycle ---- + +func (c *realClient) CreateGitRepo(ctx context.Context, req CreateGitRepoRequest) (GitRepo, error) { + if req.RepoType == "" { + req.RepoType = "code" + } + if req.DefaultBranch == "" { + req.DefaultBranch = "main" + } + var resp GitRepo + if err := c.do(ctx, http.MethodPost, "/api/v1/git/repositories", req, &resp); err != nil { + return GitRepo{}, err + } + if resp.ID == "" { + return GitRepo{}, errors.New("create git repo: empty id in response") + } + return resp, nil +} + +func (c *realClient) CreateBranch(ctx context.Context, repoID, branch, baseBranch string) error { + body := struct { + BranchName string `json:"branch_name"` + BaseBranch string `json:"base_branch,omitempty"` + }{BranchName: branch, BaseBranch: baseBranch} + if err := c.do(ctx, http.MethodPost, "/api/v1/git/repositories/"+url.PathEscape(repoID)+"/branches", body, nil); err != nil { + return fmt.Errorf("create branch %s on %s: %w", branch, repoID, err) + } + return nil +} + +func (c *realClient) AttachRepoToProject(ctx context.Context, projectID, repoID string, primary bool) error { + if err := c.do(ctx, http.MethodPut, "/api/v1/projects/"+url.PathEscape(projectID)+"/repositories/"+url.PathEscape(repoID)+"/attach", nil, nil); err != nil { + return fmt.Errorf("attach repo: %w", err) + } + if primary { + if err := c.do(ctx, http.MethodPut, "/api/v1/projects/"+url.PathEscape(projectID)+"/repositories/"+url.PathEscape(repoID)+"/primary", nil, nil); err != nil { + return fmt.Errorf("set primary repo: %w", err) + } + } + return nil +} + +// ---- App lifecycle ---- + +func (c *realClient) CreateApp(ctx context.Context, req AppRequest) (App, error) { + var resp App + if err := c.do(ctx, http.MethodPost, "/api/v1/apps", req, &resp); err != nil { + return App{}, err + } + if resp.ID == "" { + return App{}, errors.New("create app: empty id in response") + } + return resp, nil +} + +func (c *realClient) GetApp(ctx context.Context, id string) (App, error) { + var resp App + if err := c.do(ctx, http.MethodGet, "/api/v1/apps/"+url.PathEscape(id), nil, &resp); err != nil { + return App{}, err + } + return resp, nil +} + +// UpdateApp puts to /api/v1/apps/{id}. Helix's handler reads the +// app ID from the request *body* (not the URL path), so the `id` +// field is added to the JSON body alongside the request fields. +func (c *realClient) UpdateApp(ctx context.Context, id string, req AppRequest) (App, error) { + body := struct { + ID string `json:"id"` + OrganizationID string `json:"organization_id,omitempty"` + Global bool `json:"global,omitempty"` + Config json.RawMessage `json:"config,omitempty"` + }{ + ID: id, + OrganizationID: req.OrganizationID, + Global: req.Global, + Config: req.Config, + } + var resp App + if err := c.do(ctx, http.MethodPut, "/api/v1/apps/"+url.PathEscape(id), body, &resp); err != nil { + return App{}, err + } + return resp, nil +} + +// AttachMCPToApp adds (or updates) a single HTTP MCP server entry +// on an App's first Assistant. Idempotent — replaces any existing +// entry whose `name` matches. +// +// Implemented as a get-mutate-put round-trip with the raw config as +// `map[string]any` so unknown fields (everything Helix's +// AssistantConfig has that helix-org doesn't model) survive. +// +// Used by helix-org's per-Worker project apply: project-apply +// auto-provisions a `zed_external` Agent App but doesn't accept +// MCPs in its spec, so we attach them in this second step. +func AttachMCPToApp(ctx context.Context, c Client, appID, name, transport, mcpURL string) error { + if appID == "" { + return errors.New("AttachMCPToApp: appID is empty") + } + app, err := c.GetApp(ctx, appID) + if err != nil { + return fmt.Errorf("get app: %w", err) + } + var raw map[string]any + if len(app.Config) == 0 { + raw = map[string]any{} + } else if err := json.Unmarshal(app.Config, &raw); err != nil { + return fmt.Errorf("decode config: %w", err) + } + helix, _ := raw["helix"].(map[string]any) + if helix == nil { + helix = map[string]any{} + raw["helix"] = helix + } + asstsAny, _ := helix["assistants"].([]any) + if len(asstsAny) == 0 { + return errors.New("AttachMCPToApp: app has no assistants") + } + asst, _ := asstsAny[0].(map[string]any) + if asst == nil { + return errors.New("AttachMCPToApp: assistant is not an object") + } + mcpsAny, _ := asst["mcps"].([]any) + mcps := make([]any, 0, len(mcpsAny)+1) + replaced := false + for _, mAny := range mcpsAny { + m, ok := mAny.(map[string]any) + if !ok { + mcps = append(mcps, mAny) + continue + } + if m["name"] == name { + m["transport"] = transport + m["url"] = mcpURL + replaced = true + } + mcps = append(mcps, m) + } + if !replaced { + mcps = append(mcps, map[string]any{ + "name": name, + "transport": transport, + "url": mcpURL, + }) + } + asst["mcps"] = mcps + asstsAny[0] = asst + helix["assistants"] = asstsAny + raw["helix"] = helix + body, err := json.Marshal(raw) + if err != nil { + return fmt.Errorf("encode config: %w", err) + } + if _, err := c.UpdateApp(ctx, appID, AppRequest{Config: body}); err != nil { + return fmt.Errorf("update app: %w", err) + } + return nil +} + +// ---- Project secrets ---- + +func (c *realClient) PutProjectSecret(ctx context.Context, projectID, name, value string) error { + body := map[string]string{"name": name, "value": value} + return c.do(ctx, http.MethodPost, "/api/v1/projects/"+url.PathEscape(projectID)+"/secrets", body, nil) +} + +// ---- Git contents ---- + +func (c *realClient) PutFile(ctx context.Context, repoID string, req PutFileRequest) error { + body := map[string]string{ + "path": req.Path, + "branch": req.Branch, + "message": req.Message, + "author": req.Author, + "email": req.Email, + "content": base64.StdEncoding.EncodeToString([]byte(req.Content)), + } + return c.do(ctx, http.MethodPut, "/api/v1/git/repositories/"+url.PathEscape(repoID)+"/contents", body, nil) +} + +func (c *realClient) GetFile(ctx context.Context, repoID, path, branch string) (string, error) { + q := url.Values{"path": {path}, "branch": {branch}} + var resp struct { + Path string `json:"path"` + Content string `json:"content"` + } + if err := c.do(ctx, http.MethodGet, "/api/v1/git/repositories/"+url.PathEscape(repoID)+"/contents?"+q.Encode(), nil, &resp); err != nil { + return "", err + } + // Helix's GET /contents returns raw plain text in the `content` + // field for small files — not base64 like PutFile expects on the + // way in. Try a base64 decode first; fall through to raw on + // failure to handle both shapes. + if decoded, err := base64.StdEncoding.DecodeString(resp.Content); err == nil { + return string(decoded), nil + } + return resp.Content, nil +} + +// ---- Chat session lifecycle ---- + +func (c *realClient) StartChat(ctx context.Context, req StartChatRequest) (Session, error) { + s, _, err := c.startChat(ctx, req) + return s, err +} + +func (c *realClient) StartChatWithStatus(ctx context.Context, req StartChatRequest) (Session, bool, error) { + return c.startChat(ctx, req) +} + +// SendSessionMessage POSTs to /api/v1/sessions/{id}/messages. The +// endpoint persists a Waiting interaction and returns 200 even when +// the agent's WebSocket is not connected — pickupWaitingInteraction +// delivers the message on reconnect. This is the durable replacement +// for the client-side warmup loop helix-org used to run during cold +// starts. +func (c *realClient) SendSessionMessage(ctx context.Context, sessionID, content string, opts SendMessageOptions) (SendMessageResponse, error) { + if strings.TrimSpace(sessionID) == "" { + return SendMessageResponse{}, errors.New("SendSessionMessage: sessionID is empty") + } + body := struct { + Content string `json:"content"` + Interrupt bool `json:"interrupt,omitempty"` + NotifyUserID string `json:"notify_user_id,omitempty"` + }{Content: content, Interrupt: opts.Interrupt, NotifyUserID: opts.NotifyUserID} + var resp SendMessageResponse + if err := c.do(ctx, http.MethodPost, "/api/v1/sessions/"+url.PathEscape(sessionID)+"/messages", body, &resp); err != nil { + return SendMessageResponse{}, err + } + return resp, nil +} + +func (c *realClient) startChat(ctx context.Context, req StartChatRequest) (Session, bool, error) { + if req.Type == "" { + req.Type = "text" + } + if len(req.Messages) == 0 { + return Session{}, false, errors.New("StartChat: req.Messages must contain at least one message") + } + if req.AgentType == "zed_external" && req.ExternalAgentConfig == nil { + req.ExternalAgentConfig = &ExternalAgentConfig{} + } + if req.AgentType == "zed_external" { + req.Stream = true + return c.startChatStreaming(ctx, req) + } + var raw json.RawMessage + if err := c.do(ctx, http.MethodPost, "/api/v1/sessions/chat", req, &raw); err != nil { + return Session{}, false, err + } + s, err := parseStartChatResponse(raw) + return s, false, err +} + +// startChatStreaming POSTs to /sessions/chat with stream=true and +// reads SSE chunks. We extract the session ID from the first chunk +// (Helix writes it before any LLM/agent call happens) and return — +// any in-stream error from the agent dispatch is non-fatal because +// the interaction is already persisted server-side and will be +// picked up when the agent connects. The actual response is +// delivered via SubscribeUpdates. +// +// We deliberately detach the upstream HTTP request from the caller's +// ctx. Helix's handler holds the connection open during +// waitForExternalAgentReady (up to 5 minutes for a cold container) +// and runs the agent dispatch synchronously. If the caller's ctx +// cancels (which happens immediately when our /ui/chat/send handler +// returns 200 to the browser after we've grabbed the session ID), +// the upstream conn drops, Helix's request ctx cancels, and the wait +// fails with "0 attempts". Detaching keeps Helix's handler running +// long enough to complete startup; the body-drain goroutine reads to +// EOF and closes the connection cleanly. +func (c *realClient) startChatStreaming(_ context.Context, req StartChatRequest) (Session, bool, error) { + buf, err := json.Marshal(req) + if err != nil { + return Session{}, false, fmt.Errorf("marshal: %w", err) + } + upstreamCtx, upstreamCancel := context.WithTimeout(context.Background(), 10*time.Minute) + httpReq, err := http.NewRequestWithContext(upstreamCtx, http.MethodPost, c.base+"/api/v1/sessions/chat", bytes.NewReader(buf)) + if err != nil { + upstreamCancel() + return Session{}, false, err + } + httpReq.Header.Set("Authorization", "Bearer "+c.apiKey) + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Accept", "text/event-stream") + resp, err := c.http.Do(httpReq) //nolint:bodyclose // body is closed inside the drain goroutine below or on early-return paths; the lint can't follow it across the closure + if err != nil { + upstreamCancel() + return Session{}, false, fmt.Errorf("POST /api/v1/sessions/chat: %w", err) + } + if resp.StatusCode >= 400 { + raw, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + _ = resp.Body.Close() + upstreamCancel() + return Session{}, false, fmt.Errorf("POST /api/v1/sessions/chat: %s: %s", resp.Status, strings.TrimSpace(string(raw))) + } + scanner := bufio.NewScanner(resp.Body) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + var sessionID string + hadWSError := false + for scanner.Scan() { + line := scanner.Text() + payload := strings.TrimSpace(line) + if payload == "" { + continue + } + payload = strings.TrimPrefix(payload, "data:") + payload = strings.TrimSpace(payload) + if payload == "" || payload == "[DONE]" { + continue + } + var chunk struct { + ID string `json:"id"` + Error *struct { + Message string `json:"message"` + } `json:"error,omitempty"` + } + if err := json.Unmarshal([]byte(payload), &chunk); err != nil { + continue + } + if chunk.ID != "" && sessionID == "" { + sessionID = chunk.ID + } + if chunk.Error != nil && strings.Contains(chunk.Error.Message, "no external agent WebSocket connection") { + hadWSError = true + break + } + } + // Drain anything remaining so Helix can finish its handler under + // upstreamCtx — the body-close below would otherwise drop FIN + // mid-write and the server-side log fills with broken-pipe noise. + go func() { + defer upstreamCancel() + defer func() { _ = resp.Body.Close() }() + for scanner.Scan() { + } + }() + if sessionID != "" { + return Session{ID: sessionID}, hadWSError, nil + } + if err := scanner.Err(); err != nil { + return Session{}, false, fmt.Errorf("read SSE: %w", err) + } + return Session{}, false, errors.New("start chat streaming: no session id in stream") +} + +// parseStartChatResponse normalises the two response shapes Helix +// returns from /sessions/chat. zed_external returns the full Session +// JSON; helix_basic / openai-style returns an OpenAI chat-completion +// shape with `id` (session ID) and `choices[0].message.content`. +func parseStartChatResponse(raw json.RawMessage) (Session, error) { + var s Session + _ = json.Unmarshal(raw, &s) + if len(s.Interactions) > 0 { + if s.ID == "" { + return Session{}, errors.New("start chat: session has no id") + } + return s, nil + } + var oai struct { + ID string `json:"id"` + Choices []struct { + Index int `json:"index"` + Message struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` + } + if err := json.Unmarshal(raw, &oai); err != nil { + return Session{}, fmt.Errorf("decode start-chat response: %w", err) + } + if oai.ID == "" { + return Session{}, errors.New("start chat: empty session id") + } + out := Session{ID: oai.ID} + if len(oai.Choices) > 0 && oai.Choices[0].Message.Content != "" { + out.Interactions = []*Interaction{{ + ID: oai.ID + ":synth", + State: "complete", + ResponseMessage: oai.Choices[0].Message.Content, + }} + } + return out, nil +} + +func (c *realClient) GetSession(ctx context.Context, id string) (Session, error) { + var s Session + if err := c.do(ctx, http.MethodGet, "/api/v1/sessions/"+url.PathEscape(id), nil, &s); err != nil { + return Session{}, err + } + return s, nil +} + +func (c *realClient) GetOutput(ctx context.Context, sessionID string) (Output, error) { + var out Output + if err := c.do(ctx, http.MethodGet, "/api/v1/sessions/"+url.PathEscape(sessionID)+"/output", nil, &out); err != nil { + return Output{}, err + } + return out, nil +} + +func (c *realClient) StopExternalAgent(ctx context.Context, sessionID string) error { + return c.do(ctx, http.MethodDelete, "/api/v1/sessions/"+url.PathEscape(sessionID)+"/stop-external-agent", nil, nil) +} + +// ---- Live updates ---- + +func (c *realClient) SubscribeUpdates(ctx context.Context, sessionID string) (<-chan SessionUpdate, error) { + wsURL, err := wsURLFromBase(c.base, sessionID) + if err != nil { + return nil, err + } + header := http.Header{} + header.Set("Authorization", "Bearer "+c.apiKey) + conn, resp, err := websocket.DefaultDialer.DialContext(ctx, wsURL, header) + if resp != nil { + _ = resp.Body.Close() + } + if err != nil { + return nil, fmt.Errorf("ws dial: %w", err) + } + ch := make(chan SessionUpdate, 16) + go func() { + defer close(ch) + defer func() { _ = conn.Close() }() + go func() { + <-ctx.Done() + _ = conn.Close() + }() + for { + _, data, err := conn.ReadMessage() + if err != nil { + return + } + var u SessionUpdate + if err := json.Unmarshal(data, &u); err != nil { + continue + } + select { + case ch <- u: + case <-ctx.Done(): + return + } + } + }() + return ch, nil +} + +func wsURLFromBase(base, sessionID string) (string, error) { + u, err := url.Parse(base) + if err != nil { + return "", fmt.Errorf("parse base url: %w", err) + } + switch u.Scheme { + case "http": + u.Scheme = "ws" + case "https": + u.Scheme = "wss" + default: + return "", fmt.Errorf("unsupported scheme %q", u.Scheme) + } + u.Path = "/api/v1/ws/user" + q := u.Query() + q.Set("session_id", sessionID) + u.RawQuery = q.Encode() + return u.String(), nil +} diff --git a/helix-org/helix/helixclient/client_test.go b/helix-org/helix/helixclient/client_test.go new file mode 100644 index 0000000000..3e0239b111 --- /dev/null +++ b/helix-org/helix/helixclient/client_test.go @@ -0,0 +1,419 @@ +package helixclient + +import ( + "context" + "encoding/base64" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/gorilla/websocket" +) + +func newTestClient(t *testing.T, h http.Handler) Client { + t.Helper() + srv := httptest.NewServer(h) + t.Cleanup(srv.Close) + c, err := New(Config{BaseURL: srv.URL, APIKey: "tok"}) + if err != nil { + t.Fatalf("new: %v", err) + } + return c +} + +func TestStartChatSendsHelixSessionChatRequest(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if got, want := r.Header.Get("Authorization"), "Bearer tok"; got != want { + t.Errorf("auth header: got %q want %q", got, want) + } + if r.URL.Path != "/api/v1/sessions/chat" { + t.Errorf("path: %q", r.URL.Path) + } + var req StartChatRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode: %v", err) + } + if req.AgentType != "zed_external" || req.SessionRole != "job" { + t.Errorf("unexpected req: %+v", req) + } + if len(req.Messages) != 1 || req.Messages[0].Role != "user" { + t.Errorf("messages: %+v", req.Messages) + } + if req.ExternalAgentConfig == nil { + t.Errorf("ExternalAgentConfig is nil; expected default {}") + } + _ = json.NewEncoder(w).Encode(Session{ID: "ses_42", Interactions: []*Interaction{{ID: "ix1"}}}) + })) + s, err := c.StartChat(context.Background(), StartChatRequest{ + ProjectID: "p1", + SessionRole: "job", + AgentType: "zed_external", + Messages: []SessionChatMessage{NewTextMessage("user", "hello")}, + }) + if err != nil { + t.Fatalf("start: %v", err) + } + if s.ID != "ses_42" { + t.Errorf("id: got %q want ses_42", s.ID) + } +} + +// TestStartChatSyntheticInteractionFromOpenAIShape verifies that the +// OpenAI-compatible /sessions/chat response shape (returned by +// helix_basic / openai-routed sessions) is normalised into a +// synthetic Interaction so callers see one shape regardless. +func TestStartChatSyntheticInteractionFromOpenAIShape(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"id":"ses_oai","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"hello back"}}]}`)) + })) + s, err := c.StartChat(context.Background(), StartChatRequest{ + ProjectID: "p", + AgentType: "helix_basic", + Messages: []SessionChatMessage{NewTextMessage("user", "hi")}, + }) + if err != nil { + t.Fatalf("start: %v", err) + } + if s.ID != "ses_oai" || len(s.Interactions) != 1 || s.Interactions[0].ResponseMessage != "hello back" { + t.Errorf("synthetic interaction: %+v", s) + } +} + +func TestApplyProject(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPut || r.URL.Path != "/api/v1/projects/apply" { + t.Errorf("expected PUT /api/v1/projects/apply, got %s %s", r.Method, r.URL.Path) + } + var req ProjectApplyRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode: %v", err) + } + if req.Name != "w-eng" { + t.Errorf("name: %q", req.Name) + } + if req.Spec.Agent == nil || req.Spec.Agent.Runtime != "claude_code" { + t.Errorf("agent spec: %+v", req.Spec.Agent) + } + _ = json.NewEncoder(w).Encode(ProjectApplyResponse{ProjectID: "prj_x", AgentAppID: "app_x", Created: true}) + })) + resp, err := c.ApplyProject(context.Background(), ProjectApplyRequest{ + Name: "w-eng", + Spec: ProjectSpec{ + Description: "engineer", + Agent: &ProjectAgentSpec{ + Name: "engineer", + Runtime: "claude_code", + Model: "claude-sonnet-4-6", + }, + }, + }) + if err != nil { + t.Fatalf("apply: %v", err) + } + if resp.ProjectID != "prj_x" || resp.AgentAppID != "app_x" || !resp.Created { + t.Errorf("resp: %+v", resp) + } +} + +func TestPutProjectSecret(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + t.Errorf("method: %s", r.Method) + } + if !strings.HasSuffix(r.URL.Path, "/secrets") { + t.Errorf("path: %q", r.URL.Path) + } + var body map[string]string + _ = json.NewDecoder(r.Body).Decode(&body) + if body["name"] != "HELIX_ORG_URL" || body["value"] != "https://example" { + t.Errorf("body: %+v", body) + } + })) + if err := c.PutProjectSecret(context.Background(), "prj_x", "HELIX_ORG_URL", "https://example"); err != nil { + t.Fatalf("put secret: %v", err) + } +} + +func TestDeleteProject(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodDelete || !strings.HasPrefix(r.URL.Path, "/api/v1/projects/prj_x") { + t.Errorf("expected DELETE /api/v1/projects/prj_x, got %s %s", r.Method, r.URL.Path) + } + })) + if err := c.DeleteProject(context.Background(), "prj_x"); err != nil { + t.Fatalf("delete: %v", err) + } +} + +func TestGetOutput(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.HasSuffix(r.URL.Path, "/output") { + t.Errorf("path: %q", r.URL.Path) + } + _ = json.NewEncoder(w).Encode(Output{SessionID: "ses_x", Status: "complete", Output: "ok", DurationMs: 12}) + })) + out, err := c.GetOutput(context.Background(), "ses_x") + if err != nil { + t.Fatalf("output: %v", err) + } + if !out.IsTerminal() || out.Output != "ok" { + t.Errorf("output: %+v", out) + } +} + +func TestSubscribeUpdatesParsesEntryPatches(t *testing.T) { + t.Parallel() + upgrader := websocket.Upgrader{} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("session_id") != "ses_w" { + t.Errorf("session_id: %q", r.URL.Query().Get("session_id")) + } + conn, err := upgrader.Upgrade(w, r, nil) + if err != nil { + t.Fatalf("upgrade: %v", err) + } + defer func() { _ = conn.Close() }() + _ = conn.WriteJSON(SessionUpdate{ + Type: "interaction_patch", + SessionID: "ses_w", + InteractionID: "ix1", + EntryCount: 1, + EntryPatches: []EntryPatch{{ + Index: 0, MessageID: "msg-a", Type: "text", Patch: "hello", PatchOffset: 0, TotalLength: 5, + }}, + }) + })) + defer srv.Close() + c, err := New(Config{BaseURL: srv.URL, APIKey: "tok"}) + if err != nil { + t.Fatalf("new: %v", err) + } + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + ch, err := c.SubscribeUpdates(ctx, "ses_w") + if err != nil { + t.Fatalf("subscribe: %v", err) + } + select { + case u, ok := <-ch: + if !ok { + t.Fatal("channel closed before frame") + } + if len(u.EntryPatches) != 1 || u.EntryPatches[0].Patch != "hello" { + t.Errorf("frame: %+v", u) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for frame") + } +} + +func TestWhoAmI(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _ = json.NewEncoder(w).Encode(UserStatus{Admin: true, User: "u-1", Slug: "phil"}) + })) + us, err := c.WhoAmI(context.Background()) + if err != nil { + t.Fatalf("whoami: %v", err) + } + if us.User != "u-1" || !us.Admin { + t.Errorf("user: %+v", us) + } +} + +func TestPutFileBase64Encodes(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.HasSuffix(r.URL.Path, "/contents") { + t.Errorf("path: %q", r.URL.Path) + } + var body map[string]string + _ = json.NewDecoder(r.Body).Decode(&body) + decoded, err := base64.StdEncoding.DecodeString(body["content"]) + if err != nil { + t.Fatalf("base64: %v", err) + } + if string(decoded) != "hello world" { + t.Errorf("content: %q", decoded) + } + if body["path"] != "job/role.md" || body["branch"] != "helix-specs" { + t.Errorf("body: %+v", body) + } + })) + if err := c.PutFile(context.Background(), "r-1", PutFileRequest{Path: "job/role.md", Branch: "helix-specs", Message: "init", Content: "hello world"}); err != nil { + t.Fatalf("putfile: %v", err) + } +} + +// TestSendSessionMessagePostsToMessagesEndpoint verifies the new +// /sessions/{id}/messages path is wired correctly: method, path, +// auth header, JSON body shape, and that the request_id / +// interaction_id come back into the typed response. +func TestSendSessionMessagePostsToMessagesEndpoint(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + t.Errorf("method: %s", r.Method) + } + if r.URL.Path != "/api/v1/sessions/ses_42/messages" { + t.Errorf("path: %q", r.URL.Path) + } + if got := r.Header.Get("Authorization"); got != "Bearer tok" { + t.Errorf("auth: %q", got) + } + var body struct { + Content string `json:"content"` + Interrupt bool `json:"interrupt,omitempty"` + NotifyUserID string `json:"notify_user_id,omitempty"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + t.Fatalf("decode: %v", err) + } + if body.Content != "hello queue" || !body.Interrupt { + t.Errorf("body: %+v", body) + } + _ = json.NewEncoder(w).Encode(SendMessageResponse{RequestID: "req_1", InteractionID: "ix_7"}) + })) + resp, err := c.SendSessionMessage(context.Background(), "ses_42", "hello queue", SendMessageOptions{Interrupt: true}) + if err != nil { + t.Fatalf("send: %v", err) + } + if resp.RequestID != "req_1" || resp.InteractionID != "ix_7" { + t.Errorf("resp: %+v", resp) + } +} + +// TestValidateProviderModel covers the happy path plus each rejection +// branch: unknown provider, unknown model, disabled model, missing +// inputs. The validator is the operator-facing pre-flight that turns +// "your typo causes a confusing 422 from /zed-config when the desktop +// boots" into "your typo causes helix-org to refuse to start with a +// concrete error pointing at the bad key." +func TestValidateProviderModel(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/v1/providers": + _ = json.NewEncoder(w).Encode([]string{"openai", "anthropic"}) + case "/v1/models": + provider := r.URL.Query().Get("provider") + var models []Model + switch provider { + case "anthropic": + models = []Model{ + {ID: "claude-opus-4-6", Enabled: true}, + {ID: "claude-shelved-1", Enabled: false}, + } + case "openai": + models = []Model{{ID: "gpt-4o-mini", Enabled: true}} + } + _ = json.NewEncoder(w).Encode(map[string]any{"data": models}) + default: + t.Errorf("unexpected path: %s", r.URL.Path) + http.NotFound(w, r) + } + })) + cases := []struct { + name string + provider string + model string + wantSubstr string // empty = expect nil error + mustContain []string + }{ + {name: "happy", provider: "anthropic", model: "claude-opus-4-6"}, + {name: "empty provider", provider: "", model: "x", wantSubstr: "both provider and model"}, + {name: "empty model", provider: "anthropic", model: "", wantSubstr: "both provider and model"}, + {name: "unknown provider", provider: "bunker-minimax-m2.7", model: "minimax-m2.7", wantSubstr: "not configured", mustContain: []string{"bunker-minimax-m2.7", "openai"}}, + {name: "unknown model", provider: "anthropic", model: "claude-opus-9999", wantSubstr: "not found", mustContain: []string{"claude-opus-9999", "anthropic"}}, + {name: "disabled model", provider: "anthropic", model: "claude-shelved-1", wantSubstr: "disabled"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + err := ValidateProviderModel(context.Background(), c, tc.provider, tc.model) + if tc.wantSubstr == "" { + if err != nil { + t.Fatalf("expected nil error, got: %v", err) + } + return + } + if err == nil { + t.Fatalf("expected error containing %q, got nil", tc.wantSubstr) + } + if !strings.Contains(err.Error(), tc.wantSubstr) { + t.Errorf("error %q does not contain %q", err, tc.wantSubstr) + } + for _, must := range tc.mustContain { + if !strings.Contains(err.Error(), must) { + t.Errorf("error %q missing required hint %q", err, must) + } + } + }) + } +} + +// TestCheckDesktopQuota covers the three branches: room (active < max), +// no room (active >= max), and unlimited (max == 0). The point of the +// helper is to fail fast with a clear error instead of letting +// helix-org spin up project plumbing only to bail at StartDesktop. +func TestCheckDesktopQuota(t *testing.T) { + t.Parallel() + cases := []struct { + name string + body string + wantSubstr string // empty = expect nil error + }{ + {name: "room", body: `{"max_concurrent_desktops":2,"active_concurrent_desktops":1}`}, + {name: "exact-limit", body: `{"max_concurrent_desktops":2,"active_concurrent_desktops":2}`, wantSubstr: "2/2 active"}, + {name: "over-limit", body: `{"max_concurrent_desktops":2,"active_concurrent_desktops":3}`, wantSubstr: "3/2 active"}, + {name: "unlimited", body: `{"max_concurrent_desktops":0,"active_concurrent_desktops":99}`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + c := newTestClient(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/v1/config" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + _, _ = w.Write([]byte(tc.body)) + })) + err := CheckDesktopQuota(context.Background(), c) + if tc.wantSubstr == "" { + if err != nil { + t.Fatalf("expected nil, got: %v", err) + } + return + } + if err == nil || !strings.Contains(err.Error(), tc.wantSubstr) { + t.Fatalf("error %q does not contain %q", err, tc.wantSubstr) + } + }) + } +} + +func TestSendSessionMessageRejectsEmptySID(t *testing.T) { + t.Parallel() + c := newTestClient(t, http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) { + t.Fatal("server must not be called when sessionID is empty") + })) + if _, err := c.SendSessionMessage(context.Background(), "", "x", SendMessageOptions{}); err == nil { + t.Fatal("expected error on empty sessionID") + } +} + +func TestNewRequiresFields(t *testing.T) { + t.Parallel() + if _, err := New(Config{}); err == nil { + t.Fatal("expected error") + } + if _, err := New(Config{BaseURL: "http://x"}); err == nil { + t.Fatal("expected error") + } +} diff --git a/helix-org/helix/helixclient/patches.go b/helix-org/helix/helixclient/patches.go new file mode 100644 index 0000000000..d5389083b8 --- /dev/null +++ b/helix-org/helix/helixclient/patches.go @@ -0,0 +1,183 @@ +package helixclient + +// EntryStream is a per-session translator from Helix's response-entry +// patch wire format into stable, "settled" transcript events. +// +// Helix streams `EntryPatch[]` frames over the WebSocket. Each patch +// targets one entry in the session's response-entry array (indexed +// by `Index`, identified by `MessageID`). Text and tool_call entries +// can be extended in place; a new MessageID at the same Index means +// the previous entry is sealed and a new one takes its slot. Tool +// calls additionally carry `ToolStatus` ("In Progress" | "Completed" +// | …). +// +// `EntryStream` accumulates the full content of each entry and +// invokes the supplied callback once an entry is *settled*: +// +// - `text` entries settle when a different MessageID appears at +// the same Index, or when `Flush()` is called (session end). +// - `tool_call` entries settle when `ToolStatus` reaches +// `Completed` or `Failed`. The first time a tool_call entry is +// observed, an `EventToolUse` is emitted with the args; the +// final event is `EventToolResult` (or `EventToolResultError`). +// +// The callback's view is the same shape for both AI Worker +// activation transcripts and the chat SSE bridge, so they can render +// identically without needing to know about EntryPatches. +type EntryStream struct { + emit func(Event) + entries map[int]*entryState +} + +// Event is one settled transcript event surfaced by EntryStream. The +// text/tool_use/tool_result distinction matches the line shapes the +// claude bridge has emitted historically; both helix-org's +// activation stream and the chat HTML bridge consume the same set. +type Event struct { + Kind string // "assistant" | "tool_use" | "tool_result" | "tool_result-error" | "error" + Text string + ToolName string +} + +// EventKind constants mirror the line tags the claude bridge emits. +const ( + EventAssistant = "assistant" + EventToolUse = "tool_use" + EventToolResult = "tool_result" + EventToolResultError = "tool_result-error" + EventError = "error" +) + +type entryState struct { + messageID string + kind string // "text" | "tool_call" + content string + toolName string + toolStatus string + announced bool // true once the opening event for this entry has been emitted + settled bool // true once the closing event for this entry has been emitted +} + +// NewEntryStream returns a fresh translator. emit is called once per +// settled event. emit must be safe to call from the goroutine +// driving Apply. +func NewEntryStream(emit func(Event)) *EntryStream { + return &EntryStream{emit: emit, entries: map[int]*entryState{}} +} + +// Apply consumes one SessionUpdate frame from SubscribeUpdates. It +// processes EntryPatches into the per-entry state and emits any +// settled events. Frames with no EntryPatches (session_update / +// interaction_update snapshots) are also handled — they may carry +// terminal `Interaction.State` indicating end-of-turn, which flushes +// any open text entries. +func (s *EntryStream) Apply(u SessionUpdate) { + for _, p := range u.EntryPatches { + s.applyPatch(p) + } + if u.Interaction != nil { + switch u.Interaction.State { + case "complete": + s.Flush() + case "error": + if u.Interaction.Error != "" { + s.emit(Event{Kind: EventError, Text: u.Interaction.Error}) + } + s.Flush() + } + } +} + +func (s *EntryStream) applyPatch(p EntryPatch) { + cur, exists := s.entries[p.Index] + if !exists || cur.messageID != p.MessageID { + // New entry at this index — seal the previous occupant first. + if exists { + s.seal(cur) + } + cur = &entryState{messageID: p.MessageID, kind: p.Type} + s.entries[p.Index] = cur + } + cur.content = spliceUTF16(cur.content, p.Patch, p.PatchOffset) + if p.ToolName != "" { + cur.toolName = p.ToolName + } + if p.ToolStatus != "" { + cur.toolStatus = p.ToolStatus + } + // Announce tool_use the first time a tool_call entry is seen. + if cur.kind == "tool_call" && !cur.announced { + cur.announced = true + s.emit(Event{Kind: EventToolUse, Text: cur.content, ToolName: cur.toolName}) + } + // Settle tool_call when the runtime reports a terminal status. + if cur.kind == "tool_call" && (cur.toolStatus == "Completed" || cur.toolStatus == "Failed") && !cur.settled { + s.seal(cur) + } +} + +// Flush emits closing events for any unsealed entries. Call at end +// of session (terminal status) or on disconnect when the caller +// wants to drain whatever's been accumulated. +func (s *EntryStream) Flush() { + // Iterate in deterministic order so transcripts are stable. + for i := 0; i <= maxIndex(s.entries); i++ { + e, ok := s.entries[i] + if !ok || e.settled { + continue + } + s.seal(e) + } +} + +func maxIndex(m map[int]*entryState) int { + max := -1 + for i := range m { + if i > max { + max = i + } + } + return max +} + +func (s *EntryStream) seal(e *entryState) { + if e.settled { + return + } + e.settled = true + switch e.kind { + case "text": + if e.content != "" { + s.emit(Event{Kind: EventAssistant, Text: e.content}) + } + case "tool_call": + kind := EventToolResult + if e.toolStatus == "Failed" { + kind = EventToolResultError + } + s.emit(Event{Kind: kind, Text: e.content, ToolName: e.toolName}) + } +} + +// spliceUTF16 inserts `patch` into `s` at byte offset `offset`. +// +// Helix's `PatchOffset` is documented as a UTF-16 offset; in +// practice the patches helix-org consumes are append-only ASCII / +// short UTF-8 strings where the byte offset matches the UTF-16 +// offset. We treat it as a byte offset and clamp to len(s) so +// out-of-range patches (e.g. snapshot replay where offset > current +// length) append rather than panic. If real-world patches surface +// surrogate-pair edits this approximation will need fixing. +func spliceUTF16(s, patch string, offset int) string { + if offset >= len(s) { + return s + patch + } + if offset < 0 { + offset = 0 + } + end := offset + len(patch) + if end > len(s) { + end = len(s) + } + return s[:offset] + patch + s[end:] +} diff --git a/helix-org/helix/helixclient/patches_test.go b/helix-org/helix/helixclient/patches_test.go new file mode 100644 index 0000000000..1430532c7d --- /dev/null +++ b/helix-org/helix/helixclient/patches_test.go @@ -0,0 +1,125 @@ +package helixclient + +import "testing" + +// TestEntryStreamTextSettlesOnReplace verifies the core invariant: a +// text entry is held open until its slot is replaced by a different +// MessageID, then the full accumulated content is emitted. +func TestEntryStreamTextSettlesOnReplace(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + + // Three append-only patches against the same MessageID. + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "m1", Type: "text", Patch: "Hello", PatchOffset: 0}, + }}) + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "m1", Type: "text", Patch: " world", PatchOffset: 5}, + }}) + if len(got) != 0 { + t.Fatalf("expected no emits while text is still streaming, got %v", got) + } + + // New MessageID at same index: previous text settles. + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "m2", Type: "tool_call", Patch: `{"x":1}`, PatchOffset: 0, ToolName: "publish", ToolStatus: "In Progress"}, + }}) + if len(got) != 2 { + t.Fatalf("expected 2 events (assistant text, tool_use), got %v", got) + } + if got[0].Kind != EventAssistant || got[0].Text != "Hello world" { + t.Errorf("text: %+v", got[0]) + } + if got[1].Kind != EventToolUse || got[1].ToolName != "publish" { + t.Errorf("tool_use: %+v", got[1]) + } +} + +// TestEntryStreamToolCompletes verifies that a tool_call entry seals +// when ToolStatus reaches Completed. +func TestEntryStreamToolCompletes(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "t1", Type: "tool_call", Patch: `{"x":1}`, PatchOffset: 0, ToolName: "fetch", ToolStatus: "In Progress"}, + }}) + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "t1", Type: "tool_call", Patch: ` ok`, PatchOffset: 7, ToolStatus: "Completed"}, + }}) + if len(got) != 2 { + t.Fatalf("expected tool_use + tool_result, got %v", got) + } + if got[1].Kind != EventToolResult || got[1].Text != `{"x":1} ok` { + t.Errorf("tool_result: %+v", got[1]) + } +} + +// TestEntryStreamToolFailedEmitsError verifies that ToolStatus=Failed +// produces a tool_result-error rather than tool_result. +func TestEntryStreamToolFailedEmitsError(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "t1", Type: "tool_call", Patch: "boom", ToolName: "x", ToolStatus: "In Progress"}, + }}) + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "t1", Type: "tool_call", ToolStatus: "Failed"}, + }}) + if got[len(got)-1].Kind != EventToolResultError { + t.Errorf("expected tool_result-error, got %+v", got[len(got)-1]) + } +} + +// TestEntryStreamFlushSealsOpenText verifies Flush emits any open +// text entries, e.g. on session terminal state. +func TestEntryStreamFlushSealsOpenText(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + s.Apply(SessionUpdate{EntryPatches: []EntryPatch{ + {Index: 0, MessageID: "m1", Type: "text", Patch: "answer", PatchOffset: 0}, + }}) + s.Flush() + if len(got) != 1 || got[0].Kind != EventAssistant || got[0].Text != "answer" { + t.Errorf("unexpected events: %v", got) + } +} + +// TestEntryStreamSnapshotReplayDoesNotDoubleEmit verifies that +// re-applying the same patches (e.g. a late-joiner snapshot) doesn't +// duplicate events. The MessageID matches and PatchOffset starts +// from 0 with full content; the splice clamps so content reaches a +// stable end-state without producing extra emits. +func TestEntryStreamSnapshotReplayDoesNotDoubleEmit(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + patches := []EntryPatch{ + {Index: 0, MessageID: "t1", Type: "tool_call", Patch: `{"x":1}`, PatchOffset: 0, ToolName: "fetch", ToolStatus: "In Progress"}, + {Index: 0, MessageID: "t1", Type: "tool_call", Patch: ` done`, PatchOffset: 7, ToolStatus: "Completed"}, + } + s.Apply(SessionUpdate{EntryPatches: patches}) + first := append([]Event(nil), got...) + // Replay same patches. + s.Apply(SessionUpdate{EntryPatches: patches}) + if len(got) != len(first) { + t.Errorf("snapshot replay double-emitted: %d → %d events", len(first), len(got)) + } +} + +// TestEntryStreamInteractionErrorEmitsErrorEvent verifies that an +// `interaction_update` with State="error" surfaces an error event. +func TestEntryStreamInteractionErrorEmitsErrorEvent(t *testing.T) { + t.Parallel() + var got []Event + s := NewEntryStream(func(e Event) { got = append(got, e) }) + s.Apply(SessionUpdate{Interaction: &Interaction{State: "error", Error: "boom"}}) + if len(got) != 1 || got[0].Kind != EventError || got[0].Text != "boom" { + t.Errorf("expected one error event, got %v", got) + } +} diff --git a/helix-org/prompts/builtins.go b/helix-org/prompts/builtins.go new file mode 100644 index 0000000000..1a32af953a --- /dev/null +++ b/helix-org/prompts/builtins.go @@ -0,0 +1,25 @@ +package prompts + +import "fmt" + +// RegisterBuiltins registers every prompt that ships with helix-org. +// Mirror of tools.RegisterBuiltins: one place to add a new prompt and +// everything else (per-worker MCP exposure, chat slash-command +// expansion, /help listing) picks it up for free. +// +// Help is registered last so it sees every sibling prompt — its +// listing is generated by walking the registry at invocation time, +// which only works if the siblings are already in there. +func RegisterBuiltins(reg *Registry) error { + for _, p := range []Prompt{ + Role{}, + } { + if err := reg.Register(p); err != nil { + return fmt.Errorf("register %q: %w", p.Name(), err) + } + } + if err := reg.Register(NewHelp(reg)); err != nil { + return fmt.Errorf("register %q: %w", HelpName, err) + } + return nil +} diff --git a/helix-org/prompts/help.go b/helix-org/prompts/help.go new file mode 100644 index 0000000000..4a58b21688 --- /dev/null +++ b/helix-org/prompts/help.go @@ -0,0 +1,63 @@ +package prompts + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/helixml/helix-org/domain" +) + +// HelpName is the slash-command identifier for the help-listing prompt. +const HelpName Name = "help" + +// Help is a self-introspecting slash command: when invoked it walks +// the very Registry it was registered on and produces a markdown list +// of every other prompt visible at that moment, each with a one-line +// description. +// +// Holding a pointer to its own registry is the whole trick — it means +// adding a new prompt anywhere in the codebase automatically lights up +// in `/help` without anyone touching this file. The reference is taken +// at registration time but resolved lazily on every invocation, so the +// listing always reflects the registry's current state, not a snapshot. +type Help struct { + reg *Registry +} + +// NewHelp constructs a Help bound to the given registry. The registry +// pointer is captured so Render can iterate it later — register Help +// last (after the prompts it should advertise) and you're done. +func NewHelp(reg *Registry) Help { return Help{reg: reg} } + +func (Help) Name() Name { return HelpName } +func (Help) Title() string { return "Available slash commands" } +func (Help) Description() string { + return "Lists every slash command this chat surface knows about, with a one-line " + + "description of each. Auto-generated from the prompt registry — adding a new " + + "prompt automatically adds it here." +} + +func (Help) Arguments() []Argument { return nil } + +// RequiresTool returns the empty string so every Worker sees `/help` +// regardless of grants. There is no tool to gate against — `/help` +// reads the registry, never mutates anything. +func (Help) RequiresTool() domain.ToolName { return "" } + +func (h Help) Render(_ context.Context, _ map[string]string) ([]Message, error) { + all := h.reg.All() + sort.Slice(all, func(i, j int) bool { return all[i].Name() < all[j].Name() }) + + var sb strings.Builder + // Stage directions for the LLM. Without these the model paraphrases + // or adds chatty commentary; we want a clean, deterministic listing. + sb.WriteString("The user typed `/help`. Reply with **this exact markdown listing** — no preamble, no paraphrasing, no extra commentary:\n\n") + sb.WriteString("---\n\n") + sb.WriteString("**Available slash commands**\n\n") + for _, p := range all { + fmt.Fprintf(&sb, "- `/%s` — %s\n", p.Name(), p.Description()) + } + return []Message{{Role: "user", Text: sb.String()}}, nil +} diff --git a/helix-org/prompts/help_test.go b/helix-org/prompts/help_test.go new file mode 100644 index 0000000000..23cf970684 --- /dev/null +++ b/helix-org/prompts/help_test.go @@ -0,0 +1,120 @@ +package prompts_test + +import ( + "context" + "strings" + "testing" + + "github.com/helixml/helix-org/prompts" +) + +// TestHelpListsRegisteredPrompts pins the core /help contract: every +// prompt registered on the registry shows up in the rendered output +// with its name and description. +func TestHelpListsRegisteredPrompts(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + if err := reg.Register(stubPrompt{name: "alpha"}); err != nil { + t.Fatalf("register alpha: %v", err) + } + if err := reg.Register(stubPrompt{name: "beta"}); err != nil { + t.Fatalf("register beta: %v", err) + } + help := prompts.NewHelp(reg) + if err := reg.Register(help); err != nil { + t.Fatalf("register help: %v", err) + } + + msgs, err := help.Render(context.Background(), nil) + if err != nil { + t.Fatalf("Render: %v", err) + } + if len(msgs) != 1 { + t.Fatalf("messages = %d, want 1", len(msgs)) + } + for _, want := range []string{"`/alpha`", "`/beta`", "`/help`"} { + if !strings.Contains(msgs[0].Text, want) { + t.Errorf("output missing %q\n%s", want, msgs[0].Text) + } + } +} + +// TestHelpAutoGeneratesNewPrompts is the regression guard for the +// design promise: adding a new prompt must NOT require touching +// help.go. We register Help, render once, then register a new prompt +// and render again — the second render must include the new prompt. +// +// If this ever fails, someone has snapshotted the prompt list at +// construction time instead of resolving lazily on each Render call. +func TestHelpAutoGeneratesNewPrompts(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + help := prompts.NewHelp(reg) + _ = reg.Register(help) + + first, _ := help.Render(context.Background(), nil) + if strings.Contains(first[0].Text, "`/late-arrival`") { + t.Fatal("output unexpectedly contains a prompt that hasn't been registered yet") + } + + if err := reg.Register(stubPrompt{name: "late-arrival"}); err != nil { + t.Fatalf("register late-arrival: %v", err) + } + second, _ := help.Render(context.Background(), nil) + if !strings.Contains(second[0].Text, "`/late-arrival`") { + t.Fatalf("output missed prompt registered after Help: %s", second[0].Text) + } +} + +// TestHelpAlphabeticallySorted: the listing is deterministic so a +// future change that flipped to map-order randomness is caught. +func TestHelpAlphabeticallySorted(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + for _, n := range []string{"zebra", "alpha", "mango"} { + _ = reg.Register(stubPrompt{name: prompts.Name(n)}) + } + help := prompts.NewHelp(reg) + _ = reg.Register(help) + + msgs, _ := help.Render(context.Background(), nil) + // The body has a stage-direction preamble that itself mentions + // `/help`; if we searched the whole string, /help would always + // appear first. Skip past the listing header so we're only + // matching the actual sorted list. + _, list, ok := strings.Cut(msgs[0].Text, "Available slash commands") + if !ok { + t.Fatalf("listing header missing from output:\n%s", msgs[0].Text) + } + a := strings.Index(list, "`/alpha`") + h := strings.Index(list, "`/help`") + m := strings.Index(list, "`/mango`") + z := strings.Index(list, "`/zebra`") + if a >= h || h >= m || m >= z { + t.Fatalf("not alphabetical: alpha=%d help=%d mango=%d zebra=%d\n%s", a, h, m, z, list) + } +} + +func TestHelpNoToolGate(t *testing.T) { + t.Parallel() + if got := (prompts.Help{}).RequiresTool(); got != "" { + t.Errorf("RequiresTool = %q, want empty (universal visibility)", got) + } +} + +// TestRegisterBuiltinsIncludesHelp covers the wiring promise: serve.go +// calls prompts.RegisterBuiltins, and the result must include Help so +// users see it in the autocomplete and can invoke it. +func TestRegisterBuiltinsIncludesHelp(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + if err := prompts.RegisterBuiltins(reg); err != nil { + t.Fatalf("RegisterBuiltins: %v", err) + } + if _, err := reg.Get(prompts.HelpName); err != nil { + t.Errorf("help missing from builtins: %v", err) + } + if _, err := reg.Get(prompts.RoleName); err != nil { + t.Errorf("role missing from builtins: %v", err) + } +} diff --git a/helix-org/prompts/prompt.go b/helix-org/prompts/prompt.go new file mode 100644 index 0000000000..932b489789 --- /dev/null +++ b/helix-org/prompts/prompt.go @@ -0,0 +1,58 @@ +// Package prompts holds the MCP-prompt surface — server-defined +// templates that clients (Claude Code, the helix-org chat UI) surface +// as slash commands. Prompts are scaffolding for the *human* side of +// the org: structured interviews that turn vague intent into well-shaped +// graph mutations through the existing tools (create_role, hire_worker, +// etc.). They never carry behaviour of their own; the LLM consumes the +// rendered messages and dispatches via tools. +package prompts + +import ( + "context" + + "github.com/helixml/helix-org/domain" +) + +// Name is the identifier MCP clients use to fetch a prompt. Must be +// unique within a registry. Convention: lowercase snake_case +// (`new_role`, `hire_worker`). +type Name string + +// Argument describes a single named parameter the client may pass when +// invoking the prompt. Mirrors mcp.PromptArgument so registration is +// a one-to-one copy. +type Argument struct { + Name string + Title string + Description string + Required bool +} + +// Message is one seed turn the prompt contributes to the conversation. +// Role is "user" or "assistant" per the MCP spec; in practice every +// helix-org prompt seeds a single user turn. +type Message struct { + Role string + Text string +} + +// Prompt is the contract every server-defined prompt satisfies. The +// per-worker MCP server iterates the registry, filters by RequiresTool, +// and registers each survivor as an mcp.Prompt. +type Prompt interface { + Name() Name + Title() string + Description() string + Arguments() []Argument + + // RequiresTool gates visibility: only Workers holding a grant for + // the named tool see this prompt. The empty string means visible + // to every Worker. Gating exists because prompts that end in a + // tool call are useless to a Worker who can't make that call — + // surfacing the slash command would just produce a 403 at the end. + RequiresTool() domain.ToolName + + // Render produces the seed messages for this invocation. The args + // map is the validated set passed by the MCP client. + Render(ctx context.Context, args map[string]string) ([]Message, error) +} diff --git a/helix-org/prompts/registry.go b/helix-org/prompts/registry.go new file mode 100644 index 0000000000..b86839fb9c --- /dev/null +++ b/helix-org/prompts/registry.go @@ -0,0 +1,50 @@ +package prompts + +import "fmt" + +// Registry is an in-memory map of Name to Prompt. The shape mirrors +// tools.Registry deliberately: same lifecycle (built once at startup, +// read concurrently per request) and same "addable without changing +// core" property — wiring a new prompt is one Register call, not a +// switch in the server. +type Registry struct { + prompts map[Name]Prompt +} + +// NewRegistry returns an empty registry. +func NewRegistry() *Registry { + return &Registry{prompts: make(map[Name]Prompt)} +} + +// Register adds a prompt. Duplicate names fail loudly: a prompt is a +// slash command, and ambiguous slash commands are user-hostile. +func (r *Registry) Register(p Prompt) error { + name := p.Name() + if name == "" { + return fmt.Errorf("prompt name is empty") + } + if _, exists := r.prompts[name]; exists { + return fmt.Errorf("prompt %q already registered", name) + } + r.prompts[name] = p + return nil +} + +// Get returns the prompt by name, or an error if unknown. +func (r *Registry) Get(name Name) (Prompt, error) { + p, ok := r.prompts[name] + if !ok { + return nil, fmt.Errorf("prompt %q not registered", name) + } + return p, nil +} + +// All returns every registered prompt. Order is not guaranteed and +// callers must not assume it. +func (r *Registry) All() []Prompt { + out := make([]Prompt, 0, len(r.prompts)) + for _, p := range r.prompts { + out = append(out, p) + } + return out +} diff --git a/helix-org/prompts/registry_test.go b/helix-org/prompts/registry_test.go new file mode 100644 index 0000000000..4475a1445c --- /dev/null +++ b/helix-org/prompts/registry_test.go @@ -0,0 +1,104 @@ +package prompts_test + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/prompts" +) + +// stubPrompt is a minimal Prompt for exercising registry behaviour +// without coupling these tests to the new_role implementation. +type stubPrompt struct { + name Name + tool domain.ToolName +} + +type Name = prompts.Name + +func (s stubPrompt) Name() Name { return s.name } +func (stubPrompt) Title() string { return "stub" } +func (stubPrompt) Description() string { return "stub" } +func (stubPrompt) Arguments() []prompts.Argument { return nil } +func (s stubPrompt) RequiresTool() domain.ToolName { return s.tool } +func (stubPrompt) Render(_ context.Context, _ map[string]string) ([]prompts.Message, error) { + return []prompts.Message{{Role: "user", Text: "stub"}}, nil +} + +func TestRegistryRegisterAndGet(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + if err := reg.Register(stubPrompt{name: "a"}); err != nil { + t.Fatalf("Register: %v", err) + } + p, err := reg.Get("a") + if err != nil { + t.Fatalf("Get: %v", err) + } + if p.Name() != "a" { + t.Fatalf("Name = %q, want a", p.Name()) + } +} + +func TestRegistryRejectsEmptyName(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + if err := reg.Register(stubPrompt{name: ""}); err == nil { + t.Fatal("Register empty name = nil, want error") + } +} + +func TestRegistryRejectsDuplicate(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + if err := reg.Register(stubPrompt{name: "a"}); err != nil { + t.Fatalf("first Register: %v", err) + } + err := reg.Register(stubPrompt{name: "a"}) + if err == nil { + t.Fatal("duplicate Register = nil, want error") + } + if !strings.Contains(err.Error(), "already registered") { + t.Fatalf("err = %v, want 'already registered'", err) + } +} + +func TestRegistryGetUnknown(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + _, err := reg.Get("missing") + if err == nil || !strings.Contains(err.Error(), "not registered") { + t.Fatalf("err = %v, want 'not registered'", err) + } + // Stay friendly to callers that want to translate "missing" into + // their own error type — should at least be a real error value. + if errors.Unwrap(err) != nil { + t.Fatalf("err.Unwrap() = %v, want nil sentinel-shaped error", errors.Unwrap(err)) + } +} + +func TestRegistryAllReturnsEverything(t *testing.T) { + t.Parallel() + reg := prompts.NewRegistry() + for _, n := range []Name{"a", "b", "c"} { + if err := reg.Register(stubPrompt{name: n}); err != nil { + t.Fatalf("Register %s: %v", n, err) + } + } + all := reg.All() + if len(all) != 3 { + t.Fatalf("All() = %d, want 3", len(all)) + } + got := map[Name]bool{} + for _, p := range all { + got[p.Name()] = true + } + for _, want := range []Name{"a", "b", "c"} { + if !got[want] { + t.Errorf("All() missing %q", want) + } + } +} diff --git a/helix-org/prompts/role.go b/helix-org/prompts/role.go new file mode 100644 index 0000000000..a0af7b047f --- /dev/null +++ b/helix-org/prompts/role.go @@ -0,0 +1,57 @@ +package prompts + +import ( + "context" + _ "embed" + "strings" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/tools" +) + +// RoleName is the slash-command identifier for the role-design prompt. +// Surfaced as `/role` in MCP clients. Singular verb-less form follows +// Claude Code's own slash-command convention (`/init`, `/review`, +// `/compact`) — never `/new_xxx`. +const RoleName Name = "role" + +//go:embed templates/role.md +var roleTemplate string + +// Role drafts a fresh Role markdown from a one-line title hint, saves +// it via create_role without asking permission, then offers in-place +// edits or chains into hire_worker. All the actual content lives in +// templates/role.md; this file is just the registration shell. +type Role struct{} + +func (Role) Name() Name { return RoleName } +func (Role) Title() string { return "Draft a Role from a title" } + +func (Role) Description() string { + return "Drafts and saves a new Role from a title — e.g. `/role cto`, " + + "`/role marketing director`, `/role customer support`. After saving, " + + "offers edits in-place or hires someone into it." +} + +func (Role) Arguments() []Argument { + return []Argument{{ + Name: "hint", + Title: "Role title", + Description: "The Role to draft, in plain words — e.g. 'cto', 'marketing director', 'customer support'. The LLM uses this as the seed for the whole markdown.", + Required: false, + }} +} + +// RequiresTool gates the prompt on the create_role grant: a Worker +// without it can't save the result, so surfacing the slash command +// would only produce a dead-end at the very last step. +func (Role) RequiresTool() domain.ToolName { return tools.CreateRoleName } + +func (Role) Render(_ context.Context, args map[string]string) ([]Message, error) { + body := roleTemplate + if hint := strings.TrimSpace(args["hint"]); hint != "" { + body += "\n\n---\n\n**Role title from the operator:** " + hint + + "\n\nDraft from this directly — no interview.\n" + } + return []Message{{Role: "user", Text: body}}, nil +} diff --git a/helix-org/prompts/role_test.go b/helix-org/prompts/role_test.go new file mode 100644 index 0000000000..bbed8c05fb --- /dev/null +++ b/helix-org/prompts/role_test.go @@ -0,0 +1,61 @@ +package prompts_test + +import ( + "context" + "strings" + "testing" + + "github.com/helixml/helix-org/prompts" + "github.com/helixml/helix-org/tools" +) + +func TestRoleRequiresCreateRoleGrant(t *testing.T) { + t.Parallel() + if got := (prompts.Role{}).RequiresTool(); got != tools.CreateRoleName { + t.Fatalf("RequiresTool = %q, want %q", got, tools.CreateRoleName) + } +} + +func TestRoleRendersTemplate(t *testing.T) { + t.Parallel() + msgs, err := (prompts.Role{}).Render(context.Background(), nil) + if err != nil { + t.Fatalf("Render: %v", err) + } + if len(msgs) != 1 { + t.Fatalf("messages = %d, want 1", len(msgs)) + } + if msgs[0].Role != "user" { + t.Fatalf("role = %q, want user", msgs[0].Role) + } + // The template must mention the tool we're driving toward and the + // canonical Role-markdown sections demonstrated in the demo Roles. + // These assertions pin the *contract* of the prompt — that it tells + // the LLM to call create_role and produces output the rest of the + // org can read. They do not pin every word of the prose. + for _, want := range []string{"create_role", "## Triggers", "## Streams", "## Constraints"} { + if !strings.Contains(msgs[0].Text, want) { + t.Errorf("template missing %q", want) + } + } +} + +func TestRoleAppendsHint(t *testing.T) { + t.Parallel() + msgs, err := (prompts.Role{}).Render(context.Background(), map[string]string{"hint": "marketing director"}) + if err != nil { + t.Fatalf("Render: %v", err) + } + if !strings.Contains(msgs[0].Text, "marketing director") { + t.Fatalf("hint not in output: %s", msgs[0].Text) + } +} + +func TestRoleIgnoresWhitespaceHint(t *testing.T) { + t.Parallel() + withHint, _ := (prompts.Role{}).Render(context.Background(), map[string]string{"hint": " "}) + withoutHint, _ := (prompts.Role{}).Render(context.Background(), nil) + if withHint[0].Text != withoutHint[0].Text { + t.Fatalf("whitespace-only hint changed output") + } +} diff --git a/helix-org/prompts/templates/role.md b/helix-org/prompts/templates/role.md new file mode 100644 index 0000000000..c4e95147a7 --- /dev/null +++ b/helix-org/prompts/templates/role.md @@ -0,0 +1,109 @@ +You are helping me add a new Role to the org. **Move fast.** Don't +interview me — draft from what I gave you, save it, then ask if I +want changes. + +## Step 1 — Draft the Role + +Generate a complete Role markdown in this exact shape (every demo +Role in this repo follows it; consistency matters more than +creativity here): + +```markdown +# Role: {Title} + +{One-paragraph mission. Plain prose, no bullets. Says what outcome +they own.} + +## Tools (MCP) + +`tool_a`, `tool_b`. {Note on shell tools if non-default.} + +## Streams + +- `s-foo` — {what they do with it}. +- `s-bar` — {what they do with it}. + +## Triggers + +**On {event}.** {What they do — concrete, imperative, no hedging.} +Post output to `s-{channel}`. + +**On {another event}.** {…} + +**On anything else.** Stay quiet. Read events, update your own +notes if useful, but don't post. The bar for posting is: a trigger +above matches, and the output is something a human asked for or +would recognise as their request. + +## Constraints + +- Do not {forbidden thing}. +- Before acting on a trigger, name it in one line + (e.g. `Trigger: researcher posted notes`) so the audit log shows + which branch fired. +- Do not modify your own Role. + +## Files + +- `path/.md` — {what's in it}. +``` + +Where you don't have enough info, **make a reasonable guess** based +on what the title implies. Mark each guess inline with +`(ASSUMED: …)` so I can spot what to challenge. A good guess beats +a question. + +Every `**On {event}.**` block must end with an explicit output +channel (`Post to s-…`) or say "no post — internal note only". +Every Role must include the `**On anything else.** Stay quiet` +block verbatim — it's the default-quiet rule. + +Default tools: pick from what the org has — typically `subscribe`, +`publish`, `read_events`, `dm`. Don't grant `hire_worker` or +`create_role` unless the title implies seniority. + +## Step 2 — Save it. **Don't ask permission.** + +Immediately call **`create_role`** with: +- `id`: kebab-case from the title, prefixed `r-` + (e.g. `r-marketing-director`) +- `content`: the markdown above + +Just do it. The owner can edit or delete after. + +## Step 3 — Show me what landed and offer changes + +After `create_role` returns, post the saved markdown back to me in +a code block, then ask **one** focused question — pick the +direction most likely to want a tweak: + +> Saved as `r-…`. Want to change anything? Common edits: +> - **Triggers** — different events, or different responses +> - **Streams** — add/remove which channels they read/write +> - **Tools** — broader or tighter MCP scope +> - **Constraints** — what they should never do +> +> Say what you'd change, or say **"next"** to hire someone into this +> Role and I'll set up the Position and Worker too. + +If I name an edit, call `update_role` and show the new version. +If I say "next" (or anything indicating I want to hire), drive the +hire conversationally: ask only for a name + one-line vibe for the +person, then chain: + +1. `create_position` under `p-root` (unless I said otherwise). +2. `hire_worker` — kind `ai`, id `w-`, grants + matching the Role's Tools section. +3. **Stand up their streams.** For each stream the Role's Streams + section lists: + - call `list_streams` first — another Worker may already have + created it + - if it exists, `subscribe` the new Worker + - if not, `create_stream` then `subscribe` + + A Worker hired without their streams subscribed is half-hired — + they have nothing to listen to. + +Don't ask permission for each tool call — chain them. + +Never restart the draft from scratch. Modify in place. diff --git a/helix-org/server/chat/backend.go b/helix-org/server/chat/backend.go new file mode 100644 index 0000000000..8f59b63de1 --- /dev/null +++ b/helix-org/server/chat/backend.go @@ -0,0 +1,46 @@ +package chat + +import "net/http" + +// Backend is the surface the HTTP server wires to /ui/chat/* and the +// UI handler reads. Two implementations live in this package today: +// +// - *Bridge — runs a long-lived `claude` subprocess in the server's +// cwd and bridges its stream-json output to SSE. Used when +// `chat.backend=claude`. Development-only (the North Star is that +// all LLM calls flow through Helix). +// - *HelixBridge — drives a Helix chat session via helixclient and +// translates `interaction_update` / `interaction_patch` frames +// into the same SSE shape the UI expects. Used when +// `chat.backend=helix`. +// +// Keeping the claude implementation around is a dev convenience — if +// a contributor doesn't have a Helix to point at, they can still drive +// the org graph end-to-end. Both backends MUST be safe to use through +// this interface alone; the UI handler never type-asserts. +type Backend interface { + StreamHandler() http.Handler + SendHandler() http.Handler + NewHandler() http.Handler + SwitchHandler() http.Handler + CommandsHandler() http.Handler + // CWD is the working directory the backend is anchored to. The + // claude backend uses it to find per-cwd session jsonls; the + // helix backend returns the server's cwd as a stable label. + CWD() string + // HistoryStartsFresh reports whether the chat page should render + // nothing as initial history because the user just clicked New + // chat and the freshly-created session hasn't produced output yet. + HistoryStartsFresh() bool + // Label is a short footer string for the chat page indicating the + // active LLM backend, e.g. "helix · minimax-m2.7" or + // "claude · sonnet 4.6". Rendered next to the Send button so the + // operator can tell at a glance which stack their chat is on. + Label() string +} + +// Compile-time assertions: both bridges satisfy Backend. +var ( + _ Backend = (*Bridge)(nil) + _ Backend = (*HelixBridge)(nil) +) diff --git a/helix-org/server/chat/chat.go b/helix-org/server/chat/chat.go new file mode 100644 index 0000000000..b5fa8bddcd --- /dev/null +++ b/helix-org/server/chat/chat.go @@ -0,0 +1,586 @@ +// Package chat bridges the browser chat surface to a long-lived +// `claude` subprocess running in the helix-org server's working +// directory. The bridge owns one subprocess per Bridge instance — +// today there is exactly one Owner, so one global session is enough — +// and fans claude's stream-json stdout out to any number of SSE +// listeners as ready-to-swap HTML fragments. User input arrives via +// HTTP POST and is written to claude's stdin as a stream-json frame. +// +// The subprocess runs in the server's cwd so the conversation is +// shared with terminal `helix-org chat` invoked from the same +// directory: claude's per-cwd session store handles persistence, and +// the bridge resumes the most recent session by ID at startup. +package chat + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "os/exec" + "sort" + "strings" + "sync" + "time" + + "github.com/helixml/helix-org/prompts" +) + +// Bridge owns the chat subprocess and the SSE fan-out. Construct one +// per server, mount StreamHandler() and SendHandler() under /ui/chat/. +type Bridge struct { + claudeBin string + cwd string + mcpURL string + logger *slog.Logger + // prompts is the optional MCP-prompt registry. When set, SendHandler + // intercepts inputs that start with `/` and replaces them with + // the prompt's rendered seed text before forwarding to claude. + // Reason: claude in stream-json mode does not process slash commands — + // it just wraps the raw text as a user message — so MCP prompts + // (which Claude Code's interactive TUI handles natively) are dead on + // arrival here unless we expand them server-side. + prompts *prompts.Registry + // label is the short footer string the chat UI renders next to the + // send button. claude doesn't expose its active model via the CLI + // in any reliable way, so the wiring layer passes a label string + // (e.g. "claude · sonnet") via WithLabel — falls back to "claude" + // if unset. + label string + + mu sync.Mutex // guards sess, forceNew, resumeSID, freshFromPath + sess *session + forceNew bool // next start() spawns claude with no --resume + overrideResume string // next start() resumes this sid; "" = use latest + // freshFromPath is the path of the latest jsonl at the moment the + // user clicked "New chat". Until a *different* file becomes + // latest (i.e. the new claude process has produced its own jsonl), + // the UI suppresses history rendering. Empty means "no New chat + // pending — render normally". Path-based, not time-based, because + // a sibling claude process (e.g. dev's Claude Code) may keep + // updating its own jsonl after the click and would otherwise look + // like fresh content. + freshFromPath string +} + +// CWD returns the working directory the bridge launches `claude` in. +// The UI uses it to read claude's per-cwd session jsonls for history +// rendering and the Recents list. +func (b *Bridge) CWD() string { return b.cwd } + +// Label satisfies chat.Backend. Returns the configured label or +// "claude" if none was set. +func (b *Bridge) Label() string { + if b.label == "" { + return "claude" + } + return b.label +} + +// WithLabel sets the footer label rendered in the chat UI. Returns +// the same Bridge so the call composes with New().WithPrompts(). +func (b *Bridge) WithLabel(s string) *Bridge { + b.label = s + return b +} + +// New returns a Bridge configured to spawn `claude` from claudeBin in +// the given cwd, wired to a single MCP server at mcpURL named "helix". +// Sessions are spawned lazily on the first request. +func New(claudeBin, cwd, mcpURL string, logger *slog.Logger) *Bridge { + if logger == nil { + logger = slog.Default() + } + return &Bridge{claudeBin: claudeBin, cwd: cwd, mcpURL: mcpURL, logger: logger} +} + +// WithPrompts attaches a prompts.Registry so the bridge can resolve +// `/` inputs in the chat textarea into MCP-prompt seed text +// before handing the message to claude. Returns the same Bridge so the +// call can be chained off New. nil is equivalent to no prompts — +// slash commands fall through to claude unchanged. +func (b *Bridge) WithPrompts(reg *prompts.Registry) *Bridge { + b.prompts = reg + return b +} + +// session is one running claude subprocess plus its SSE listeners. It +// is created once and reused for the life of the process; if the +// subprocess exits, the next request creates a fresh session that +// resumes the same claude conversation by ID. +type session struct { + cmd *exec.Cmd + stdin io.WriteCloser + + mu sync.Mutex + listeners map[chan string]struct{} + dead chan struct{} +} + +// ensure returns the live session, lazily starting it if there isn't +// one or if the previous one exited. ctx is the request context — only +// used to bound startup, not to bound the subprocess lifetime. +func (b *Bridge) ensure(ctx context.Context) (*session, error) { + b.mu.Lock() + defer b.mu.Unlock() + if b.sess != nil { + select { + case <-b.sess.dead: + b.sess = nil + default: + return b.sess, nil + } + } + s, err := b.start(ctx) + if err != nil { + return nil, err + } + b.sess = s + return s, nil +} + +func (b *Bridge) start(ctx context.Context) (*session, error) { + mcpJSON, err := json.Marshal(map[string]any{ + "mcpServers": map[string]any{ + "helix": map[string]string{"type": "http", "url": b.mcpURL}, + }, + }) + if err != nil { + return nil, fmt.Errorf("marshal mcp config: %w", err) + } + args := []string{ + "-p", + "--input-format", "stream-json", + "--output-format", "stream-json", + "--verbose", + "--permission-mode", "bypassPermissions", + "--strict-mcp-config", + "--mcp-config", string(mcpJSON), + } + switch { + case b.forceNew: + // no --resume — fresh session + case b.overrideResume != "": + args = append(args, "--resume", b.overrideResume) + default: + if sid := latestClaudeSessionID(b.cwd); sid != "" { + args = append(args, "--resume", sid) + } + } + // ctx is only used for cancellation during start (e.g. the request + // going away mid-spawn). We deliberately do NOT bind the subprocess + // to ctx — the subprocess outlives the request. + cmd := exec.CommandContext(context.Background(), b.claudeBin, args...) //nolint:gosec // claudeBin is operator-supplied + cmd.Dir = b.cwd + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("stdin pipe: %w", err) + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("start claude: %w", err) + } + // Intents consumed only on successful spawn — if start() fails we + // preserve "user wanted X" for the retry. + b.forceNew = false + b.overrideResume = "" + + s := &session{ + cmd: cmd, + stdin: stdin, + listeners: make(map[chan string]struct{}), + dead: make(chan struct{}), + } + + go b.readLoop(s, stdout) + go b.drainStderr(stderr) + go func() { + _ = cmd.Wait() + close(s.dead) + b.logger.Info("chat session exited", "pid", cmd.Process.Pid) + }() + + b.logger.Info("chat session started", "pid", cmd.Process.Pid, "cwd", b.cwd) + _ = ctx // explicit: ctx not retained; subprocess outlives the request + return s, nil +} + +func (b *Bridge) readLoop(s *session, r io.Reader) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + for scanner.Scan() { + var ev streamEvent + if err := json.Unmarshal(scanner.Bytes(), &ev); err != nil { + b.logger.Warn("chat parse stream-json", "err", err, "line", oneLine(scanner.Text(), 200)) + continue + } + for _, frag := range renderFragments(ev) { + s.broadcast(frag) + } + } + if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) { + b.logger.Warn("chat scanner error", "err", err) + } +} + +func (b *Bridge) drainStderr(r io.Reader) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + b.logger.Warn("chat claude stderr", "line", scanner.Text()) + } +} + +func (s *session) broadcast(frag string) { + s.mu.Lock() + defer s.mu.Unlock() + for ch := range s.listeners { + select { + case ch <- frag: + default: + // Drop on slow listener — better than blocking the read + // loop on one stuck browser tab. + } + } +} + +func (s *session) subscribe() chan string { + ch := make(chan string, 64) + s.mu.Lock() + s.listeners[ch] = struct{}{} + s.mu.Unlock() + return ch +} + +func (s *session) unsubscribe(ch chan string) { + s.mu.Lock() + delete(s.listeners, ch) + s.mu.Unlock() +} + +// send writes one user message frame to claude's stdin in the +// stream-json format claude expects with --input-format stream-json. +func (s *session) send(text string) error { + frame := map[string]any{ + "type": "user", + "message": map[string]any{ + "role": "user", + "content": text, + }, + } + data, err := json.Marshal(frame) + if err != nil { + return fmt.Errorf("marshal user frame: %w", err) + } + data = append(data, '\n') + if _, err := s.stdin.Write(data); err != nil { + return fmt.Errorf("write user frame: %w", err) + } + return nil +} + +// StreamHandler serves the SSE channel at GET /ui/chat/stream. Each +// browser tab opens one of these long-lived connections and receives +// pre-rendered HTML fragments as `data: …` lines, which htmx swaps +// straight into #chat-log. +func (b *Bridge) StreamHandler() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "streaming unsupported", http.StatusInternalServerError) + return + } + s, err := b.ensure(r.Context()) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + ch := s.subscribe() + defer s.unsubscribe(ch) + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("X-Accel-Buffering", "no") + w.WriteHeader(http.StatusOK) + flusher.Flush() + + ping := time.NewTicker(15 * time.Second) + defer ping.Stop() + + for { + select { + case frag := <-ch: + // SSE forbids raw `\n` inside a `data:` line — `\n\n` + // terminates the event. The spec's own answer is to + // split multi-line payloads across repeated `data:` + // lines, which the browser's EventSource rejoins with + // `\n`. Markdown-rendered fragments contain real + // newlines inside `
` blocks (fenced code), so we
+				// must preserve them; flattening to spaces collapsed
+				// fenced markdown into a single visual line.
+				_, _ = fmt.Fprint(w, "event: message\n")
+				for _, line := range strings.Split(frag, "\n") {
+					_, _ = fmt.Fprintf(w, "data: %s\n", line)
+				}
+				_, _ = fmt.Fprint(w, "\n")
+				flusher.Flush()
+			case <-ping.C:
+				_, _ = fmt.Fprint(w, ": keepalive\n\n")
+				flusher.Flush()
+			case <-r.Context().Done():
+				return
+			case <-s.dead:
+				return
+			}
+		}
+	})
+}
+
+// NewHandler wipes the active session at POST /ui/chat/new. Closing
+// stdin lets the subprocess exit cleanly (it might still finish a
+// turn that's already in flight, which is fine — there are no
+// listeners on the old session). The HX-Redirect header tells htmx to
+// navigate the browser to /ui/, which re-renders with an empty
+// #chat-log and lazily spawns a fresh `claude` (no --resume) on the
+// next request.
+func (b *Bridge) NewHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b.newSession()
+		w.Header().Set("HX-Redirect", "/ui/")
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+// newSession kills the active session (if any) and flags the next
+// ensure() to spawn claude with no --resume. Idempotent: with no
+// active session, sets the flag and returns. Captures the path of
+// the current latest jsonl so the UI can tell when a *different*
+// file becomes latest — the signal that the new claude has written
+// its first event and the chat page can stop suppressing history.
+func (b *Bridge) newSession() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.forceNew = true
+	b.overrideResume = ""
+	b.freshFromPath = newestJSONL(claudeProjectsDir(b.cwd))
+	if b.sess != nil {
+		_ = b.sess.stdin.Close()
+		b.sess = nil
+		b.logger.Info("chat session reset by user", "marker", b.freshFromPath)
+	}
+}
+
+// HistoryStartsFresh reports whether the chat page should render
+// nothing as initial history because the user clicked New chat and
+// no different jsonl has yet become the latest in the cwd. Returns
+// false when no New chat has happened, or when a different file
+// (the freshly-spawned claude's new jsonl) is now the latest —
+// meaning the new conversation has begun and its history is safe
+// to render.
+func (b *Bridge) HistoryStartsFresh() bool {
+	b.mu.Lock()
+	marker := b.freshFromPath
+	b.mu.Unlock()
+	if marker == "" {
+		return false
+	}
+	current := newestJSONL(claudeProjectsDir(b.cwd))
+	if current == "" {
+		return true
+	}
+	if current == marker {
+		return true
+	}
+	// A different file is now latest — clear the marker so we don't
+	// keep paying for stat() on every page load.
+	b.mu.Lock()
+	if b.freshFromPath == marker {
+		b.freshFromPath = ""
+	}
+	b.mu.Unlock()
+	return false
+}
+
+// SwitchHandler kills the active session and flags the next ensure()
+// to resume the requested session ID. Form field "sid" carries the
+// target. HX-Redirect bounces the browser to /ui/?sid= so the
+// chat handler renders that session's history.
+func (b *Bridge) SwitchHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		r.Body = http.MaxBytesReader(w, r.Body, 4<<10)
+		if err := r.ParseForm(); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		sid := strings.TrimSpace(r.PostFormValue("sid"))
+		if sid == "" {
+			http.Error(w, "sid required", http.StatusBadRequest)
+			return
+		}
+		b.switchSession(sid)
+		w.Header().Set("HX-Redirect", "/ui/?sid="+sid)
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+// switchSession kills the active session and flags the next ensure()
+// to spawn claude with --resume .
+func (b *Bridge) switchSession(sid string) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.forceNew = false
+	b.overrideResume = sid
+	if b.sess != nil {
+		_ = b.sess.stdin.Close()
+		b.sess = nil
+		b.logger.Info("chat session switched", "sid", sid)
+	}
+}
+
+// SendHandler accepts a user message at POST /ui/chat/send. The form
+// posts with the field name "message"; the response is the rendered
+// user-bubble HTML which htmx swaps into #chat-log immediately so the
+// user sees their message before the assistant streams its reply.
+// Assistant chunks land on the SSE channel asynchronously.
+//
+// The body is capped at 64 KiB — chat messages are short, and a hard
+// cap protects the form parser from a hostile client streaming
+// unbounded form data.
+func (b *Bridge) SendHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		r.Body = http.MaxBytesReader(w, r.Body, 64<<10)
+		if err := r.ParseForm(); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		msg := strings.TrimSpace(r.PostFormValue("message"))
+		if msg == "" {
+			w.WriteHeader(http.StatusNoContent)
+			return
+		}
+		// User-facing bubble shows the original input — if they typed
+		// `/role marketing director`, that's what they expect to see in
+		// their own conversation, not the expanded interview text.
+		bubble := msg
+		if expanded, ok := b.expandSlashCommand(r.Context(), msg); ok {
+			msg = expanded
+		}
+		s, err := b.ensure(r.Context())
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		if err := s.send(msg); err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
+		_, _ = fmt.Fprint(w, renderUserBubble(bubble))
+	})
+}
+
+// expandSlashCommand intercepts inputs of the form `/ ` and
+// replaces them with the rendered text of an MCP prompt registered
+// under . Returns (expanded, true) on a hit, ("", false) if the
+// input isn't a slash command, the registry isn't attached, or the
+// prompt name isn't known. The fall-through case lets the message reach
+// claude unchanged so that anything we don't recognise (e.g. claude's
+// own built-ins like `/clear`) keeps working as far as it ever did.
+//
+// Argument convention: this is the smallest thing that works for
+// today's prompts — it threads any text after the command name into
+// the prompt's *first declared argument*. That's enough for `/role`
+// (one optional `hint` arg) and any other single-arg prompt; multi-arg
+// prompts will need real parsing when we have one.
+func (b *Bridge) expandSlashCommand(ctx context.Context, msg string) (string, bool) {
+	if b.prompts == nil || !strings.HasPrefix(msg, "/") {
+		return "", false
+	}
+	name, rest, _ := strings.Cut(msg[1:], " ")
+	if name == "" {
+		return "", false
+	}
+	p, err := b.prompts.Get(prompts.Name(name))
+	if err != nil {
+		return "", false
+	}
+	args := map[string]string{}
+	rest = strings.TrimSpace(rest)
+	if rest != "" {
+		if a := p.Arguments(); len(a) > 0 {
+			args[a[0].Name] = rest
+		}
+	}
+	rendered, err := p.Render(ctx, args)
+	if err != nil {
+		b.logger.Info("chat slash command render failed", "name", name, "err", err)
+		return "", false
+	}
+	parts := make([]string, 0, len(rendered))
+	for _, m := range rendered {
+		parts = append(parts, m.Text)
+	}
+	return strings.Join(parts, "\n\n"), true
+}
+
+// CommandsHandler renders the slash-command typeahead at POST
+// /ui/chat/commands. The textarea fires this on keyup; the body is the
+// current value, keyed `message`. We respond with a (possibly empty)
+// HTML fragment that htmx swaps into #slash-suggestions: an empty
+// response hides the dropdown, a list of buttons exposes each matching
+// prompt with its title and description.
+//
+// We don't filter by which Worker holds which grant here — the chat
+// surface today is the owner's, and the SendHandler intercepts and
+// expands locally without going through the per-worker MCP visibility
+// pipeline. If we open the chat to non-owner Workers later, this
+// endpoint should call into the same gating logic the per-worker MCP
+// server uses.
+func (b *Bridge) CommandsHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
+		if b.prompts == nil {
+			return
+		}
+		r.Body = http.MaxBytesReader(w, r.Body, 4<<10)
+		if err := r.ParseForm(); err != nil {
+			return
+		}
+		msg := r.PostFormValue("message")
+		if !strings.HasPrefix(msg, "/") {
+			return
+		}
+		// Match against the first whitespace-delimited token (minus
+		// leading slash). Once the user types past `/ ` they're
+		// composing an argument, not picking a command — keep the
+		// chosen prompt highlighted but stop filtering further.
+		token, _, _ := strings.Cut(msg[1:], " ")
+		prefix := strings.ToLower(token)
+
+		all := b.prompts.All()
+		matches := make([]prompts.Prompt, 0, len(all))
+		for _, p := range all {
+			if strings.HasPrefix(strings.ToLower(string(p.Name())), prefix) {
+				matches = append(matches, p)
+			}
+		}
+		sort.Slice(matches, func(i, j int) bool { return matches[i].Name() < matches[j].Name() })
+		for _, p := range matches {
+			_, _ = fmt.Fprint(w, renderSlashSuggestion(p))
+		}
+	})
+}
+
+// rendering helpers and stream-json shapes live in render.go,
+// shared with the historical-replay reader in sessions.go.
diff --git a/helix-org/server/chat/chat_test.go b/helix-org/server/chat/chat_test.go
new file mode 100644
index 0000000000..51e14216dd
--- /dev/null
+++ b/helix-org/server/chat/chat_test.go
@@ -0,0 +1,107 @@
+package chat
+
+import (
+	"context"
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/helixml/helix-org/domain"
+	"github.com/helixml/helix-org/prompts"
+)
+
+// stubPrompt is a single-arg prompt used to exercise the bridge's
+// slash-command expansion logic without coupling these tests to the
+// real /role template content.
+type stubPrompt struct {
+	name Name
+	arg  string
+}
+
+type Name = prompts.Name
+
+func (s stubPrompt) Name() Name        { return s.name }
+func (stubPrompt) Title() string       { return "stub" }
+func (stubPrompt) Description() string { return "stub" }
+func (s stubPrompt) Arguments() []prompts.Argument {
+	if s.arg == "" {
+		return nil
+	}
+	return []prompts.Argument{{Name: s.arg}}
+}
+func (stubPrompt) RequiresTool() domain.ToolName { return "" }
+func (stubPrompt) Render(_ context.Context, args map[string]string) ([]prompts.Message, error) {
+	body := "rendered:" + args["hint"]
+	return []prompts.Message{{Role: "user", Text: body}}, nil
+}
+
+func newBridgeWithPrompts(t *testing.T, ps ...prompts.Prompt) *Bridge {
+	t.Helper()
+	reg := prompts.NewRegistry()
+	for _, p := range ps {
+		if err := reg.Register(p); err != nil {
+			t.Fatalf("register %s: %v", p.Name(), err)
+		}
+	}
+	return New("claude", t.TempDir(), "http://example/mcp", slog.Default()).WithPrompts(reg)
+}
+
+// TestExpandsSlashCommandIntoPromptText confirms that `/foo` is
+// rewritten to the rendered prompt body when `foo` is registered.
+func TestExpandsSlashCommandIntoPromptText(t *testing.T) {
+	t.Parallel()
+	b := newBridgeWithPrompts(t, stubPrompt{name: "foo", arg: "hint"})
+	got, ok := b.expandSlashCommand(context.Background(), "/foo")
+	if !ok {
+		t.Fatal("expand = false, want true")
+	}
+	if !strings.HasPrefix(got, "rendered:") {
+		t.Fatalf("got = %q, want it to start with 'rendered:'", got)
+	}
+}
+
+// TestThreadsTailIntoFirstArgument is the contract that lets the user
+// type `/role marketing director` and have "marketing director" land
+// as the prompt's hint.
+func TestThreadsTailIntoFirstArgument(t *testing.T) {
+	t.Parallel()
+	b := newBridgeWithPrompts(t, stubPrompt{name: "foo", arg: "hint"})
+	got, _ := b.expandSlashCommand(context.Background(), "/foo marketing director")
+	if !strings.Contains(got, "marketing director") {
+		t.Fatalf("tail not threaded through: %q", got)
+	}
+}
+
+// TestUnknownSlashCommandFallsThrough confirms that a slash the bridge
+// doesn't recognise is not consumed — the literal text reaches claude,
+// which can then handle it (e.g. surface its own "unknown command"
+// message) instead of the bridge silently dropping the message.
+func TestUnknownSlashCommandFallsThrough(t *testing.T) {
+	t.Parallel()
+	b := newBridgeWithPrompts(t, stubPrompt{name: "foo"})
+	if _, ok := b.expandSlashCommand(context.Background(), "/missing"); ok {
+		t.Fatal("expand = true for unknown command, want false")
+	}
+}
+
+// TestNonSlashIsNotIntercepted is the simplest regression guard: a
+// plain user message must not be rewritten just because the bridge has
+// a registry attached.
+func TestNonSlashIsNotIntercepted(t *testing.T) {
+	t.Parallel()
+	b := newBridgeWithPrompts(t, stubPrompt{name: "foo"})
+	if _, ok := b.expandSlashCommand(context.Background(), "hello world"); ok {
+		t.Fatal("expand = true for non-slash text, want false")
+	}
+}
+
+// TestNilRegistryFallsThrough mirrors production wiring where bridges
+// without a prompt registry attached should pass slash commands
+// through to claude unchanged.
+func TestNilRegistryFallsThrough(t *testing.T) {
+	t.Parallel()
+	b := New("claude", t.TempDir(), "http://example/mcp", slog.Default())
+	if _, ok := b.expandSlashCommand(context.Background(), "/foo"); ok {
+		t.Fatal("expand = true with nil registry, want false")
+	}
+}
diff --git a/helix-org/server/chat/helix_bridge.go b/helix-org/server/chat/helix_bridge.go
new file mode 100644
index 0000000000..b3e7a146f8
--- /dev/null
+++ b/helix-org/server/chat/helix_bridge.go
@@ -0,0 +1,627 @@
+package chat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"log/slog"
+
+	agenthelix "github.com/helixml/helix-org/agent/helix"
+	"github.com/helixml/helix-org/domain"
+	"github.com/helixml/helix-org/helix/helixclient"
+	"github.com/helixml/helix-org/prompts"
+)
+
+// HelixBridge drives the owner chat surface against a Helix chat
+// session instead of a local `claude` subprocess. Each Bridge owns
+// **one** Helix session at a time (the "current" session); New chat
+// or Switch reset the pointer and the next Send creates / resumes the
+// chosen session.
+//
+// Why one session per Bridge today: there is exactly one Owner chat
+// surface and the existing `*Bridge` shares its single subprocess
+// across every browser tab. Mirroring that keeps the UI's mental
+// model unchanged. When per-Worker chat surfaces arrive, swap the
+// "current session" field for a per-Worker map.
+//
+// SSE listeners are fanned out the same way the claude bridge does
+// it: one channel per subscriber, broadcast publishes drop on slow
+// listeners. Frame translation lives in renderHelixFrames below — it
+// converts Helix's WebsocketEvent payloads into the same HTML
+// fragment shape `chat.go::renderFragments` produces, so the UI
+// renders both backends identically.
+type HelixBridge struct {
+	client      helixclient.Client
+	ensure      ProjectEnsurer // resolves the owner Worker's per-Worker project
+	ownerID     domain.WorkerID
+	sessionRole string
+	provider    string
+	model       string
+	cwd         string
+	logger      *slog.Logger
+	prompts     *prompts.Registry
+
+	mu           sync.Mutex // guards sessionID, listeners, ws, freshFromBlank
+	sessionID    string     // current Helix session ID; "" means "next Send creates one"
+	listeners    map[chan string]struct{}
+	wsCancel     context.CancelFunc // closes the active WS goroutine when we switch sessions
+	wsWG         sync.WaitGroup
+	freshFromNew bool                // true while the user just clicked New chat and no Helix session exists yet
+	seen         map[string]struct{} // dedup keys for translated frames; cleared on session switch
+
+	// orgIDByProject caches project_id → organization_id so we don't
+	// re-fetch the project on every send. Populated lazily on first
+	// send for a project. We MUST send organization_id on /sessions/chat
+	// because Helix's handler doesn't auto-populate it from project_id,
+	// and without it desktop quota falls back to the user's personal
+	// org (limit 2 by default).
+	orgIDByProject map[string]string
+}
+
+// ProjectEnsurer resolves a Worker's Helix project IDs. The chat
+// bridge calls Ensure(ctx, ownerID) per send so the owner Worker's
+// project (and its auto-provisioned Agent App with MCP wiring) is
+// always the target. The interface keeps the chat package free of a
+// hard import on tools/.
+type ProjectEnsurer interface {
+	Ensure(ctx context.Context, workerID domain.WorkerID) (projectID, agentAppID, repoID string, err error)
+}
+
+// HelixConfig wires a HelixBridge. The bridge holds no global
+// project ID — each chat session opens against the owner Worker's
+// per-Worker project, looked up via Ensure on every send.
+//
+// agent_type is fixed at agenthelix.AgentType ("zed_external") — see
+// the constant for why. There is no `chat.agent_type` knob.
+type HelixConfig struct {
+	Client      helixclient.Client
+	Ensure      ProjectEnsurer
+	OwnerID     domain.WorkerID // typically "w-owner"
+	SessionRole string          // chat.session_role, e.g. "owner-chat"
+	Provider    string          // chat.provider
+	Model       string          // chat.model
+	CWD         string          // server cwd, only used as a stable label
+	Logger      *slog.Logger
+}
+
+// NewHelix returns a HelixBridge bound to the supplied Helix client.
+func NewHelix(cfg HelixConfig) (*HelixBridge, error) {
+	if cfg.Client == nil {
+		return nil, fmt.Errorf("chat helix bridge: Client is required")
+	}
+	if cfg.Ensure == nil {
+		return nil, fmt.Errorf("chat helix bridge: Ensure is required")
+	}
+	if cfg.OwnerID == "" {
+		return nil, fmt.Errorf("chat helix bridge: OwnerID is required")
+	}
+	if cfg.SessionRole == "" {
+		return nil, fmt.Errorf("chat helix bridge: SessionRole is required (set chat.session_role)")
+	}
+	logger := cfg.Logger
+	if logger == nil {
+		logger = slog.Default()
+	}
+	return &HelixBridge{
+		client:         cfg.Client,
+		ensure:         cfg.Ensure,
+		ownerID:        cfg.OwnerID,
+		sessionRole:    cfg.SessionRole,
+		provider:       cfg.Provider,
+		model:          cfg.Model,
+		cwd:            cfg.CWD,
+		logger:         logger,
+		listeners:      make(map[chan string]struct{}),
+		seen:           make(map[string]struct{}),
+		orgIDByProject: make(map[string]string),
+	}, nil
+}
+
+// WithPrompts attaches the slash-command registry so SendHandler can
+// expand `/` inputs server-side before posting to Helix. Same
+// shape as Bridge.WithPrompts; returns Backend so it composes with the
+// interface at the wiring layer.
+func (b *HelixBridge) WithPrompts(reg *prompts.Registry) Backend {
+	b.prompts = reg
+	return b
+}
+
+// CWD returns the server's working directory. Used by the UI as the
+// stable label under which Helix-backed Recents are grouped — there
+// is only one helix-org instance per cwd.
+func (b *HelixBridge) CWD() string { return b.cwd }
+
+// Label satisfies chat.Backend. Renders as "helix · " so the
+// chat UI footer truthfully reports which LLM stack is active.
+func (b *HelixBridge) Label() string {
+	if b.model == "" {
+		return "helix"
+	}
+	return "helix · " + b.model
+}
+
+// HistoryStartsFresh reports whether the chat page should suppress
+// rendered history because the user just clicked New and no Helix
+// session has been created for this fresh chat yet.
+func (b *HelixBridge) HistoryStartsFresh() bool {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.freshFromNew && b.sessionID == ""
+}
+
+// subscribe / unsubscribe / broadcast follow the same shape the
+// claude bridge uses, so SSE plumbing in StreamHandler is identical.
+func (b *HelixBridge) subscribe() chan string {
+	ch := make(chan string, 64)
+	b.mu.Lock()
+	b.listeners[ch] = struct{}{}
+	b.mu.Unlock()
+	return ch
+}
+
+func (b *HelixBridge) unsubscribe(ch chan string) {
+	b.mu.Lock()
+	delete(b.listeners, ch)
+	b.mu.Unlock()
+}
+
+func (b *HelixBridge) broadcast(frag string) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	for ch := range b.listeners {
+		select {
+		case ch <- frag:
+		default:
+			// drop on slow listener
+		}
+	}
+}
+
+// StreamHandler serves /ui/chat/stream as SSE. It is identical to
+// the claude bridge's handler in shape — listeners are subscribed
+// here, the background WS goroutine started by Send broadcasts
+// frames, and the connection lives until the browser closes it.
+func (b *HelixBridge) StreamHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		flusher, ok := w.(http.Flusher)
+		if !ok {
+			http.Error(w, "streaming unsupported", http.StatusInternalServerError)
+			return
+		}
+		ch := b.subscribe()
+		defer b.unsubscribe(ch)
+
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.Header().Set("Cache-Control", "no-cache")
+		w.Header().Set("Connection", "keep-alive")
+		w.Header().Set("X-Accel-Buffering", "no")
+		w.WriteHeader(http.StatusOK)
+		flusher.Flush()
+
+		ping := time.NewTicker(15 * time.Second)
+		defer ping.Stop()
+
+		for {
+			select {
+			case frag := <-ch:
+				_, _ = fmt.Fprint(w, "event: message\n")
+				for _, line := range strings.Split(frag, "\n") {
+					_, _ = fmt.Fprintf(w, "data: %s\n", line)
+				}
+				_, _ = fmt.Fprint(w, "\n")
+				flusher.Flush()
+			case <-ping.C:
+				_, _ = fmt.Fprint(w, ": keepalive\n\n")
+				flusher.Flush()
+			case <-r.Context().Done():
+				return
+			}
+		}
+	})
+}
+
+// SendHandler accepts a user message at /ui/chat/send. Synchronous:
+// it calls Helix, waits for the response, then writes the user bubble
+// back. This means the textarea sits frozen for the generation time —
+// not ideal UX, but the async-with-mutex variant we tried first dropped
+// follow-up responses on the floor and wasn't worth fixing this round.
+// Pick this back up if/when the chat surface is busy enough to care.
+func (b *HelixBridge) SendHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		r.Body = http.MaxBytesReader(w, r.Body, 64<<10)
+		if err := r.ParseForm(); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		msg := strings.TrimSpace(r.PostFormValue("message"))
+		if msg == "" {
+			w.WriteHeader(http.StatusNoContent)
+			return
+		}
+		bubble := msg
+		if expanded, ok := b.expandSlashCommand(r.Context(), msg); ok {
+			msg = expanded
+		}
+		if err := b.send(r.Context(), msg); err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
+		_, _ = fmt.Fprint(w, renderUserBubble(bubble))
+	})
+}
+
+// send dispatches one user message to the owner Worker's chat
+// session. Resolves the per-Worker project (and its auto-provisioned
+// Agent App carrying our MCP wiring) via Ensure on every call —
+// idempotent, so the cost is one DB lookup once the Worker has a
+// project.
+//
+// Two paths:
+//   - **Follow-up** (sessionID already attached): POST
+//     /api/v1/sessions/{id}/messages — Helix queues the message and
+//     pickupWaitingInteraction delivers it on agent reconnect.
+//   - **First turn** (no session): POST /sessions/chat to create the
+//     session. If the desktop's WS hasn't connected yet (hadWSError)
+//     we immediately re-queue the same prompt via the /messages
+//     endpoint so it lands once the agent dials home.
+func (b *HelixBridge) send(ctx context.Context, msg string) error {
+	projectID, agentAppID, _, err := b.ensure.Ensure(ctx, b.ownerID)
+	if err != nil {
+		return fmt.Errorf("ensure owner project: %w", err)
+	}
+
+	b.mu.Lock()
+	sid := b.sessionID
+	b.mu.Unlock()
+
+	// Follow-up: just queue. No StartChat dance, no warmup retry.
+	if sid != "" {
+		if _, err := b.client.SendSessionMessage(ctx, sid, msg, helixclient.SendMessageOptions{}); err != nil {
+			return fmt.Errorf("helix followup: %w", err)
+		}
+		b.logger.Info("chat helix followup", "sid", sid, "project", projectID)
+		return nil
+	}
+
+	// Pre-flight desktop quota — fail fast with a clear message
+	// instead of letting Helix's StartDesktop bail with a 500 after
+	// project apply / agent-app provisioning has already run. Soft
+	// check: a parallel caller could still race us for the last slot,
+	// in which case Helix's own quota error wins.
+	if err := helixclient.CheckDesktopQuota(ctx, b.client); err != nil {
+		return err
+	}
+
+	orgID, err := b.resolveProjectOrg(ctx, projectID)
+	if err != nil {
+		return fmt.Errorf("resolve project org: %w", err)
+	}
+
+	// AppID MUST be set — it becomes session.ParentApp, and Helix's
+	// external MCP proxy at /api/v1/mcp/external/{name} bails with
+	// "session has no associated agent" if ParentApp is empty
+	// (mcp_backend_external.go:272). Without that, the org-graph MCP
+	// we attached to the project's auto-provisioned Agent App never
+	// shows up in the desktop's Zed config — the agent then has only
+	// Helix's bundled MCPs and flounders when asked to call
+	// create_role / hire_worker.
+	req := helixclient.StartChatRequest{
+		ProjectID:           projectID,
+		OrganizationID:      orgID,
+		AppID:               agentAppID,
+		SessionRole:         b.sessionRole,
+		AgentType:           agenthelix.AgentType,
+		Type:                "text",
+		Provider:            b.provider,
+		Model:               b.model,
+		ExternalAgentConfig: &helixclient.ExternalAgentConfig{},
+		Messages:            []helixclient.SessionChatMessage{helixclient.NewTextMessage("user", msg)},
+	}
+	session, hadWSError, err := b.client.StartChatWithStatus(ctx, req)
+	if err != nil {
+		return fmt.Errorf("start helix chat: %w", err)
+	}
+	b.attachSession(session.ID)
+	b.logger.Info("chat helix session opened", "sid", session.ID, "project", projectID)
+
+	// Synchronous (helix_basic) sessions return the assistant reply
+	// inline; render it immediately. Streaming sessions populate
+	// Interactions later via the WS bridge.
+	b.broadcastInteractions(session.Interactions)
+
+	// Cold-start race: Helix's first /sessions/chat raced the desktop's
+	// WS connect, so the prompt is sitting in state=error. Re-queue the
+	// same message via the durable /messages endpoint — it'll be
+	// delivered on reconnect.
+	if hadWSError {
+		b.broadcast(renderAssistantText("_Warming up the Zed desktop. This usually takes a minute or two on a cold session..._"))
+		if _, err := b.client.SendSessionMessage(ctx, session.ID, msg, helixclient.SendMessageOptions{}); err != nil {
+			b.logger.Warn("chat helix queue cold-start retry", "sid", session.ID, "err", err)
+		}
+	}
+	return nil
+}
+
+// broadcastInteractions handles the *synchronous* response shape
+// (helix_basic chat completions, where the assistant reply is on the
+// returned `Session.Interactions[*].ResponseMessage` rather than
+// arriving over the WebSocket as EntryPatches). Each unique reply
+// becomes one HTML fragment broadcast to SSE listeners. The
+// EntryStream's dedup state covers the streamed path — this method
+// only fires on the OpenAI-shape path where there are no patches.
+func (b *HelixBridge) broadcastInteractions(ixs []*helixclient.Interaction) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	for _, ix := range ixs {
+		if ix == nil {
+			continue
+		}
+		key := fmt.Sprintf("sync:%s:%d", ix.ID, ix.GenerationID)
+		if _, dup := b.seen[key]; dup {
+			continue
+		}
+		b.seen[key] = struct{}{}
+		if ix.ResponseMessage != "" {
+			b.broadcastLocked(renderAssistantText(ix.ResponseMessage))
+		}
+		if ix.State == "error" && ix.Error != "" && !strings.Contains(ix.Error, "no external agent WebSocket connection") {
+			b.broadcastLocked(renderTurnError(ix.Error))
+		}
+	}
+}
+
+// broadcastLocked publishes one fragment without re-acquiring b.mu.
+// Caller already holds it.
+func (b *HelixBridge) broadcastLocked(frag string) {
+	for ch := range b.listeners {
+		select {
+		case ch <- frag:
+		default:
+		}
+	}
+}
+
+// attachSession records sid as the current session and starts a new
+// WS reader goroutine. Any prior reader is cancelled first. The dedup
+// map is reset because interaction IDs only need to be unique within
+// one session.
+func (b *HelixBridge) attachSession(sid string) {
+	b.mu.Lock()
+	if b.wsCancel != nil {
+		b.wsCancel()
+	}
+	b.wsWG.Wait()
+	b.sessionID = sid
+	b.freshFromNew = false
+	b.seen = make(map[string]struct{})
+	ctx, cancel := context.WithCancel(context.Background())
+	b.wsCancel = cancel
+	b.mu.Unlock()
+	b.wsWG.Add(1)
+	go b.runWebsocket(ctx, sid)
+}
+
+// runWebsocket subscribes to /api/v1/ws/user for sid, applies each
+// frame to a per-session EntryStream, and broadcasts settled events
+// as HTML fragments to SSE listeners. Reconnects with capped
+// exponential backoff for the life of ctx.
+//
+// EntryStream's per-Index/MessageID dedup covers the WS path; the
+// synchronous OpenAI-shape path (broadcastInteractions) carries its
+// own dedup keyed on interaction ID. The two paths are mutually
+// exclusive in practice — a chat completion either streams patches
+// or returns inline.
+func (b *HelixBridge) runWebsocket(ctx context.Context, sid string) {
+	defer b.wsWG.Done()
+	stream := helixclient.NewEntryStream(func(e helixclient.Event) {
+		b.broadcast(b.renderEvent(e))
+	})
+	delay := time.Second
+	for {
+		ch, err := b.client.SubscribeUpdates(ctx, sid)
+		if err != nil {
+			b.logger.Warn("chat helix ws subscribe", "sid", sid, "err", err)
+		} else {
+			for u := range ch {
+				stream.Apply(u)
+			}
+		}
+		select {
+		case <-ctx.Done():
+			stream.Flush()
+			return
+		case <-time.After(delay):
+		}
+		if delay < 30*time.Second {
+			delay *= 2
+		}
+	}
+}
+
+// renderEvent maps one EntryStream event to the HTML fragment the
+// chat SSE bridge serves. Same render functions the legacy claude
+// bridge uses, so both backends are visually indistinguishable.
+func (b *HelixBridge) renderEvent(e helixclient.Event) string {
+	switch e.Kind {
+	case helixclient.EventAssistant:
+		return renderAssistantText(e.Text)
+	case helixclient.EventToolUse:
+		return renderToolUse(e.ToolName, e.Text)
+	case helixclient.EventToolResult:
+		return renderToolResult(e.Text, false)
+	case helixclient.EventToolResultError:
+		return renderToolResult(e.Text, true)
+	case helixclient.EventError:
+		// Suppress the warmup-race error chip — it only fires while
+		// the desktop's Zed agent is still booting, and warmupAndRetry
+		// re-sends the prompt automatically. Showing it would leak a
+		// confusing scary message every few seconds during the cold
+		// start.
+		if strings.Contains(e.Text, "no external agent WebSocket connection") {
+			return ""
+		}
+		return renderTurnError(e.Text)
+	}
+	return ""
+}
+
+// NewHandler wipes the current session pointer at /ui/chat/new. The
+// next Send opens a fresh Helix session. SSE listeners stay
+// connected; the broadcaster keeps publishing once the new WS reader
+// starts.
+func (b *HelixBridge) NewHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b.mu.Lock()
+		if b.wsCancel != nil {
+			b.wsCancel()
+			b.wsCancel = nil
+		}
+		b.sessionID = ""
+		b.freshFromNew = true
+		b.seen = make(map[string]struct{})
+		b.mu.Unlock()
+		b.wsWG.Wait()
+		b.logger.Info("chat helix session reset by user")
+		w.Header().Set("HX-Redirect", "/ui/")
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+// SwitchHandler attaches the bridge to an existing Helix session at
+// /ui/chat/switch. The form field "sid" carries the target ID; the
+// next SSE listener picks up the new session's stream.
+func (b *HelixBridge) SwitchHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		r.Body = http.MaxBytesReader(w, r.Body, 4<<10)
+		if err := r.ParseForm(); err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+		sid := strings.TrimSpace(r.PostFormValue("sid"))
+		if sid == "" {
+			http.Error(w, "sid required", http.StatusBadRequest)
+			return
+		}
+		b.attachSession(sid)
+		w.Header().Set("HX-Redirect", "/ui/?sid="+sid)
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+// CommandsHandler renders the slash-command typeahead at
+// /ui/chat/commands. Identical to the claude bridge's behaviour;
+// reusing renderSlashSuggestion keeps both backends visually
+// indistinguishable.
+func (b *HelixBridge) CommandsHandler() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/html; charset=utf-8")
+		if b.prompts == nil {
+			return
+		}
+		r.Body = http.MaxBytesReader(w, r.Body, 4<<10)
+		if err := r.ParseForm(); err != nil {
+			return
+		}
+		msg := r.PostFormValue("message")
+		if !strings.HasPrefix(msg, "/") {
+			return
+		}
+		token, _, _ := strings.Cut(msg[1:], " ")
+		prefix := strings.ToLower(token)
+
+		all := b.prompts.All()
+		matches := make([]prompts.Prompt, 0, len(all))
+		for _, p := range all {
+			if strings.HasPrefix(strings.ToLower(string(p.Name())), prefix) {
+				matches = append(matches, p)
+			}
+		}
+		sort.Slice(matches, func(i, j int) bool { return matches[i].Name() < matches[j].Name() })
+		for _, p := range matches {
+			_, _ = fmt.Fprint(w, renderSlashSuggestion(p))
+		}
+	})
+}
+
+// expandSlashCommand mirrors the claude bridge's behaviour. Slash
+// commands are resolved server-side by the prompt registry; the
+// rendered text replaces the user input before posting to Helix.
+func (b *HelixBridge) expandSlashCommand(ctx context.Context, msg string) (string, bool) {
+	if b.prompts == nil || !strings.HasPrefix(msg, "/") {
+		return "", false
+	}
+	name, rest, _ := strings.Cut(msg[1:], " ")
+	if name == "" {
+		return "", false
+	}
+	p, err := b.prompts.Get(prompts.Name(name))
+	if err != nil {
+		return "", false
+	}
+	args := map[string]string{}
+	rest = strings.TrimSpace(rest)
+	if rest != "" {
+		if a := p.Arguments(); len(a) > 0 {
+			args[a[0].Name] = rest
+		}
+	}
+	rendered, err := p.Render(ctx, args)
+	if err != nil {
+		b.logger.Info("chat slash command render failed", "name", name, "err", err)
+		return "", false
+	}
+	parts := make([]string, 0, len(rendered))
+	for _, m := range rendered {
+		parts = append(parts, m.Text)
+	}
+	return strings.Join(parts, "\n\n"), true
+}
+
+// resolveProjectOrg returns the project's organization_id, caching
+// the result so we make at most one GetProject call per project per
+// process. We MUST send organization_id on /sessions/chat — Helix's
+// handler doesn't auto-populate it from project_id, and the desktop
+// quota check defaults to the user's personal org (limit 2) when
+// missing.
+func (b *HelixBridge) resolveProjectOrg(ctx context.Context, projectID string) (string, error) {
+	b.mu.Lock()
+	if orgID, ok := b.orgIDByProject[projectID]; ok {
+		b.mu.Unlock()
+		return orgID, nil
+	}
+	b.mu.Unlock()
+	proj, err := b.client.GetProject(ctx, projectID)
+	if err != nil {
+		return "", err
+	}
+	b.mu.Lock()
+	b.orgIDByProject[projectID] = proj.OrganizationID
+	b.mu.Unlock()
+	return proj.OrganizationID, nil
+}
+
+// jsonField is a tiny helper used by render translation when peeking
+// at structured Helix payloads we don't fully model.
+func jsonField(raw json.RawMessage, key string) string {
+	if len(raw) == 0 {
+		return ""
+	}
+	var m map[string]any
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return ""
+	}
+	if v, ok := m[key].(string); ok {
+		return v
+	}
+	return ""
+}
+
+// keep compiler happy if jsonField becomes unused as we evolve renderHelixFrames
+var _ = jsonField
diff --git a/helix-org/server/chat/helix_bridge_test.go b/helix-org/server/chat/helix_bridge_test.go
new file mode 100644
index 0000000000..dc2cf515dd
--- /dev/null
+++ b/helix-org/server/chat/helix_bridge_test.go
@@ -0,0 +1,192 @@
+package chat
+
+import (
+	"context"
+	"io"
+	"log/slog"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"strings"
+	"testing"
+
+	"github.com/helixml/helix-org/domain"
+	"github.com/helixml/helix-org/helix/helixclient"
+)
+
+// fakeEnsurer is a fixed ProjectEnsurer that returns canned IDs so
+// the bridge tests don't need a Helix or a store.
+type fakeEnsurer struct {
+	projectID, agentAppID, repoID string
+}
+
+func (f *fakeEnsurer) Ensure(_ context.Context, _ domain.WorkerID) (string, string, string, error) {
+	return f.projectID, f.agentAppID, f.repoID, nil
+}
+
+// fakeChatClient is a minimum-viable helixclient.Client used by the
+// helix bridge tests. Captures StartChat / PostFollowup calls so the
+// test can assert the bridge persists the session ID and switches
+// to follow-up on subsequent messages.
+type fakeChatClient struct {
+	helixclient.Client
+	startCalls     int
+	sendCalls      int
+	lastStartReq   helixclient.StartChatRequest
+	lastSendSID    string
+	lastSendBody   string
+	startSessionID string
+}
+
+func (f *fakeChatClient) SendSessionMessage(_ context.Context, sid, content string, _ helixclient.SendMessageOptions) (helixclient.SendMessageResponse, error) {
+	f.sendCalls++
+	f.lastSendSID = sid
+	f.lastSendBody = content
+	return helixclient.SendMessageResponse{RequestID: "req_x", InteractionID: "ix_x"}, nil
+}
+
+func (f *fakeChatClient) ServerStatus(_ context.Context) (helixclient.ServerStatus, error) {
+	return helixclient.ServerStatus{MaxConcurrentDesktops: 0}, nil // 0 = unlimited
+}
+
+func (f *fakeChatClient) StartChat(_ context.Context, req helixclient.StartChatRequest) (helixclient.Session, error) {
+	f.startCalls++
+	f.lastStartReq = req
+	if f.startSessionID == "" {
+		f.startSessionID = "ses_test_1"
+	}
+	return helixclient.Session{ID: f.startSessionID}, nil
+}
+
+func (f *fakeChatClient) StartChatWithStatus(ctx context.Context, req helixclient.StartChatRequest) (helixclient.Session, bool, error) {
+	s, err := f.StartChat(ctx, req)
+	return s, false, err
+}
+
+func (f *fakeChatClient) CreateGitRepo(_ context.Context, req helixclient.CreateGitRepoRequest) (helixclient.GitRepo, error) {
+	return helixclient.GitRepo{ID: "repo-" + req.Name, Name: req.Name}, nil
+}
+
+func (f *fakeChatClient) AttachRepoToProject(_ context.Context, _, _ string, _ bool) error {
+	return nil
+}
+
+func (f *fakeChatClient) CreateBranch(_ context.Context, _, _, _ string) error { return nil }
+
+func (f *fakeChatClient) GetProject(_ context.Context, id string) (helixclient.Project, error) {
+	return helixclient.Project{ID: id, OrganizationID: "org-test"}, nil
+}
+
+func (f *fakeChatClient) SubscribeUpdates(ctx context.Context, _ string) (<-chan helixclient.SessionUpdate, error) {
+	ch := make(chan helixclient.SessionUpdate)
+	go func() {
+		<-ctx.Done()
+		close(ch)
+	}()
+	return ch, nil
+}
+
+func newTestHelixBridge(t *testing.T, fc *fakeChatClient) *HelixBridge {
+	t.Helper()
+	b, err := NewHelix(HelixConfig{
+		Client:      fc,
+		Ensure:      &fakeEnsurer{projectID: "prj_x", agentAppID: "app_x"},
+		OwnerID:     "w-owner",
+		SessionRole: "owner-chat",
+		CWD:         t.TempDir(),
+		Logger:      slog.New(slog.NewTextHandler(io.Discard, nil)),
+	})
+	if err != nil {
+		t.Fatalf("NewHelix: %v", err)
+	}
+	return b
+}
+
+// TestHelixBridgeStartsThenFollowsUp verifies the core invariant: the
+// first Send opens a fresh Helix session via /sessions/chat, subsequent
+// Sends queue messages on the same session via SendSessionMessage.
+func TestHelixBridgeStartsThenFollowsUp(t *testing.T) {
+	t.Parallel()
+	fc := &fakeChatClient{startSessionID: "ses_42"}
+	b := newTestHelixBridge(t, fc)
+	srv := httptest.NewServer(b.SendHandler())
+	defer srv.Close()
+
+	post := func(msg string) *http.Response {
+		resp, err := http.PostForm(srv.URL, url.Values{"message": {msg}})
+		if err != nil {
+			t.Fatalf("post: %v", err)
+		}
+		return resp
+	}
+
+	resp1 := post("hello")
+	if resp1.StatusCode != 200 {
+		t.Fatalf("first send: %d", resp1.StatusCode)
+	}
+	body, _ := io.ReadAll(resp1.Body)
+	resp1.Body.Close() //nolint:errcheck,gosec // test cleanup
+	if !strings.Contains(string(body), "hello") {
+		t.Errorf("expected user-bubble echo, got %q", body)
+	}
+	if fc.startCalls != 1 || fc.lastStartReq.SessionID != "" {
+		t.Errorf("first turn: startCalls=%d sid=%q (want 1, empty)", fc.startCalls, fc.lastStartReq.SessionID)
+	}
+
+	resp2 := post("again")
+	resp2.Body.Close() //nolint:errcheck,gosec // test cleanup
+	if fc.startCalls != 1 {
+		t.Errorf("followup must NOT call StartChat: %d (want 1)", fc.startCalls)
+	}
+	if fc.sendCalls != 1 {
+		t.Errorf("followup SendSessionMessage calls: %d (want 1)", fc.sendCalls)
+	}
+	if fc.lastSendSID != "ses_42" {
+		t.Errorf("followup target session: %q (want ses_42)", fc.lastSendSID)
+	}
+	if fc.lastSendBody != "again" {
+		t.Errorf("followup body: %q (want again)", fc.lastSendBody)
+	}
+}
+
+// TestHelixBridgeNewResetsSession verifies that POST /ui/chat/new
+// clears the session pointer so the next Send opens a fresh Helix
+// session rather than following up on the prior one.
+func TestHelixBridgeNewResetsSession(t *testing.T) {
+	t.Parallel()
+	fc := &fakeChatClient{startSessionID: "ses_a"}
+	b := newTestHelixBridge(t, fc)
+	send := httptest.NewServer(b.SendHandler())
+	newSrv := httptest.NewServer(b.NewHandler())
+	defer send.Close()
+	defer newSrv.Close()
+
+	if r, _ := http.PostForm(send.URL, url.Values{"message": {"first"}}); r != nil {
+		r.Body.Close() //nolint:errcheck,gosec // test cleanup
+	}
+	// Click "New chat".
+	if r, _ := http.PostForm(newSrv.URL, url.Values{}); r != nil {
+		r.Body.Close() //nolint:errcheck,gosec // test cleanup
+	}
+	if !b.HistoryStartsFresh() {
+		t.Errorf("HistoryStartsFresh = false after New (want true)")
+	}
+	// Next send should open a brand-new session.
+	fc.startSessionID = "ses_b"
+	if r, _ := http.PostForm(send.URL, url.Values{"message": {"second"}}); r != nil {
+		r.Body.Close() //nolint:errcheck,gosec // test cleanup
+	}
+	if fc.startCalls != 2 {
+		t.Errorf("StartChat calls: %d (want 2)", fc.startCalls)
+	}
+}
+
+func TestHelixBridgeRejectsMissingConfig(t *testing.T) {
+	t.Parallel()
+	if _, err := NewHelix(HelixConfig{}); err == nil {
+		t.Fatal("expected error")
+	}
+	if _, err := NewHelix(HelixConfig{Client: &fakeChatClient{}}); err == nil {
+		t.Fatal("expected error")
+	}
+}
diff --git a/helix-org/server/chat/render.go b/helix-org/server/chat/render.go
new file mode 100644
index 0000000000..d0399c7492
--- /dev/null
+++ b/helix-org/server/chat/render.go
@@ -0,0 +1,230 @@
+package chat
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"html"
+	"strings"
+
+	"github.com/yuin/goldmark"
+
+	"github.com/helixml/helix-org/prompts"
+)
+
+// markdown is the shared parser/renderer for assistant text. Default
+// goldmark options escape any raw HTML the LLM emits — we never opt
+// into WithUnsafe because the assistant text is not trusted input.
+var markdown = goldmark.New()
+
+// renderMarkdown turns assistant-emitted markdown into safe HTML.
+// On parse error (which the default goldmark cannot really produce
+// for arbitrary text input) we fall back to escaped plaintext so the
+// bubble still renders something legible.
+func renderMarkdown(src string) string {
+	var buf bytes.Buffer
+	if err := markdown.Convert([]byte(src), &buf); err != nil {
+		return html.EscapeString(src)
+	}
+	return buf.String()
+}
+
+// streamEvent captures the parts of claude's stream-json format the
+// chat surface needs to render. The shape is shared between the
+// live SSE bridge (chat.go) and the historical-replay reader
+// (sessions.go) — both parse the same events out of either claude's
+// stdout or its on-disk session jsonl.
+type streamEvent struct {
+	Type    string          `json:"type"`
+	Subtype string          `json:"subtype,omitempty"`
+	Message json.RawMessage `json:"message,omitempty"`
+	Result  string          `json:"result,omitempty"`
+	IsError bool            `json:"is_error,omitempty"`
+}
+
+// messagePayload mirrors the message envelope inside both stream-json
+// (live) and session-jsonl (history) lines. content can be a string
+// (raw user prompt) or an array of contentSegment (everything else).
+type messagePayload struct {
+	Role    string          `json:"role"`
+	Content json.RawMessage `json:"content"`
+}
+
+type contentSegment struct {
+	Type    string          `json:"type"`
+	Text    string          `json:"text,omitempty"`
+	Name    string          `json:"name,omitempty"`
+	Input   json.RawMessage `json:"input,omitempty"`
+	Content json.RawMessage `json:"content,omitempty"`
+	IsError bool            `json:"is_error,omitempty"`
+}
+
+// renderFragments turns one parsed stream-json event into zero or
+// more HTML fragments, one per atomic visual unit (user bubble,
+// assistant text bubble, tool-use chip, tool-result chip, error
+// banner). Used by both the live SSE bridge and historical replay.
+//
+// Returns []string rather than a single string so the caller can
+// broadcast each fragment as its own SSE message and the browser can
+// stream them in one at a time.
+func renderFragments(ev streamEvent) []string {
+	switch ev.Type {
+	case "user":
+		return renderUserEvent(ev.Message)
+	case "assistant":
+		return renderAssistantEvent(ev.Message)
+	case "result":
+		if ev.IsError {
+			return []string{renderTurnError(ev.Result)}
+		}
+	}
+	return nil
+}
+
+// renderUserEvent decodes a user-event message and returns the HTML
+// fragments it produces. content is either a raw string (the user's
+// prompt) or an array of segments (where the only renderable segment
+// is tool_result — the live stream-json sometimes wraps tool results
+// in a user envelope). text segments inside an array body are also
+// surfaced so resumed-history user messages with multipart content
+// render correctly.
+//
+// CLI metadata blocks (, ,
+// ) are silently dropped — those are scaffolding
+// claude wrote into the transcript, not actual user prompts, and
+// rendering them as bubbles would clutter the resumed view.
+func renderUserEvent(messageJSON json.RawMessage) []string {
+	var msg messagePayload
+	if err := json.Unmarshal(messageJSON, &msg); err != nil {
+		return nil
+	}
+	// Try string-shaped content first.
+	var asString string
+	if err := json.Unmarshal(msg.Content, &asString); err == nil {
+		if asString = strings.TrimSpace(asString); asString != "" && !isMetaPrompt(asString) {
+			return []string{renderUserBubble(asString)}
+		}
+		return nil
+	}
+	// Otherwise treat as array of segments.
+	var segs []contentSegment
+	if err := json.Unmarshal(msg.Content, &segs); err != nil {
+		return nil
+	}
+	var out []string
+	for _, seg := range segs {
+		switch seg.Type {
+		case "text":
+			t := strings.TrimSpace(seg.Text)
+			if t == "" || isMetaPrompt(t) {
+				continue
+			}
+			out = append(out, renderUserBubble(t))
+		case "tool_result":
+			out = append(out, renderToolResult(string(seg.Content), seg.IsError))
+		}
+	}
+	return out
+}
+
+// renderAssistantEvent decodes an assistant-event message and returns
+// the HTML fragments. text segments become assistant bubbles;
+// tool_use becomes a tool-use chip; thinking is silently dropped
+// (internal scratchpad — not for the chat surface).
+func renderAssistantEvent(messageJSON json.RawMessage) []string {
+	var msg messagePayload
+	if err := json.Unmarshal(messageJSON, &msg); err != nil {
+		return nil
+	}
+	var segs []contentSegment
+	if err := json.Unmarshal(msg.Content, &segs); err != nil {
+		return nil
+	}
+	var out []string
+	for _, seg := range segs {
+		switch seg.Type {
+		case "text":
+			if seg.Text != "" {
+				out = append(out, renderAssistantText(seg.Text))
+			}
+		case "tool_use":
+			out = append(out, renderToolUse(seg.Name, string(seg.Input)))
+		}
+	}
+	return out
+}
+
+// renderSlashSuggestion renders one row in the slash-command dropdown.
+// Clicking the row fills the textarea with `/ ` (trailing space
+// so the user can keep typing arguments) and clears the dropdown.
+//
+// The inline onclick is the smallest thing that works — it does the
+// two DOM ops that have no reasonable server-rendered equivalent
+// (mutating the textarea value, hiding the suggestion list). Anything
+// fancier than this would mean adopting a JS framework, which we don't
+// need.
+func renderSlashSuggestion(p prompts.Prompt) string {
+	name := html.EscapeString(string(p.Name()))
+	title := html.EscapeString(p.Title())
+	desc := html.EscapeString(p.Description())
+	return fmt.Sprintf(
+		``,
+		name, name, title, desc,
+	)
+}
+
+func renderUserBubble(text string) string {
+	return fmt.Sprintf(
+		`
%s
`, + html.EscapeString(text), + ) +} + +func renderAssistantText(text string) string { + return fmt.Sprintf( + `
%s
`, + renderMarkdown(text), + ) +} + +func renderToolUse(name, input string) string { + return fmt.Sprintf( + `
%s %s
`, + html.EscapeString(name), + html.EscapeString(oneLine(input, 220)), + ) +} + +func renderToolResult(content string, isErr bool) string { + color := "var(--ink-muted)" + arrow := "◂" + if isErr { + color = "#A0432F" + arrow = "⚠" + } + return fmt.Sprintf( + `
%s %s
`, + color, arrow, html.EscapeString(oneLine(content, 220)), + ) +} + +func renderTurnError(msg string) string { + return fmt.Sprintf( + `
⚠ %s
`, + html.EscapeString(oneLine(msg, 500)), + ) +} + +func oneLine(s string, max int) string { + s = strings.Join(strings.Fields(s), " ") + if max > 0 && len(s) > max { + return s[:max] + "…" + } + return s +} diff --git a/helix-org/server/chat/render_test.go b/helix-org/server/chat/render_test.go new file mode 100644 index 0000000000..3315de1e3e --- /dev/null +++ b/helix-org/server/chat/render_test.go @@ -0,0 +1,76 @@ +package chat + +import ( + "strings" + "testing" +) + +// TestAssistantTextRendersMarkdown pins the contract that assistant +// bubbles render markdown — bold, lists, code, paragraphs — rather +// than dumping the literal markdown source as plaintext. +func TestAssistantTextRendersMarkdown(t *testing.T) { + t.Parallel() + cases := []struct { + name string + src string + want []string // substrings that MUST appear in the rendered HTML + }{ + { + name: "bold and italics", + src: "this is **bold** and *italic*", + want: []string{"bold", "italic"}, + }, + { + name: "bullet list", + src: "- alpha\n- beta\n- gamma", + want: []string{"
    ", "
  • alpha
  • ", "
  • gamma
  • "}, + }, + { + name: "fenced code block", + src: "```go\nfmt.Println(\"hi\")\n```", + want: []string{"
    ", "make test"},
    +		},
    +		{
    +			name: "heading",
    +			src:  "## Plan\nstep one",
    +			want: []string{"

    Plan

    ", "

    step one

    "}, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := renderAssistantText(tc.src) + for _, want := range tc.want { + if !strings.Contains(got, want) { + t.Errorf("missing %q in:\n%s", want, got) + } + } + }) + } +} + +// TestAssistantTextDropsRawHTML protects against an LLM emitting raw +// HTML (deliberately or via a hallucinated diagnostic) that would +// otherwise become live DOM in the user's browser. Goldmark in safe +// mode (the default — we never call WithUnsafe) replaces raw HTML +// blocks with an "" placeholder. Either +// outcome — escaping or omission — is fine; the only thing this test +// must catch is a live world`) + if strings.Contains(got, " pair. + if strings.Contains(got, "") { + t.Fatalf("closing survived rendering:\n%s", got) + } +} diff --git a/helix-org/server/chat/sessions.go b/helix-org/server/chat/sessions.go new file mode 100644 index 0000000000..2f38c02fdf --- /dev/null +++ b/helix-org/server/chat/sessions.go @@ -0,0 +1,328 @@ +package chat + +import ( + "bufio" + "encoding/json" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +// SessionInfo is one row in the Recents list — a per-cwd claude +// session jsonl summarized for the sidebar. +type SessionInfo struct { + SessionID string // sid extracted from the first line + Title string // best-effort title (custom-title, else first user prompt) + ModTime time.Time // file mtime, used for ordering +} + +// ListSessions returns the claude session jsonls under +// ~/.claude/projects// ordered most-recent first. +// Sessions whose first line cannot be decoded are skipped silently — +// a corrupt log shouldn't break the sidebar render. Files containing +// no user-visible turns (only meta events) are also skipped. +func ListSessions(cwd string) []SessionInfo { + dir := claudeProjectsDir(cwd) + if dir == "" { + return nil + } + entries, err := os.ReadDir(dir) + if err != nil { + return nil + } + var out []SessionInfo + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".jsonl") { + continue + } + info, err := e.Info() + if err != nil { + continue + } + path := filepath.Join(dir, e.Name()) + s, ok := summarize(path, info.ModTime()) + if !ok { + continue + } + out = append(out, s) + } + sort.Slice(out, func(i, j int) bool { + return out[i].ModTime.After(out[j].ModTime) + }) + return out +} + +// ReadHistory streams the claude session jsonl for sid (or the latest +// in cwd if sid is empty) and returns rendered HTML fragments — same +// format the live SSE bridge emits — so the chat page can mount the +// existing conversation on load. +// +// Lines that fail to decode or are meta events (custom-title, +// attachment, file-history-snapshot, system, etc.) are silently +// skipped. Returns nil if the session can't be located. +func ReadHistory(cwd, sid string) []string { + dir := claudeProjectsDir(cwd) + if dir == "" { + return nil + } + path := "" + if sid != "" { + candidate := filepath.Join(dir, sid+".jsonl") + if _, err := os.Stat(candidate); err == nil { + path = candidate + } + } + if path == "" { + path = newestJSONL(dir) + } + if path == "" { + return nil + } + f, err := os.Open(path) + if err != nil { + return nil + } + defer func() { _ = f.Close() }() + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + var out []string + for scanner.Scan() { + var ev streamEvent + if err := json.Unmarshal(scanner.Bytes(), &ev); err != nil { + continue + } + out = append(out, renderFragments(ev)...) + } + return out +} + +// summarize reads the head of a session jsonl just deeply enough to +// extract sid + a display title. +// +// Title preference order, best to worst: +// +// 1. custom-title — user explicitly named the session via claude's +// /title slash command. Highest priority, never overridden. +// 2. ai-title — claude itself periodically generates a short label +// for the session and writes an "ai-title" event. This is what +// gives recents a real, descriptive name ("Set up new CEO role +// permissions") instead of a truncated first-prompt fragment. +// 3. firstUserText — fallback when neither title event has landed +// yet (very fresh session, or a session that hasn't earned a +// generated title). Truncated to a reasonable length. +// +// ok=false means this file should be skipped (no usable sid, or no +// human-visible turn). We can't break early any more — the user's +// first prompt usually appears before claude has emitted ai-title, +// so we have to scan the whole file to be sure the better title +// isn't waiting at the bottom. +func summarize(path string, mtime time.Time) (SessionInfo, bool) { + f, err := os.Open(path) //nolint:gosec // path is built from claudeProjectsDir + a known suffix + if err != nil { + return SessionInfo{}, false + } + defer func() { _ = f.Close() }() + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + var ( + sid string + customTitle string + aiTitle string + fallbackText string + ) + for scanner.Scan() { + var head struct { + SessionID string `json:"sessionId"` + Type string `json:"type"` + CustomTitle string `json:"customTitle,omitempty"` + AITitle string `json:"aiTitle,omitempty"` + Message json.RawMessage `json:"message,omitempty"` + } + if err := json.Unmarshal(scanner.Bytes(), &head); err != nil { + continue + } + if sid == "" && head.SessionID != "" { + sid = head.SessionID + } + switch head.Type { + case "custom-title": + if t := strings.TrimSpace(head.CustomTitle); t != "" && customTitle == "" { + customTitle = t + } + case "ai-title": + // Claude rewrites this as the conversation evolves; keep + // the latest non-empty value rather than the first. + if t := strings.TrimSpace(head.AITitle); t != "" { + aiTitle = t + } + case "user": + if fallbackText == "" { + if t := firstUserText(head.Message); t != "" { + fallbackText = t + } + } + } + // custom-title outranks everything; once seen, no later event + // can change the answer — bail out so long sessions don't pay + // for a full scan. + if customTitle != "" && sid != "" { + break + } + } + if sid == "" { + return SessionInfo{}, false + } + title := pickTitle(customTitle, aiTitle, fallbackText) + if title == "" { + // Skip jsonls with no user-visible content — they're + // almost always transient bookkeeping (custom-title only, + // abandoned spawns, etc.). + return SessionInfo{}, false + } + return SessionInfo{SessionID: sid, Title: shortenTitle(title), ModTime: mtime}, true +} + +// pickTitle returns the best title from the candidates in priority +// order. Extracted so the choice is unit-testable without spinning up +// a real session jsonl. +func pickTitle(custom, ai, fallback string) string { + if custom != "" { + return custom + } + if ai != "" { + return ai + } + return fallback +} + +// firstUserText extracts a user-visible string from a session jsonl +// "user" event's message envelope. It transparently handles the two +// shapes claude writes: plain string content, or an array of +// segments where the first text segment is the prompt. Tool-result +// segments and CLI metadata blocks (anything starting with "<") are +// skipped — those are scaffolding, not user prompts. +func firstUserText(messageJSON json.RawMessage) string { + if len(messageJSON) == 0 { + return "" + } + var msg messagePayload + if err := json.Unmarshal(messageJSON, &msg); err != nil { + return "" + } + var asString string + if err := json.Unmarshal(msg.Content, &asString); err == nil { + t := strings.TrimSpace(asString) + if isMetaPrompt(t) { + return "" + } + return t + } + var segs []contentSegment + if err := json.Unmarshal(msg.Content, &segs); err != nil { + return "" + } + for _, seg := range segs { + if seg.Type != "text" { + continue + } + t := strings.TrimSpace(seg.Text) + if t == "" || isMetaPrompt(t) { + continue + } + return t + } + return "" +} + +// isMetaPrompt reports whether the given user-message body is a CLI +// metadata block rather than a real prompt — e.g. /clear, /reload, +// the local-command-caveat preamble. Recents should ignore these +// when picking a title. +func isMetaPrompt(s string) bool { + if s == "" { + return true + } + if strings.HasPrefix(s, "") || strings.HasPrefix(s, "") { + return true + } + return false +} + +func shortenTitle(s string) string { + s = strings.Join(strings.Fields(s), " ") + const max = 60 + if len(s) > max { + return s[:max] + "…" + } + return s +} + +func claudeProjectsDir(cwd string) string { + if cwd == "" { + return "" + } + home, err := os.UserHomeDir() + if err != nil { + return "" + } + return filepath.Join(home, ".claude", "projects", strings.ReplaceAll(cwd, "/", "-")) +} + +func newestJSONL(dir string) string { + entries, err := os.ReadDir(dir) + if err != nil { + return "" + } + var ( + newestPath string + newestTime time.Time + ) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".jsonl") { + continue + } + info, err := e.Info() + if err != nil { + continue + } + if info.ModTime().After(newestTime) { + newestTime = info.ModTime() + newestPath = filepath.Join(dir, e.Name()) + } + } + return newestPath +} + +// latestClaudeSessionID returns the sid of the most recently +// modified .jsonl in claude's per-cwd session store, or "" if none. +// Mirrors cmd/helix-org/chat.go's resolver. Used by the bridge to +// decide what to pass to claude --resume on lazy spawn. +func latestClaudeSessionID(cwd string) string { + dir := claudeProjectsDir(cwd) + if dir == "" { + return "" + } + path := newestJSONL(dir) + if path == "" { + return "" + } + f, err := os.Open(path) //nolint:gosec // path is built from a known prefix and a directory entry name + if err != nil { + return "" + } + defer func() { _ = f.Close() }() + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + if !scanner.Scan() { + return "" + } + var record struct { + SessionID string `json:"sessionId"` + } + if err := json.Unmarshal(scanner.Bytes(), &record); err != nil { + return "" + } + return record.SessionID +} diff --git a/helix-org/server/chat/sessions_test.go b/helix-org/server/chat/sessions_test.go new file mode 100644 index 0000000000..f3a55bf530 --- /dev/null +++ b/helix-org/server/chat/sessions_test.go @@ -0,0 +1,29 @@ +package chat + +import "testing" + +// TestPickTitleHonoursPriority pins the rule: +// custom-title (user-set) > ai-title (claude-generated) > first user prompt. +// This is the contract recents render relies on. +func TestPickTitleHonoursPriority(t *testing.T) { + t.Parallel() + cases := []struct { + name string + custom, ai, fallback, want string + }{ + {"custom wins over ai", "Manual", "Generated", "first prompt", "Manual"}, + {"custom wins alone", "Manual", "", "", "Manual"}, + {"ai wins over fallback", "", "Generated", "first prompt", "Generated"}, + {"fallback only", "", "", "first prompt", "first prompt"}, + {"all empty", "", "", "", ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := pickTitle(tc.custom, tc.ai, tc.fallback) + if got != tc.want { + t.Fatalf("pickTitle(%q,%q,%q) = %q, want %q", tc.custom, tc.ai, tc.fallback, got, tc.want) + } + }) + } +} diff --git a/helix-org/server/mcp.go b/helix-org/server/mcp.go new file mode 100644 index 0000000000..5a7280e4c6 --- /dev/null +++ b/helix-org/server/mcp.go @@ -0,0 +1,162 @@ +package server + +import ( + "context" + "encoding/json" + "net/http" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/prompts" +) + +// mcpHandler returns an http.Handler that speaks MCP over the Streamable +// HTTP transport. It is mounted at /workers/{id}/mcp; the worker ID in +// the URL identifies the caller, and the server exposes only the tools +// that worker holds grants for. +// +// Stateless mode is used: each request stands on its own. The server has +// no need to push notifications to clients, so session state buys us +// nothing here and adds an obligation to track session IDs. +func (s *Server) mcpHandler() http.Handler { + return mcp.NewStreamableHTTPHandler(s.buildMCPServer, &mcp.StreamableHTTPOptions{ + Stateless: true, + Logger: s.logger, + DisableLocalhostProtection: true, // helix-org is reverse-proxied through tunnels (cloudflared) when Helix's runner is on a different host; the SDK's DNS-rebinding guard rejects non-loopback Host headers, which kills the tunnel path. + }) +} + +// buildMCPServer assembles a fresh *mcp.Server tailored to the worker in +// the request URL. Tools are filtered by the worker's grants — the LLM +// only ever sees what the owner authorised — and each tool handler +// closes over the grant so scope and enforcement mode are bound at +// registration time. +// +// Returning nil causes the SDK to respond 400 Bad Request. +func (s *Server) buildMCPServer(r *http.Request) *mcp.Server { + workerID := domain.WorkerID(r.PathValue("id")) + if workerID == "" { + return nil + } + + ctx := r.Context() + worker, err := s.store.Workers.Get(ctx, workerID) + if err != nil { + s.logger.Info("mcp.unknown_worker", "worker", workerID, "err", err.Error()) + return nil + } + + grants, err := s.store.Grants.ListByWorker(ctx, workerID) + if err != nil { + s.logger.Info("mcp.grants_lookup_failed", "worker", workerID, "err", err.Error()) + return nil + } + + srv := mcp.NewServer(&mcp.Implementation{ + Name: "helix-org", + Version: "0.1.0", + }, nil) + + heldTools := make(map[domain.ToolName]bool, len(grants)) + for _, g := range grants { + heldTools[g.ToolName] = true + tool, err := s.registry.Get(g.ToolName) + if err != nil { + // A grant pointing at a tool we don't know about. Skip silently; + // removing the grant is the owner's job. + s.logger.Info("mcp.unknown_tool_grant", "worker", workerID, "tool", g.ToolName) + continue + } + registerToolForWorker(srv, tool, worker, g, s.logger.With("worker", workerID, "tool", g.ToolName)) + } + + if s.prompts != nil { + for _, p := range s.prompts.All() { + if req := p.RequiresTool(); req != "" && !heldTools[req] { + continue + } + registerPromptForWorker(srv, p, s.logger.With("worker", workerID, "prompt", p.Name())) + } + } + + return srv +} + +// registerToolForWorker binds a single granted tool onto the per-worker +// MCP server. The handler closes over caller and grant so each call +// dispatches with the right Invocation without re-querying the store. +// The grant is what authorises the call; there's nothing else on it +// the tool needs at invocation time. +func registerToolForWorker(srv *mcp.Server, tool domain.Tool, caller domain.Worker, _ domain.ToolGrant, logger interface { + Info(msg string, args ...any) +}) { + srv.AddTool(&mcp.Tool{ + Name: string(tool.Name()), + Description: tool.Description(), + InputSchema: tool.InputSchema(), + }, func(ctx context.Context, req *mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := req.Params.Arguments + if len(args) == 0 { + args = json.RawMessage(`{}`) + } + result, err := tool.Invoke(ctx, domain.Invocation{ + Caller: caller, + Args: args, + }) + if err != nil { + logger.Info("mcp.tool_error", "err", err.Error()) + out := &mcp.CallToolResult{} + out.SetError(err) + return out, nil + } + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: string(result)}}, + }, nil + }) +} + +// registerPromptForWorker binds a single prompt onto the per-worker +// MCP server. The handler renders the prompt's template into seed +// messages; the LLM consumes those and drives the conversation, +// usually ending in a tool call (create_role, update_identity, …). +// +// Visibility is decided in buildMCPServer; by the time we get here the +// prompt is already in the worker's allowed set. +func registerPromptForWorker(srv *mcp.Server, p prompts.Prompt, logger interface { + Info(msg string, args ...any) +}) { + args := p.Arguments() + mcpArgs := make([]*mcp.PromptArgument, 0, len(args)) + for _, a := range args { + mcpArgs = append(mcpArgs, &mcp.PromptArgument{ + Name: a.Name, + Title: a.Title, + Description: a.Description, + Required: a.Required, + }) + } + srv.AddPrompt(&mcp.Prompt{ + Name: string(p.Name()), + Title: p.Title(), + Description: p.Description(), + Arguments: mcpArgs, + }, func(ctx context.Context, req *mcp.GetPromptRequest) (*mcp.GetPromptResult, error) { + messages, err := p.Render(ctx, req.Params.Arguments) + if err != nil { + logger.Info("mcp.prompt_error", "err", err.Error()) + return nil, err + } + out := make([]*mcp.PromptMessage, 0, len(messages)) + for _, m := range messages { + out = append(out, &mcp.PromptMessage{ + Role: mcp.Role(m.Role), + Content: &mcp.TextContent{Text: m.Text}, + }) + } + return &mcp.GetPromptResult{ + Description: p.Description(), + Messages: out, + }, nil + }) +} diff --git a/helix-org/server/server.go b/helix-org/server/server.go new file mode 100644 index 0000000000..f4c9992517 --- /dev/null +++ b/helix-org/server/server.go @@ -0,0 +1,124 @@ +// Package server exposes the HTTP surface. There is exactly one +// endpoint: /workers/{id}/mcp — every Worker is its own MCP server, +// scoped to the tools that Worker holds grants for, and used for both +// reads and mutations of the org graph. The CLI bootstraps by opening +// the store directly; there is no other HTTP write path. +package server + +import ( + "context" + "log/slog" + "net/http" + "time" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/prompts" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/tools" +) + +// Dispatcher is the subset of the dispatcher this package needs: +// fan an Event out to subscribed AI Workers. Defining the interface +// here (rather than importing dispatch) keeps the import edge +// one-directional — dispatch already imports server's siblings. +type Dispatcher interface { + Dispatch(ctx context.Context, event domain.Event) +} + +// Server wires handlers over a store and the tool registry. +type Server struct { + store *store.Store + registry *tools.Registry + prompts *prompts.Registry + broadcaster *broadcast.Broadcaster + dispatcher Dispatcher + logger *slog.Logger +} + +// New returns a Server bound to the given store, registry, broadcaster, +// dispatcher and logger. If logger is nil, a discard logger is used. +// The broadcaster wakes long-poll readers; it may be nil in tests. +// The dispatcher is required only for routes that fan-out events to +// subscribed Workers (e.g. /webhooks/{streamID}); leave it nil in +// tests that don't exercise those paths. +func New(s *store.Store, registry *tools.Registry, broadcaster *broadcast.Broadcaster, dispatcher Dispatcher, logger *slog.Logger) *Server { + if logger == nil { + logger = slog.New(slog.NewTextHandler(discardWriter{}, nil)) + } + return &Server{store: s, registry: registry, broadcaster: broadcaster, dispatcher: dispatcher, logger: logger} +} + +// WithPrompts attaches a prompts.Registry so the per-worker MCP server +// will surface MCP prompts (slash commands) alongside tools. Returns +// the same Server so the call can be chained off New. Passing nil is +// equivalent to no prompts registered — the MCP server just answers +// prompts/list with an empty list. +func (s *Server) WithPrompts(reg *prompts.Registry) *Server { + s.prompts = reg + return s +} + +// Route is a (pattern, handler) pair callers pass to Handler so +// transports can mount their own inbound endpoints (e.g. the email +// transport's /email/postmark) without server.go importing them. +type Route struct { + Pattern string + Handler http.Handler +} + +// Handler returns an http.Handler with all built-in routes registered +// (MCP per-worker, /webhooks/{streamID}) plus any extras passed in by +// the wiring layer. The request-logging middleware wraps the lot. +func (s *Server) Handler(extras ...Route) http.Handler { + mux := http.NewServeMux() + mux.Handle("/workers/{id}/mcp", s.mcpHandler()) + mux.Handle("POST /webhooks/{streamID}", s.webhookHandler()) + for _, r := range extras { + mux.Handle(r.Pattern, r.Handler) + } + return s.requestLogger(mux) +} + +// requestLogger logs one line per HTTP request at info level with method, +// path, status, and elapsed time. +func (s *Server) requestLogger(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + rw := &statusCapture{ResponseWriter: w, status: http.StatusOK} + next.ServeHTTP(rw, r) + s.logger.Info("http", + "method", r.Method, + "path", r.URL.Path, + "status", rw.status, + "elapsed", time.Since(start).Round(time.Millisecond), + ) + }) +} + +// statusCapture wraps http.ResponseWriter to record the status code +// that was written so the logging middleware can report it. Flush is +// passed through so streaming handlers (SSE, MCP streamable HTTP) keep +// working when the middleware is in the chain — without it, +// w.(http.Flusher) fails the type assertion and the handler errors +// out. +type statusCapture struct { + http.ResponseWriter + status int +} + +func (s *statusCapture) WriteHeader(code int) { + s.status = code + s.ResponseWriter.WriteHeader(code) +} + +func (s *statusCapture) Flush() { + if f, ok := s.ResponseWriter.(http.Flusher); ok { + f.Flush() + } +} + +// discardWriter is an io.Writer that throws away everything. +type discardWriter struct{} + +func (discardWriter) Write(p []byte) (int, error) { return len(p), nil } diff --git a/helix-org/server/server_test.go b/helix-org/server/server_test.go new file mode 100644 index 0000000000..b1f062ffad --- /dev/null +++ b/helix-org/server/server_test.go @@ -0,0 +1,279 @@ +package server_test + +import ( + "context" + "encoding/json" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/prompts" + "github.com/helixml/helix-org/server" + "github.com/helixml/helix-org/store/sqlite" + "github.com/helixml/helix-org/tools" +) + +// newTestServer seeds a CEO Worker with a ping grant and a hire_worker +// grant (the latter pointing at a tool deliberately not registered, so +// we can assert it's filtered out of the MCP list). Returns the running +// httptest.Server and the workerID to act as. +func newTestServer(t *testing.T) (*httptest.Server, domain.WorkerID) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + + reg := tools.NewRegistry() + if err := reg.Register(tools.Ping{}); err != nil { + t.Fatalf("register ping: %v", err) + } + + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + role, _ := domain.NewRole("r-ceo", "# CEO\nTop of org.", time.Now().UTC()) + if err := s.Roles.Create(ctx, role); err != nil { + t.Fatalf("seed role: %v", err) + } + root, _ := domain.NewPosition("p-root", "r-ceo", nil) + if err := s.Positions.Create(ctx, root); err != nil { + t.Fatalf("seed root: %v", err) + } + ai, _ := domain.NewAIWorker("w-ceo", []domain.PositionID{"p-root"}, "") + if err := s.Workers.Create(ctx, ai); err != nil { + t.Fatalf("seed worker: %v", err) + } + grant, _ := domain.NewToolGrant("g-1", "w-ceo", "hire_worker") + if err := s.Grants.Create(ctx, grant); err != nil { + t.Fatalf("seed grant: %v", err) + } + pingGrant, _ := domain.NewToolGrant("g-ping", "w-ceo", tools.PingName) + if err := s.Grants.Create(ctx, pingGrant); err != nil { + t.Fatalf("seed ping grant: %v", err) + } + return srv, "w-ceo" +} + +// connectMCP returns an MCP client session bound to the given worker's +// /mcp endpoint. The session is closed when the test ends. +func connectMCP(t *testing.T, baseURL string, workerID domain.WorkerID) *mcp.ClientSession { + t.Helper() + c := mcp.NewClient(&mcp.Implementation{Name: "helix-org-test", Version: "v0.0.0"}, nil) + transport := &mcp.StreamableClientTransport{ + Endpoint: baseURL + "/workers/" + string(workerID) + "/mcp", + DisableStandaloneSSE: true, + } + session, err := c.Connect(context.Background(), transport, nil) + if err != nil { + t.Fatalf("mcp connect %s: %v", workerID, err) + } + t.Cleanup(func() { _ = session.Close() }) + return session +} + +// TestMCPListTools confirms that the MCP tool list a worker sees is the +// intersection of (a) their grants and (b) tools the server has actually +// registered. The CEO holds grants for both ping and hire_worker, but +// only ping is registered on the test registry — so hire_worker must +// not appear. create_role is neither granted nor registered. +func TestMCPListTools(t *testing.T) { + t.Parallel() + srv, workerID := newTestServer(t) + session := connectMCP(t, srv.URL, workerID) + + res, err := session.ListTools(context.Background(), nil) + if err != nil { + t.Fatalf("list tools: %v", err) + } + got := make(map[string]bool, len(res.Tools)) + for _, tool := range res.Tools { + got[tool.Name] = true + } + if !got["ping"] { + t.Errorf("ping missing from list: %+v", got) + } + if got["hire_worker"] { + t.Errorf("granted-but-unregistered tool hire_worker leaked into list") + } + if got["create_role"] { + t.Errorf("ungranted tool create_role appeared in list") + } +} + +// TestMCPInvokePing exercises a granted tool over MCP end-to-end: the +// CEO holds a ping grant, so calling tools/call should succeed and echo +// the message back along with the caller ID. +func TestMCPInvokePing(t *testing.T) { + t.Parallel() + srv, workerID := newTestServer(t) + session := connectMCP(t, srv.URL, workerID) + + res, err := session.CallTool(context.Background(), &mcp.CallToolParams{ + Name: "ping", + Arguments: map[string]any{"message": "hello"}, + }) + if err != nil { + t.Fatalf("call tool: %v", err) + } + if res.IsError { + t.Fatalf("tool reported error: %+v", res.Content) + } + if len(res.Content) == 0 { + t.Fatalf("empty content: %+v", res) + } + text, ok := res.Content[0].(*mcp.TextContent) + if !ok { + t.Fatalf("content[0] = %T, want *TextContent", res.Content[0]) + } + var payload struct { + Echo string `json:"echo"` + Caller string `json:"caller"` + } + if err := json.Unmarshal([]byte(text.Text), &payload); err != nil { + t.Fatalf("decode tool result: %v", err) + } + if payload.Echo != "hello" || payload.Caller != "w-ceo" { + t.Fatalf("payload = %+v", payload) + } +} + +// TestMCPUngrantedToolHidden confirms that a tool the worker doesn't +// hold isn't visible. Calling a hidden tool surfaces as a protocol-level +// "tool not found", not a 403 — the LLM never sees ungranted tools at all. +func TestMCPUngrantedToolHidden(t *testing.T) { + t.Parallel() + srv, workerID := newTestServer(t) + session := connectMCP(t, srv.URL, workerID) + + _, err := session.CallTool(context.Background(), &mcp.CallToolParams{ + Name: "create_role", + Arguments: map[string]any{"id": "r-x", "title": "X"}, + }) + if err == nil { + t.Fatalf("expected error for ungranted tool, got nil") + } +} + +// newTestServerWithPrompts mirrors newTestServer but also attaches a +// prompts registry containing new_role. Whether the worker actually +// sees the prompt depends on whether they hold the gating grant +// (create_role); callers exercise both branches. +func newTestServerWithPrompts(t *testing.T, grantCreateRole bool) (*httptest.Server, domain.WorkerID) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + + reg := tools.NewRegistry() + if err := reg.Register(tools.Ping{}); err != nil { + t.Fatalf("register ping: %v", err) + } + if err := tools.RegisterBuiltins(reg, tools.DefaultDeps(s)); err != nil { + t.Fatalf("register builtins: %v", err) + } + + promptReg := prompts.NewRegistry() + if err := promptReg.Register(prompts.Role{}); err != nil { + t.Fatalf("register new_role: %v", err) + } + + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).WithPrompts(promptReg).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + role, _ := domain.NewRole("r-ceo", "# CEO", time.Now().UTC()) + _ = s.Roles.Create(ctx, role) + root, _ := domain.NewPosition("p-root", "r-ceo", nil) + _ = s.Positions.Create(ctx, root) + ai, _ := domain.NewAIWorker("w-ceo", []domain.PositionID{"p-root"}, "") + _ = s.Workers.Create(ctx, ai) + pingGrant, _ := domain.NewToolGrant("g-ping", "w-ceo", tools.PingName) + _ = s.Grants.Create(ctx, pingGrant) + if grantCreateRole { + g, _ := domain.NewToolGrant("g-create-role", "w-ceo", tools.CreateRoleName) + if err := s.Grants.Create(ctx, g); err != nil { + t.Fatalf("seed create_role grant: %v", err) + } + } + return srv, "w-ceo" +} + +// TestMCPListPromptsVisibleWithGrant confirms that a prompt gated on a +// tool grant shows up exactly when the worker holds that grant. +func TestMCPListPromptsVisibleWithGrant(t *testing.T) { + t.Parallel() + srv, workerID := newTestServerWithPrompts(t, true) + session := connectMCP(t, srv.URL, workerID) + + res, err := session.ListPrompts(context.Background(), nil) + if err != nil { + t.Fatalf("list prompts: %v", err) + } + got := make(map[string]bool, len(res.Prompts)) + for _, p := range res.Prompts { + got[p.Name] = true + } + if !got[string(prompts.RoleName)] { + t.Errorf("new_role missing from list: %+v", got) + } +} + +// TestMCPListPromptsHiddenWithoutGrant confirms the gating: a worker +// without create_role does NOT see the new_role prompt, because the +// final tool call would fail anyway. +func TestMCPListPromptsHiddenWithoutGrant(t *testing.T) { + t.Parallel() + srv, workerID := newTestServerWithPrompts(t, false) + session := connectMCP(t, srv.URL, workerID) + + res, err := session.ListPrompts(context.Background(), nil) + if err != nil { + t.Fatalf("list prompts: %v", err) + } + for _, p := range res.Prompts { + if p.Name == string(prompts.RoleName) { + t.Errorf("new_role visible without create_role grant: %+v", p) + } + } +} + +// TestMCPGetPromptReturnsSeedMessages exercises the full prompts/get +// round-trip: the rendered template lands as the user-role seed message +// in the conversation. +func TestMCPGetPromptReturnsSeedMessages(t *testing.T) { + t.Parallel() + srv, workerID := newTestServerWithPrompts(t, true) + session := connectMCP(t, srv.URL, workerID) + + res, err := session.GetPrompt(context.Background(), &mcp.GetPromptParams{ + Name: string(prompts.RoleName), + Arguments: map[string]string{"hint": "VP marketing"}, + }) + if err != nil { + t.Fatalf("get prompt: %v", err) + } + if len(res.Messages) != 1 { + t.Fatalf("messages = %d, want 1", len(res.Messages)) + } + msg := res.Messages[0] + if msg.Role != "user" { + t.Errorf("role = %q, want user", msg.Role) + } + text, ok := msg.Content.(*mcp.TextContent) + if !ok { + t.Fatalf("content = %T, want *TextContent", msg.Content) + } + if !strings.Contains(text.Text, "VP marketing") { + t.Errorf("hint not threaded through: %s", text.Text) + } + if !strings.Contains(text.Text, "create_role") { + t.Errorf("template missing create_role reference") + } +} diff --git a/helix-org/server/ui/orgchart.go b/helix-org/server/ui/orgchart.go new file mode 100644 index 0000000000..b48f9c9862 --- /dev/null +++ b/helix-org/server/ui/orgchart.go @@ -0,0 +1,216 @@ +package ui + +import ( + "fmt" + "html" + "strings" + + "github.com/helixml/helix-org/domain" +) + +// renderOrgChart returns an SVG fragment laying out positions as a +// top-down tree, with workers attached as small badges below their +// position node. Returns an empty string when there are no +// positions — the template hides the chart section in that case. +// +// Layout: each node is a 200×64 rectangle with rounded corners, +// connected to its parent by a vertical-then-horizontal edge. +// Subtree widths are computed bottom-up so siblings fan out without +// overlapping. +func renderOrgChart(positions []domain.Position, workers []domain.Worker) string { + if len(positions) == 0 { + return "" + } + + // Group workers by the positions they fill. + byPos := make(map[domain.PositionID][]domain.Worker) + for _, w := range workers { + for _, pid := range w.Positions() { + byPos[pid] = append(byPos[pid], w) + } + } + + // Build tree from positions. Roots are positions with empty + // ParentID (or whose parent isn't in the slice — defensive). + idx := make(map[domain.PositionID]domain.Position, len(positions)) + for _, p := range positions { + idx[p.ID] = p + } + type node struct { + pos domain.Position + children []*node + w float64 // subtree width + x, y float64 // top-left after layout + } + nodes := make(map[domain.PositionID]*node, len(positions)) + for _, p := range positions { + nodes[p.ID] = &node{pos: p} + } + var roots []*node + for _, p := range positions { + if p.ParentID == nil { + roots = append(roots, nodes[p.ID]) + continue + } + if parent, ok := nodes[*p.ParentID]; ok { + parent.children = append(parent.children, nodes[p.ID]) + } else { + roots = append(roots, nodes[p.ID]) + } + } + + const ( + nodeW = 200.0 + nodeH = 64.0 + gapX = 32.0 + gapY = 56.0 + padding = 16.0 + ) + + var measure func(n *node) float64 + measure = func(n *node) float64 { + if len(n.children) == 0 { + n.w = nodeW + return nodeW + } + total := 0.0 + for i, c := range n.children { + total += measure(c) + if i > 0 { + total += gapX + } + } + if total < nodeW { + total = nodeW + } + n.w = total + return total + } + + totalW := 0.0 + for i, r := range roots { + totalW += measure(r) + if i > 0 { + totalW += gapX + } + } + + var place func(n *node, x, y float64) + place = func(n *node, x, y float64) { + n.x = x + (n.w-nodeW)/2 + n.y = y + cx := x + for _, c := range n.children { + place(c, cx, y+nodeH+gapY) + cx += c.w + gapX + } + } + rootX := padding + for _, r := range roots { + place(r, rootX, padding) + rootX += r.w + gapX + } + + // Compute total height = max y reached + nodeH + padding. + maxY := 0.0 + var walk func(n *node) + walk = func(n *node) { + if n.y+nodeH > maxY { + maxY = n.y + nodeH + } + for _, c := range n.children { + walk(c) + } + } + for _, r := range roots { + walk(r) + } + width := totalW + 2*padding + height := maxY + padding + + var b strings.Builder + // width/height attributes pin the SVG to its natural pixel size so + // a one-node chart doesn't get stretched to fill the container. + // max-width:100% still caps it if the chart grows wider than the + // page; the parent has overflow-x-auto for horizontal scroll. + fmt.Fprintf(&b, ``, width, height, width, height) + // Edges first so nodes paint over them. + var drawEdges func(n *node) + drawEdges = func(n *node) { + px := n.x + nodeW/2 + py := n.y + nodeH + for _, c := range n.children { + cx := c.x + nodeW/2 + cy := c.y + midY := (py + cy) / 2 + fmt.Fprintf(&b, + ``, + px, py, px, midY, cx, midY, cx, cy, + ) + drawEdges(c) + } + } + for _, r := range roots { + drawEdges(r) + } + // Nodes. Each position group and worker badge gets htmx attrs that + // fetch /ui/org/detail and swap the result into #org-detail. We + // emit them on the outer rather than wrapping in because + // htmx 2 picks up clicks on any element with hx-* attrs and SVG + // would also need xlink:href to route correctly. + var drawNodes func(n *node) + drawNodes = func(n *node) { + ws := byPos[n.pos.ID] + title := html.EscapeString(string(n.pos.ID)) + role := html.EscapeString(string(n.pos.RoleID)) + posID := html.EscapeString(string(n.pos.ID)) + fmt.Fprintf(&b, + ``+ + ``+ + `%s`+ + `%s`, + n.x, n.y, posID, nodeW, nodeH, title, role, + ) + // Worker badges along the bottom of the rect. Each badge is + // itself clickable; we stop event propagation on the badge so + // clicking it doesn't also trigger the parent position node. + bx := 14.0 + by := 56.0 + for i, w := range ws { + if i >= 3 { // cap to keep the node tidy + fmt.Fprintf(&b, + `+%d`, + bx, by, len(ws)-3, + ) + break + } + label := string(w.ID()) + if len(label) > 14 { + label = label[:14] + "…" + } + labelEsc := html.EscapeString(label) + workerID := html.EscapeString(string(w.ID())) + padW := 6.0 + textW := float64(len(label)) * 6.0 + fmt.Fprintf(&b, + ``+ + ``+ + `%s`+ + ``, + bx, by-12, workerID, textW+2*padW, padW, labelEsc, + ) + bx += textW + 2*padW + 6 + } + fmt.Fprintf(&b, ``) + for _, c := range n.children { + drawNodes(c) + } + } + for _, r := range roots { + drawNodes(r) + } + b.WriteString(``) + return b.String() +} diff --git a/helix-org/server/ui/pages.go b/helix-org/server/ui/pages.go new file mode 100644 index 0000000000..360340ad19 --- /dev/null +++ b/helix-org/server/ui/pages.go @@ -0,0 +1,276 @@ +// Package ui serves the human-facing HTML surface at /ui/. It is a +// render-only view over the org graph — every mutation continues to +// flow through the per-Worker MCP endpoint. Templates are compiled +// from struct-embedded HTML at startup via tylermmorton/tmpl; daisyui +// v5 + Tailwind (browser CDN) + htmx are pulled in by the shared head +// partial. +package ui + +import ( + _ "embed" + "html/template" + + "github.com/tylermmorton/tmpl" +) + +//go:embed templates/head.html +var headHTML string + +//go:embed templates/sidebar.html +var sidebarHTML string + +//go:embed templates/chat.html +var chatHTML string + +//go:embed templates/org.html +var orgHTML string + +//go:embed templates/settings.html +var settingsHTML string + +//go:embed templates/streams.html +var streamsHTML string + +//go:embed templates/org_detail.html +var orgDetailHTML string + +// Head fills the document . Title is the page-specific suffix +// rendered before the site name. +type Head struct { + Title string +} + +// TemplateText returns the head partial body. +func (Head) TemplateText() string { return headHTML } + +// Sidebar renders the left rail. Active is the slug of the current +// page ("chat", "org", "settings") so the rail can highlight the +// active item. Initial/DisplayName/WorkerID populate the identity +// pill at the bottom. Recents is the list of past chat sessions +// (most-recent first) — empty when no jsonls exist on disk yet. +type Sidebar struct { + Active string + Initial string + DisplayName string + WorkerID string + Recents []RecentRow + HasRecents bool +} + +// RecentRow is one entry in the Recents list — a clickable link that +// switches the chat bridge to that session ID and reloads /ui/. +type RecentRow struct { + SessionID string + Title string + IsActive bool +} + +// TemplateText returns the sidebar partial body. +func (Sidebar) TemplateText() string { return sidebarHTML } + +// shell groups the chrome partials every page composes. +type shell struct { + Head Head `tmpl:"head"` + Sidebar Sidebar `tmpl:"sidebar"` +} + +// ChatPage renders the chat-as-home entry point. Greeting is the +// short label rendered after "Back at it,". History is the +// pre-rendered HTML for prior turns when resuming a session — empty +// for a fresh chat. +type ChatPage struct { + shell + Greeting string + History template.HTML + // BackendLabel is the short footer label shown next to the send + // button — e.g. "helix · minimax-m2.7" or "claude · sonnet 4.6". + // Populated from the active chat.Backend so the UI never lies + // about which LLM stack the chat is actually running on. + BackendLabel string +} + +// TemplateText returns the chat page body. +func (*ChatPage) TemplateText() string { return chatHTML } + +// OrgPage renders the chart-driven org overview. The chart at the top +// is the index; clicking a position node or worker badge fires an +// htmx GET to /ui/org/detail and swaps the result into #org-detail. +// HasChart/HasFlash/IsEmpty are precomputed bool fields because tmpl's +// compile-time analyzer rejects slice/method/string values inside +// {{ if }} — it requires explicit bool fields. +type OrgPage struct { + shell + ChartSVG template.HTML + HasChart bool + IsEmpty bool + Flash string + FlashError string + HasFlash bool + + // DetailHTML is the pre-rendered org-detail fragment for + // initial-render with ?pos= or ?worker= set. HasDetail is true + // when the chart-side detail pane should show the inlined + // fragment instead of the empty-state placeholder. + DetailHTML template.HTML + HasDetail bool +} + +// TemplateText returns the org page body. +func (*OrgPage) TemplateText() string { return orgHTML } + +// OrgDetail is the htmx fragment rendered in #org-detail. Exactly one +// of IsPosition / IsWorker / IsHint is true. Position fragments carry +// the editable role markdown and a list of workers at that position; +// worker fragments carry the editable identity.md (persona) and the +// list of positions held. +type OrgDetail struct { + IsHint bool + + IsPosition bool + PositionID string + RoleID string + RoleContent string + ParentID string + Workers []OrgWorkerRef + HasWorkers bool + + IsWorker bool + WorkerID string + WorkerKind string + IdentityContent string + Positions []string + HasPositions bool + // Tools is the alphabetically-sorted set of tool names this Worker + // holds grants for. Each is what the agent sees as + // `mcp__helix__` over the per-worker MCP endpoint. + Tools []string + HasTools bool + + Flash string + FlashError string + HasFlash bool +} + +// TemplateText returns the org-detail fragment body. +func (*OrgDetail) TemplateText() string { return orgDetailHTML } + +// OrgWorkerRef is a compact reference to a worker rendered inside a +// position-detail fragment. Click opens the worker's detail. +type OrgWorkerRef struct { + ID string + Kind string +} + +// SettingsPage renders the operational-config view: the registered +// config specs (each editable in place) plus the live serve flags. +// Flash and FlashError carry success/error messages from the most +// recent set/delete redirect, so the page can confirm or surface +// validation errors without keeping a session. +type SettingsPage struct { + shell + Owner string + PublicURL string + DBPath string + EnvsDir string + Specs []SettingsSpecRow + HasSpecs bool + Flash string + FlashError string + HasFlash bool +} + +// SettingsSpecRow is one row in the config registry table. Value is +// the current redacted value (default if no row is set); IsObject +// flags whether the editor should render a textarea instead of an +// input. +type SettingsSpecRow struct { + Key string + Type string + Required bool + Configured bool + Description string + Value string + IsObject bool +} + +// TemplateText returns the settings page body. +func (*SettingsPage) TemplateText() string { return settingsHTML } + +// StreamsPage renders the streams tab: a list of streams on the left, +// the selected stream's metadata + send-form + recent events on the +// right. ?id= picks the active stream; absent or invalid id falls +// back to a usage hint (IsHint = true). All booleans are precomputed +// because tmpl's compile-time analyzer rejects slice/method values +// inside {{ if }}. +type StreamsPage struct { + shell + Owner string + Streams []StreamRow + HasStreams bool + IsHint bool + Flash string + FlashError string + HasFlash bool + + HasSelection bool + SelectedID string + SelectedName string + SelectedDesc string + SelectedKind string + SelectedCreatedBy string + SelectedCreatedAt string + Subscribers []string + HasSubscribers bool + CanPublish bool + PublishDisabledReason string + Events []EventCard + HasEvents bool + // IsAllStreams is true on the no-selection landing view, where the + // right pane shows a unified firehose across every Stream rather + // than a hint. Drives an alternate header in the template and + // surfaces each card's StreamID column for cross-stream context. + IsAllStreams bool +} + +// TemplateText returns the streams page body. +func (*StreamsPage) TemplateText() string { return streamsHTML } + +// StreamRow renders one entry in the left-hand stream list. IsActive +// drives the highlighted state for the currently-selected row. +type StreamRow struct { + ID string + Name string + Kind string + IsActive bool + CreatedAt string +} + +// EventCard renders one event in the recent-events list. The raw +// Event.Body is canonical Message JSON; HasMessage signals whether +// we successfully parsed it (always true for new events; older +// hand-poked rows may not parse). When HasMessage is false the +// template falls back to rendering the raw body. +type EventCard struct { + ID string + Source string + CreatedAt string + // StreamID is set only when EventCards from multiple Streams are + // rendered together (the "All streams" unified feed). Empty in + // the per-stream detail view, where the surrounding header + // already names the stream. + StreamID string + Body string // raw Event.Body (Message JSON) + HasMessage bool + From string + To string + Subject string + MessageBody string +} + +var ( + chatTpl = tmpl.MustCompile(&ChatPage{}) + orgTpl = tmpl.MustCompile(&OrgPage{}) + orgDetailTpl = tmpl.MustCompile(&OrgDetail{}) + settingsTpl = tmpl.MustCompile(&SettingsPage{}) + streamsTpl = tmpl.MustCompile(&StreamsPage{}) +) diff --git a/helix-org/server/ui/templates/chat.html b/helix-org/server/ui/templates/chat.html new file mode 100644 index 0000000000..406d1c96a4 --- /dev/null +++ b/helix-org/server/ui/templates/chat.html @@ -0,0 +1,92 @@ + + +{{ template "head" .Head }} + +
    + {{ template "sidebar" .Sidebar }} + +
    + +
    +
    +
    + + + + + + + +
    + +

    + Back at it, {{ .Greeting }} +

    + +

    + Talk to a worker · hire a role · shape the org +

    + +
    {{ .History }}
    +
    +
    + + +
    +
    + +
    +
    +
    + +
    +
    + + {{ .BackendLabel }} + + +
    +
    + +
    +
    +
    +
    + + + + diff --git a/helix-org/server/ui/templates/head.html b/helix-org/server/ui/templates/head.html new file mode 100644 index 0000000000..d21329be98 --- /dev/null +++ b/helix-org/server/ui/templates/head.html @@ -0,0 +1,140 @@ + + + + {{ .Title }} · helix-org + + + + + + + + + + + + diff --git a/helix-org/server/ui/templates/org.html b/helix-org/server/ui/templates/org.html new file mode 100644 index 0000000000..9b1519a1fe --- /dev/null +++ b/helix-org/server/ui/templates/org.html @@ -0,0 +1,59 @@ + + +{{ template "head" .Head }} + +
    + {{ template "sidebar" .Sidebar }} + +
    +
    +
    +

    The org

    + click anything in the chart +
    +

    A prompt-driven hierarchy. Click a position to edit its role; click a worker to edit its persona.

    + + {{ if .HasFlash }} +
    + {{ if ne .FlashError "" }}{{ .FlashError }}{{ else }}{{ .Flash }}{{ end }} +
    + {{ end }} + +
    + {{ if .HasChart }} +
    +
    +

    Org chart

    + click a node to inspect · live +
    +
    {{ .ChartSVG }}
    +
    + {{ else }} +
    +

    The org is empty.

    +

    Open the chat and try “hire me a CEO”.

    +
    + {{ end }} +
    + +
    + {{ if .HasDetail }} + {{ .DetailHTML }} + {{ else }} +
    +

    Pick something to inspect.

    +

    Click a position node to edit its role. Click a worker badge to edit its persona.

    +
    + {{ end }} +
    +
    +
    +
    + + diff --git a/helix-org/server/ui/templates/org_detail.html b/helix-org/server/ui/templates/org_detail.html new file mode 100644 index 0000000000..56fb9b56d1 --- /dev/null +++ b/helix-org/server/ui/templates/org_detail.html @@ -0,0 +1,126 @@ +{{ if .HasFlash }} +
    + {{ if ne .FlashError "" }}{{ .FlashError }}{{ else }}{{ .Flash }}{{ end }} +
    +{{ end }} + +{{ if .IsHint }} +
    +

    Pick something to inspect.

    +

    Click a position node to edit its role. Click a worker badge to edit its persona.

    +
    +{{ end }} + +{{ if .IsPosition }} +
    +
    +
    +

    {{ .PositionID }}

    + position +
    +

    + role: {{ .RoleID }} + {{ if ne .ParentID "" }} · parent: {{ .ParentID }}{{ end }} +

    +
    + +
    +
    +

    Role markdown

    + edits fan out on next activation +
    +
    + + + +
    + + {{ .RoleID }} +
    +
    +
    + +
    +

    Workers here

    + {{ if .HasWorkers }} +
      + {{ range .Workers }} +
    • + +
    • + {{ end }} +
    + {{ else }} +

    No workers fill this position yet.

    + {{ end }} +
    +
    +{{ end }} + +{{ if .IsWorker }} +
    +
    +
    +

    {{ .WorkerID }}

    + {{ .WorkerKind }} +
    + {{ if .HasPositions }} +

    + positions: + {{ range $i, $p := .Positions }}{{ if $i }}, {{ end }}{{ end }} +

    + {{ end }} +
    + +
    +
    +

    Tools

    + {{ if .HasTools }}{{ len .Tools }} granted{{ else }}none{{ end }} · MCP +
    + {{ if .HasTools }} +
    + {{ range .Tools }} + {{ . }} + {{ end }} +
    + {{ else }} +

    No tool grants. This Worker can be activated but cannot mutate the org graph or publish on Streams.

    + {{ end }} +
    + +
    +
    +

    Persona — identity.md

    + edits fan out on next activation +
    +
    + + +
    + + stored in domain · projected to env on next activation +
    +
    +
    +
    +{{ end }} diff --git a/helix-org/server/ui/templates/settings.html b/helix-org/server/ui/templates/settings.html new file mode 100644 index 0000000000..7332cc0dce --- /dev/null +++ b/helix-org/server/ui/templates/settings.html @@ -0,0 +1,93 @@ + + +{{ template "head" .Head }} + +
    + {{ template "sidebar" .Sidebar }} + +
    +
    +
    +

    Settings

    +
    +

    Operational config — transports, claude, etc. Edit in place; the registry validates type and shape on save.

    + + {{ if .HasFlash }} +
    + {{ if ne .FlashError "" }}{{ .FlashError }}{{ else }}{{ .Flash }}{{ end }} +
    + {{ end }} + +
    +
    +
    +

    Config registry

    +

    {{ len .Specs }} spec{{ if ne (len .Specs) 1 }}s{{ end }}

    +
    +
    + {{ if .HasSpecs }} +
      + {{ range .Specs }} +
    • +
      + {{ .Key }} + {{ .Type }}{{ if .Required }} · required{{ end }} + + {{ if .Configured }}set{{ else if .Required }}missing{{ else }}default{{ end }} + +
      +

      {{ .Description }}

      +
      + + {{ if .IsObject }} + + {{ else }} + + {{ end }} +
      + + {{ if .Configured }} + + {{ end }} +
      +
      +
    • + {{ end }} +
    + {{ else }} +
    +

    No specs registered.

    +

    Specs are wired in cmd/helix-org/configspecs.go.

    +
    + {{ end }} +
    + +
    +
    +

    Server

    +

    flags captured at startup

    +
    +
    +
    Owner worker
    +
    {{ .Owner }}
    +
    Public URL
    +
    {{ .PublicURL }}
    +
    Database
    +
    {{ .DBPath }}
    +
    Envs directory
    +
    {{ .EnvsDir }}
    +
    +
    +
    +
    +
    + + diff --git a/helix-org/server/ui/templates/sidebar.html b/helix-org/server/ui/templates/sidebar.html new file mode 100644 index 0000000000..cec569d010 --- /dev/null +++ b/helix-org/server/ui/templates/sidebar.html @@ -0,0 +1,62 @@ + diff --git a/helix-org/server/ui/templates/streams.html b/helix-org/server/ui/templates/streams.html new file mode 100644 index 0000000000..9ff4f2a543 --- /dev/null +++ b/helix-org/server/ui/templates/streams.html @@ -0,0 +1,194 @@ + + +{{ template "head" .Head }} + +
    + {{ template "sidebar" .Sidebar }} + +
    +
    +
    +

    Streams

    + / events live here +
    +

    Pick a stream to inspect history or send a message. The owner is the publisher.

    + + {{ if .HasFlash }} +
    + {{ if ne .FlashError "" }}{{ .FlashError }}{{ else }}{{ .Flash }}{{ end }} +
    + {{ end }} + +
    + + +
    + {{ if .IsHint }} +
    +

    No streams yet.

    +

    Open chat and try “create a stream s-news”.

    +
    + {{ end }} + + {{ if .IsAllStreams }} +
    +
    +

    All streams

    + unified feed +
    +

    Recent events across every Stream — newest first, capped at 50. Pick a stream on the left to drill in.

    +
    +
    +
      + {{ range .Events }} +
    • +
      +
      + {{ .StreamID }} + {{ if ne .From "" }} · {{ .From }}{{ else if ne .Source "" }} · {{ .Source }}{{ end }} + {{ if ne .To "" }} → {{ .To }}{{ end }} +
      + {{ .CreatedAt }} +
      + {{ if ne .Subject "" }} +
      {{ .Subject }}
      + {{ end }} + {{ if .HasMessage }} +
      {{ .MessageBody }}
      + {{ else }} +
      {{ .Body }}
      + {{ end }} +
      {{ .ID }}
      +
    • + {{ end }} +
    +
    + {{ end }} + + {{ if .HasSelection }} +
    +
    +

    {{ .SelectedName }}

    + {{ .SelectedKind }} +
    +

    + {{ .SelectedID }} · created by {{ .SelectedCreatedBy }} · {{ .SelectedCreatedAt }} +

    + {{ if ne .SelectedDesc "" }} +

    {{ .SelectedDesc }}

    + {{ end }} + {{ if .HasSubscribers }} +

    subscribers: {{ range $i, $s := .Subscribers }}{{ if $i }}, {{ end }}{{ $s }}{{ end }}

    + {{ end }} +
    + +
    +

    Send

    + {{ if .CanPublish }} +
    + +
    + + +
    + +
    + + subscribed AI workers will be activated +
    +
    + {{ else }} +

    + Send disabled · {{ .PublishDisabledReason }} +

    + {{ end }} +
    + +
    +
    +

    Recent events

    + newest first · up to 50 · live +
    + {{ if .HasEvents }} +
      + {{ range .Events }} +
    • +
      +
      + {{ if ne .From "" }}{{ .From }}{{ else }}{{ .Source }}{{ end }} + {{ if ne .To "" }} → {{ .To }}{{ end }} +
      + {{ .CreatedAt }} +
      + {{ if ne .Subject "" }} +
      {{ .Subject }}
      + {{ end }} + {{ if .HasMessage }} +
      {{ .MessageBody }}
      + {{ else }} +
      {{ .Body }}
      + {{ end }} +
      {{ .ID }}
      +
    • + {{ end }} +
    + {{ else }} +

    No events on this stream yet.

    + {{ end }} +
    + {{ end }} +
    +
    +
    +
    +
    + + diff --git a/helix-org/server/ui/ui.go b/helix-org/server/ui/ui.go new file mode 100644 index 0000000000..dd907eec00 --- /dev/null +++ b/helix-org/server/ui/ui.go @@ -0,0 +1,721 @@ +package ui + +import ( + "context" + "fmt" + "html/template" + "net/http" + "sort" + "strings" + "time" + + "github.com/tylermmorton/tmpl" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/dispatch" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/server/chat" + "github.com/helixml/helix-org/store" +) + +// Deps is everything the UI surface needs from its host. The wiring +// layer (cmd/helix-org/serve.go) builds this once at startup; the UI +// package treats it as an immutable snapshot. SettingsView and the +// store populate the org and settings pages; ChatCWD is the directory +// where claude's per-cwd session jsonls live, read for chat history +// and the Recents list in the sidebar; Configs lets the settings +// page read and mutate operational config in place; Bridge exposes +// chat-session state (e.g. "user just clicked New chat") so the +// chat page can suppress stale history rendering. +type Deps struct { + Store *store.Store + Configs *config.Registry + Bridge chat.Backend + ChatCWD string + Settings SettingsView + Broadcaster *broadcast.Broadcaster + Dispatcher *dispatch.Dispatcher + NewID func() string + Now func() time.Time +} + +// SettingsView is the snapshot of operational state rendered on the +// settings page. It is captured at server startup — the public URL, +// DB path, and envs dir come from CLI flags; the spec list comes +// from config.Registry.Specs(); the per-spec "configured" flag is +// resolved per-request against store.Configs. +type SettingsView struct { + Owner string // owner Worker ID (e.g. "w-owner") + PublicURL string // --public-url (resolved if defaulted) + DBPath string // --db + EnvsDir string // resolved absolute --envs-dir + Specs []SettingsSpec // registered config specs, sorted by Key +} + +// SettingsSpec is the rendered shape for one config registry entry. +type SettingsSpec struct { + Key string + Type string // "string" | "int" | "object" — display only + Required bool + Description string +} + +// Handler returns the HTTP handler for the /ui/ surface. Mount it on +// the main mux with `mux.Handle("/ui/", ui.Handler(deps))`. Chat is +// the entry point at /ui/{$}; /ui/org and /ui/settings render the +// org graph and operational config respectively. Unknown paths under +// /ui/ return 404. +func Handler(deps Deps) http.Handler { + u := &uiHandler{deps: deps} + mux := http.NewServeMux() + mux.HandleFunc("GET /ui/{$}", u.handleChat) + mux.HandleFunc("GET /ui/org", u.handleOrg) + mux.HandleFunc("GET /ui/settings", u.handleSettings) + mux.HandleFunc("POST /ui/settings/set", u.handleSettingsSet) + mux.HandleFunc("POST /ui/settings/delete", u.handleSettingsDelete) + mux.HandleFunc("POST /ui/org/roles/set", u.handleOrgRoleSet) + mux.HandleFunc("GET /ui/org/detail", u.handleOrgDetail) + mux.HandleFunc("POST /ui/org/identity/set", u.handleOrgIdentitySet) + mux.HandleFunc("GET /ui/streams", u.handleStreams) + mux.HandleFunc("POST /ui/streams/publish", u.handleStreamsPublish) + return mux +} + +type uiHandler struct { + deps Deps +} + +// ownerSidebar is the per-page sidebar shape. Identity values are +// constant for now: there is exactly one owner Worker, hardcoded at +// bootstrap as w-owner. When per-Worker UI sessions arrive, this +// becomes a per-request lookup. +// +// active is one of "chat", "org", "settings", "streams" — it drives +// the highlighted nav item. activeSID is the session ID currently +// being viewed (chat page only); when matched against a Recents +// entry, that row is rendered active. +func (u *uiHandler) ownerSidebar(active, activeSID string) Sidebar { + s := Sidebar{ + Active: active, + Initial: "O", + DisplayName: "Owner", + WorkerID: u.deps.Settings.Owner, + } + for _, info := range chat.ListSessions(u.deps.ChatCWD) { + s.Recents = append(s.Recents, RecentRow{ + SessionID: info.SessionID, + Title: info.Title, + IsActive: info.SessionID == activeSID, + }) + } + s.HasRecents = len(s.Recents) > 0 + return s +} + +func (u *uiHandler) handleChat(w http.ResponseWriter, r *http.Request) { + sid := strings.TrimSpace(r.URL.Query().Get("sid")) + label := "" + if u.deps.Bridge != nil { + label = u.deps.Bridge.Label() + } + page := &ChatPage{ + shell: shell{Head: Head{Title: "Chat"}, Sidebar: u.ownerSidebar("chat", sid)}, + Greeting: "Owner", + BackendLabel: label, + } + // When the user just clicked "New chat" and no new turn has been + // written yet, the latest jsonl is the *previous* conversation — + // rendering it would make New chat look broken. Skip history in + // that window unless the request explicitly resumes a sid. + if sid != "" || u.deps.Bridge == nil || !u.deps.Bridge.HistoryStartsFresh() { + if frags := chat.ReadHistory(u.deps.ChatCWD, sid); len(frags) > 0 { + page.History = template.HTML(strings.Join(frags, "\n")) //nolint:gosec // fragments are produced by chat.renderFragments which html-escapes user content + } + } + render(w, chatTpl, page) +} + +// handleOrg renders the chart-driven org page. The chart is the +// always-visible index; clicking a position node or worker badge +// fires an htmx request to /ui/org/detail and swaps the result into +// the #org-detail target. ?pos= or ?worker= on the URL inlines the +// matching detail fragment on initial render — used after a form +// submit redirects so the user lands back on the detail they were +// editing rather than the empty placeholder. +func (u *uiHandler) handleOrg(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + positions, err := u.deps.Store.Positions.List(ctx) + if err != nil { + http.Error(w, "list positions: "+err.Error(), http.StatusInternalServerError) + return + } + workers, err := u.deps.Store.Workers.List(ctx) + if err != nil { + http.Error(w, "list workers: "+err.Error(), http.StatusInternalServerError) + return + } + + flash := strings.TrimSpace(r.URL.Query().Get("flash")) + flashErr := strings.TrimSpace(r.URL.Query().Get("err")) + page := &OrgPage{ + shell: shell{Head: Head{Title: "Org"}, Sidebar: u.ownerSidebar("org", "")}, + Flash: flash, + FlashError: flashErr, + HasFlash: flash != "" || flashErr != "", + } + if svg := renderOrgChart(positions, workers); svg != "" { + page.ChartSVG = template.HTML(svg) //nolint:gosec // renderOrgChart escapes all dynamic content via html.EscapeString + page.HasChart = true + } + page.IsEmpty = !page.HasChart + + // Inline the detail fragment when a selector is present. We render + // the orgDetail template into a buffer and hand the resulting HTML + // to the page so org.html can drop it where it'd otherwise render + // the placeholder. The flash is consumed by the page's outer flash + // banner — clear it on the inlined fragment so it doesn't render + // twice. + posID := strings.TrimSpace(r.URL.Query().Get("pos")) + workerID := strings.TrimSpace(r.URL.Query().Get("worker")) + if posID != "" || workerID != "" { + frag := &OrgDetail{} + switch { + case posID != "": + if err := u.fillPositionDetail(ctx, frag, domain.PositionID(posID)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + case workerID != "": + if err := u.fillWorkerDetail(ctx, frag, domain.WorkerID(workerID)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + var buf strings.Builder + if err := orgDetailTpl.Render(&buf, frag); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + page.DetailHTML = template.HTML(buf.String()) //nolint:gosec // orgDetailTpl renders into HTML; its inputs are escaped at template time + page.HasDetail = true + } + + render(w, orgTpl, page) +} + +// handleOrgDetail renders the right-hand detail fragment for the +// chart-driven org page. ?pos=ID renders the position's role markdown +// editor plus the workers filling that position. ?worker=ID renders +// the worker's identity.md (persona) editor plus the positions held. +// Both paths read fresh from the store so the fragment reflects the +// post-save state when called from a redirect. +func (u *uiHandler) handleOrgDetail(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + posID := strings.TrimSpace(r.URL.Query().Get("pos")) + workerID := strings.TrimSpace(r.URL.Query().Get("worker")) + flash := strings.TrimSpace(r.URL.Query().Get("flash")) + flashErr := strings.TrimSpace(r.URL.Query().Get("err")) + + frag := &OrgDetail{Flash: flash, FlashError: flashErr, HasFlash: flash != "" || flashErr != ""} + + switch { + case posID != "": + if err := u.fillPositionDetail(ctx, frag, domain.PositionID(posID)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + case workerID != "": + if err := u.fillWorkerDetail(ctx, frag, domain.WorkerID(workerID)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + default: + frag.IsHint = true + } + + render(w, orgDetailTpl, frag) +} + +// fillPositionDetail populates frag with the role markdown for the +// position's Role and the workers currently assigned to it. +func (u *uiHandler) fillPositionDetail(ctx context.Context, frag *OrgDetail, posID domain.PositionID) error { + pos, err := u.deps.Store.Positions.Get(ctx, posID) + if err != nil { + return fmt.Errorf("get position %s: %w", posID, err) + } + role, err := u.deps.Store.Roles.Get(ctx, pos.RoleID) + if err != nil { + return fmt.Errorf("get role %s: %w", pos.RoleID, err) + } + workers, err := u.deps.Store.Workers.List(ctx) + if err != nil { + return fmt.Errorf("list workers: %w", err) + } + frag.IsPosition = true + frag.PositionID = string(pos.ID) + frag.RoleID = string(role.ID) + frag.RoleContent = role.Content + if pos.ParentID != nil { + frag.ParentID = string(*pos.ParentID) + } + for _, wk := range workers { + for _, pid := range wk.Positions() { + if pid == pos.ID { + frag.Workers = append(frag.Workers, OrgWorkerRef{ + ID: string(wk.ID()), + Kind: string(wk.Kind()), + }) + break + } + } + } + frag.HasWorkers = len(frag.Workers) > 0 + return nil +} + +// fillWorkerDetail populates frag with the worker's IdentityContent +// (the persona / profile, read from the domain) and the list of +// positions held. The spawner projects this content into the +// Environment as identity.md at activation time — disk is not the +// source of truth, so the editor talks straight to the DB. +func (u *uiHandler) fillWorkerDetail(ctx context.Context, frag *OrgDetail, workerID domain.WorkerID) error { + wk, err := u.deps.Store.Workers.Get(ctx, workerID) + if err != nil { + return fmt.Errorf("get worker %s: %w", workerID, err) + } + frag.IsWorker = true + frag.WorkerID = string(wk.ID()) + frag.WorkerKind = string(wk.Kind()) + frag.IdentityContent = wk.IdentityContent() + for _, pid := range wk.Positions() { + frag.Positions = append(frag.Positions, string(pid)) + } + frag.HasPositions = len(frag.Positions) > 0 + + grants, err := u.deps.Store.Grants.ListByWorker(ctx, workerID) + if err != nil { + return fmt.Errorf("list grants for %s: %w", workerID, err) + } + for _, g := range grants { + frag.Tools = append(frag.Tools, string(g.ToolName)) + } + sort.Strings(frag.Tools) + frag.HasTools = len(frag.Tools) > 0 + return nil +} + +// handleOrgIdentitySet rewrites a Worker's IdentityContent in the +// domain. The change takes effect on the Worker's next activation +// when the Spawner projects current state into the Environment — +// matches what update_role does for Role.Content. +func (u *uiHandler) handleOrgIdentitySet(w http.ResponseWriter, r *http.Request) { + r.Body = http.MaxBytesReader(w, r.Body, 256<<10) + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + id := strings.TrimSpace(r.PostFormValue("id")) + content := r.PostFormValue("content") + if id == "" { + http.Error(w, "id is required", http.StatusBadRequest) + return + } + existing, err := u.deps.Store.Workers.Get(r.Context(), domain.WorkerID(id)) + if err != nil { + http.Redirect(w, r, "/ui/org?worker="+id+"&err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + if err := u.deps.Store.Workers.Update(r.Context(), existing.WithIdentityContent(content)); err != nil { + http.Redirect(w, r, "/ui/org?worker="+id+"&err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + http.Redirect(w, r, "/ui/org?worker="+id+"&flash="+queryEscape("Saved identity for "+id), http.StatusSeeOther) +} + +// handleStreams renders the streams page: a list of streams on the +// left, the selected stream's detail (metadata + recent events + +// send box) on the right. ?id= picks the active stream; absent or +// unknown id falls back to "no selection". +func (u *uiHandler) handleStreams(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + streams, err := u.deps.Store.Streams.List(ctx) + if err != nil { + http.Error(w, "list streams: "+err.Error(), http.StatusInternalServerError) + return + } + sort.SliceStable(streams, func(i, j int) bool { return streams[i].CreatedAt.Before(streams[j].CreatedAt) }) + + selectedID := strings.TrimSpace(r.URL.Query().Get("id")) + flash := strings.TrimSpace(r.URL.Query().Get("flash")) + flashErr := strings.TrimSpace(r.URL.Query().Get("err")) + + page := &StreamsPage{ + shell: shell{Head: Head{Title: "Streams"}, Sidebar: u.ownerSidebar("streams", "")}, + Owner: u.deps.Settings.Owner, + Flash: flash, + FlashError: flashErr, + HasFlash: flash != "" || flashErr != "", + } + for _, s := range streams { + page.Streams = append(page.Streams, StreamRow{ + ID: string(s.ID), + Name: s.Name, + Kind: string(s.Transport.Kind), + IsActive: string(s.ID) == selectedID, + CreatedAt: s.CreatedAt.Format(time.RFC3339), + }) + } + page.HasStreams = len(page.Streams) > 0 + + if selectedID != "" { + if err := u.fillStreamDetail(ctx, page, domain.StreamID(selectedID)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } else if err := u.fillAllStreamsFeed(ctx, page); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + render(w, streamsTpl, page) +} + +// fillAllStreamsFeed populates the no-selection landing view with a +// unified firehose of recent events across every Stream. Capped at 50 +// to keep the page tight; cross-stream context is surfaced via each +// card's StreamID. Falls back to the hint screen if there are no +// events at all (fresh org, nothing to show yet). +func (u *uiHandler) fillAllStreamsFeed(ctx context.Context, page *StreamsPage) error { + events, err := u.deps.Store.Events.ListAll(ctx, 50) + if err != nil { + return fmt.Errorf("list all events: %w", err) + } + if len(events) == 0 { + page.IsHint = true + return nil + } + page.IsAllStreams = true + for _, ev := range events { + card := EventCard{ + ID: string(ev.ID), + Source: string(ev.Source), + StreamID: string(ev.StreamID), + CreatedAt: ev.CreatedAt.Format(time.RFC3339), + Body: ev.Body, + } + if msg, err := ev.Message(); err == nil { + card.From = msg.From + card.Subject = msg.Subject + card.MessageBody = msg.Body + card.HasMessage = true + if len(msg.To) > 0 { + card.To = strings.Join(msg.To, ", ") + } + } + page.Events = append(page.Events, card) + } + page.HasEvents = true + return nil +} + +// fillStreamDetail loads the selected stream's metadata, subscribers, +// and recent events. The send-form's enabled state is decided here +// so the template stays trivial (a simple bool branch). +func (u *uiHandler) fillStreamDetail(ctx context.Context, page *StreamsPage, streamID domain.StreamID) error { + s, err := u.deps.Store.Streams.Get(ctx, streamID) + if err != nil { + // Treat a missing stream as "fall back to hint" — happens when + // the user lands on an old bookmark or a stream is deleted out + // of band. Don't 500 the whole page. + page.IsHint = true + page.FlashError = err.Error() + page.HasFlash = true + return nil + } + subs, err := u.deps.Store.Subscriptions.ListForStream(ctx, streamID) + if err != nil { + return fmt.Errorf("list subscriptions for %s: %w", streamID, err) + } + events, err := u.deps.Store.Events.ListForStream(ctx, streamID, 50) + if err != nil { + return fmt.Errorf("list events for %s: %w", streamID, err) + } + + page.HasSelection = true + page.SelectedID = string(s.ID) + page.SelectedName = s.Name + page.SelectedDesc = s.Description + page.SelectedKind = string(s.Transport.Kind) + page.SelectedCreatedBy = string(s.CreatedBy) + page.SelectedCreatedAt = s.CreatedAt.Format(time.RFC3339) + for _, sub := range subs { + page.Subscribers = append(page.Subscribers, string(sub.WorkerID)) + } + page.HasSubscribers = len(page.Subscribers) > 0 + + // GitHub streams reject publish at the tool layer; mirror the same + // rule here so the UI matches the backend exactly. + page.CanPublish = s.Transport.Kind != domain.TransportGitHub + if !page.CanPublish { + page.PublishDisabledReason = "github transport is inbound only — act on the repo with `gh` from the worker's environment" + } + + for _, ev := range events { + card := EventCard{ + ID: string(ev.ID), + Source: string(ev.Source), + CreatedAt: ev.CreatedAt.Format(time.RFC3339), + Body: ev.Body, + } + if msg, err := ev.Message(); err == nil { + card.From = msg.From + card.Subject = msg.Subject + card.MessageBody = msg.Body + card.HasMessage = true + if len(msg.To) > 0 { + card.To = strings.Join(msg.To, ", ") + } + } + page.Events = append(page.Events, card) + } + page.HasEvents = len(page.Events) > 0 + return nil +} + +// handleStreamsPublish appends an Event attributed to the owner. The +// equivalent of the publish MCP tool, exposed here so the human in +// front of /ui/streams can send a message without going through +// claude. Mirrors the tool's validation: rejects empty body, rejects +// github transport. After append, notifies the broadcaster and fans +// out to subscribed AI workers via the dispatcher — same wake path +// as a publish from a worker. +func (u *uiHandler) handleStreamsPublish(w http.ResponseWriter, r *http.Request) { + r.Body = http.MaxBytesReader(w, r.Body, 256<<10) + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + streamID := strings.TrimSpace(r.PostFormValue("stream_id")) + body := r.PostFormValue("body") + subject := strings.TrimSpace(r.PostFormValue("subject")) + toRaw := strings.TrimSpace(r.PostFormValue("to")) + if streamID == "" { + http.Error(w, "stream_id is required", http.StatusBadRequest) + return + } + if strings.TrimSpace(body) == "" { + http.Redirect(w, r, "/ui/streams?id="+streamID+"&err="+queryEscape("body is required"), http.StatusSeeOther) + return + } + if u.deps.NewID == nil || u.deps.Now == nil { + http.Error(w, "ui not configured for publish (missing NewID/Now)", http.StatusInternalServerError) + return + } + + ctx := r.Context() + stream, err := u.deps.Store.Streams.Get(ctx, domain.StreamID(streamID)) + if err != nil { + http.Redirect(w, r, "/ui/streams?id="+streamID+"&err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + if stream.Transport.Kind == domain.TransportGitHub { + http.Redirect(w, r, "/ui/streams?id="+streamID+"&err="+queryEscape("github transport is inbound only"), http.StatusSeeOther) + return + } + + owner := domain.WorkerID(u.deps.Settings.Owner) + var to []string + if toRaw != "" { + for _, part := range strings.Split(toRaw, ",") { + if t := strings.TrimSpace(part); t != "" { + to = append(to, t) + } + } + } + msg := domain.Message{ + From: string(owner), + To: to, + Subject: subject, + Body: body, + } + event, err := domain.NewMessageEvent( + domain.EventID("e-"+u.deps.NewID()), + domain.StreamID(streamID), + owner, + msg, + u.deps.Now(), + ) + if err != nil { + http.Redirect(w, r, "/ui/streams?id="+streamID+"&err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + if err := u.deps.Store.Events.Append(ctx, event); err != nil { + http.Redirect(w, r, "/ui/streams?id="+streamID+"&err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + if u.deps.Broadcaster != nil { + u.deps.Broadcaster.Notify(domain.StreamID(streamID)) + } + if u.deps.Dispatcher != nil { + u.deps.Dispatcher.Dispatch(ctx, event) + } + http.Redirect(w, r, "/ui/streams?id="+streamID+"&flash="+queryEscape("Sent event "+string(event.ID)), http.StatusSeeOther) +} + +// handleOrgRoleSet updates an existing role's content. The new +// content fans out to every Worker filling a Position with this +// Role on next activation. Validation is done by the domain layer +// (NewRole rejects empty content); we surface its error as a flash. +func (u *uiHandler) handleOrgRoleSet(w http.ResponseWriter, r *http.Request) { + r.Body = http.MaxBytesReader(w, r.Body, 256<<10) + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + id := strings.TrimSpace(r.PostFormValue("id")) + pos := strings.TrimSpace(r.PostFormValue("pos")) + content := r.PostFormValue("content") + if id == "" { + http.Error(w, "id is required", http.StatusBadRequest) + return + } + // Redirect target preserves the originating position so the user + // lands back on the same detail. Falls back to the bare /ui/org + // page when the form had no position context (shouldn't happen + // from the chart-driven UI but keeps the handler defensive). + back := "/ui/org" + if pos != "" { + back = "/ui/org?pos=" + pos + } + sep := "&" + if pos == "" { + sep = "?" + } + existing, err := u.deps.Store.Roles.Get(r.Context(), domain.RoleID(id)) + if err != nil { + http.Redirect(w, r, back+sep+"err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + existing.Content = content + if err := u.deps.Store.Roles.Update(r.Context(), existing); err != nil { + http.Redirect(w, r, back+sep+"err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + http.Redirect(w, r, back+sep+"flash="+queryEscape("Saved "+id), http.StatusSeeOther) +} + +func (u *uiHandler) handleSettings(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + flash := strings.TrimSpace(r.URL.Query().Get("flash")) + flashErr := strings.TrimSpace(r.URL.Query().Get("err")) + page := &SettingsPage{ + shell: shell{Head: Head{Title: "Settings"}, Sidebar: u.ownerSidebar("settings", "")}, + Owner: u.deps.Settings.Owner, + PublicURL: u.deps.Settings.PublicURL, + DBPath: u.deps.Settings.DBPath, + EnvsDir: u.deps.Settings.EnvsDir, + Flash: flash, + FlashError: flashErr, + HasFlash: flash != "" || flashErr != "", + } + for _, spec := range u.deps.Settings.Specs { + row := SettingsSpecRow{ + Key: spec.Key, + Type: spec.Type, + Required: spec.Required, + Description: spec.Description, + } + row.Configured = u.isConfigured(ctx, spec.Key) + row.Value = u.currentValue(ctx, spec.Key) + row.IsObject = spec.Type == "object" + page.Specs = append(page.Specs, row) + } + page.HasSpecs = len(page.Specs) > 0 + render(w, settingsTpl, page) +} + +// handleSettingsSet writes a config value via the registry. The +// registry validates type-shape and returns 400 on bad input; +// successful writes redirect back to /ui/settings with a flash. +func (u *uiHandler) handleSettingsSet(w http.ResponseWriter, r *http.Request) { + r.Body = http.MaxBytesReader(w, r.Body, 64<<10) + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + key := strings.TrimSpace(r.PostFormValue("key")) + value := r.PostFormValue("value") // intentionally not trimmed — JSON object bodies may contain meaningful whitespace + if key == "" { + http.Error(w, "key is required", http.StatusBadRequest) + return + } + if err := u.deps.Configs.Set(r.Context(), key, value, domain.WorkerID(u.deps.Settings.Owner)); err != nil { + http.Redirect(w, r, "/ui/settings?err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + http.Redirect(w, r, "/ui/settings?flash="+queryEscape("Saved "+key), http.StatusSeeOther) +} + +// handleSettingsDelete removes a config row, falling back to the +// spec's default. The registry rejects deleting unknown keys. +func (u *uiHandler) handleSettingsDelete(w http.ResponseWriter, r *http.Request) { + r.Body = http.MaxBytesReader(w, r.Body, 4<<10) + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + key := strings.TrimSpace(r.PostFormValue("key")) + if key == "" { + http.Error(w, "key is required", http.StatusBadRequest) + return + } + if err := u.deps.Configs.Delete(r.Context(), key); err != nil { + http.Redirect(w, r, "/ui/settings?err="+queryEscape(err.Error()), http.StatusSeeOther) + return + } + http.Redirect(w, r, "/ui/settings?flash="+queryEscape("Reset "+key), http.StatusSeeOther) +} + +// queryEscape escapes a string for use in a URL query value. +// net/url.QueryEscape would do this but pulling in net/url just for +// this is overkill — small helper. +func queryEscape(s string) string { + r := strings.NewReplacer(" ", "+", "&", "%26", "?", "%3F", "=", "%3D", "#", "%23") + return r.Replace(s) +} + +// isConfigured reports whether the given key has a row in the configs +// table. We swallow store errors here — a transient DB hiccup at page +// render time should not 500 the whole settings view; treating the +// row as missing is the safe default. +func (u *uiHandler) isConfigured(ctx context.Context, key string) bool { + _, err := u.deps.Store.Configs.Get(ctx, key) + return err == nil +} + +// currentValue returns the redacted value for a config key — falls +// through to the spec default when no row is set, returns "" on +// error so the form renders empty rather than leaking the error +// into the textarea. +func (u *uiHandler) currentValue(ctx context.Context, key string) string { + if u.deps.Configs == nil { + return "" + } + v, err := u.deps.Configs.GetRedacted(ctx, key) + if err != nil { + return "" + } + return v +} + +// render writes the page as text/html; on render failure it falls back +// to a 500 with the error string. tmpl.MustCompile already validated +// the template at startup, so a Render error here means a runtime data +// problem — surface it loudly rather than silently emitting partial +// HTML. +func render[T tmpl.TemplateProvider](w http.ResponseWriter, t tmpl.Template[T], data T) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if err := t.Render(w, data); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } +} diff --git a/helix-org/server/webhook.go b/helix-org/server/webhook.go new file mode 100644 index 0000000000..b45337e0a1 --- /dev/null +++ b/helix-org/server/webhook.go @@ -0,0 +1,101 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" + + "github.com/google/uuid" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// nowUTC returns the current wall-clock time in UTC. Kept as a package +// helper so handlers stay short and the time source is easy to audit. +func nowUTC() time.Time { return time.Now().UTC() } + +// maxWebhookBody caps the body size we'll accept on a webhook POST. +// 1 MiB is comfortable for text payloads and prevents an obvious DoS. +const maxWebhookBody = 1 << 20 + +// webhookHandler accepts inbound POSTs on /webhooks/ and +// turns each request body into an Event on that Stream. The Stream +// must exist and have transport.kind == webhook; otherwise 404. +// +// Source attribution on the resulting Event is empty (system-emitted, +// per domain.NewEvent's contract). The dispatcher is invoked so AI +// Workers subscribed to the Stream are activated; the broadcaster is +// notified so any long-poll observer wakes. +func (s *Server) webhookHandler() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + streamID := domain.StreamID(r.PathValue("streamID")) + if streamID == "" { + http.Error(w, "missing streamID", http.StatusNotFound) + return + } + + stream, err := s.store.Streams.Get(r.Context(), streamID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + http.Error(w, fmt.Sprintf("stream %q: not found", streamID), http.StatusNotFound) + return + } + s.logger.Error("webhook: lookup stream", "stream", streamID, "err", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + if stream.Transport.Kind != domain.TransportWebhook { + http.Error(w, fmt.Sprintf("stream %q is not a webhook stream", streamID), http.StatusNotFound) + return + } + + body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maxWebhookBody)) + if err != nil { + http.Error(w, "read body: "+err.Error(), http.StatusBadRequest) + return + } + if len(body) == 0 { + http.Error(w, "body is empty", http.StatusBadRequest) + return + } + + // Wrap the inbound bytes into the canonical Message envelope. + // From is empty — webhook callers are arbitrary external systems + // with no helix Worker identity; routing decisions about "who + // sent this" belong in the receiving Role's prompt. + event, err := domain.NewMessageEvent( + domain.EventID("e-"+uuid.NewString()), + streamID, + "", // system-emitted; webhooks have no Worker source + domain.Message{Body: string(body)}, + nowUTC(), + ) + if err != nil { + http.Error(w, "build event: "+err.Error(), http.StatusBadRequest) + return + } + if err := s.store.Events.Append(r.Context(), event); err != nil { + s.logger.Error("webhook: append event", "stream", streamID, "err", err) + http.Error(w, "append event", http.StatusInternalServerError) + return + } + + if s.broadcaster != nil { + s.broadcaster.Notify(streamID) + } + if s.dispatcher != nil { + s.dispatcher.Dispatch(r.Context(), event) + } + + ack, _ := json.Marshal(map[string]string{ + "id": string(event.ID), + "streamId": string(streamID), + }) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write(ack) + }) +} diff --git a/helix-org/server/webhook_test.go b/helix-org/server/webhook_test.go new file mode 100644 index 0000000000..46dc994f7b --- /dev/null +++ b/helix-org/server/webhook_test.go @@ -0,0 +1,429 @@ +package server_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/dispatch" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/server" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" + "github.com/helixml/helix-org/tools" +) + +// recordingDispatcher captures every Dispatch call so tests can assert +// the webhook handler fans events out to subscribed Workers. Safe for +// concurrent calls — httptest.Server runs each request on its own +// goroutine. +type recordingDispatcher struct { + mu sync.Mutex + events []domain.Event +} + +func (d *recordingDispatcher) Dispatch(_ context.Context, e domain.Event) { + d.mu.Lock() + defer d.mu.Unlock() + d.events = append(d.events, e) +} + +func (d *recordingDispatcher) snapshot() []domain.Event { + d.mu.Lock() + defer d.mu.Unlock() + out := make([]domain.Event, len(d.events)) + copy(out, d.events) + return out +} + +// newWebhookServer wires an in-memory store, a real broadcaster, and +// the supplied dispatcher (may be nil) into a Server. Returns the +// running httptest.Server plus the store + broadcaster so tests can +// seed streams and observe wakeups. +func newWebhookServer(t *testing.T, dispatcher server.Dispatcher) (*httptest.Server, *store.Store, *broadcast.Broadcaster) { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + bc := broadcast.New() + srv := httptest.NewServer(server.New(s, tools.NewRegistry(), bc, dispatcher, nil).Handler()) + t.Cleanup(srv.Close) + return srv, s, bc +} + +// seedStream creates a Stream with the given transport kind. The +// caller's createdBy is a fixed test sentinel; we don't seed a +// matching Worker because the webhook path doesn't read it. +func seedStream(t *testing.T, s *store.Store, id domain.StreamID, kind domain.TransportKind) { + t.Helper() + stream, err := domain.NewStream(id, string(id), "", "w-owner", time.Now().UTC(), + domain.Transport{Kind: kind}) + if err != nil { + t.Fatalf("new stream %q: %v", id, err) + } + if err := s.Streams.Create(context.Background(), stream); err != nil { + t.Fatalf("seed stream %q: %v", id, err) + } +} + +// TestWebhookPostAppendsEvent walks the happy path: POSTing a body to +// /webhooks/ appends an event with empty source (system- +// emitted) and the raw body. The dispatcher receives the event, and a +// long-poll observer of that stream wakes. +func TestWebhookPostAppendsEvent(t *testing.T) { + t.Parallel() + rd := &recordingDispatcher{} + srv, s, bc := newWebhookServer(t, rd) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + + wake := bc.Subscribe([]domain.StreamID{"s-inbox"}) + t.Cleanup(func() { bc.Unsubscribe([]domain.StreamID{"s-inbox"}, wake) }) + + body := "incoming text — anything goes here" + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + t.Fatalf("status = %d, body = %q", resp.StatusCode, string(b)) + } + + events, err := s.Events.ListForStream(context.Background(), "s-inbox", 10) + if err != nil { + t.Fatalf("list events: %v", err) + } + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } + msg, err := events[0].Message() + if err != nil { + t.Fatalf("parse message body: %v", err) + } + if msg.Body != body { + t.Fatalf("message body = %q, want %q", msg.Body, body) + } + if msg.From != "" { + t.Fatalf("message from = %q, want empty (no helix originator)", msg.From) + } + if events[0].Source != "" { + t.Fatalf("source = %q, want empty (system-emitted)", events[0].Source) + } + if events[0].StreamID != "s-inbox" { + t.Fatalf("streamID = %q, want s-inbox", events[0].StreamID) + } + + dispatched := rd.snapshot() + if len(dispatched) != 1 || dispatched[0].ID != events[0].ID { + t.Fatalf("dispatched = %+v, want one event matching the appended one", dispatched) + } + + select { + case <-wake: + default: + t.Fatal("broadcaster did not wake long-poll observer") + } +} + +// TestWebhookPostErrors covers the rejection paths the handler must +// turn into HTTP errors: unknown streams, wrong-transport streams, +// empty bodies, and wrong HTTP methods. +func TestWebhookPostErrors(t *testing.T) { + t.Parallel() + srv, s, _ := newWebhookServer(t, nil) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + seedStream(t, s, "s-local", domain.TransportLocal) + + cases := []struct { + name string + method string + path string + body string + wantCode int + }{ + {"unknown stream", "POST", "/webhooks/s-ghost", "x", http.StatusNotFound}, + {"wrong-transport stream (local) is not a webhook", "POST", "/webhooks/s-local", "x", http.StatusNotFound}, + {"empty body", "POST", "/webhooks/s-inbox", "", http.StatusBadRequest}, + {"GET not allowed", "GET", "/webhooks/s-inbox", "", http.StatusMethodNotAllowed}, + {"PUT not allowed", "PUT", "/webhooks/s-inbox", "x", http.StatusMethodNotAllowed}, + {"DELETE not allowed", "DELETE", "/webhooks/s-inbox", "", http.StatusMethodNotAllowed}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + req, err := http.NewRequest(tc.method, srv.URL+tc.path, strings.NewReader(tc.body)) + if err != nil { + t.Fatalf("build request: %v", err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != tc.wantCode { + b, _ := io.ReadAll(resp.Body) + t.Fatalf("status = %d, want %d (body = %q)", resp.StatusCode, tc.wantCode, string(b)) + } + }) + } +} + +// TestWebhookErrorsLeaveStoreClean asserts that error paths don't +// half-create state. After a failed POST, the stream's event list +// must be empty. +func TestWebhookErrorsLeaveStoreClean(t *testing.T) { + t.Parallel() + srv, s, _ := newWebhookServer(t, nil) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + + // Empty body → 400. No event should land. + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", strings.NewReader("")) + if err != nil { + t.Fatalf("POST: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status = %d, want 400", resp.StatusCode) + } + + events, err := s.Events.ListForStream(context.Background(), "s-inbox", 10) + if err != nil { + t.Fatalf("list events: %v", err) + } + if len(events) != 0 { + t.Fatalf("events = %d, want 0 — failed POST should not append", len(events)) + } +} + +// TestWebhookBodySizeBoundary verifies the 1 MiB cap is enforced +// exactly: a body at the limit is accepted, a body one byte over is +// rejected. +func TestWebhookBodySizeBoundary(t *testing.T) { + t.Parallel() + srv, s, _ := newWebhookServer(t, nil) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + + atLimit := bytes.Repeat([]byte("a"), 1<<20) + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", bytes.NewReader(atLimit)) + if err != nil { + t.Fatalf("POST at limit: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("at-limit status = %d, want 200", resp.StatusCode) + } + + overLimit := bytes.Repeat([]byte("a"), (1<<20)+1) + resp, err = http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", bytes.NewReader(overLimit)) + if err != nil { + t.Fatalf("POST over limit: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("over-limit status = %d, want 400", resp.StatusCode) + } +} + +// TestWebhookWithNilCollaborators verifies the handler tolerates a +// Server constructed without a broadcaster or dispatcher — common in +// tests and in degraded modes where one or both are deliberately +// unwired. The event still lands; nothing panics. +func TestWebhookWithNilCollaborators(t *testing.T) { + t.Parallel() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + seedStream(t, s, "s-inbox", domain.TransportWebhook) + srv := httptest.NewServer(server.New(s, tools.NewRegistry(), nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", strings.NewReader("x")) + if err != nil { + t.Fatalf("POST: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status = %d, want 200", resp.StatusCode) + } + + events, _ := s.Events.ListForStream(context.Background(), "s-inbox", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } +} + +// TestWebhookPreservesBodyExactly verifies the handler stores bodies +// verbatim — newlines, multibyte UTF-8, special characters all round- +// trip without normalisation. +func TestWebhookPreservesBodyExactly(t *testing.T) { + t.Parallel() + srv, s, _ := newWebhookServer(t, nil) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + + body := "line one\nline two\n\ttabbed → emoji 🚀 — UTF-8 preserved" + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "application/json", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status = %d, want 200", resp.StatusCode) + } + + events, _ := s.Events.ListForStream(context.Background(), "s-inbox", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } + msg, err := events[0].Message() + if err != nil { + t.Fatalf("parse message: %v", err) + } + if msg.Body != body { + t.Fatalf("message body mismatch:\n got: %q\nwant: %q", msg.Body, body) + } +} + +// TestWebhookConcurrentPosts fires many parallel POSTs to the same +// Stream and asserts every one lands as a distinct event with a +// matching dispatch. +func TestWebhookConcurrentPosts(t *testing.T) { + t.Parallel() + rd := &recordingDispatcher{} + srv, s, _ := newWebhookServer(t, rd) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + + const N = 25 + var wg sync.WaitGroup + for i := 0; i < N; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + body := fmt.Sprintf("payload %02d", i) + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", strings.NewReader(body)) + if err != nil { + t.Errorf("POST %d: %v", i, err) + return + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Errorf("POST %d: status = %d", i, resp.StatusCode) + } + }(i) + } + wg.Wait() + + events, err := s.Events.ListForStream(context.Background(), "s-inbox", 100) + if err != nil { + t.Fatalf("list events: %v", err) + } + if len(events) != N { + t.Fatalf("events = %d, want %d", len(events), N) + } + if got := len(rd.snapshot()); got != N { + t.Fatalf("dispatched = %d, want %d", got, N) + } + + seen := make(map[domain.EventID]bool, N) + for _, e := range events { + if seen[e.ID] { + t.Fatalf("duplicate event ID %q", e.ID) + } + seen[e.ID] = true + } +} + +// TestWebhookDoesNotLeakAcrossStreams verifies that a POST to one +// webhook stream lands only on that stream — not on a sibling +// webhook stream that happens to exist. +func TestWebhookDoesNotLeakAcrossStreams(t *testing.T) { + t.Parallel() + srv, s, _ := newWebhookServer(t, nil) + seedStream(t, s, "s-inbox", domain.TransportWebhook) + seedStream(t, s, "s-other", domain.TransportWebhook) + + resp, err := http.Post(srv.URL+"/webhooks/s-inbox", "text/plain", strings.NewReader("for inbox")) + if err != nil { + t.Fatalf("POST: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status = %d", resp.StatusCode) + } + + inboxEvents, _ := s.Events.ListForStream(context.Background(), "s-inbox", 10) + otherEvents, _ := s.Events.ListForStream(context.Background(), "s-other", 10) + if len(inboxEvents) != 1 { + t.Fatalf("inbox events = %d, want 1", len(inboxEvents)) + } + if len(otherEvents) != 0 { + t.Fatalf("other events = %d, want 0 (no leakage)", len(otherEvents)) + } +} + +// TestWebhookInboundDoesNotEcho proves that a bidirectional webhook +// Stream (one with both inbound and outbound configured) does *not* +// echo inbound POSTs back out to its own outbound URL. The dispatcher +// skips emit for events with empty Source — i.e. events that came +// from this transport's own inbound — so a stream that's +// bidirectional doesn't loop. Only Worker-published events +// (Source != "") emit outbound. +func TestWebhookInboundDoesNotEcho(t *testing.T) { + t.Parallel() + caught := make(chan string, 1) + catcher := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + caught <- string(body) + w.WriteHeader(http.StatusNoContent) + })) + t.Cleanup(catcher.Close) + + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + d := dispatch.New(st, nil, slog.New(slog.NewTextHandler(io.Discard, nil))) + srv := httptest.NewServer(server.New(st, tools.NewRegistry(), broadcast.New(), d, nil).Handler()) + t.Cleanup(srv.Close) + + cfg, _ := json.Marshal(domain.WebhookConfig{OutboundURL: catcher.URL}) + stream, err := domain.NewStream("s-bridge", "bridge", "", "w-owner", time.Now().UTC(), + domain.Transport{Kind: domain.TransportWebhook, Config: cfg}) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := st.Streams.Create(context.Background(), stream); err != nil { + t.Fatalf("create stream: %v", err) + } + + body := "round-trip text" + resp, err := http.Post(srv.URL+"/webhooks/s-bridge", "text/plain", strings.NewReader(body)) + if err != nil { + t.Fatalf("POST: %v", err) + } + _ = resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("inbound status = %d", resp.StatusCode) + } + + select { + case got := <-caught: + t.Fatalf("inbound event echoed to outbound: %q", got) + case <-time.After(500 * time.Millisecond): + // Expected: nothing arrives at the catcher because inbound + // events have empty Source and the dispatcher skips emit. + } +} diff --git a/helix-org/store/sqlite/config.go b/helix-org/store/sqlite/config.go new file mode 100644 index 0000000000..9ad0056d6d --- /dev/null +++ b/helix-org/store/sqlite/config.go @@ -0,0 +1,96 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + "gorm.io/gorm/clause" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type configRow struct { + Key string `gorm:"primaryKey;type:text"` + Value string `gorm:"not null"` + UpdatedAt time.Time + UpdatedBy string `gorm:"type:text"` +} + +func (configRow) TableName() string { return "configs" } + +type configsRepo struct { + db *gorm.DB +} + +// Set upserts a config row by key. The `helix-org config` CLI is the +// only intended caller — there is no MCP tool path in. +func (r *configsRepo) Set(ctx context.Context, cfg domain.Config) error { + row := configRow{ + Key: cfg.Key, + Value: cfg.Value, + UpdatedAt: cfg.UpdatedAt, + UpdatedBy: string(cfg.UpdatedBy), + } + err := r.db.WithContext(ctx).Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "key"}}, + DoUpdates: clause.AssignmentColumns([]string{"value", "updated_at", "updated_by"}), + }).Create(&row).Error + if err != nil { + return fmt.Errorf("set config %q: %w", cfg.Key, err) + } + return nil +} + +func (r *configsRepo) Get(ctx context.Context, key string) (domain.Config, error) { + var row configRow + err := r.db.WithContext(ctx).First(&row, "key = ?", key).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Config{}, fmt.Errorf("config %q: %w", key, store.ErrNotFound) + } + return domain.Config{}, fmt.Errorf("get config %q: %w", key, err) + } + return rowToConfig(row), nil +} + +// List returns every config row whose key starts with prefix, ordered +// by key. An empty prefix returns everything. +func (r *configsRepo) List(ctx context.Context, prefix string) ([]domain.Config, error) { + var rows []configRow + q := r.db.WithContext(ctx).Order("key") + if prefix != "" { + q = q.Where("key LIKE ?", prefix+"%") + } + if err := q.Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list configs: %w", err) + } + out := make([]domain.Config, 0, len(rows)) + for _, row := range rows { + out = append(out, rowToConfig(row)) + } + return out, nil +} + +func (r *configsRepo) Delete(ctx context.Context, key string) error { + res := r.db.WithContext(ctx).Delete(&configRow{}, "key = ?", key) + if res.Error != nil { + return fmt.Errorf("delete config %q: %w", key, res.Error) + } + if res.RowsAffected == 0 { + return fmt.Errorf("config %q: %w", key, store.ErrNotFound) + } + return nil +} + +func rowToConfig(row configRow) domain.Config { + return domain.Config{ + Key: row.Key, + Value: row.Value, + UpdatedAt: row.UpdatedAt, + UpdatedBy: domain.WorkerID(row.UpdatedBy), + } +} diff --git a/helix-org/store/sqlite/config_test.go b/helix-org/store/sqlite/config_test.go new file mode 100644 index 0000000000..b8a362222f --- /dev/null +++ b/helix-org/store/sqlite/config_test.go @@ -0,0 +1,111 @@ +package sqlite_test + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +func TestConfigsSetGetUpsert(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + + cfg, err := domain.NewConfig("claude.bin", `"claude"`, now, "w-owner") + if err != nil { + t.Fatalf("NewConfig: %v", err) + } + if err := s.Configs.Set(ctx, cfg); err != nil { + t.Fatalf("Set: %v", err) + } + + got, err := s.Configs.Get(ctx, "claude.bin") + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.Value != `"claude"` { + t.Fatalf("value = %q", got.Value) + } + + // Upsert: change value, key stays the same. + cfg2, _ := domain.NewConfig("claude.bin", `"/usr/local/bin/claude"`, now.Add(time.Hour), "w-owner") + if err := s.Configs.Set(ctx, cfg2); err != nil { + t.Fatalf("Set (update): %v", err) + } + got2, _ := s.Configs.Get(ctx, "claude.bin") + if got2.Value != `"/usr/local/bin/claude"` { + t.Fatalf("value after update = %q", got2.Value) + } +} + +func TestConfigsGetMissing(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + + _, err := s.Configs.Get(ctx, "nope") + if !errors.Is(err, store.ErrNotFound) { + t.Fatalf("err = %v, want ErrNotFound", err) + } +} + +func TestConfigsListPrefix(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + + for _, kv := range []struct{ k, v string }{ + {"claude.bin", `"claude"`}, + {"claude.model", `"opus"`}, + {"transport.postmark", `{"token":"x"}`}, + {"dispatcher.timeout", `300`}, + } { + c, _ := domain.NewConfig(kv.k, kv.v, now, "") + if err := s.Configs.Set(ctx, c); err != nil { + t.Fatalf("Set %q: %v", kv.k, err) + } + } + + all, _ := s.Configs.List(ctx, "") + if len(all) != 4 { + t.Fatalf("List() = %d, want 4", len(all)) + } + + claudeOnly, _ := s.Configs.List(ctx, "claude.") + if len(claudeOnly) != 2 { + t.Fatalf("List(claude.) = %d, want 2", len(claudeOnly)) + } + + none, _ := s.Configs.List(ctx, "missing.") + if len(none) != 0 { + t.Fatalf("List(missing.) = %d, want 0", len(none)) + } +} + +func TestConfigsDelete(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC) + + c, _ := domain.NewConfig("temp.x", `1`, now, "") + if err := s.Configs.Set(ctx, c); err != nil { + t.Fatalf("Set: %v", err) + } + if err := s.Configs.Delete(ctx, "temp.x"); err != nil { + t.Fatalf("Delete: %v", err) + } + if _, err := s.Configs.Get(ctx, "temp.x"); !errors.Is(err, store.ErrNotFound) { + t.Fatalf("Get after Delete = %v, want ErrNotFound", err) + } + + if err := s.Configs.Delete(ctx, "temp.x"); !errors.Is(err, store.ErrNotFound) { + t.Fatalf("Delete again = %v, want ErrNotFound", err) + } +} diff --git a/helix-org/store/sqlite/environment.go b/helix-org/store/sqlite/environment.go new file mode 100644 index 0000000000..bbe2744616 --- /dev/null +++ b/helix-org/store/sqlite/environment.go @@ -0,0 +1,50 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type environmentRow struct { + WorkerID string `gorm:"primaryKey;type:text"` + Path string `gorm:"not null"` + CreatedAt time.Time + UpdatedAt time.Time +} + +func (environmentRow) TableName() string { return "environments" } + +type environmentsRepo struct { + db *gorm.DB +} + +func (r *environmentsRepo) Create(ctx context.Context, env domain.Environment) error { + row := environmentRow{ + WorkerID: string(env.WorkerID), + Path: env.Path, + CreatedAt: env.CreatedAt, + } + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create environment: %w", err) + } + return nil +} + +func (r *environmentsRepo) Get(ctx context.Context, workerID domain.WorkerID) (domain.Environment, error) { + var row environmentRow + err := r.db.WithContext(ctx).First(&row, "worker_id = ?", string(workerID)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Environment{}, fmt.Errorf("environment for worker %q: %w", workerID, store.ErrNotFound) + } + return domain.Environment{}, fmt.Errorf("get environment for worker %q: %w", workerID, err) + } + return domain.NewEnvironment(domain.WorkerID(row.WorkerID), row.Path, row.CreatedAt) +} diff --git a/helix-org/store/sqlite/event.go b/helix-org/store/sqlite/event.go new file mode 100644 index 0000000000..e9237dea4e --- /dev/null +++ b/helix-org/store/sqlite/event.go @@ -0,0 +1,153 @@ +package sqlite + +import ( + "context" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" +) + +type eventRow struct { + ID string `gorm:"primaryKey;type:text"` + StreamID string `gorm:"not null;index"` + Source string `gorm:"index"` // empty for system-emitted + Body string `gorm:"not null"` + CreatedAt time.Time `gorm:"index"` +} + +func (eventRow) TableName() string { return "events" } + +type eventsRepo struct { + db *gorm.DB +} + +func (r *eventsRepo) Append(ctx context.Context, e domain.Event) error { + row := eventToRow(e) + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("append event: %w", err) + } + return nil +} + +func (r *eventsRepo) ListForStream(ctx context.Context, streamID domain.StreamID, limit int) ([]domain.Event, error) { + query := r.db.WithContext(ctx).Where("stream_id = ?", string(streamID)).Order("created_at DESC, id DESC") + if limit > 0 { + query = query.Limit(limit) + } + var rows []eventRow + if err := query.Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list events for stream %q: %w", streamID, err) + } + return rowsToEvents(rows) +} + +func (r *eventsRepo) ListSince(ctx context.Context, streamIDs []domain.StreamID, since domain.EventID, limit int) ([]domain.Event, error) { + if len(streamIDs) == 0 { + return nil, nil + } + ids := make([]string, 0, len(streamIDs)) + for _, s := range streamIDs { + ids = append(ids, string(s)) + } + + // Resolve `since` to its (created_at, id) pair. If the event is unknown + // (empty since, or stale), we fall back to "no lower bound" — same as if + // the caller passed nothing. + var ( + sinceTS time.Time + sinceID string + hasLB bool + ) + if since != "" { + var pivot eventRow + err := r.db.WithContext(ctx).Where("id = ?", string(since)).Take(&pivot).Error + if err == nil { + sinceTS = pivot.CreatedAt + sinceID = pivot.ID + hasLB = true + } + // gorm.ErrRecordNotFound and other errors fall through to "no lower + // bound" — tail callers tolerate this and just see recent history. + } + + query := r.db.WithContext(ctx).Where("stream_id IN ?", ids) + if hasLB { + // (created_at, id) > (sinceTS, sinceID) + query = query.Where("(created_at > ?) OR (created_at = ? AND id > ?)", sinceTS, sinceTS, sinceID) + } + query = query.Order("created_at ASC, id ASC") + if limit > 0 { + query = query.Limit(limit) + } + var rows []eventRow + if err := query.Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list events since %q: %w", since, err) + } + return rowsToEvents(rows) +} + +func (r *eventsRepo) ListAll(ctx context.Context, limit int) ([]domain.Event, error) { + query := r.db.WithContext(ctx).Order("created_at DESC, id DESC") + if limit > 0 { + query = query.Limit(limit) + } + var rows []eventRow + if err := query.Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list all events: %w", err) + } + return rowsToEvents(rows) +} + +func (r *eventsRepo) ListForWorker(ctx context.Context, workerID domain.WorkerID, limit int) ([]domain.Event, error) { + // Join events with subscriptions to return only events on streams the + // worker subscribes to, newest first. + query := r.db.WithContext(ctx). + Table("events AS e"). + Joins("JOIN subscriptions AS s ON s.stream_id = e.stream_id"). + Where("s.worker_id = ?", string(workerID)). + Order("e.created_at DESC, e.id DESC"). + Select("e.*") + if limit > 0 { + query = query.Limit(limit) + } + var rows []eventRow + if err := query.Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list events for worker %q: %w", workerID, err) + } + return rowsToEvents(rows) +} + +func eventToRow(e domain.Event) eventRow { + return eventRow{ + ID: string(e.ID), + StreamID: string(e.StreamID), + Source: string(e.Source), + Body: e.Body, + CreatedAt: e.CreatedAt, + } +} + +func rowToEvent(row eventRow) (domain.Event, error) { + return domain.NewEvent( + domain.EventID(row.ID), + domain.StreamID(row.StreamID), + domain.WorkerID(row.Source), + row.Body, + row.CreatedAt, + ) +} + +func rowsToEvents(rows []eventRow) ([]domain.Event, error) { + out := make([]domain.Event, 0, len(rows)) + for _, row := range rows { + e, err := rowToEvent(row) + if err != nil { + return nil, err + } + out = append(out, e) + } + return out, nil +} diff --git a/helix-org/store/sqlite/grant.go b/helix-org/store/sqlite/grant.go new file mode 100644 index 0000000000..620d28c286 --- /dev/null +++ b/helix-org/store/sqlite/grant.go @@ -0,0 +1,102 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type grantRow struct { + ID string `gorm:"primaryKey;type:text"` + WorkerID string `gorm:"not null;index"` + ToolName string `gorm:"not null"` + CreatedAt time.Time + UpdatedAt time.Time +} + +func (grantRow) TableName() string { return "grants" } + +type grantsRepo struct { + db *gorm.DB +} + +func (r *grantsRepo) Create(ctx context.Context, g domain.ToolGrant) error { + row := grantToRow(g) + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create grant: %w", err) + } + return nil +} + +func (r *grantsRepo) Get(ctx context.Context, id domain.GrantID) (domain.ToolGrant, error) { + var row grantRow + err := r.db.WithContext(ctx).First(&row, "id = ?", string(id)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.ToolGrant{}, fmt.Errorf("grant %q: %w", id, store.ErrNotFound) + } + return domain.ToolGrant{}, fmt.Errorf("get grant %q: %w", id, err) + } + return rowToGrant(row) +} + +func (r *grantsRepo) ListByWorker(ctx context.Context, workerID domain.WorkerID) ([]domain.ToolGrant, error) { + var rows []grantRow + if err := r.db.WithContext(ctx).Where("worker_id = ?", string(workerID)).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list grants for worker %q: %w", workerID, err) + } + out := make([]domain.ToolGrant, 0, len(rows)) + for _, row := range rows { + g, err := rowToGrant(row) + if err != nil { + return nil, err + } + out = append(out, g) + } + return out, nil +} + +func (r *grantsRepo) FindForWorkerAndTool(ctx context.Context, workerID domain.WorkerID, toolName domain.ToolName) (domain.ToolGrant, error) { + var row grantRow + err := r.db.WithContext(ctx).Where("worker_id = ? AND tool_name = ?", string(workerID), string(toolName)).First(&row).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.ToolGrant{}, fmt.Errorf("grant for worker %q tool %q: %w", workerID, toolName, store.ErrNotFound) + } + return domain.ToolGrant{}, fmt.Errorf("find grant for worker %q tool %q: %w", workerID, toolName, err) + } + return rowToGrant(row) +} + +func (r *grantsRepo) Delete(ctx context.Context, id domain.GrantID) error { + res := r.db.WithContext(ctx).Delete(&grantRow{}, "id = ?", string(id)) + if res.Error != nil { + return fmt.Errorf("delete grant %q: %w", id, res.Error) + } + if res.RowsAffected == 0 { + return fmt.Errorf("grant %q: %w", id, store.ErrNotFound) + } + return nil +} + +func grantToRow(g domain.ToolGrant) grantRow { + return grantRow{ + ID: string(g.ID), + WorkerID: string(g.WorkerID), + ToolName: string(g.ToolName), + } +} + +func rowToGrant(row grantRow) (domain.ToolGrant, error) { + return domain.NewToolGrant( + domain.GrantID(row.ID), + domain.WorkerID(row.WorkerID), + domain.ToolName(row.ToolName), + ) +} diff --git a/helix-org/store/sqlite/position.go b/helix-org/store/sqlite/position.go new file mode 100644 index 0000000000..a13f0fc7c2 --- /dev/null +++ b/helix-org/store/sqlite/position.go @@ -0,0 +1,97 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type positionRow struct { + ID string `gorm:"primaryKey;type:text"` + RoleID string `gorm:"not null;index"` + ParentID *string `gorm:"index"` + CreatedAt time.Time + UpdatedAt time.Time +} + +func (positionRow) TableName() string { return "positions" } + +type positionsRepo struct { + db *gorm.DB +} + +func (r *positionsRepo) Create(ctx context.Context, pos domain.Position) error { + row := positionToRow(pos) + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create position: %w", err) + } + return nil +} + +func (r *positionsRepo) Get(ctx context.Context, id domain.PositionID) (domain.Position, error) { + var row positionRow + err := r.db.WithContext(ctx).First(&row, "id = ?", string(id)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Position{}, fmt.Errorf("position %q: %w", id, store.ErrNotFound) + } + return domain.Position{}, fmt.Errorf("get position %q: %w", id, err) + } + return rowToPosition(row) +} + +func (r *positionsRepo) List(ctx context.Context) ([]domain.Position, error) { + var rows []positionRow + if err := r.db.WithContext(ctx).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list positions: %w", err) + } + return rowsToPositions(rows) +} + +func (r *positionsRepo) ListChildren(ctx context.Context, parent domain.PositionID) ([]domain.Position, error) { + var rows []positionRow + if err := r.db.WithContext(ctx).Where("parent_id = ?", string(parent)).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list children of %q: %w", parent, err) + } + return rowsToPositions(rows) +} + +func positionToRow(pos domain.Position) positionRow { + var parent *string + if pos.ParentID != nil { + s := string(*pos.ParentID) + parent = &s + } + return positionRow{ + ID: string(pos.ID), + RoleID: string(pos.RoleID), + ParentID: parent, + } +} + +func rowToPosition(row positionRow) (domain.Position, error) { + var parent *domain.PositionID + if row.ParentID != nil { + p := domain.PositionID(*row.ParentID) + parent = &p + } + return domain.NewPosition(domain.PositionID(row.ID), domain.RoleID(row.RoleID), parent) +} + +func rowsToPositions(rows []positionRow) ([]domain.Position, error) { + out := make([]domain.Position, 0, len(rows)) + for _, row := range rows { + pos, err := rowToPosition(row) + if err != nil { + return nil, err + } + out = append(out, pos) + } + return out, nil +} diff --git a/helix-org/store/sqlite/role.go b/helix-org/store/sqlite/role.go new file mode 100644 index 0000000000..db9d6846a8 --- /dev/null +++ b/helix-org/store/sqlite/role.go @@ -0,0 +1,90 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type roleRow struct { + ID string `gorm:"primaryKey;type:text"` + Content string `gorm:"not null"` + CreatedAt time.Time + UpdatedAt time.Time +} + +func (roleRow) TableName() string { return "roles" } + +type rolesRepo struct { + db *gorm.DB +} + +func (r *rolesRepo) Create(ctx context.Context, role domain.Role) error { + if err := r.db.WithContext(ctx).Create(roleToRow(role)).Error; err != nil { + return fmt.Errorf("create role: %w", err) + } + return nil +} + +func (r *rolesRepo) Get(ctx context.Context, id domain.RoleID) (domain.Role, error) { + var row roleRow + err := r.db.WithContext(ctx).First(&row, "id = ?", string(id)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Role{}, fmt.Errorf("role %q: %w", id, store.ErrNotFound) + } + return domain.Role{}, fmt.Errorf("get role %q: %w", id, err) + } + return rowToRole(row), nil +} + +func (r *rolesRepo) List(ctx context.Context) ([]domain.Role, error) { + var rows []roleRow + if err := r.db.WithContext(ctx).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list roles: %w", err) + } + out := make([]domain.Role, 0, len(rows)) + for _, row := range rows { + out = append(out, rowToRole(row)) + } + return out, nil +} + +func (r *rolesRepo) Update(ctx context.Context, role domain.Role) error { + row := roleToRow(role) + res := r.db.WithContext(ctx).Model(&roleRow{}).Where("id = ?", row.ID).Updates(map[string]any{ + "content": row.Content, + "updated_at": row.UpdatedAt, + }) + if res.Error != nil { + return fmt.Errorf("update role: %w", res.Error) + } + if res.RowsAffected == 0 { + return fmt.Errorf("role %q: %w", role.ID, store.ErrNotFound) + } + return nil +} + +func roleToRow(role domain.Role) roleRow { + return roleRow{ + ID: string(role.ID), + Content: role.Content, + CreatedAt: role.CreatedAt, + UpdatedAt: role.UpdatedAt, + } +} + +func rowToRole(row roleRow) domain.Role { + return domain.Role{ + ID: domain.RoleID(row.ID), + Content: row.Content, + CreatedAt: row.CreatedAt, + UpdatedAt: row.UpdatedAt, + } +} diff --git a/helix-org/store/sqlite/sqlite.go b/helix-org/store/sqlite/sqlite.go new file mode 100644 index 0000000000..6d973d4c89 --- /dev/null +++ b/helix-org/store/sqlite/sqlite.go @@ -0,0 +1,65 @@ +// Package sqlite is the GORM/SQLite implementation of the store interfaces. +package sqlite + +import ( + "fmt" + "time" + + "github.com/glebarez/sqlite" + "gorm.io/gorm" + "gorm.io/gorm/logger" + + "github.com/helixml/helix-org/store" +) + +// Open opens a SQLite database at the given path (use ":memory:" for tests) +// and runs AutoMigrate. It returns a Store bound to the concrete repos. +// +// For ":memory:" DSNs, the connection pool is pinned to a single +// connection. Without this, every new connection in the pool gets its +// own private in-memory database — concurrent HTTP requests would +// each see a different (empty) DB. File-backed DSNs are unaffected. +func Open(dsn string) (*store.Store, error) { + db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{ + Logger: logger.Default.LogMode(logger.Silent), + NowFunc: func() time.Time { + return time.Now().UTC() + }, + }) + if err != nil { + return nil, fmt.Errorf("open sqlite %q: %w", dsn, err) + } + if dsn == ":memory:" { + sqlDB, err := db.DB() + if err != nil { + return nil, fmt.Errorf("get sql.DB: %w", err) + } + sqlDB.SetMaxOpenConns(1) + } + if err := db.AutoMigrate( + &roleRow{}, + &positionRow{}, + &workerRow{}, + &workerRuntimeStateRow{}, + &grantRow{}, + &streamRow{}, + &subscriptionRow{}, + &eventRow{}, + &environmentRow{}, + &configRow{}, + ); err != nil { + return nil, fmt.Errorf("auto-migrate: %w", err) + } + return &store.Store{ + Roles: &rolesRepo{db: db}, + Positions: &positionsRepo{db: db}, + Workers: &workersRepo{db: db}, + WorkerRuntimeState: &workerRuntimeStateRepo{db: db}, + Grants: &grantsRepo{db: db}, + Streams: &streamsRepo{db: db}, + Subscriptions: &subscriptionsRepo{db: db}, + Events: &eventsRepo{db: db}, + Environments: &environmentsRepo{db: db}, + Configs: &configsRepo{db: db}, + }, nil +} diff --git a/helix-org/store/sqlite/sqlite_test.go b/helix-org/store/sqlite/sqlite_test.go new file mode 100644 index 0000000000..ff0b97bf1c --- /dev/null +++ b/helix-org/store/sqlite/sqlite_test.go @@ -0,0 +1,196 @@ +package sqlite_test + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" +) + +func newStore(t *testing.T) *store.Store { + t.Helper() + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + return s +} + +func TestRolesRoundTripAndUpdate(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + + created := time.Date(2026, 4, 25, 12, 0, 0, 0, time.UTC) + role, err := domain.NewRole("r-ceo", "# CEO\nTop of the org.", created) + if err != nil { + t.Fatalf("NewRole: %v", err) + } + if err := s.Roles.Create(ctx, role); err != nil { + t.Fatalf("Create: %v", err) + } + + got, err := s.Roles.Get(ctx, "r-ceo") + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.Content != "# CEO\nTop of the org." { + t.Fatalf("roundtrip mismatch: %+v", got) + } + if got.CreatedAt.IsZero() || got.UpdatedAt.IsZero() { + t.Fatalf("timestamps not persisted: created=%v updated=%v", got.CreatedAt, got.UpdatedAt) + } + + updated := domain.Role{ + ID: got.ID, + Content: "# CEO\nNow with more verve.", + CreatedAt: got.CreatedAt, + UpdatedAt: created.Add(time.Hour), + } + if err := s.Roles.Update(ctx, updated); err != nil { + t.Fatalf("Update: %v", err) + } + got, err = s.Roles.Get(ctx, "r-ceo") + if err != nil { + t.Fatalf("Get after update: %v", err) + } + if got.Content != "# CEO\nNow with more verve." { + t.Fatalf("post-update content = %q", got.Content) + } + if !got.UpdatedAt.Equal(created.Add(time.Hour)) { + t.Fatalf("UpdatedAt = %v, want %v", got.UpdatedAt, created.Add(time.Hour)) + } + + list, err := s.Roles.List(ctx) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(list) != 1 { + t.Fatalf("List length = %d, want 1", len(list)) + } +} + +func TestRolesNotFound(t *testing.T) { + t.Parallel() + s := newStore(t) + _, err := s.Roles.Get(context.Background(), "missing") + if !errors.Is(err, store.ErrNotFound) { + t.Fatalf("error = %v, want ErrNotFound", err) + } +} + +func TestPositionsRoundTripAndChildren(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + + root, _ := domain.NewPosition("p-root", "r-owner", nil) + if err := s.Positions.Create(ctx, root); err != nil { + t.Fatalf("Create root: %v", err) + } + rootID := root.ID + child, _ := domain.NewPosition("p-ceo", "r-ceo", &rootID) + if err := s.Positions.Create(ctx, child); err != nil { + t.Fatalf("Create child: %v", err) + } + + got, err := s.Positions.Get(ctx, "p-ceo") + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.ParentID == nil || *got.ParentID != "p-root" { + t.Fatalf("parent = %v, want p-root", got.ParentID) + } + + kids, err := s.Positions.ListChildren(ctx, "p-root") + if err != nil { + t.Fatalf("ListChildren: %v", err) + } + if len(kids) != 1 || kids[0].ID != "p-ceo" { + t.Fatalf("children = %+v, want [p-ceo]", kids) + } +} + +func TestWorkersHumanAndAI(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + + human, err := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "i am the owner") + if err != nil { + t.Fatalf("NewHumanWorker: %v", err) + } + if err := s.Workers.Create(ctx, human); err != nil { + t.Fatalf("Create human: %v", err) + } + + ai, err := domain.NewAIWorker("w-ceo", []domain.PositionID{"p-ceo"}, "you are the ceo") + if err != nil { + t.Fatalf("NewAIWorker: %v", err) + } + if err := s.Workers.Create(ctx, ai); err != nil { + t.Fatalf("Create ai: %v", err) + } + + gotHuman, err := s.Workers.Get(ctx, "w-owner") + if err != nil { + t.Fatalf("Get human: %v", err) + } + if gotHuman.Kind() != domain.WorkerKindHuman { + t.Fatalf("kind = %q, want human", gotHuman.Kind()) + } + if _, ok := gotHuman.(*domain.HumanWorker); !ok { + t.Fatalf("want *HumanWorker, got %T", gotHuman) + } + + gotAI, err := s.Workers.Get(ctx, "w-ceo") + if err != nil { + t.Fatalf("Get ai: %v", err) + } + if gotAI.Kind() != domain.WorkerKindAI { + t.Fatalf("kind = %q, want ai", gotAI.Kind()) + } + +} + +func TestGrants(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + + g, err := domain.NewToolGrant("g-1", "w-ceo", "hire_worker") + if err != nil { + t.Fatalf("NewToolGrant: %v", err) + } + if err := s.Grants.Create(ctx, g); err != nil { + t.Fatalf("Create: %v", err) + } + + got, err := s.Grants.Get(ctx, "g-1") + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.ToolName != "hire_worker" { + t.Fatalf("tool = %q", got.ToolName) + } + + list, err := s.Grants.ListByWorker(ctx, "w-ceo") + if err != nil { + t.Fatalf("ListByWorker: %v", err) + } + if len(list) != 1 { + t.Fatalf("list len = %d", len(list)) + } + + if err := s.Grants.Delete(ctx, "g-1"); err != nil { + t.Fatalf("Delete: %v", err) + } + _, err = s.Grants.Get(ctx, "g-1") + if !errors.Is(err, store.ErrNotFound) { + t.Fatalf("after delete err = %v, want ErrNotFound", err) + } +} diff --git a/helix-org/store/sqlite/stream.go b/helix-org/store/sqlite/stream.go new file mode 100644 index 0000000000..06e68ccd05 --- /dev/null +++ b/helix-org/store/sqlite/stream.go @@ -0,0 +1,100 @@ +package sqlite + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type streamRow struct { + ID string `gorm:"primaryKey;type:text"` + Name string `gorm:"not null;uniqueIndex"` + Description string + CreatedBy string `gorm:"not null;index"` + CreatedAt time.Time + TransportKind string `gorm:"not null;default:local"` + TransportConfig string `gorm:"not null;default:''"` +} + +func (streamRow) TableName() string { return "streams" } + +type streamsRepo struct { + db *gorm.DB +} + +func (r *streamsRepo) Create(ctx context.Context, s domain.Stream) error { + row, err := streamToRow(s) + if err != nil { + return err + } + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create stream: %w", err) + } + return nil +} + +func (r *streamsRepo) Get(ctx context.Context, id domain.StreamID) (domain.Stream, error) { + var row streamRow + err := r.db.WithContext(ctx).First(&row, "id = ?", string(id)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Stream{}, fmt.Errorf("stream %q: %w", id, store.ErrNotFound) + } + return domain.Stream{}, fmt.Errorf("get stream %q: %w", id, err) + } + return rowToStream(row) +} + +func (r *streamsRepo) List(ctx context.Context) ([]domain.Stream, error) { + var rows []streamRow + if err := r.db.WithContext(ctx).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list streams: %w", err) + } + out := make([]domain.Stream, 0, len(rows)) + for _, row := range rows { + s, err := rowToStream(row) + if err != nil { + return nil, err + } + out = append(out, s) + } + return out, nil +} + +func streamToRow(s domain.Stream) (streamRow, error) { + cfg := "" + if len(s.Transport.Config) > 0 { + cfg = string(s.Transport.Config) + } + return streamRow{ + ID: string(s.ID), + Name: s.Name, + Description: s.Description, + CreatedBy: string(s.CreatedBy), + CreatedAt: s.CreatedAt, + TransportKind: string(s.Transport.Kind), + TransportConfig: cfg, + }, nil +} + +func rowToStream(row streamRow) (domain.Stream, error) { + transport := domain.Transport{Kind: domain.TransportKind(row.TransportKind)} + if row.TransportConfig != "" { + transport.Config = json.RawMessage(row.TransportConfig) + } + return domain.NewStream( + domain.StreamID(row.ID), + row.Name, + row.Description, + domain.WorkerID(row.CreatedBy), + row.CreatedAt, + transport, + ) +} diff --git a/helix-org/store/sqlite/streams_and_events_test.go b/helix-org/store/sqlite/streams_and_events_test.go new file mode 100644 index 0000000000..44131a214e --- /dev/null +++ b/helix-org/store/sqlite/streams_and_events_test.go @@ -0,0 +1,183 @@ +package sqlite_test + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +func TestStreamsRoundTripAndByName(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + + st, err := domain.NewStream("s-general", "general", "all-hands", "w-owner", now, domain.Transport{}) + if err != nil { + t.Fatalf("NewStream: %v", err) + } + if err := s.Streams.Create(ctx, st); err != nil { + t.Fatalf("Create: %v", err) + } + + gotByID, err := s.Streams.Get(ctx, "s-general") + if err != nil { + t.Fatalf("Get: %v", err) + } + if gotByID.Name != "general" { + t.Fatalf("name = %q", gotByID.Name) + } + if gotByID.Transport.Kind != domain.TransportLocal { + t.Fatalf("Transport.Kind = %q, want %q", gotByID.Transport.Kind, domain.TransportLocal) + } +} + +func TestSubscriptionsUniqueWorkerStream(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + + sub, _ := domain.NewSubscription("w-1", "s-1", now) + if err := s.Subscriptions.Create(ctx, sub); err != nil { + t.Fatalf("Create: %v", err) + } + + dup, _ := domain.NewSubscription("w-1", "s-1", now) + if err := s.Subscriptions.Create(ctx, dup); err == nil { + t.Fatalf("Create duplicate (worker,stream) should fail") + } + + found, err := s.Subscriptions.Find(ctx, "w-1", "s-1") + if err != nil { + t.Fatalf("Find: %v", err) + } + if found.WorkerID != "w-1" || found.StreamID != "s-1" { + t.Fatalf("subscription = %+v", found) + } + + if err := s.Subscriptions.Delete(ctx, "w-1", "s-1"); err != nil { + t.Fatalf("Delete: %v", err) + } + _, err = s.Subscriptions.Find(ctx, "w-1", "s-1") + if !errors.Is(err, store.ErrNotFound) { + t.Fatalf("Find after delete: %v, want ErrNotFound", err) + } +} + +func TestEventsListForWorkerViaSubscriptions(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + base := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + + // Two streams, w-1 subscribed only to s-a. + sub, _ := domain.NewSubscription("w-1", "s-a", base) + if err := s.Subscriptions.Create(ctx, sub); err != nil { + t.Fatalf("Create subscription: %v", err) + } + + e1, _ := domain.NewEvent("e-1", "s-a", "w-owner", "hello on a", base.Add(time.Second)) + e2, _ := domain.NewEvent("e-2", "s-b", "w-owner", "hello on b", base.Add(2*time.Second)) + e3, _ := domain.NewEvent("e-3", "s-a", "w-owner", "hello again on a", base.Add(3*time.Second)) + for _, e := range []domain.Event{e1, e2, e3} { + if err := s.Events.Append(ctx, e); err != nil { + t.Fatalf("Append %s: %v", e.ID, err) + } + } + + got, err := s.Events.ListForWorker(ctx, "w-1", 0) + if err != nil { + t.Fatalf("ListForWorker: %v", err) + } + if len(got) != 2 { + t.Fatalf("got %d events, want 2 (only s-a visible)", len(got)) + } + if got[0].ID != "e-3" || got[1].ID != "e-1" { + t.Fatalf("order wrong: %v", []domain.EventID{got[0].ID, got[1].ID}) + } + + limited, err := s.Events.ListForWorker(ctx, "w-1", 1) + if err != nil { + t.Fatalf("ListForWorker limit: %v", err) + } + if len(limited) != 1 || limited[0].ID != "e-3" { + t.Fatalf("limit result = %v", limited) + } +} + +func TestEventsListSinceAcrossStreams(t *testing.T) { + t.Parallel() + s := newStore(t) + ctx := context.Background() + base := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + + // Three streams, four events, interleaved across s-a and s-b plus + // one on s-other (which the caller will exclude). + for _, e := range []struct { + id, st, body string + offset time.Duration + }{ + {"e-1", "s-a", "first on a", 1 * time.Second}, + {"e-2", "s-b", "first on b", 2 * time.Second}, + {"e-3", "s-other", "noise", 3 * time.Second}, + {"e-4", "s-a", "second on a", 4 * time.Second}, + {"e-5", "s-b", "second on b", 5 * time.Second}, + } { + ev, _ := domain.NewEvent(domain.EventID(e.id), domain.StreamID(e.st), "w-owner", e.body, base.Add(e.offset)) + if err := s.Events.Append(ctx, ev); err != nil { + t.Fatalf("Append %s: %v", e.id, err) + } + } + + // since="" returns all matching events oldest-first. + all, err := s.Events.ListSince(ctx, []domain.StreamID{"s-a", "s-b"}, "", 0) + if err != nil { + t.Fatalf("ListSince: %v", err) + } + gotIDs := make([]domain.EventID, len(all)) + for i, e := range all { + gotIDs[i] = e.ID + } + wantIDs := []domain.EventID{"e-1", "e-2", "e-4", "e-5"} + if len(gotIDs) != len(wantIDs) { + t.Fatalf("ids = %v, want %v", gotIDs, wantIDs) + } + for i := range wantIDs { + if gotIDs[i] != wantIDs[i] { + t.Fatalf("ids = %v, want %v", gotIDs, wantIDs) + } + } + + // since=e-2 returns only events strictly newer than e-2 on the + // matching streams. + tail, err := s.Events.ListSince(ctx, []domain.StreamID{"s-a", "s-b"}, "e-2", 0) + if err != nil { + t.Fatalf("ListSince since: %v", err) + } + if len(tail) != 2 || tail[0].ID != "e-4" || tail[1].ID != "e-5" { + t.Fatalf("since=e-2 result = %v", tail) + } + + // Empty stream set returns nothing. + empty, err := s.Events.ListSince(ctx, nil, "", 0) + if err != nil { + t.Fatalf("ListSince empty: %v", err) + } + if len(empty) != 0 { + t.Fatalf("expected no events, got %v", empty) + } + + // Unknown since falls through to "no lower bound". + full, err := s.Events.ListSince(ctx, []domain.StreamID{"s-a"}, "e-stale", 0) + if err != nil { + t.Fatalf("ListSince stale: %v", err) + } + if len(full) != 2 { + t.Fatalf("stale-since dropped events: %v", full) + } +} diff --git a/helix-org/store/sqlite/subscription.go b/helix-org/store/sqlite/subscription.go new file mode 100644 index 0000000000..acc9ea8bf9 --- /dev/null +++ b/helix-org/store/sqlite/subscription.go @@ -0,0 +1,100 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type subscriptionRow struct { + WorkerID string `gorm:"primaryKey;type:text"` + StreamID string `gorm:"primaryKey;type:text"` + CreatedAt time.Time +} + +func (subscriptionRow) TableName() string { return "subscriptions" } + +type subscriptionsRepo struct { + db *gorm.DB +} + +func (r *subscriptionsRepo) Create(ctx context.Context, sub domain.Subscription) error { + row := subscriptionToRow(sub) + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create subscription: %w", err) + } + return nil +} + +func (r *subscriptionsRepo) Delete(ctx context.Context, workerID domain.WorkerID, streamID domain.StreamID) error { + res := r.db.WithContext(ctx).Delete(&subscriptionRow{}, "worker_id = ? AND stream_id = ?", string(workerID), string(streamID)) + if res.Error != nil { + return fmt.Errorf("delete subscription (%q,%q): %w", workerID, streamID, res.Error) + } + if res.RowsAffected == 0 { + return fmt.Errorf("subscription (%q,%q): %w", workerID, streamID, store.ErrNotFound) + } + return nil +} + +func (r *subscriptionsRepo) Find(ctx context.Context, workerID domain.WorkerID, streamID domain.StreamID) (domain.Subscription, error) { + var row subscriptionRow + err := r.db.WithContext(ctx).Where("worker_id = ? AND stream_id = ?", string(workerID), string(streamID)).First(&row).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return domain.Subscription{}, fmt.Errorf("subscription (%q,%q): %w", workerID, streamID, store.ErrNotFound) + } + return domain.Subscription{}, fmt.Errorf("find subscription (%q,%q): %w", workerID, streamID, err) + } + return rowToSubscription(row) +} + +func (r *subscriptionsRepo) ListForWorker(ctx context.Context, workerID domain.WorkerID) ([]domain.Subscription, error) { + var rows []subscriptionRow + if err := r.db.WithContext(ctx).Where("worker_id = ?", string(workerID)).Order("stream_id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list subscriptions for worker %q: %w", workerID, err) + } + return rowsToSubscriptions(rows) +} + +func (r *subscriptionsRepo) ListForStream(ctx context.Context, streamID domain.StreamID) ([]domain.Subscription, error) { + var rows []subscriptionRow + if err := r.db.WithContext(ctx).Where("stream_id = ?", string(streamID)).Order("worker_id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list subscriptions for stream %q: %w", streamID, err) + } + return rowsToSubscriptions(rows) +} + +func subscriptionToRow(sub domain.Subscription) subscriptionRow { + return subscriptionRow{ + WorkerID: string(sub.WorkerID), + StreamID: string(sub.StreamID), + CreatedAt: sub.CreatedAt, + } +} + +func rowToSubscription(row subscriptionRow) (domain.Subscription, error) { + return domain.NewSubscription( + domain.WorkerID(row.WorkerID), + domain.StreamID(row.StreamID), + row.CreatedAt, + ) +} + +func rowsToSubscriptions(rows []subscriptionRow) ([]domain.Subscription, error) { + out := make([]domain.Subscription, 0, len(rows)) + for _, row := range rows { + s, err := rowToSubscription(row) + if err != nil { + return nil, err + } + out = append(out, s) + } + return out, nil +} diff --git a/helix-org/store/sqlite/worker.go b/helix-org/store/sqlite/worker.go new file mode 100644 index 0000000000..75e9282bdd --- /dev/null +++ b/helix-org/store/sqlite/worker.go @@ -0,0 +1,125 @@ +package sqlite + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +type workerRow struct { + ID string `gorm:"primaryKey;type:text"` + Kind string `gorm:"not null"` // "human" or "ai" + Positions string // JSON array of position ids + IdentityContent string // markdown body — domain-owned persona/profile, projected by the spawner + CreatedAt time.Time + UpdatedAt time.Time +} + +func (workerRow) TableName() string { return "workers" } + +type workersRepo struct { + db *gorm.DB +} + +func (r *workersRepo) Create(ctx context.Context, worker domain.Worker) error { + row, err := workerToRow(worker) + if err != nil { + return err + } + if err := r.db.WithContext(ctx).Create(&row).Error; err != nil { + return fmt.Errorf("create worker: %w", err) + } + return nil +} + +func (r *workersRepo) Get(ctx context.Context, id domain.WorkerID) (domain.Worker, error) { + var row workerRow + err := r.db.WithContext(ctx).First(&row, "id = ?", string(id)).Error + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, fmt.Errorf("worker %q: %w", id, store.ErrNotFound) + } + return nil, fmt.Errorf("get worker %q: %w", id, err) + } + return rowToWorker(row) +} + +func (r *workersRepo) List(ctx context.Context) ([]domain.Worker, error) { + var rows []workerRow + if err := r.db.WithContext(ctx).Order("id").Find(&rows).Error; err != nil { + return nil, fmt.Errorf("list workers: %w", err) + } + out := make([]domain.Worker, 0, len(rows)) + for _, row := range rows { + w, err := rowToWorker(row) + if err != nil { + return nil, err + } + out = append(out, w) + } + return out, nil +} + +// Update rewrites the mutable fields of an existing worker row. +// Positions and Kind are not user-editable and are kept aligned with +// the existing record on save — only IdentityContent is intended to +// change today, but we write all mutable fields for forward-compat. +func (r *workersRepo) Update(ctx context.Context, worker domain.Worker) error { + row, err := workerToRow(worker) + if err != nil { + return err + } + res := r.db.WithContext(ctx). + Model(&workerRow{}). + Where("id = ?", row.ID). + Updates(map[string]any{ + "identity_content": row.IdentityContent, + "positions": row.Positions, + "kind": row.Kind, + }) + if res.Error != nil { + return fmt.Errorf("update worker %q: %w", row.ID, res.Error) + } + if res.RowsAffected == 0 { + return fmt.Errorf("worker %q: %w", worker.ID(), store.ErrNotFound) + } + return nil +} + +func workerToRow(worker domain.Worker) (workerRow, error) { + positions := worker.Positions() + encoded, err := json.Marshal(positions) + if err != nil { + return workerRow{}, fmt.Errorf("marshal positions: %w", err) + } + return workerRow{ + ID: string(worker.ID()), + Kind: string(worker.Kind()), + Positions: string(encoded), + IdentityContent: worker.IdentityContent(), + }, nil +} + +func rowToWorker(row workerRow) (domain.Worker, error) { + var positions []domain.PositionID + if row.Positions != "" { + if err := json.Unmarshal([]byte(row.Positions), &positions); err != nil { + return nil, fmt.Errorf("unmarshal positions: %w", err) + } + } + switch domain.WorkerKind(row.Kind) { + case domain.WorkerKindHuman: + return domain.NewHumanWorker(domain.WorkerID(row.ID), positions, row.IdentityContent) + case domain.WorkerKindAI: + return domain.NewAIWorker(domain.WorkerID(row.ID), positions, row.IdentityContent) + default: + return nil, fmt.Errorf("unknown worker kind %q", row.Kind) + } +} diff --git a/helix-org/store/sqlite/worker_runtime.go b/helix-org/store/sqlite/worker_runtime.go new file mode 100644 index 0000000000..69f6f706a1 --- /dev/null +++ b/helix-org/store/sqlite/worker_runtime.go @@ -0,0 +1,98 @@ +package sqlite + +import ( + "context" + "errors" + "fmt" + "time" + + "gorm.io/gorm" + "gorm.io/gorm/clause" + + "github.com/helixml/helix-org/domain" +) + +// workerRuntimeStateRow stores one (workerID, backend, key) → value +// triple. The composite primary key is the natural key — there is no +// synthetic ID. Backends own the key namespace inside their backend +// label; helix-org core never reads or writes here. +type workerRuntimeStateRow struct { + WorkerID string `gorm:"primaryKey;type:text"` + Backend string `gorm:"primaryKey;type:text"` + Key string `gorm:"primaryKey;type:text"` + Value string `gorm:"type:text"` + UpdatedAt time.Time `gorm:"autoUpdateTime"` +} + +func (workerRuntimeStateRow) TableName() string { return "worker_runtime_state" } + +type workerRuntimeStateRepo struct { + db *gorm.DB +} + +func (r *workerRuntimeStateRepo) Get(ctx context.Context, workerID domain.WorkerID, backend string) (map[string]string, error) { + if workerID == "" || backend == "" { + return nil, errors.New("worker_runtime_state: workerID and backend are required") + } + var rows []workerRuntimeStateRow + err := r.db.WithContext(ctx). + Where("worker_id = ? AND backend = ?", string(workerID), backend). + Find(&rows).Error + if err != nil { + return nil, fmt.Errorf("worker_runtime_state get %s/%s: %w", workerID, backend, err) + } + out := make(map[string]string, len(rows)) + for _, row := range rows { + out[row.Key] = row.Value + } + return out, nil +} + +func (r *workerRuntimeStateRepo) Set(ctx context.Context, workerID domain.WorkerID, backend, key, value string) error { + return r.SetMany(ctx, workerID, backend, map[string]string{key: value}) +} + +func (r *workerRuntimeStateRepo) SetMany(ctx context.Context, workerID domain.WorkerID, backend string, kv map[string]string) error { + if workerID == "" || backend == "" { + return errors.New("worker_runtime_state: workerID and backend are required") + } + if len(kv) == 0 { + return nil + } + rows := make([]workerRuntimeStateRow, 0, len(kv)) + for k, v := range kv { + if k == "" { + return errors.New("worker_runtime_state: key is empty") + } + rows = append(rows, workerRuntimeStateRow{ + WorkerID: string(workerID), + Backend: backend, + Key: k, + Value: v, + }) + } + // Upsert on the natural key — preserves any keys not in kv. + err := r.db.WithContext(ctx). + Clauses(clause.OnConflict{ + Columns: []clause.Column{{Name: "worker_id"}, {Name: "backend"}, {Name: "key"}}, + DoUpdates: clause.AssignmentColumns([]string{"value", "updated_at"}), + }). + Create(&rows).Error + if err != nil { + return fmt.Errorf("worker_runtime_state set %s/%s: %w", workerID, backend, err) + } + return nil +} + +func (r *workerRuntimeStateRepo) Clear(ctx context.Context, workerID domain.WorkerID, backend string) error { + if workerID == "" || backend == "" { + return errors.New("worker_runtime_state: workerID and backend are required") + } + err := r.db.WithContext(ctx). + Where("worker_id = ? AND backend = ?", string(workerID), backend). + Delete(&workerRuntimeStateRow{}).Error + if err != nil { + return fmt.Errorf("worker_runtime_state clear %s/%s: %w", workerID, backend, err) + } + return nil +} diff --git a/helix-org/store/store.go b/helix-org/store/store.go new file mode 100644 index 0000000000..f502811b36 --- /dev/null +++ b/helix-org/store/store.go @@ -0,0 +1,150 @@ +// Package store defines the persistence contracts used by the server and +// tools. Concrete implementations live in sub-packages (e.g. sqlite). +package store + +import ( + "context" + "errors" + + "github.com/helixml/helix-org/domain" +) + +// ErrNotFound signals that the requested record does not exist. +// Repos wrap this with %w so callers can errors.Is it. +var ErrNotFound = errors.New("record not found") + +// Roles persists job descriptions. +type Roles interface { + Create(ctx context.Context, role domain.Role) error + Get(ctx context.Context, id domain.RoleID) (domain.Role, error) + List(ctx context.Context) ([]domain.Role, error) + Update(ctx context.Context, role domain.Role) error +} + +// Positions persists slots in the org chart. +type Positions interface { + Create(ctx context.Context, pos domain.Position) error + Get(ctx context.Context, id domain.PositionID) (domain.Position, error) + List(ctx context.Context) ([]domain.Position, error) + ListChildren(ctx context.Context, parent domain.PositionID) ([]domain.Position, error) +} + +// Workers persists humans and AIs. Update mutates fields the system +// allows changing in place — currently just IdentityContent (set at +// hire by the caller, replaced wholesale by update_identity). Identity +// is the per-Worker description; the system holds it in the domain +// rather than on disk so it survives any change in env layout. +type Workers interface { + Create(ctx context.Context, worker domain.Worker) error + Get(ctx context.Context, id domain.WorkerID) (domain.Worker, error) + List(ctx context.Context) ([]domain.Worker, error) + Update(ctx context.Context, worker domain.Worker) error +} + +// WorkerRuntimeState is a sidecar key/value store keyed by +// (workerID, backend). Runtime backends (the Helix integration today, +// future local containers, etc.) write whatever per-Worker pointers +// they need — Helix uses keys like "session_id", "project_id", +// "agent_app_id", "repo_id" — without forcing the domain to grow a +// field every time. +// +// The "backend" component is a free-form string the runtime owns +// (e.g. "helix"); helix-org core never reads or writes it. +// +// Get returns an empty map if the (workerID, backend) pair has no +// entries. Set upserts a single key, leaving other keys for that +// (workerID, backend) untouched. SetMany upserts a batch in the +// same way. Clear removes every entry for the pair (used when a +// Worker is fired and the runtime tears down its per-Worker state). +type WorkerRuntimeState interface { + Get(ctx context.Context, workerID domain.WorkerID, backend string) (map[string]string, error) + Set(ctx context.Context, workerID domain.WorkerID, backend, key, value string) error + SetMany(ctx context.Context, workerID domain.WorkerID, backend string, kv map[string]string) error + Clear(ctx context.Context, workerID domain.WorkerID, backend string) error +} + +// Grants persists tool grants. +type Grants interface { + Create(ctx context.Context, g domain.ToolGrant) error + Get(ctx context.Context, id domain.GrantID) (domain.ToolGrant, error) + ListByWorker(ctx context.Context, workerID domain.WorkerID) ([]domain.ToolGrant, error) + FindForWorkerAndTool(ctx context.Context, workerID domain.WorkerID, toolName domain.ToolName) (domain.ToolGrant, error) + Delete(ctx context.Context, id domain.GrantID) error +} + +// Streams persists named event sources. Streams are created explicitly +// via the create_stream tool. Every Stream carries a Transport — the +// default (TransportLocal) keeps events in SQLite and notifies the +// in-process broadcaster; other transports compose external I/O over +// the same local store. +type Streams interface { + Create(ctx context.Context, s domain.Stream) error + Get(ctx context.Context, id domain.StreamID) (domain.Stream, error) + List(ctx context.Context) ([]domain.Stream, error) +} + +// Subscriptions persists (Worker, Stream) links. The pair is the key — +// there is no synthetic ID. +type Subscriptions interface { + Create(ctx context.Context, sub domain.Subscription) error + Delete(ctx context.Context, workerID domain.WorkerID, streamID domain.StreamID) error + Find(ctx context.Context, workerID domain.WorkerID, streamID domain.StreamID) (domain.Subscription, error) + ListForWorker(ctx context.Context, workerID domain.WorkerID) ([]domain.Subscription, error) + ListForStream(ctx context.Context, streamID domain.StreamID) ([]domain.Subscription, error) +} + +// Events persists entries published on a Stream. +type Events interface { + Append(ctx context.Context, e domain.Event) error + ListForStream(ctx context.Context, streamID domain.StreamID, limit int) ([]domain.Event, error) + // ListForWorker returns events on the Streams a Worker subscribes to, + // newest first. If limit <= 0, no limit is applied. + ListForWorker(ctx context.Context, workerID domain.WorkerID, limit int) ([]domain.Event, error) + // ListSince returns events on the named Streams strictly newer than the + // `since` event, oldest first. If streamIDs is empty, returns nothing + // (caller's glob matched no streams). If `since` is empty, returns the + // most recent `limit` events on the named streams in oldest-first order. + // If `since` does not exist, returns the same as if it were empty. If + // limit <= 0, no limit is applied. + ListSince(ctx context.Context, streamIDs []domain.StreamID, since domain.EventID, limit int) ([]domain.Event, error) + // ListAll returns events across every Stream, newest first. Powers + // the unified "All streams" activity feed in the UI. If limit <= 0, + // no limit is applied — callers are expected to pass a sane cap. + ListAll(ctx context.Context, limit int) ([]domain.Event, error) +} + +// Environments persists the per-Worker directory handle. The manager +// populates the directory before hire; this table just tracks that a +// directory exists and which Worker owns it. +type Environments interface { + Create(ctx context.Context, env domain.Environment) error + Get(ctx context.Context, workerID domain.WorkerID) (domain.Environment, error) +} + +// Configs persists operational-config rows: transport credentials, +// claude binary path, model selection, etc. Keys are flat dot- +// namespaced strings; values are JSON-encoded. See design/config.md +// for the org-graph-vs-ops split. Configs are written exclusively +// through the helix-org config CLI — never via MCP. +type Configs interface { + Set(ctx context.Context, cfg domain.Config) error + Get(ctx context.Context, key string) (domain.Config, error) + List(ctx context.Context, prefix string) ([]domain.Config, error) + Delete(ctx context.Context, key string) error +} + +// Store bundles all repositories a single concrete implementation provides. +// Handlers and tools depend on the narrower interfaces above; Store is the +// wiring point. +type Store struct { + Roles Roles + Positions Positions + Workers Workers + WorkerRuntimeState WorkerRuntimeState + Grants Grants + Streams Streams + Subscriptions Subscriptions + Events Events + Environments Environments + Configs Configs +} diff --git a/helix-org/tools/builtins.go b/helix-org/tools/builtins.go new file mode 100644 index 0000000000..697f6ce7a0 --- /dev/null +++ b/helix-org/tools/builtins.go @@ -0,0 +1,121 @@ +package tools + +import ( + "context" + "fmt" + "time" + + "github.com/google/uuid" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Clock returns the current time. Tests override it. +type Clock func() time.Time + +// IDGen generates new unique string IDs. Tests override it. +type IDGen func() string + +// EventDispatcher fans a freshly-published Event out to every +// subscribed AI Worker as a separate Spawner activation. Tools call it +// after persisting an Event. The interface keeps tools.Deps free of a +// dependency on the dispatch package (avoiding an import cycle: the +// dispatcher itself imports tools). +type EventDispatcher interface { + Dispatch(ctx context.Context, event domain.Event) + DispatchHire(ctx context.Context, workerID domain.WorkerID, envPath string) +} + +// Deps bundles the stores, clocks, and configuration tools need. +// +// EnvsDir is the directory under which each Worker's Environment lives: +// HireWorker creates // at hire time and writes the +// role.md / identity.md / agent.md trio into it. +// +// Broadcaster is optional: if set, event-emitting tools (publish) will +// call its Notify method so any long-poll readers blocked on those +// streams wake up immediately. +// +// Dispatcher is optional: if set, event-emitting tools also call its +// Dispatch method so subscribed AI Workers get re-activated. Tests +// that don't exercise the runtime can leave it nil. The dispatcher +// itself owns the Spawner. +// +// Workspace is required (use agent.NoopWorkspaceSync{} for tests). +// update_role and update_identity call PublishFile on it after +// persisting to the DB so the per-runtime view of role/identity stays +// in sync with the canonical domain copy. +type Deps struct { + Store *store.Store + Now Clock + NewID IDGen + EnvsDir string + Broadcaster *broadcast.Broadcaster + Dispatcher EventDispatcher + Workspace agent.WorkspaceSync +} + +// DefaultDeps wires production defaults: real UUIDs and wall-clock time, +// and a no-op WorkspaceSync that callers replace with the runtime- +// specific implementation. EnvsDir, Broadcaster, and Dispatcher are +// left zero — production callers wire them in cmd/helix-org/serve.go. +func DefaultDeps(s *store.Store) Deps { + return Deps{ + Store: s, + Now: func() time.Time { return time.Now().UTC() }, + NewID: uuid.NewString, + Workspace: agent.NoopWorkspaceSync{}, + } +} + +// RegisterBuiltins registers every built-in tool on the registry — +// mutations on the org graph plus the matching read tools. Test tools +// (like Ping) are not included. +func RegisterBuiltins(reg *Registry, deps Deps) error { + if deps.Workspace == nil { + return fmt.Errorf("tools.RegisterBuiltins: deps.Workspace is required (use agent.NoopWorkspaceSync{} for tests)") + } + builtins := []domain.Tool{ + // Mutations. + &CreateRole{deps: deps}, + &UpdateRole{deps: deps}, + &UpdateIdentity{deps: deps}, + &CreatePosition{deps: deps}, + &HireWorker{deps: deps}, + &GrantTool{deps: deps}, + &RevokeTool{deps: deps}, + &CreateStream{deps: deps}, + &StreamMembers{deps: deps}, + &Subscribe{deps: deps}, + &Unsubscribe{deps: deps}, + &InviteWorkers{deps: deps}, + &Publish{deps: deps}, + &DM{deps: deps}, + // Reads. Each is a thin wrapper around a store call; together + // they replace the jsonapi GET handlers the server used to expose. + &ListRoles{deps: deps}, + &GetRole{deps: deps}, + &ListPositions{deps: deps}, + &GetPosition{deps: deps}, + &ListPositionChildren{deps: deps}, + &ListWorkers{deps: deps}, + &GetWorker{deps: deps}, + &ListWorkerGrants{deps: deps}, + &GetWorkerEnvironment{deps: deps}, + &ListStreams{deps: deps}, + &GetStream{deps: deps}, + &ListStreamEvents{deps: deps}, + &GetGrant{deps: deps}, + &ReadEvents{deps: deps}, + &WorkerLog{deps: deps}, + } + for _, tool := range builtins { + if err := reg.Register(tool); err != nil { + return fmt.Errorf("register %q: %w", tool.Name(), err) + } + } + return nil +} diff --git a/helix-org/tools/builtins_test.go b/helix-org/tools/builtins_test.go new file mode 100644 index 0000000000..f9812fc8e7 --- /dev/null +++ b/helix-org/tools/builtins_test.go @@ -0,0 +1,878 @@ +package tools_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http/httptest" + "os" + "path/filepath" + "testing" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/server" + "github.com/helixml/helix-org/store/sqlite" + "github.com/helixml/helix-org/tools" +) + +// TestDemoOwnerHiresCEO walks the "manager does the orchestration" story +// over MCP: each tool does one primitive thing, and the test drives the +// hiring ritual step by step. +// +// Owner is pre-seeded. Owner creates a #general Stream, subscribes +// themselves, defines a CEO Role (markdown content), creates a Position, +// then hires the CEO with inline grants and an identityContent. The +// Worker's IdentityContent is stored in the domain alongside the Role — +// no env files are written at hire (the spawner projects them at +// activation). Owner publishes; CEO sees it. +func TestDemoOwnerHiresCEO(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + + // Seed owner directly: role, position, worker, environment, structural grants. + now := time.Now().UTC() + ownerRole, err := domain.NewRole("r-owner", "# Owner\nBootstrap owner.", now) + if err != nil { + t.Fatalf("seed role: %v", err) + } + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + ownerEnvPath := filepath.Join(envsDir, "w-owner") + if err := os.MkdirAll(ownerEnvPath, 0o750); err != nil { + t.Fatalf("mkdir owner env: %v", err) + } + ownerEnv, _ := domain.NewEnvironment("w-owner", ownerEnvPath, now) + mustCreate(t, s.Environments.Create(ctx, ownerEnv)) + for _, name := range []domain.ToolName{ + tools.CreateRoleName, + tools.UpdateRoleName, + tools.CreatePositionName, + tools.HireWorkerName, + tools.GrantToolName, + tools.CreateStreamName, + tools.SubscribeName, + tools.PublishName, + } { + grantID := domain.GrantID("g-owner-" + name) + g, _ := domain.NewToolGrant(grantID, "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + + ownerSession := connectMCP(t, srv.URL, "w-owner") + + invokeExpectID(t, ownerSession, tools.CreateStreamName, map[string]any{ + "id": "s-general", + "name": "general", + }) + invokeOK(t, ownerSession, tools.SubscribeName, map[string]any{"streamId": "s-general"}) + + invokeExpectID(t, ownerSession, tools.CreateRoleName, map[string]any{ + "id": "r-ceo", + "content": "# CEO\nLead the company. Subscribe to s-general.", + }) + + invokeExpectID(t, ownerSession, tools.CreatePositionName, map[string]any{ + "id": "p-ceo", + "roleId": "r-ceo", + "parentId": "p-root", + }) + + invokeExpectID(t, ownerSession, tools.HireWorkerName, map[string]any{ + "id": "w-ceo", + "positionId": "p-ceo", + "kind": "ai", + "identityContent": "# Meina Gladstone\nCEO. Decisive, warm, direct.", + "grants": []map[string]any{ + {"toolName": "publish"}, + {"toolName": "subscribe"}, + }, + }) + + // hire_worker creates the env directory but does not write files — + // the spawner projects role.md / identity.md / agent.md at + // activation time. So we should see the directory exist but be + // empty after a hire. + ceoEnvPath := filepath.Join(envsDir, "w-ceo") + if entries, err := os.ReadDir(ceoEnvPath); err != nil { + t.Fatalf("expected env dir to exist: %v", err) + } else if len(entries) != 0 { + t.Fatalf("expected empty env dir, got %d entries", len(entries)) + } + // IdentityContent lives in the domain. + ceoWorker, err := s.Workers.Get(ctx, "w-ceo") + if err != nil { + t.Fatalf("get w-ceo: %v", err) + } + if ceoWorker.IdentityContent() != "# Meina Gladstone\nCEO. Decisive, warm, direct." { + t.Fatalf("ceo identity = %q", ceoWorker.IdentityContent()) + } + + // hire_worker also creates the activation stream and subscribes the + // hiring Worker (the owner) so they can audit by reading events. + if _, err := s.Streams.Get(ctx, "s-activations-w-ceo"); err != nil { + t.Fatalf("activation stream missing for w-ceo: %v", err) + } + if _, err := s.Subscriptions.Find(ctx, "w-owner", "s-activations-w-ceo"); err != nil { + t.Fatalf("owner not subscribed to w-ceo activations: %v", err) + } + // The new Worker themselves is intentionally NOT subscribed — + // otherwise self-published events would loop the dispatcher. + if _, err := s.Subscriptions.Find(ctx, "w-ceo", "s-activations-w-ceo"); err == nil { + t.Fatalf("w-ceo should NOT be subscribed to its own activation stream") + } + + // Stand in for the CEO's hire activation: subscribe to the + // stream they were told about. The dispatcher isn't wired in + // this test, so we drive it manually. + ceoSession := connectMCP(t, srv.URL, "w-ceo") + invokeOK(t, ceoSession, tools.SubscribeName, map[string]any{"streamId": "s-general"}) + + if _, err := s.Subscriptions.Find(ctx, "w-ceo", "s-general"); err != nil { + t.Fatalf("CEO subscription on s-general missing: %v", err) + } + + invokeExpectID(t, ownerSession, tools.PublishName, map[string]any{ + "streamId": "s-general", + "body": "please hire all of your staff", + }) + ceoEvents, err := s.Events.ListForWorker(ctx, "w-ceo", 10) + if err != nil { + t.Fatalf("ceo events: %v", err) + } + if len(ceoEvents) != 1 { + t.Fatalf("ceo events = %+v, want 1", ceoEvents) + } + msg, err := ceoEvents[0].Message() + if err != nil { + t.Fatalf("parse ceo event message: %v", err) + } + if msg.Body != "please hire all of your staff" { + t.Fatalf("ceo event body = %q", msg.Body) + } +} + +// TestUpdateRoleAndIdentityAreDomainWrites pins the post-refactor +// contract: update_role and update_identity are pure DB mutations. +// The spawner is the only thing that projects state into envs, so +// after a tool call the on-disk files (if any) are stale and only +// the DB row reflects the change. This test hires two workers and +// asserts that both `update_role` and `update_identity` flow through +// the domain alone — no fan-out walks, no cross-env writes. +func TestUpdateRoleAndIdentityAreDomainWrites(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + for _, name := range []domain.ToolName{ + tools.CreateRoleName, + tools.UpdateRoleName, + tools.UpdateIdentityName, + tools.CreatePositionName, + tools.HireWorkerName, + } { + g, _ := domain.NewToolGrant(domain.GrantID("g-"+name), "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + + ownerSession := connectMCP(t, srv.URL, "w-owner") + + invokeExpectID(t, ownerSession, tools.CreateRoleName, map[string]any{ + "id": "r-eng", + "content": "# Engineer v1\nBuild stuff.", + }) + invokeExpectID(t, ownerSession, tools.CreatePositionName, map[string]any{ + "id": "p-eng-a", "roleId": "r-eng", "parentId": "p-root", + }) + invokeExpectID(t, ownerSession, tools.CreatePositionName, map[string]any{ + "id": "p-eng-b", "roleId": "r-eng", "parentId": "p-root", + }) + invokeExpectID(t, ownerSession, tools.HireWorkerName, map[string]any{ + "id": "w-a", "positionId": "p-eng-a", "kind": "ai", + "identityContent": "# Alice", + }) + invokeExpectID(t, ownerSession, tools.HireWorkerName, map[string]any{ + "id": "w-b", "positionId": "p-eng-b", "kind": "ai", + "identityContent": "# Bob", + }) + + // hire_worker does not write env files; the dirs exist but are empty. + for _, id := range []string{"w-a", "w-b"} { + entries, err := os.ReadDir(filepath.Join(envsDir, id)) + if err != nil { + t.Fatalf("read %s env dir: %v", id, err) + } + if len(entries) != 0 { + t.Fatalf("%s env should be empty after hire, got %d entries", id, len(entries)) + } + } + + invokeExpectID(t, ownerSession, tools.UpdateRoleName, map[string]any{ + "roleId": "r-eng", + "content": "# Engineer v2\nBuild better stuff.", + }) + + // Role row in the DB now carries the new content; nothing was + // written to disk by the tool. + got, err := s.Roles.Get(ctx, "r-eng") + if err != nil { + t.Fatalf("get r-eng: %v", err) + } + if got.Content != "# Engineer v2\nBuild better stuff." { + t.Fatalf("r-eng content = %q", got.Content) + } + for _, id := range []string{"w-a", "w-b"} { + entries, _ := os.ReadDir(filepath.Join(envsDir, id)) + if len(entries) != 0 { + t.Fatalf("%s env should still be empty after update_role, got %d entries", id, len(entries)) + } + } + + // update_identity rewrites Worker.IdentityContent on the DB row. + invokeExpectID(t, ownerSession, tools.UpdateIdentityName, map[string]any{ + "workerId": "w-a", + "content": "# Alice (v2)\nNow with extra spice.", + }) + wa, err := s.Workers.Get(ctx, "w-a") + if err != nil { + t.Fatalf("get w-a: %v", err) + } + if wa.IdentityContent() != "# Alice (v2)\nNow with extra spice." { + t.Fatalf("w-a identity = %q", wa.IdentityContent()) + } + // w-b's identity is untouched. + wb, err := s.Workers.Get(ctx, "w-b") + if err != nil { + t.Fatalf("get w-b: %v", err) + } + if wb.IdentityContent() != "# Bob" { + t.Fatalf("w-b identity changed: %q", wb.IdentityContent()) + } +} + +// TestStreamMembers exercises the read-only stream_members tool: +// before any subscriber, members is empty; after a Worker subscribes, +// they appear in the list. This is the "wait until Renée is part of +// the stream" primitive — managers call this before publishing if +// they need to know whether a particular Worker is listening. +func TestStreamMembers(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + worker, _ := domain.NewAIWorker("w-listener", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, worker)) + for _, name := range []domain.ToolName{ + tools.CreateStreamName, + tools.StreamMembersName, + tools.SubscribeName, + } { + g, _ := domain.NewToolGrant(domain.GrantID("g-owner-"+name), "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + g, _ := domain.NewToolGrant("g-listener-sub", "w-listener", tools.SubscribeName) + mustCreate(t, s.Grants.Create(ctx, g)) + + ownerSession := connectMCP(t, srv.URL, "w-owner") + listenerSession := connectMCP(t, srv.URL, "w-listener") + + invokeExpectID(t, ownerSession, tools.CreateStreamName, map[string]any{ + "id": "s-room", + "name": "room", + }) + + // Empty before anyone subscribes. + if got := membersOf(t, ownerSession, "s-room"); len(got) != 0 { + t.Fatalf("members before subscribe = %v, want empty", got) + } + + invokeOK(t, listenerSession, tools.SubscribeName, map[string]any{"streamId": "s-room"}) + + if got := membersOf(t, ownerSession, "s-room"); len(got) != 1 || got[0] != "w-listener" { + t.Fatalf("members after subscribe = %v, want [w-listener]", got) + } +} + +// TestInviteWorkers verifies one Worker can subscribe others to a +// Stream — the primitive that lets the initiator open a DM by creating +// a Stream and adding both parties, without requiring the recipient to +// self-subscribe first. +func TestInviteWorkers(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + alice, _ := domain.NewAIWorker("w-alice", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, alice)) + bob, _ := domain.NewAIWorker("w-bob", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, bob)) + for _, name := range []domain.ToolName{ + tools.CreateStreamName, + tools.InviteWorkersName, + tools.StreamMembersName, + } { + g, _ := domain.NewToolGrant(domain.GrantID("g-owner-"+name), "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + + ownerSession := connectMCP(t, srv.URL, "w-owner") + + invokeExpectID(t, ownerSession, tools.CreateStreamName, map[string]any{ + "id": "s-dm", + "name": "alice ↔ bob", + }) + + // Owner adds both parties to the stream in one call. + invokeOK(t, ownerSession, tools.InviteWorkersName, map[string]any{ + "streamId": "s-dm", + "workerIds": []string{"w-alice", "w-bob"}, + }) + + got := membersOf(t, ownerSession, "s-dm") + if len(got) != 2 { + t.Fatalf("members after invite = %v, want two", got) + } + want := map[string]bool{"w-alice": true, "w-bob": true} + for _, m := range got { + if !want[m] { + t.Fatalf("unexpected member %q in %v", m, got) + } + } + + // Idempotent: re-inviting an already-subscribed worker alongside a + // new one is a no-op for the existing subscription and a success + // for the rest. + invokeOK(t, ownerSession, tools.InviteWorkersName, map[string]any{ + "streamId": "s-dm", + "workerIds": []string{"w-alice", "w-owner"}, + }) + got = membersOf(t, ownerSession, "s-dm") + if len(got) != 3 { + t.Fatalf("members after re-invite = %v, want three", got) + } + + // Unknown worker -> error, no partial subscription created. + if _, err := invokeTool(t, ownerSession, tools.InviteWorkersName, map[string]any{ + "streamId": "s-dm", + "workerIds": []string{"w-ghost"}, + }); err == nil { + t.Fatalf("inviting unknown worker should error") + } + if got = membersOf(t, ownerSession, "s-dm"); len(got) != 3 { + t.Fatalf("members after failed invite = %v, want three (unchanged)", got) + } +} + +// TestDM exercises the dm tool: a single call from Alice to Bob +// creates the per-pair Stream, subscribes both, and publishes the +// body. A second DM in the reverse direction reuses the same Stream. +func TestDM(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + alice, _ := domain.NewHumanWorker("w-alice", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, alice)) + bob, _ := domain.NewAIWorker("w-bob", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, bob)) + for _, name := range []domain.ToolName{tools.DMName, tools.ReadEventsName} { + g, _ := domain.NewToolGrant(domain.GrantID("g-alice-"+name), "w-alice", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + bobDMGrant, _ := domain.NewToolGrant("g-bob-dm", "w-bob", tools.DMName) + mustCreate(t, s.Grants.Create(ctx, bobDMGrant)) + + aliceSession := connectMCP(t, srv.URL, "w-alice") + bobSession := connectMCP(t, srv.URL, "w-bob") + + // Alice DMs Bob — single call does it all. + raw, err := invokeTool(t, aliceSession, tools.DMName, map[string]any{ + "toWorkerId": "w-bob", + "body": "hey", + }) + if err != nil { + t.Fatalf("dm: %v", err) + } + var out struct { + ID string `json:"id"` + StreamID string `json:"streamId"` + To string `json:"to"` + } + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal dm: %v", err) + } + if out.StreamID != "s-dm-w-alice-w-bob" { + t.Fatalf("streamId = %q, want s-dm-w-alice-w-bob", out.StreamID) + } + if out.To != "w-bob" { + t.Fatalf("to = %q, want w-bob", out.To) + } + + // Both parties are subscribed; the event landed in the store. + for _, wid := range []domain.WorkerID{"w-alice", "w-bob"} { + if _, err := s.Subscriptions.Find(ctx, wid, domain.StreamID(out.StreamID)); err != nil { + t.Fatalf("%s not subscribed to %s: %v", wid, out.StreamID, err) + } + } + events, _ := s.Events.ListForWorker(ctx, "w-bob", 10) + if len(events) != 1 { + t.Fatalf("bob events = %+v, want one", events) + } + msg, err := events[0].Message() + if err != nil { + t.Fatalf("parse dm event: %v", err) + } + if msg.Body != "hey" { + t.Fatalf("dm body = %q, want hey", msg.Body) + } + if msg.From != "w-alice" || len(msg.To) != 1 || msg.To[0] != "w-bob" { + t.Fatalf("dm envelope = %+v, want from=w-alice to=[w-bob]", msg) + } + + // Bob replies. Reverse direction reuses the same Stream — the IDs + // are sorted, so A→B and B→A share one ordered conversation. + raw, err = invokeTool(t, bobSession, tools.DMName, map[string]any{ + "toWorkerId": "w-alice", + "body": "hi back", + }) + if err != nil { + t.Fatalf("reply dm: %v", err) + } + var reply struct { + StreamID string `json:"streamId"` + } + _ = json.Unmarshal(raw, &reply) + if reply.StreamID != out.StreamID { + t.Fatalf("reply streamId = %q, want %q (DM stream should be reused)", reply.StreamID, out.StreamID) + } + + // Alice can read the conversation through her own subscription. + events, _ = s.Events.ListForWorker(ctx, "w-alice", 10) + if len(events) != 2 { + t.Fatalf("alice events = %+v, want two", events) + } + + // Self-DM is rejected up-front. + if _, err := invokeTool(t, aliceSession, tools.DMName, map[string]any{ + "toWorkerId": "w-alice", + "body": "hi me", + }); err == nil { + t.Fatalf("DM to self should error") + } +} + +// TestReadsOverMCP exercises the new read tools: an Owner with the +// full builtin grant set lists workers, lists streams, and reads back +// events on subscribed streams, all over MCP. +func TestReadsOverMCP(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + for _, name := range []domain.ToolName{ + tools.CreateStreamName, + tools.SubscribeName, + tools.PublishName, + tools.ListWorkersName, + tools.ListStreamsName, + tools.ListStreamEventsName, + tools.ReadEventsName, + } { + g, _ := domain.NewToolGrant(domain.GrantID("g-owner-"+name), "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + + ownerSession := connectMCP(t, srv.URL, "w-owner") + + // Reads work before any state change: list_workers should already see the owner. + rawWorkers, err := invokeTool(t, ownerSession, tools.ListWorkersName, map[string]any{}) + if err != nil { + t.Fatalf("list_workers: %v", err) + } + var listWorkersOut struct { + Workers []struct { + ID string `json:"id"` + } `json:"workers"` + } + if err := json.Unmarshal(rawWorkers, &listWorkersOut); err != nil { + t.Fatalf("unmarshal list_workers: %v", err) + } + if len(listWorkersOut.Workers) != 1 || listWorkersOut.Workers[0].ID != "w-owner" { + t.Fatalf("list_workers = %+v, want [{w-owner}]", listWorkersOut.Workers) + } + + // Drive a small mutation through to populate read targets. + invokeExpectID(t, ownerSession, tools.CreateStreamName, map[string]any{ + "id": "s-news", + "name": "news", + }) + invokeOK(t, ownerSession, tools.SubscribeName, map[string]any{"streamId": "s-news"}) + invokeExpectID(t, ownerSession, tools.PublishName, map[string]any{ + "streamId": "s-news", + "body": "first event", + }) + + rawStreams, err := invokeTool(t, ownerSession, tools.ListStreamsName, map[string]any{}) + if err != nil { + t.Fatalf("list_streams: %v", err) + } + var listStreamsOut struct { + Streams []struct { + ID string `json:"id"` + } `json:"streams"` + } + if err := json.Unmarshal(rawStreams, &listStreamsOut); err != nil { + t.Fatalf("unmarshal list_streams: %v", err) + } + if len(listStreamsOut.Streams) != 1 || listStreamsOut.Streams[0].ID != "s-news" { + t.Fatalf("list_streams = %+v, want [{s-news}]", listStreamsOut.Streams) + } + + rawEvents, err := invokeTool(t, ownerSession, tools.ReadEventsName, map[string]any{}) + if err != nil { + t.Fatalf("read_events: %v", err) + } + var eventsOut struct { + Events []struct { + Body string `json:"body"` + } `json:"events"` + } + if err := json.Unmarshal(rawEvents, &eventsOut); err != nil { + t.Fatalf("unmarshal read_events: %v", err) + } + if len(eventsOut.Events) != 1 || eventsOut.Events[0].Body != "first event" { + t.Fatalf("read_events = %+v, want [{first event}]", eventsOut.Events) + } +} + +func membersOf(t *testing.T, session *mcp.ClientSession, streamID string) []string { + t.Helper() + raw, err := invokeTool(t, session, tools.StreamMembersName, map[string]any{"streamId": streamID}) + if err != nil { + t.Fatalf("stream_members %s: %v", streamID, err) + } + var out struct { + Members []string `json:"members"` + } + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal members: %v", err) + } + return out.Members +} + +// TestWorkerLog covers the worker_log shortcut: read a Worker's +// activation transcript by workerId without having to know the stream +// naming convention. The first call auto-subscribes the caller; later +// calls are pure reads. since/limit semantics mirror read_events. +func TestWorkerLog(t *testing.T) { + t.Parallel() + + s, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open: %v", err) + } + envsDir := t.TempDir() + + reg := tools.NewRegistry() + deps := tools.DefaultDeps(s) + deps.EnvsDir = envsDir + if err := tools.RegisterBuiltins(reg, deps); err != nil { + t.Fatalf("register builtins: %v", err) + } + srv := httptest.NewServer(server.New(s, reg, nil, nil, nil).Handler()) + t.Cleanup(srv.Close) + + ctx := context.Background() + now := time.Now().UTC() + + ownerRole, _ := domain.NewRole("r-owner", "# Owner", now) + mustCreate(t, s.Roles.Create(ctx, ownerRole)) + rootPos, _ := domain.NewPosition("p-root", "r-owner", nil) + mustCreate(t, s.Positions.Create(ctx, rootPos)) + owner, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, owner)) + bot, _ := domain.NewAIWorker("w-bot", []domain.PositionID{"p-root"}, "") + mustCreate(t, s.Workers.Create(ctx, bot)) + + // Pre-create the activation stream + seed a couple of events. In + // production hire_worker creates the stream and the spawner + // publishes events; here we shortcut. + streamID := domain.StreamID("s-activations-w-bot") + stream, _ := domain.NewStream(streamID, "Activations: w-bot", "", "w-owner", now, domain.Transport{}) + mustCreate(t, s.Streams.Create(ctx, stream)) + for i, body := range []string{"--- session start ---", "assistant: hello", "=== exit: ok ==="} { + ev, _ := domain.NewEvent( + domain.EventID(fmt.Sprintf("e-%d", i)), + streamID, + "w-bot", + body, + now.Add(time.Duration(i)*time.Second), + ) + mustCreate(t, s.Events.Append(ctx, ev)) + } + + for _, name := range []domain.ToolName{tools.WorkerLogName} { + g, _ := domain.NewToolGrant(domain.GrantID("g-owner-"+name), "w-owner", name) + mustCreate(t, s.Grants.Create(ctx, g)) + } + + ownerSession := connectMCP(t, srv.URL, "w-owner") + + // First call: returns events newest-first AND auto-subscribes owner. + raw, err := invokeTool(t, ownerSession, tools.WorkerLogName, map[string]any{ + "workerId": "w-bot", + }) + if err != nil { + t.Fatalf("worker_log: %v", err) + } + var out struct { + Events []struct { + ID string `json:"id"` + Body string `json:"body"` + } `json:"events"` + } + if err := json.Unmarshal(raw, &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if len(out.Events) != 3 { + t.Fatalf("events = %d, want 3", len(out.Events)) + } + // Newest first. + if out.Events[0].Body != "=== exit: ok ===" { + t.Fatalf("newest = %q, want exit marker", out.Events[0].Body) + } + if _, err := s.Subscriptions.Find(ctx, "w-owner", streamID); err != nil { + t.Fatalf("owner not subscribed after worker_log: %v", err) + } + + // since= filters out events at or before the given ID. Pass the + // middle event's ID; only the newer event ("exit") should remain. + mid := out.Events[1].ID + raw, err = invokeTool(t, ownerSession, tools.WorkerLogName, map[string]any{ + "workerId": "w-bot", + "since": mid, + }) + if err != nil { + t.Fatalf("worker_log since: %v", err) + } + _ = json.Unmarshal(raw, &out) + if len(out.Events) != 1 || out.Events[0].Body != "=== exit: ok ===" { + t.Fatalf("since-filtered = %+v, want exit only", out.Events) + } + + // Unknown worker errors with a clear message. + if _, err := invokeTool(t, ownerSession, tools.WorkerLogName, map[string]any{ + "workerId": "w-ghost", + }); err == nil { + t.Fatalf("worker_log on unknown worker should error") + } + + // Human Worker has no activation stream — clear error, not a generic + // "stream not found". + if _, err := invokeTool(t, ownerSession, tools.WorkerLogName, map[string]any{ + "workerId": "w-owner", + }); err == nil { + t.Fatalf("worker_log on human worker should error") + } +} + +// Helpers + +func mustCreate(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Fatalf("seed: %v", err) + } +} + +func connectMCP(t *testing.T, baseURL string, workerID domain.WorkerID) *mcp.ClientSession { + t.Helper() + c := mcp.NewClient(&mcp.Implementation{Name: "helix-org-test", Version: "v0.0.0"}, nil) + transport := &mcp.StreamableClientTransport{ + Endpoint: baseURL + "/workers/" + string(workerID) + "/mcp", + DisableStandaloneSSE: true, + } + session, err := c.Connect(context.Background(), transport, nil) + if err != nil { + t.Fatalf("mcp connect %s: %v", workerID, err) + } + t.Cleanup(func() { _ = session.Close() }) + return session +} + +func invokeTool(t *testing.T, session *mcp.ClientSession, toolName domain.ToolName, args map[string]any) (json.RawMessage, error) { + t.Helper() + res, err := session.CallTool(context.Background(), &mcp.CallToolParams{ + Name: string(toolName), + Arguments: args, + }) + if err != nil { + return nil, fmt.Errorf("call %s: %w", toolName, err) + } + if res.IsError { + var detail string + if len(res.Content) > 0 { + if tc, ok := res.Content[0].(*mcp.TextContent); ok { + detail = tc.Text + } + } + return nil, fmt.Errorf("%s: %s", toolName, detail) + } + if len(res.Content) == 0 { + return nil, fmt.Errorf("%s: empty content", toolName) + } + text, ok := res.Content[0].(*mcp.TextContent) + if !ok { + return nil, fmt.Errorf("%s: content[0] = %T, want *TextContent", toolName, res.Content[0]) + } + return json.RawMessage(text.Text), nil +} + +func invokeExpectID(t *testing.T, session *mcp.ClientSession, toolName domain.ToolName, args map[string]any) string { + t.Helper() + result, err := invokeTool(t, session, toolName, args) + if err != nil { + t.Fatalf("%s: %v", toolName, err) + } + var out struct { + ID string `json:"id"` + } + if err := json.Unmarshal(result, &out); err != nil { + t.Fatalf("unmarshal result: %v", err) + } + return out.ID +} + +// invokeOK is for tools that don't return an `id` (subscribe / unsubscribe). +func invokeOK(t *testing.T, session *mcp.ClientSession, toolName domain.ToolName, args map[string]any) { + t.Helper() + if _, err := invokeTool(t, session, toolName, args); err != nil { + t.Fatalf("%s: %v", toolName, err) + } +} diff --git a/helix-org/tools/create_position.go b/helix-org/tools/create_position.go new file mode 100644 index 0000000000..75e34d8876 --- /dev/null +++ b/helix-org/tools/create_position.go @@ -0,0 +1,66 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// CreatePosition instantiates a Role as a concrete slot in the org chart. +type CreatePosition struct { + deps Deps +} + +const CreatePositionName domain.ToolName = "create_position" + +var createPositionSchema = mustSchema[createPositionArgs]() + +func (t *CreatePosition) Name() domain.ToolName { return CreatePositionName } +func (t *CreatePosition) InputSchema() *jsonschema.Schema { return createPositionSchema } +func (t *CreatePosition) Description() string { + return "Instantiate a Role as a concrete slot in the org chart, optionally under a parent Position." +} + +type createPositionArgs struct { + ID string `json:"id,omitempty"` + RoleID string `json:"roleId"` + ParentID string `json:"parentId,omitempty"` +} + +func (t *CreatePosition) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args createPositionArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + + if _, err := t.deps.Store.Roles.Get(ctx, domain.RoleID(args.RoleID)); err != nil { + return nil, fmt.Errorf("role %q: %w", args.RoleID, err) + } + + var parent *domain.PositionID + if args.ParentID != "" { + p := domain.PositionID(args.ParentID) + if _, err := t.deps.Store.Positions.Get(ctx, p); err != nil { + return nil, fmt.Errorf("parent %q: %w", args.ParentID, err) + } + parent = &p + } + + id := domain.PositionID(args.ID) + if id == "" { + id = domain.PositionID("p-" + t.deps.NewID()) + } + + pos, err := domain.NewPosition(id, domain.RoleID(args.RoleID), parent) + if err != nil { + return nil, err + } + if err := t.deps.Store.Positions.Create(ctx, pos); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"id": string(id)}) +} diff --git a/helix-org/tools/create_role.go b/helix-org/tools/create_role.go new file mode 100644 index 0000000000..af0fb79c5f --- /dev/null +++ b/helix-org/tools/create_role.go @@ -0,0 +1,53 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// CreateRole defines a new Role: an ID and the canonical markdown +// content that every Worker filling a Position with this Role will read +// at activation. Owner-only: holding the grant is the authorisation. +type CreateRole struct { + deps Deps +} + +const CreateRoleName domain.ToolName = "create_role" + +var createRoleSchema = mustSchema[createRoleArgs]() + +func (t *CreateRole) Name() domain.ToolName { return CreateRoleName } +func (t *CreateRole) InputSchema() *jsonschema.Schema { return createRoleSchema } +func (t *CreateRole) Description() string { + return "Define a new Role with markdown content. The content is what every Worker " + + "filling this Role reads on activation. Use update_role to amend it later." +} + +type createRoleArgs struct { + ID string `json:"id,omitempty"` + Content string `json:"content"` +} + +func (t *CreateRole) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args createRoleArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + id := domain.RoleID(args.ID) + if id == "" { + id = domain.RoleID("r-" + t.deps.NewID()) + } + role, err := domain.NewRole(id, args.Content, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Roles.Create(ctx, role); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"id": string(id)}) +} diff --git a/helix-org/tools/create_stream.go b/helix-org/tools/create_stream.go new file mode 100644 index 0000000000..06d0a738df --- /dev/null +++ b/helix-org/tools/create_stream.go @@ -0,0 +1,120 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// CreateStream creates a new named Stream. The caller becomes the +// creator. Stream names are unique across the org. The transport +// defaults to "local" — events live in SQLite and reach subscribers +// through the in-process broadcaster and dispatcher. Other transports +// (when implemented) compose external I/O over the same local store. +type CreateStream struct { + deps Deps +} + +const CreateStreamName domain.ToolName = "create_stream" + +var createStreamSchema = func() *jsonschema.Schema { + s := mustSchema[createStreamArgs]() + // transport accepts either the object form or a bare TransportKind + // string shorthand. Replace the auto-derived object schema with a + // oneOf so strict-schema MCP clients accept both shapes. + if t, ok := s.Properties["transport"]; ok { + object := *t // copy: object shape minus the union wrapper + object.Type = "object" + object.Types = nil // pointer field arrived as Types:["object","null"]; Type+Types together is a marshal error + s.Properties["transport"] = &jsonschema.Schema{ + Description: "Transport for the new Stream. Either a bare string naming the kind (\"local\" / \"webhook\" / \"email\" / \"github\") or an object with kind and optional config.", + OneOf: []*jsonschema.Schema{ + enumSchema(domain.TransportKindValues(), "Transport kind shorthand."), + &object, + }, + } + } + return s +}() + +func (t *CreateStream) Name() domain.ToolName { return CreateStreamName } +func (t *CreateStream) Description() string { + return "Create a new named Stream. The caller becomes the creator. Stream names are unique. " + + "Optional `transport` describes how events on the Stream move to/from the outside world; " + + "omit it to use the default `local` transport (in-process pub/sub only). " + + "Valid transport.kind values: \"local\", \"webhook\", \"email\", \"github\". " + + "Example for an inbound HTTP webhook: " + + `{"transport":{"kind":"webhook"}}` + + ". Example for a bidirectional webhook with an outbound URL: " + + `{"transport":{"kind":"webhook","config":{"outbound_url":"https://example.com/in"}}}` + + "." +} +func (t *CreateStream) InputSchema() *jsonschema.Schema { return createStreamSchema } + +type createStreamArgs struct { + ID string `json:"id,omitempty"` + Name string `json:"name"` + Description string `json:"description,omitempty"` + Transport *createStreamTransport `json:"transport,omitempty"` +} + +type createStreamTransport struct { + Kind domain.TransportKind `json:"kind"` + Config json.RawMessage `json:"config,omitempty"` +} + +// UnmarshalJSON accepts either the canonical object form +// (`{"kind":"webhook","config":{...}}`) or a bare string shorthand +// (`"webhook"`) that means `{"kind":"webhook"}`. Smaller chat +// models reliably collapse the object to its discriminator string +// once they've seen the `kind` enum on the schema; refusing the +// shorthand just makes them retry. Both shapes are unambiguous and +// mean the same thing. +func (t *createStreamTransport) UnmarshalJSON(data []byte) error { + if len(data) > 0 && data[0] == '"' { + var kind domain.TransportKind + if err := json.Unmarshal(data, &kind); err != nil { + return err + } + t.Kind = kind + t.Config = nil + return nil + } + type raw createStreamTransport + var r raw + if err := json.Unmarshal(data, &r); err != nil { + return err + } + *t = createStreamTransport(r) + return nil +} + +func (t *CreateStream) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args createStreamArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + id := domain.StreamID(args.ID) + if id == "" { + id = domain.StreamID("s-" + t.deps.NewID()) + } + transport := domain.Transport{} + if args.Transport != nil { + transport = domain.Transport{ + Kind: args.Transport.Kind, + Config: args.Transport.Config, + } + } + s, err := domain.NewStream(id, args.Name, args.Description, inv.Caller.ID(), t.deps.Now(), transport) + if err != nil { + return nil, err + } + if err := t.deps.Store.Streams.Create(ctx, s); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"id": string(id)}) +} diff --git a/helix-org/tools/create_stream_test.go b/helix-org/tools/create_stream_test.go new file mode 100644 index 0000000000..00d63ad1da --- /dev/null +++ b/helix-org/tools/create_stream_test.go @@ -0,0 +1,104 @@ +package tools + +import ( + "encoding/json" + "testing" + + "github.com/helixml/helix-org/domain" +) + +// createStreamTransport accepts both the canonical object form and a +// bare string shorthand so smaller chat models that collapse the +// object to its discriminator string still get a working call. +func TestCreateStreamTransportUnmarshal(t *testing.T) { + t.Parallel() + cases := []struct { + name string + input string + want createStreamTransport + hasErr bool + }{ + { + name: "object form with kind only", + input: `{"kind":"webhook"}`, + want: createStreamTransport{Kind: domain.TransportWebhook}, + }, + { + name: "object form with kind and config", + input: `{"kind":"webhook","config":{"outbound_url":"http://x/in"}}`, + want: createStreamTransport{ + Kind: domain.TransportWebhook, + Config: json.RawMessage(`{"outbound_url":"http://x/in"}`), + }, + }, + { + name: "string shorthand webhook", + input: `"webhook"`, + want: createStreamTransport{Kind: domain.TransportWebhook}, + }, + { + name: "string shorthand local", + input: `"local"`, + want: createStreamTransport{Kind: domain.TransportLocal}, + }, + { + name: "malformed JSON", + input: `{not json`, + hasErr: true, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + var got createStreamTransport + err := json.Unmarshal([]byte(tc.input), &got) + if tc.hasErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.Kind != tc.want.Kind { + t.Errorf("Kind = %q, want %q", got.Kind, tc.want.Kind) + } + if string(got.Config) != string(tc.want.Config) { + t.Errorf("Config = %q, want %q", got.Config, tc.want.Config) + } + }) + } +} + +// The schema must declare both shapes so strict-validating MCP +// clients accept either input form. +func TestCreateStreamSchemaTransportOneOf(t *testing.T) { + t.Parallel() + tr, ok := createStreamSchema.Properties["transport"] + if !ok { + t.Fatal("schema is missing the transport property") + } + if len(tr.OneOf) != 2 { + t.Fatalf("transport.oneOf len = %d, want 2", len(tr.OneOf)) + } + // One branch must be the bare-string enum, the other the object. + var sawString, sawObject bool + for _, b := range tr.OneOf { + switch b.Type { + case "string": + sawString = true + if len(b.Enum) == 0 { + t.Errorf("string branch has no enum constraint") + } + case "object": + sawObject = true + if _, ok := b.Properties["kind"]; !ok { + t.Errorf("object branch missing kind property") + } + } + } + if !sawString || !sawObject { + t.Errorf("transport.oneOf must cover both string and object (got string=%v object=%v)", sawString, sawObject) + } +} diff --git a/helix-org/tools/dm.go b/helix-org/tools/dm.go new file mode 100644 index 0000000000..2a1c1992f8 --- /dev/null +++ b/helix-org/tools/dm.go @@ -0,0 +1,141 @@ +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sort" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// DM sends a direct message to a single other Worker. It bundles the +// underlying primitives — get-or-create a per-pair Stream, subscribe +// both parties, publish the body — into one Tool the agent can reach +// for from a "DM the fact-checker..." style instruction without having +// to chain four separate calls. +// +// The Stream ID is deterministic from the sorted (sender, recipient) +// pair, so subsequent DMs in either direction land on the same Stream +// and the back-and-forth stays ordered in one place. +type DM struct { + deps Deps +} + +const DMName domain.ToolName = "dm" + +var dmSchema = mustSchema[dmArgs]() + +func (t *DM) Name() domain.ToolName { return DMName } +func (t *DM) Description() string { + return "Send a direct message (DM/PM/private message) to a single other Worker. " + + "Reach for this whenever the user says to DM/message/ping a named colleague. " + + "Transparently creates a per-pair Stream the first time, subscribes both " + + "parties, and publishes the body; subsequent DMs to the same Worker reuse " + + "the same Stream so the conversation stays in one ordered place. Use " + + "list_workers first if you need to look up the recipient's ID. Returns the " + + "streamId — read_events on it to wait for a reply." +} +func (t *DM) InputSchema() *jsonschema.Schema { return dmSchema } + +type dmArgs struct { + ToWorkerID string `json:"toWorkerId"` + Body string `json:"body"` +} + +func (t *DM) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args dmArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ToWorkerID == "" || args.Body == "" { + return nil, fmt.Errorf("toWorkerId and body are required") + } + sender := inv.Caller.ID() + recipient := domain.WorkerID(args.ToWorkerID) + if sender == recipient { + return nil, fmt.Errorf("cannot DM yourself") + } + if _, err := t.deps.Store.Workers.Get(ctx, recipient); err != nil { + return nil, fmt.Errorf("recipient %q: %w", recipient, err) + } + + streamID := dmStreamID(sender, recipient) + + // Get-or-create the per-pair Stream. Reuse it across DMs so the + // conversation stays ordered in one place. + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + if !errors.Is(err, store.ErrNotFound) { + return nil, fmt.Errorf("lookup stream %q: %w", streamID, err) + } + name := fmt.Sprintf("dm: %s ↔ %s", sender, recipient) + s, err := domain.NewStream(streamID, name, "", sender, t.deps.Now(), domain.Transport{}) + if err != nil { + return nil, err + } + if err := t.deps.Store.Streams.Create(ctx, s); err != nil { + return nil, fmt.Errorf("create stream %q: %w", streamID, err) + } + } + + // Make sure both parties are subscribed (idempotent). The recipient + // might have unsubscribed since the last DM; re-subscribe them so + // the message actually reaches them. + for _, wid := range []domain.WorkerID{sender, recipient} { + if _, err := t.deps.Store.Subscriptions.Find(ctx, wid, streamID); err == nil { + continue + } else if !errors.Is(err, store.ErrNotFound) { + return nil, err + } + sub, err := domain.NewSubscription(wid, streamID, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Subscriptions.Create(ctx, sub); err != nil { + return nil, err + } + } + + msg := domain.Message{ + From: string(sender), + To: []string{string(recipient)}, + Body: args.Body, + } + event, err := domain.NewMessageEvent( + domain.EventID("e-"+t.deps.NewID()), + streamID, + sender, + msg, + t.deps.Now(), + ) + if err != nil { + return nil, err + } + if err := t.deps.Store.Events.Append(ctx, event); err != nil { + return nil, err + } + if t.deps.Broadcaster != nil { + t.deps.Broadcaster.Notify(streamID) + } + if t.deps.Dispatcher != nil { + t.deps.Dispatcher.Dispatch(ctx, event) + } + + return json.Marshal(map[string]string{ + "id": string(event.ID), + "streamId": string(streamID), + "to": string(recipient), + }) +} + +// dmStreamID returns the deterministic Stream ID for a DM between two +// Workers, ordered by string compare so A→B and B→A share one Stream. +func dmStreamID(a, b domain.WorkerID) domain.StreamID { + pair := []string{string(a), string(b)} + sort.Strings(pair) + return domain.StreamID("s-dm-" + pair[0] + "-" + pair[1]) +} diff --git a/helix-org/tools/grant_tool.go b/helix-org/tools/grant_tool.go new file mode 100644 index 0000000000..8080651ace --- /dev/null +++ b/helix-org/tools/grant_tool.go @@ -0,0 +1,57 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// GrantTool creates a new ToolGrant for a Worker — boolean permission +// to call the named tool. Owner-only. Granularity comes from the +// design of tools; there is no per-grant scope. +type GrantTool struct { + deps Deps +} + +const GrantToolName domain.ToolName = "grant_tool" + +var grantToolSchema = mustSchema[grantToolArgs]() + +func (t *GrantTool) Name() domain.ToolName { return GrantToolName } +func (t *GrantTool) InputSchema() *jsonschema.Schema { return grantToolSchema } +func (t *GrantTool) Description() string { + return "Grant a tool to a Worker. The grant is a boolean permission — holding it lets the " + + "Worker call that tool however the tool's input schema allows." +} + +type grantToolArgs struct { + ID string `json:"id,omitempty"` + WorkerID string `json:"workerId"` + ToolName string `json:"toolName"` +} + +func (t *GrantTool) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args grantToolArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if _, err := t.deps.Store.Workers.Get(ctx, domain.WorkerID(args.WorkerID)); err != nil { + return nil, fmt.Errorf("worker %q: %w", args.WorkerID, err) + } + id := domain.GrantID(args.ID) + if id == "" { + id = domain.GrantID("g-" + t.deps.NewID()) + } + grant, err := domain.NewToolGrant(id, domain.WorkerID(args.WorkerID), domain.ToolName(args.ToolName)) + if err != nil { + return nil, err + } + if err := t.deps.Store.Grants.Create(ctx, grant); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"id": string(id)}) +} diff --git a/helix-org/tools/hire_worker.go b/helix-org/tools/hire_worker.go new file mode 100644 index 0000000000..e2b4780aa1 --- /dev/null +++ b/helix-org/tools/hire_worker.go @@ -0,0 +1,207 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" +) + +// HireWorker brings a Worker into existence: a Worker row carrying the +// per-hire IdentityContent, an Environment row pointing at +// //, any tool grants bundled inline, and — for +// AI Workers — a hire activation through the Dispatcher. +// +// State lives in the domain (DB), not on disk. role.md / identity.md / +// agent.md are projected into the Worker's Environment by the Spawner +// at activation time. This keeps every mutation a single DB write and +// lets the env layer evolve (local files today, remote workspaces +// tomorrow) without touching the tools. +// +// Grants are passed inline so the Worker is fully-authorised before the +// Spawner starts their process. Without this, claude would race the +// owner's follow-up grant_tool calls and hit 403s on its first action. +// Grants are data; the tool does not decide what to grant. The +// separate grant_tool tool stays for granting to Workers that already +// exist. +// +// hire_worker does not subscribe to Channels; the manager does that +// explicitly after the Worker is alive, typically via the Worker's own +// on-hire activation. +// +// For AI Workers, hire_worker also creates the per-Worker activation +// Stream (s-activations-) and subscribes the hiring Worker to +// it. The Spawner publishes one event per assistant message, tool call, +// and tool result to that Stream — the hiring Worker can audit their +// hires by calling read_events on it. The new Worker themselves is +// intentionally never subscribed to their own activation Stream +// (otherwise self-published events would re-trigger them indefinitely). +type HireWorker struct { + deps Deps +} + +const HireWorkerName domain.ToolName = "hire_worker" + +var hireWorkerSchema = mustSchema[hireWorkerArgs]() + +func (t *HireWorker) Name() domain.ToolName { return HireWorkerName } +func (t *HireWorker) InputSchema() *jsonschema.Schema { return hireWorkerSchema } +func (t *HireWorker) Description() string { + return "Hire a Worker into a Position. The Worker's identityContent (per-hire persona / " + + "profile) is stored in the domain alongside the Worker row; the spawner projects " + + "role and identity into the Environment at activation time. Optional `grants` are " + + "issued atomically with the hire so the Worker is authorised before the agent " + + "process boots.\n\n" + + "Always supply `id` as a short, real-sounding handle: a lowercase given name " + + "prefixed with `w-`, e.g. `w-mark`, `w-priya`, `w-jordan`. Pick a name that fits " + + "the Position and isn't already taken. Do NOT pass a UUID and do NOT omit `id` " + + "to let the server invent one — the auto-generated `w-` form is reserved as " + + "a last-resort fallback and is unpleasant to read in logs and UIs. If your first " + + "choice collides, try a variant (`w-mark-2`, `w-marko`) rather than falling back " + + "to a UUID." +} + +type hireWorkerGrant struct { + ToolName string `json:"toolName"` +} + +type hireWorkerArgs struct { + ID string `json:"id,omitempty"` + PositionID string `json:"positionId"` + Kind domain.WorkerKind `json:"kind"` + IdentityContent string `json:"identityContent"` + Grants []hireWorkerGrant `json:"grants,omitempty"` +} + +func (t *HireWorker) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args hireWorkerArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if err := args.Kind.Validate(); err != nil { + return nil, err + } + if args.IdentityContent == "" { + return nil, fmt.Errorf("identityContent is required") + } + if t.deps.EnvsDir == "" { + return nil, fmt.Errorf("server is not configured with an envs directory") + } + + pos, err := t.deps.Store.Positions.Get(ctx, domain.PositionID(args.PositionID)) + if err != nil { + return nil, fmt.Errorf("position %q: %w", args.PositionID, err) + } + + id := domain.WorkerID(args.ID) + if id == "" { + id = domain.WorkerID("w-" + t.deps.NewID()) + } + envPath := filepath.Join(t.deps.EnvsDir, string(id)) + + var worker domain.Worker + switch args.Kind { + case domain.WorkerKindHuman: + w, err := domain.NewHumanWorker(id, []domain.PositionID{pos.ID}, args.IdentityContent) + if err != nil { + return nil, err + } + worker = w + case domain.WorkerKindAI: + w, err := domain.NewAIWorker(id, []domain.PositionID{pos.ID}, args.IdentityContent) + if err != nil { + return nil, err + } + worker = w + default: + // Unreachable: Validate() above already rejected unknown kinds. + return nil, args.Kind.Validate() + } + + // The env directory exists so it can be the Worker's cwd at + // activation; the spawner writes role.md / identity.md / agent.md + // into it just before exec'ing claude. Nothing on disk is the + // source of truth. + if err := os.MkdirAll(envPath, 0o750); err != nil { + return nil, fmt.Errorf("create env dir %q: %w", envPath, err) + } + + if err := t.deps.Store.Workers.Create(ctx, worker); err != nil { + return nil, err + } + + env, err := domain.NewEnvironment(id, envPath, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Environments.Create(ctx, env); err != nil { + return nil, fmt.Errorf("create environment: %w", err) + } + + // Issue bundled grants before the Spawner runs. An AI Worker that + // comes up without its grants immediately fails on its first tool + // call. + for i, g := range args.Grants { + if g.ToolName == "" { + return nil, fmt.Errorf("grants[%d]: toolName is required", i) + } + grantID := domain.GrantID("g-" + t.deps.NewID()) + grant, err := domain.NewToolGrant(grantID, id, domain.ToolName(g.ToolName)) + if err != nil { + return nil, fmt.Errorf("grants[%d]: %w", i, err) + } + if err := t.deps.Store.Grants.Create(ctx, grant); err != nil { + return nil, fmt.Errorf("grants[%d] (%s): %w", i, g.ToolName, err) + } + } + + if args.Kind == domain.WorkerKindAI { + if err := createActivationStream(ctx, t.deps, id, inv.Caller.ID()); err != nil { + return nil, err + } + } + + if args.Kind == domain.WorkerKindAI && t.deps.Dispatcher != nil { + t.deps.Dispatcher.DispatchHire(ctx, id, envPath) + } + + return json.Marshal(map[string]string{"id": string(id)}) +} + +// createActivationStream creates the per-Worker activation Stream and +// subscribes the hiring Worker to it. The Stream ID is deterministic +// (s-activations-) so the Spawner can find it without an +// extra lookup. +func createActivationStream(ctx context.Context, deps Deps, workerID, hiringWorkerID domain.WorkerID) error { + streamID := agent.ActivationStreamID(workerID) + stream, err := domain.NewStream( + streamID, + "Activations: "+string(workerID), + "Per-message activation transcript for "+string(workerID)+ + " — assistant text, tool calls, tool results. "+ + "Read with read_events to audit a hire.", + hiringWorkerID, + deps.Now(), + domain.Transport{}, + ) + if err != nil { + return fmt.Errorf("activation stream: %w", err) + } + if err := deps.Store.Streams.Create(ctx, stream); err != nil { + return fmt.Errorf("create activation stream: %w", err) + } + sub, err := domain.NewSubscription(hiringWorkerID, streamID, deps.Now()) + if err != nil { + return fmt.Errorf("activation subscription: %w", err) + } + if err := deps.Store.Subscriptions.Create(ctx, sub); err != nil { + return fmt.Errorf("subscribe %q to activation stream: %w", hiringWorkerID, err) + } + return nil +} diff --git a/helix-org/tools/invite_workers.go b/helix-org/tools/invite_workers.go new file mode 100644 index 0000000000..569a30d895 --- /dev/null +++ b/helix-org/tools/invite_workers.go @@ -0,0 +1,94 @@ +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// InviteWorkers subscribes one or more other Workers to a Stream. The +// counterpart to subscribe (which is self-only) — used to add others to +// a stream you've created, e.g. opening a DM by creating a stream and +// inviting both parties to it. +type InviteWorkers struct { + deps Deps +} + +const InviteWorkersName domain.ToolName = "invite_workers" + +var inviteWorkersSchema = mustSchema[inviteWorkersArgs]() + +func (t *InviteWorkers) Name() domain.ToolName { return InviteWorkersName } +func (t *InviteWorkers) Description() string { + return "Subscribe one or more Workers to a Stream. Use this to add others " + + "to a stream you control — e.g. opening a DM by creating a stream and " + + "inviting both parties, or pulling a colleague into an existing thread. " + + "Idempotent per worker: anyone already subscribed is a no-op." +} +func (t *InviteWorkers) InputSchema() *jsonschema.Schema { return inviteWorkersSchema } + +type inviteWorkersArgs struct { + StreamID string `json:"streamId"` + WorkerIDs []string `json:"workerIds"` +} + +func (t *InviteWorkers) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args inviteWorkersArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" { + return nil, fmt.Errorf("streamId is required") + } + if len(args.WorkerIDs) == 0 { + return nil, fmt.Errorf("workerIds must contain at least one worker") + } + streamID := domain.StreamID(args.StreamID) + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + return nil, fmt.Errorf("stream %q: %w", streamID, err) + } + + // Validate all targets up-front so a typo in one ID doesn't leave + // the others half-subscribed. + workerIDs := make([]domain.WorkerID, 0, len(args.WorkerIDs)) + for _, raw := range args.WorkerIDs { + if raw == "" { + return nil, fmt.Errorf("workerIds contains an empty entry") + } + wid := domain.WorkerID(raw) + if _, err := t.deps.Store.Workers.Get(ctx, wid); err != nil { + return nil, fmt.Errorf("worker %q: %w", wid, err) + } + workerIDs = append(workerIDs, wid) + } + + for _, wid := range workerIDs { + if _, err := t.deps.Store.Subscriptions.Find(ctx, wid, streamID); err == nil { + continue + } else if !errors.Is(err, store.ErrNotFound) { + return nil, err + } + sub, err := domain.NewSubscription(wid, streamID, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Subscriptions.Create(ctx, sub); err != nil { + return nil, err + } + } + + workerIDStrings := make([]string, len(workerIDs)) + for i, wid := range workerIDs { + workerIDStrings[i] = string(wid) + } + return json.Marshal(map[string]any{ + "streamId": string(streamID), + "workerIds": workerIDStrings, + }) +} diff --git a/helix-org/tools/ping.go b/helix-org/tools/ping.go new file mode 100644 index 0000000000..156a587aab --- /dev/null +++ b/helix-org/tools/ping.go @@ -0,0 +1,42 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// Ping is a trivial built-in tool used to exercise the invocation pipeline. +// It echoes the args back. Not part of the structural tool set. +type Ping struct{} + +const PingName domain.ToolName = "ping" + +type pingArgs struct { + Message string `json:"message,omitempty"` +} + +var pingSchema = mustSchema[pingArgs]() + +func (Ping) Name() domain.ToolName { return PingName } +func (Ping) Description() string { return "Echo a message back. Used to exercise the tool pipeline." } +func (Ping) InputSchema() *jsonschema.Schema { return pingSchema } + +func (Ping) Invoke(_ context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args pingArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + out, err := json.Marshal(map[string]string{ + "echo": args.Message, + "caller": string(inv.Caller.ID()), + }) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return out, nil +} diff --git a/helix-org/tools/publish.go b/helix-org/tools/publish.go new file mode 100644 index 0000000000..91948bbc8f --- /dev/null +++ b/helix-org/tools/publish.go @@ -0,0 +1,107 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// Publish appends an Event to a named Stream, attributed to the caller. +// It does exactly one thing: append an event to an existing Stream. It +// does not create Streams or manage subscriptions; for the common +// "direct message a Worker" case, see the dm tool, which bundles +// create-stream + subscribe-both + publish into a single call. +// +// All events are stored as canonical Message JSON (see domain.Message). +// The minimal call form — streamId + body — yields a Message with +// From=caller and Body=body. Optional fields (to, subject, threadId, +// inReplyTo, messageId, bodyContentType, attachments) let the caller +// publish a richer envelope when threading or recipients matter. +type Publish struct { + deps Deps +} + +const PublishName domain.ToolName = "publish" + +var publishSchema = mustSchema[publishArgs]() + +func (t *Publish) Name() domain.ToolName { return PublishName } +func (t *Publish) Description() string { + return "Append an Event with the given body to a Stream. Wakes long-poll observers and " + + "activates every subscribed AI Worker. Optional fields (to, subject, threadId, " + + "inReplyTo, messageId, attachments) carry threading and recipient metadata for " + + "messaging streams; omit them for plain text publishes." +} +func (t *Publish) InputSchema() *jsonschema.Schema { return publishSchema } + +type publishArgs struct { + StreamID string `json:"streamId"` + Body string `json:"body"` + To []string `json:"to,omitempty"` + Subject string `json:"subject,omitempty"` + BodyContentType string `json:"bodyContentType,omitempty"` + ThreadID string `json:"threadId,omitempty"` + InReplyTo string `json:"inReplyTo,omitempty"` + MessageID string `json:"messageId,omitempty"` + Attachments []domain.Attachment `json:"attachments,omitempty"` +} + +func (t *Publish) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args publishArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" || args.Body == "" { + return nil, fmt.Errorf("streamId and body are required") + } + streamID := domain.StreamID(args.StreamID) + stream, err := t.deps.Store.Streams.Get(ctx, streamID) + if err != nil { + return nil, fmt.Errorf("stream %q: %w", streamID, err) + } + // GitHub streams are inbound-only. Acting on a repo (label, + // comment, review, open PR) is the Worker's job via `gh` in its + // Environment — wrapping each github action behind publish would + // reinvent the gh CLI's flag set with worse ergonomics. Surface + // the mistake loudly rather than silently no-op'ing. + if stream.Transport.Kind == domain.TransportGitHub { + return nil, fmt.Errorf("stream %q: publish is not supported on github transport streams; use `gh` from your Environment to act on the repo", streamID) + } + msg := domain.Message{ + From: string(inv.Caller.ID()), + To: args.To, + Subject: args.Subject, + Body: args.Body, + BodyContentType: args.BodyContentType, + ThreadID: args.ThreadID, + InReplyTo: args.InReplyTo, + MessageID: args.MessageID, + Attachments: args.Attachments, + } + event, err := domain.NewMessageEvent( + domain.EventID("e-"+t.deps.NewID()), + streamID, + inv.Caller.ID(), + msg, + t.deps.Now(), + ) + if err != nil { + return nil, err + } + if err := t.deps.Store.Events.Append(ctx, event); err != nil { + return nil, err + } + // Wake long-poll observers (read_events with wait>0). + if t.deps.Broadcaster != nil { + t.deps.Broadcaster.Notify(streamID) + } + // Activate every subscribed AI Worker. Background; returns immediately. + if t.deps.Dispatcher != nil { + t.deps.Dispatcher.Dispatch(ctx, event) + } + return json.Marshal(map[string]string{"id": string(event.ID), "streamId": string(streamID)}) +} diff --git a/helix-org/tools/publish_test.go b/helix-org/tools/publish_test.go new file mode 100644 index 0000000000..04e772b0b3 --- /dev/null +++ b/helix-org/tools/publish_test.go @@ -0,0 +1,105 @@ +package tools + +import ( + "context" + "encoding/json" + "strings" + "testing" + "time" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store/sqlite" +) + +// TestPublishRejectsGitHubStream: publishing to a github transport +// stream is rejected with an explanatory error rather than a silent +// no-op. GitHub streams are inbound-only; outbound action lives in +// the Worker's `gh`. See design/github-transport.md. +func TestPublishRejectsGitHubStream(t *testing.T) { + t.Parallel() + + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + ctx := context.Background() + + // Seed a github-transport Stream and a caller Worker. + cfg, _ := json.Marshal(map[string]any{ + "repo": "helixml/helix-org", + "events": []string{"issues"}, + }) + stream, err := domain.NewStream("s-github", "s-github", "", "w-owner", + time.Now().UTC(), + domain.Transport{Kind: domain.TransportGitHub, Config: cfg}) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := st.Streams.Create(ctx, stream); err != nil { + t.Fatalf("create stream: %v", err) + } + caller, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + + deps := DefaultDeps(st) + tool := &Publish{deps: deps} + + args, _ := json.Marshal(map[string]any{ + "streamId": "s-github", + "body": "this should be rejected", + }) + + _, err = tool.Invoke(ctx, domain.Invocation{Caller: caller, Args: args}) + if err == nil { + t.Fatalf("Invoke = nil, want error rejecting github publish") + } + if !strings.Contains(err.Error(), "github") { + t.Fatalf("err = %v, want error mentioning github", err) + } + if !strings.Contains(err.Error(), "gh") { + t.Fatalf("err = %v, want error pointing user at `gh`", err) + } + + // And no event was appended. + events, _ := st.Events.ListForStream(ctx, "s-github", 10) + if len(events) != 0 { + t.Fatalf("events = %d, want 0 (publish must not append on rejection)", len(events)) + } +} + +// TestPublishLocalStreamStillWorks: the rejection above must not +// regress publish to local streams. +func TestPublishLocalStreamStillWorks(t *testing.T) { + t.Parallel() + + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + ctx := context.Background() + + stream, err := domain.NewStream("s-general", "s-general", "", "w-owner", + time.Now().UTC(), domain.LocalTransport()) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := st.Streams.Create(ctx, stream); err != nil { + t.Fatalf("create stream: %v", err) + } + caller, _ := domain.NewHumanWorker("w-owner", []domain.PositionID{"p-root"}, "") + + deps := DefaultDeps(st) + tool := &Publish{deps: deps} + + args, _ := json.Marshal(map[string]any{ + "streamId": "s-general", + "body": "hello", + }) + if _, err := tool.Invoke(ctx, domain.Invocation{Caller: caller, Args: args}); err != nil { + t.Fatalf("Invoke = %v, want nil for local stream", err) + } + + events, _ := st.Events.ListForStream(ctx, "s-general", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } +} diff --git a/helix-org/tools/read_events.go b/helix-org/tools/read_events.go new file mode 100644 index 0000000000..901ee84152 --- /dev/null +++ b/helix-org/tools/read_events.go @@ -0,0 +1,165 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// Defaults and caps for read_events pagination and long-polling. +const ( + readEventsDefaultLimit = 50 + readEventsMaxLimit = 200 + readEventsMaxWaitSecs = 60 +) + +// eventView is the on-the-wire shape returned by read_events / +// worker_log. Body is the visible text — for messaging events, the +// parsed Message.Body; for legacy events that fail to parse, the raw +// stored Body. Message carries the full canonical envelope when it +// parses cleanly, letting Roles inspect From/To/threading without +// re-parsing. +type eventView struct { + ID domain.EventID `json:"id"` + StreamID domain.StreamID `json:"streamId"` + Source domain.WorkerID `json:"source"` + Body string `json:"body"` + Message *domain.Message `json:"message,omitempty"` + CreatedAt time.Time `json:"createdAt"` +} + +func eventViewOf(e domain.Event) eventView { + view := eventView{ + ID: e.ID, + StreamID: e.StreamID, + Source: e.Source, + Body: e.Body, + CreatedAt: e.CreatedAt, + } + if msg, err := e.Message(); err == nil { + view.Body = msg.Body + view.Message = &msg + } + return view +} + +// ReadEvents returns the events on the Streams the calling Worker +// subscribes to, newest-first. With wait>0, blocks up to that many +// seconds for new events on any subscribed Stream. +type ReadEvents struct { + deps Deps +} + +const ReadEventsName domain.ToolName = "read_events" + +var readEventsSchema = mustSchema[readEventsArgs]() + +type readEventsArgs struct { + Limit int `json:"limit,omitempty"` + Since string `json:"since,omitempty"` + Wait int `json:"wait,omitempty"` +} + +func (t *ReadEvents) Name() domain.ToolName { return ReadEventsName } +func (t *ReadEvents) InputSchema() *jsonschema.Schema { return readEventsSchema } +func (t *ReadEvents) Description() string { + return "Read events on the Streams you subscribe to, newest first. Pass since= " + + "to skip everything up to and including a previously-seen event. Pass wait= " + + "(0..60) to block for new events when nothing is currently waiting after applying " + + "`since`. limit defaults to 50, capped at 200." +} + +func (t *ReadEvents) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args readEventsArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + limit := args.Limit + if limit <= 0 { + limit = readEventsDefaultLimit + } + if limit > readEventsMaxLimit { + limit = readEventsMaxLimit + } + wait := args.Wait + if wait < 0 { + wait = 0 + } + if wait > readEventsMaxWaitSecs { + wait = readEventsMaxWaitSecs + } + since := domain.EventID(args.Since) + workerID := inv.Caller.ID() + + fresh, err := t.fresh(ctx, workerID, limit, since) + if err != nil { + return nil, err + } + if len(fresh) > 0 || wait == 0 || t.deps.Broadcaster == nil { + return marshalEvents(fresh), nil + } + + subs, err := t.deps.Store.Subscriptions.ListForWorker(ctx, workerID) + if err != nil { + return nil, fmt.Errorf("list subscriptions for %q: %w", workerID, err) + } + streamIDs := make([]domain.StreamID, 0, len(subs)) + for _, sub := range subs { + streamIDs = append(streamIDs, sub.StreamID) + } + wake := t.deps.Broadcaster.Subscribe(streamIDs) + defer t.deps.Broadcaster.Unsubscribe(streamIDs, wake) + + timer := time.NewTimer(time.Duration(wait) * time.Second) + defer timer.Stop() + + select { + case <-wake: + case <-timer.C: + case <-ctx.Done(): + return marshalEvents(nil), nil + } + + fresh, err = t.fresh(ctx, workerID, limit, since) + if err != nil { + return nil, err + } + return marshalEvents(fresh), nil +} + +// fresh returns events newer than `since` (exclusive), newest-first, up +// to `limit`. An empty `since` means "return everything". +func (t *ReadEvents) fresh(ctx context.Context, workerID domain.WorkerID, limit int, since domain.EventID) ([]domain.Event, error) { + events, err := t.deps.Store.Events.ListForWorker(ctx, workerID, limit) + if err != nil { + return nil, fmt.Errorf("list events for %q: %w", workerID, err) + } + if since == "" { + return events, nil + } + for i, e := range events { + if e.ID == since { + return events[:i], nil + } + } + return events, nil +} + +func marshalEvents(events []domain.Event) json.RawMessage { + out := make([]eventView, 0, len(events)) + for _, e := range events { + out = append(out, eventViewOf(e)) + } + body, err := json.Marshal(map[string]any{"events": out}) + if err != nil { + // All inputs are simple structs of primitives; a marshal failure + // here is a programming error, not a runtime condition. + panic(fmt.Sprintf("marshal events: %v", err)) + } + return body +} diff --git a/helix-org/tools/read_grants.go b/helix-org/tools/read_grants.go new file mode 100644 index 0000000000..2b70f73c17 --- /dev/null +++ b/helix-org/tools/read_grants.go @@ -0,0 +1,55 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +type grantView struct { + ID domain.GrantID `json:"id"` + WorkerID domain.WorkerID `json:"workerId"` + ToolName domain.ToolName `json:"toolName"` +} + +func grantViewOf(g domain.ToolGrant) grantView { + return grantView{ID: g.ID, WorkerID: g.WorkerID, ToolName: g.ToolName} +} + +// GetGrant returns one ToolGrant by ID. +type GetGrant struct { + deps Deps +} + +const GetGrantName domain.ToolName = "get_grant" + +var getGrantSchema = mustSchema[getGrantArgs]() + +type getGrantArgs struct { + ID string `json:"id"` +} + +func (t *GetGrant) Name() domain.ToolName { return GetGrantName } +func (t *GetGrant) InputSchema() *jsonschema.Schema { return getGrantSchema } +func (t *GetGrant) Description() string { + return "Fetch one ToolGrant by id." +} + +func (t *GetGrant) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getGrantArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ID == "" { + return nil, fmt.Errorf("id is required") + } + g, err := t.deps.Store.Grants.Get(ctx, domain.GrantID(args.ID)) + if err != nil { + return nil, fmt.Errorf("get grant %q: %w", args.ID, err) + } + return json.Marshal(grantViewOf(g)) +} diff --git a/helix-org/tools/read_positions.go b/helix-org/tools/read_positions.go new file mode 100644 index 0000000000..3e24b9f646 --- /dev/null +++ b/helix-org/tools/read_positions.go @@ -0,0 +1,123 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +type positionView struct { + ID domain.PositionID `json:"id"` + RoleID domain.RoleID `json:"roleId"` + ParentID *domain.PositionID `json:"parentId"` +} + +func positionViewOf(p domain.Position) positionView { + return positionView{ID: p.ID, RoleID: p.RoleID, ParentID: p.ParentID} +} + +// ListPositions returns every Position in the org chart. +type ListPositions struct { + deps Deps +} + +const ListPositionsName domain.ToolName = "list_positions" + +var listPositionsSchema = mustSchema[listPositionsArgs]() + +type listPositionsArgs struct{} + +func (t *ListPositions) Name() domain.ToolName { return ListPositionsName } +func (t *ListPositions) InputSchema() *jsonschema.Schema { return listPositionsSchema } +func (t *ListPositions) Description() string { + return "List every Position: id, the Role it instantiates, and its parent. Use this to " + + "navigate the org chart." +} + +func (t *ListPositions) Invoke(ctx context.Context, _ domain.Invocation) (json.RawMessage, error) { + positions, err := t.deps.Store.Positions.List(ctx) + if err != nil { + return nil, fmt.Errorf("list positions: %w", err) + } + out := make([]positionView, 0, len(positions)) + for _, p := range positions { + out = append(out, positionViewOf(p)) + } + return json.Marshal(map[string]any{"positions": out}) +} + +// GetPosition returns one Position by ID. +type GetPosition struct { + deps Deps +} + +const GetPositionName domain.ToolName = "get_position" + +var getPositionSchema = mustSchema[getPositionArgs]() + +type getPositionArgs struct { + ID string `json:"id"` +} + +func (t *GetPosition) Name() domain.ToolName { return GetPositionName } +func (t *GetPosition) InputSchema() *jsonschema.Schema { return getPositionSchema } +func (t *GetPosition) Description() string { + return "Fetch one Position by id." +} + +func (t *GetPosition) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getPositionArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ID == "" { + return nil, fmt.Errorf("id is required") + } + pos, err := t.deps.Store.Positions.Get(ctx, domain.PositionID(args.ID)) + if err != nil { + return nil, fmt.Errorf("get position %q: %w", args.ID, err) + } + return json.Marshal(positionViewOf(pos)) +} + +// ListPositionChildren returns every direct subordinate of a Position. +type ListPositionChildren struct { + deps Deps +} + +const ListPositionChildrenName domain.ToolName = "list_position_children" + +var listPositionChildrenSchema = mustSchema[listPositionChildrenArgs]() + +type listPositionChildrenArgs struct { + ParentID string `json:"parentId"` +} + +func (t *ListPositionChildren) Name() domain.ToolName { return ListPositionChildrenName } +func (t *ListPositionChildren) InputSchema() *jsonschema.Schema { return listPositionChildrenSchema } +func (t *ListPositionChildren) Description() string { + return "List the direct children of a Position — the slots that report into it." +} + +func (t *ListPositionChildren) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args listPositionChildrenArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ParentID == "" { + return nil, fmt.Errorf("parentId is required") + } + positions, err := t.deps.Store.Positions.ListChildren(ctx, domain.PositionID(args.ParentID)) + if err != nil { + return nil, fmt.Errorf("list children of %q: %w", args.ParentID, err) + } + out := make([]positionView, 0, len(positions)) + for _, p := range positions { + out = append(out, positionViewOf(p)) + } + return json.Marshal(map[string]any{"positions": out}) +} diff --git a/helix-org/tools/read_roles.go b/helix-org/tools/read_roles.go new file mode 100644 index 0000000000..be10aed56e --- /dev/null +++ b/helix-org/tools/read_roles.go @@ -0,0 +1,87 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +type roleView struct { + ID domain.RoleID `json:"id"` + Content string `json:"content"` + CreatedAt time.Time `json:"createdAt"` + UpdatedAt time.Time `json:"updatedAt"` +} + +func roleViewOf(r domain.Role) roleView { + return roleView{ID: r.ID, Content: r.Content, CreatedAt: r.CreatedAt, UpdatedAt: r.UpdatedAt} +} + +// ListRoles returns every Role in the org. +type ListRoles struct { + deps Deps +} + +const ListRolesName domain.ToolName = "list_roles" + +var listRolesSchema = mustSchema[listRolesArgs]() + +type listRolesArgs struct{} + +func (t *ListRoles) Name() domain.ToolName { return ListRolesName } +func (t *ListRoles) InputSchema() *jsonschema.Schema { return listRolesSchema } +func (t *ListRoles) Description() string { + return "List every Role: id, markdown content, and timestamps. Use this to discover what " + + "roles exist before creating a Position." +} + +func (t *ListRoles) Invoke(ctx context.Context, _ domain.Invocation) (json.RawMessage, error) { + roles, err := t.deps.Store.Roles.List(ctx) + if err != nil { + return nil, fmt.Errorf("list roles: %w", err) + } + out := make([]roleView, 0, len(roles)) + for _, r := range roles { + out = append(out, roleViewOf(r)) + } + return json.Marshal(map[string]any{"roles": out}) +} + +// GetRole returns one Role by ID. +type GetRole struct { + deps Deps +} + +const GetRoleName domain.ToolName = "get_role" + +var getRoleSchema = mustSchema[getRoleArgs]() + +type getRoleArgs struct { + ID string `json:"id"` +} + +func (t *GetRole) Name() domain.ToolName { return GetRoleName } +func (t *GetRole) InputSchema() *jsonschema.Schema { return getRoleSchema } +func (t *GetRole) Description() string { + return "Fetch one Role by id and return its current markdown content." +} + +func (t *GetRole) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getRoleArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ID == "" { + return nil, fmt.Errorf("id is required") + } + role, err := t.deps.Store.Roles.Get(ctx, domain.RoleID(args.ID)) + if err != nil { + return nil, fmt.Errorf("get role %q: %w", args.ID, err) + } + return json.Marshal(roleViewOf(role)) +} diff --git a/helix-org/tools/read_streams.go b/helix-org/tools/read_streams.go new file mode 100644 index 0000000000..2beb31731c --- /dev/null +++ b/helix-org/tools/read_streams.go @@ -0,0 +1,152 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +type streamView struct { + ID domain.StreamID `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + CreatedBy domain.WorkerID `json:"createdBy"` + CreatedAt time.Time `json:"createdAt"` + TransportKind string `json:"transportKind"` +} + +func streamViewOf(s domain.Stream) streamView { + return streamView{ + ID: s.ID, + Name: s.Name, + Description: s.Description, + CreatedBy: s.CreatedBy, + CreatedAt: s.CreatedAt, + TransportKind: string(s.Transport.Kind), + } +} + +// ListStreams returns every Stream. +type ListStreams struct { + deps Deps +} + +const ListStreamsName domain.ToolName = "list_streams" + +var listStreamsSchema = mustSchema[listStreamsArgs]() + +type listStreamsArgs struct{} + +func (t *ListStreams) Name() domain.ToolName { return ListStreamsName } +func (t *ListStreams) InputSchema() *jsonschema.Schema { return listStreamsSchema } +func (t *ListStreams) Description() string { + return "List every Stream: id, name, description, creator, transport kind, and created-at." +} + +func (t *ListStreams) Invoke(ctx context.Context, _ domain.Invocation) (json.RawMessage, error) { + streams, err := t.deps.Store.Streams.List(ctx) + if err != nil { + return nil, fmt.Errorf("list streams: %w", err) + } + out := make([]streamView, 0, len(streams)) + for _, s := range streams { + out = append(out, streamViewOf(s)) + } + return json.Marshal(map[string]any{"streams": out}) +} + +// GetStream returns one Stream by ID. +type GetStream struct { + deps Deps +} + +const GetStreamName domain.ToolName = "get_stream" + +var getStreamSchema = mustSchema[getStreamArgs]() + +type getStreamArgs struct { + ID string `json:"id"` +} + +func (t *GetStream) Name() domain.ToolName { return GetStreamName } +func (t *GetStream) InputSchema() *jsonschema.Schema { return getStreamSchema } +func (t *GetStream) Description() string { + return "Fetch one Stream by id." +} + +func (t *GetStream) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getStreamArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ID == "" { + return nil, fmt.Errorf("id is required") + } + s, err := t.deps.Store.Streams.Get(ctx, domain.StreamID(args.ID)) + if err != nil { + return nil, fmt.Errorf("get stream %q: %w", args.ID, err) + } + return json.Marshal(streamViewOf(s)) +} + +// ListStreamEvents returns recent Events on one Stream, newest first. +// Non-blocking — callers who want to wait for new events use read_events. +type ListStreamEvents struct { + deps Deps +} + +const ListStreamEventsName domain.ToolName = "list_stream_events" + +var listStreamEventsSchema = mustSchema[listStreamEventsArgs]() + +const ( + listStreamEventsDefaultLimit = 50 + listStreamEventsMaxLimit = 200 +) + +type listStreamEventsArgs struct { + StreamID string `json:"streamId"` + Limit int `json:"limit,omitempty"` +} + +func (t *ListStreamEvents) Name() domain.ToolName { return ListStreamEventsName } +func (t *ListStreamEvents) InputSchema() *jsonschema.Schema { return listStreamEventsSchema } +func (t *ListStreamEvents) Description() string { + return "List recent Events on a Stream, newest first. Returns immediately. limit defaults " + + "to 50, capped at 200." +} + +func (t *ListStreamEvents) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args listStreamEventsArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" { + return nil, fmt.Errorf("streamId is required") + } + limit := args.Limit + if limit <= 0 { + limit = listStreamEventsDefaultLimit + } + if limit > listStreamEventsMaxLimit { + limit = listStreamEventsMaxLimit + } + streamID := domain.StreamID(args.StreamID) + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + return nil, fmt.Errorf("stream %q: %w", streamID, err) + } + events, err := t.deps.Store.Events.ListForStream(ctx, streamID, limit) + if err != nil { + return nil, fmt.Errorf("list events for %q: %w", streamID, err) + } + out := make([]eventView, 0, len(events)) + for _, e := range events { + out = append(out, eventViewOf(e)) + } + return json.Marshal(map[string]any{"events": out}) +} diff --git a/helix-org/tools/read_workers.go b/helix-org/tools/read_workers.go new file mode 100644 index 0000000000..ba6e203947 --- /dev/null +++ b/helix-org/tools/read_workers.go @@ -0,0 +1,170 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +type workerView struct { + ID domain.WorkerID `json:"id"` + Kind domain.WorkerKind `json:"kind"` + Positions []domain.PositionID `json:"positions"` +} + +func workerViewOf(w domain.Worker) workerView { + return workerView{ID: w.ID(), Kind: w.Kind(), Positions: w.Positions()} +} + +// ListWorkers returns every Worker — humans and AIs. +type ListWorkers struct { + deps Deps +} + +const ListWorkersName domain.ToolName = "list_workers" + +var listWorkersSchema = mustSchema[listWorkersArgs]() + +type listWorkersArgs struct{} + +func (t *ListWorkers) Name() domain.ToolName { return ListWorkersName } +func (t *ListWorkers) InputSchema() *jsonschema.Schema { return listWorkersSchema } +func (t *ListWorkers) Description() string { + return "List every Worker: id, kind (human|ai), and Positions held." +} + +func (t *ListWorkers) Invoke(ctx context.Context, _ domain.Invocation) (json.RawMessage, error) { + workers, err := t.deps.Store.Workers.List(ctx) + if err != nil { + return nil, fmt.Errorf("list workers: %w", err) + } + out := make([]workerView, 0, len(workers)) + for _, w := range workers { + out = append(out, workerViewOf(w)) + } + return json.Marshal(map[string]any{"workers": out}) +} + +// GetWorker returns one Worker by ID. +type GetWorker struct { + deps Deps +} + +const GetWorkerName domain.ToolName = "get_worker" + +var getWorkerSchema = mustSchema[getWorkerArgs]() + +type getWorkerArgs struct { + ID string `json:"id"` +} + +func (t *GetWorker) Name() domain.ToolName { return GetWorkerName } +func (t *GetWorker) InputSchema() *jsonschema.Schema { return getWorkerSchema } +func (t *GetWorker) Description() string { + return "Fetch one Worker by id." +} + +func (t *GetWorker) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getWorkerArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.ID == "" { + return nil, fmt.Errorf("id is required") + } + w, err := t.deps.Store.Workers.Get(ctx, domain.WorkerID(args.ID)) + if err != nil { + return nil, fmt.Errorf("get worker %q: %w", args.ID, err) + } + return json.Marshal(workerViewOf(w)) +} + +// ListWorkerGrants returns every ToolGrant held by one Worker. +type ListWorkerGrants struct { + deps Deps +} + +const ListWorkerGrantsName domain.ToolName = "list_worker_grants" + +var listWorkerGrantsSchema = mustSchema[listWorkerGrantsArgs]() + +type listWorkerGrantsArgs struct { + WorkerID string `json:"workerId"` +} + +func (t *ListWorkerGrants) Name() domain.ToolName { return ListWorkerGrantsName } +func (t *ListWorkerGrants) InputSchema() *jsonschema.Schema { return listWorkerGrantsSchema } +func (t *ListWorkerGrants) Description() string { + return "List the ToolGrants held by a Worker — i.e. the tools they may invoke over MCP." +} + +func (t *ListWorkerGrants) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args listWorkerGrantsArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.WorkerID == "" { + return nil, fmt.Errorf("workerId is required") + } + grants, err := t.deps.Store.Grants.ListByWorker(ctx, domain.WorkerID(args.WorkerID)) + if err != nil { + return nil, fmt.Errorf("list grants for %q: %w", args.WorkerID, err) + } + out := make([]grantView, 0, len(grants)) + for _, g := range grants { + out = append(out, grantViewOf(g)) + } + return json.Marshal(map[string]any{"grants": out}) +} + +// GetWorkerEnvironment returns the on-disk Environment record for a Worker. +type GetWorkerEnvironment struct { + deps Deps +} + +const GetWorkerEnvironmentName domain.ToolName = "get_worker_environment" + +var getWorkerEnvironmentSchema = mustSchema[getWorkerEnvironmentArgs]() + +type getWorkerEnvironmentArgs struct { + WorkerID string `json:"workerId"` +} + +type environmentView struct { + WorkerID domain.WorkerID `json:"workerId"` + Path string `json:"path"` + CreatedAt time.Time `json:"createdAt"` +} + +func (t *GetWorkerEnvironment) Name() domain.ToolName { return GetWorkerEnvironmentName } +func (t *GetWorkerEnvironment) InputSchema() *jsonschema.Schema { + return getWorkerEnvironmentSchema +} +func (t *GetWorkerEnvironment) Description() string { + return "Fetch a Worker's Environment record: the path on disk where their role.md, " + + "identity.md, and agent.md live." +} + +func (t *GetWorkerEnvironment) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args getWorkerEnvironmentArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.WorkerID == "" { + return nil, fmt.Errorf("workerId is required") + } + env, err := t.deps.Store.Environments.Get(ctx, domain.WorkerID(args.WorkerID)) + if err != nil { + return nil, fmt.Errorf("get environment for %q: %w", args.WorkerID, err) + } + return json.Marshal(environmentView{ + WorkerID: env.WorkerID, + Path: env.Path, + CreatedAt: env.CreatedAt, + }) +} diff --git a/helix-org/tools/registry.go b/helix-org/tools/registry.go new file mode 100644 index 0000000000..25557fc23e --- /dev/null +++ b/helix-org/tools/registry.go @@ -0,0 +1,45 @@ +// Package tools holds the tool registry, the invocation pipeline, and +// built-in tool implementations. The pipeline is scope-agnostic; individual +// tools own their scope shape and enforcement logic. +package tools + +import ( + "fmt" + + "github.com/helixml/helix-org/domain" +) + +// Registry is an in-memory map of tool name to implementation. +// Built-ins are registered at server startup; MCP or owner-defined tools +// can be added later without changing the registry type. +type Registry struct { + tools map[domain.ToolName]domain.Tool +} + +// NewRegistry returns an empty Registry. +func NewRegistry() *Registry { + return &Registry{tools: make(map[domain.ToolName]domain.Tool)} +} + +// Register adds a tool. It fails if another tool is already registered under +// the same name — the owner's map of possible capabilities must be unambiguous. +func (r *Registry) Register(tool domain.Tool) error { + name := tool.Name() + if name == "" { + return fmt.Errorf("tool name is empty") + } + if _, exists := r.tools[name]; exists { + return fmt.Errorf("tool %q already registered", name) + } + r.tools[name] = tool + return nil +} + +// Get returns the tool by name, or an error if unknown. +func (r *Registry) Get(name domain.ToolName) (domain.Tool, error) { + tool, ok := r.tools[name] + if !ok { + return nil, fmt.Errorf("tool %q not registered", name) + } + return tool, nil +} diff --git a/helix-org/tools/revoke_tool.go b/helix-org/tools/revoke_tool.go new file mode 100644 index 0000000000..cc5fcec775 --- /dev/null +++ b/helix-org/tools/revoke_tool.go @@ -0,0 +1,39 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// RevokeTool deletes an existing ToolGrant. Owner-only. +type RevokeTool struct { + deps Deps +} + +const RevokeToolName domain.ToolName = "revoke_tool" + +var revokeToolSchema = mustSchema[revokeToolArgs]() + +func (t *RevokeTool) Name() domain.ToolName { return RevokeToolName } +func (t *RevokeTool) Description() string { return "Revoke an existing tool grant by ID." } +func (t *RevokeTool) InputSchema() *jsonschema.Schema { return revokeToolSchema } + +type revokeToolArgs struct { + GrantID string `json:"grantId"` +} + +func (t *RevokeTool) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args revokeToolArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if err := t.deps.Store.Grants.Delete(ctx, domain.GrantID(args.GrantID)); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"id": args.GrantID}) +} diff --git a/helix-org/tools/schema.go b/helix-org/tools/schema.go new file mode 100644 index 0000000000..ca22853024 --- /dev/null +++ b/helix-org/tools/schema.go @@ -0,0 +1,66 @@ +package tools + +import ( + "encoding/json" + "fmt" + "reflect" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// schemaOpts are passed to every jsonschema.For call so all our tool +// schemas agree on how to render the special types we use. +// +// json.RawMessage is []byte under the hood; without an override the +// generator emits "array of integer 0..255", which is technically true +// of any byte slice but useless for an MCP client. We treat it as +// "any JSON value" instead. +// +// String-typed enum domains (WorkerKind, TransportKind) are surfaced as +// JSON Schema `enum` constraints so the LLM sees the valid values in +// the tool's input schema and never has to guess (or read source) — and +// any client doing schema validation rejects bad calls *before* they +// reach the tool. +var schemaOpts = &jsonschema.ForOptions{ + TypeSchemas: map[reflect.Type]*jsonschema.Schema{ + reflect.TypeFor[json.RawMessage](): {Type: "object"}, + reflect.TypeFor[domain.WorkerKind](): enumSchema( + domain.WorkerKindValues(), + "Worker kind: human (a person) or ai (a software agent).", + ), + reflect.TypeFor[domain.TransportKind](): enumSchema( + domain.TransportKindValues(), + "Stream transport: local (in-process), webhook (HTTP), email (Postmark), github (inbound).", + ), + }, +} + +// enumSchema builds an "enum-constrained string" schema for a +// string-typed domain enum. Centralising this means new enum domains +// (e.g. a future GrantScope) get the right shape automatically. +func enumSchema[T ~string](vals []T, description string) *jsonschema.Schema { + out := make([]any, len(vals)) + for i, v := range vals { + out[i] = string(v) + } + return &jsonschema.Schema{ + Type: "string", + Enum: out, + Description: description, + } +} + +// mustSchema builds a JSON Schema from the given args type T at package +// init time. A failure here is a build-time invariant violation (the +// args type is not representable as JSON Schema), not a runtime data +// problem — panicking is the right response and the test suite catches +// it. +func mustSchema[T any]() *jsonschema.Schema { + s, err := jsonschema.For[T](schemaOpts) + if err != nil { + panic(fmt.Sprintf("jsonschema.For[%T]: %v", *new(T), err)) + } + return s +} diff --git a/helix-org/tools/schema_test.go b/helix-org/tools/schema_test.go new file mode 100644 index 0000000000..bb98f6ce00 --- /dev/null +++ b/helix-org/tools/schema_test.go @@ -0,0 +1,80 @@ +package tools + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "github.com/helixml/helix-org/domain" +) + +// TestHireWorkerSchemaSurfacesKindEnum pins the contract that the +// `kind` field appears as a JSON-Schema enum on the hire_worker input +// schema. MCP clients (Claude Code, etc.) consume this directly, so the +// LLM sees the valid values up front rather than discovering them via +// a server-side validation error round-trip. +func TestHireWorkerSchemaSurfacesKindEnum(t *testing.T) { + t.Parallel() + schema := (&HireWorker{}).InputSchema() + props, ok := schema.Properties["kind"] + if !ok { + t.Fatalf("kind not in schema properties: %+v", schema.Properties) + } + if props.Type != "string" { + t.Errorf("kind type = %q, want string", props.Type) + } + got := make(map[string]bool, len(props.Enum)) + for _, v := range props.Enum { + s, _ := v.(string) + got[s] = true + } + for _, want := range []string{"human", "ai"} { + if !got[want] { + t.Errorf("kind enum missing %q (got %v)", want, props.Enum) + } + } + // And nothing extra: "claude" famously does not belong here. + if got["claude"] { + t.Errorf("kind enum unexpectedly contains \"claude\"") + } +} + +// TestHireWorkerInvokeRejectsUnknownKindWithValidList exercises the +// runtime safety net for clients that ignore the schema and post a bad +// `kind` anyway. The error must list the valid values verbatim — that +// is the contract that lets a self-correcting agent retry without +// reading source. +func TestHireWorkerInvokeRejectsUnknownKindWithValidList(t *testing.T) { + t.Parallel() + tool := &HireWorker{deps: Deps{EnvsDir: t.TempDir()}} + args, _ := json.Marshal(map[string]any{ + "id": "w-bad", + "positionId": "p-x", + "kind": "claude", + "identityContent": "hi", + }) + _, err := tool.Invoke(context.Background(), domain.Invocation{Args: args}) + if err == nil { + t.Fatal("Invoke = nil, want error") + } + for _, want := range []string{`"human"`, `"ai"`, "claude"} { + if !strings.Contains(err.Error(), want) { + t.Errorf("err = %q, want it to mention %q", err, want) + } + } +} + +// TestQuotedListRendersDomainEnums covers the helper used by every +// "valid: …" error. Pin both the WorkerKind and TransportKind cases so +// a future enum addition that breaks the generic constraint is caught +// here, not in a Slack-channel report. +func TestQuotedListRendersDomainEnums(t *testing.T) { + t.Parallel() + if got := domain.QuotedList(domain.WorkerKindValues()); got != `"human", "ai"` { + t.Errorf("WorkerKind QuotedList = %q", got) + } + if got := domain.QuotedList(domain.TransportKindValues()); !strings.Contains(got, `"local"`) || !strings.Contains(got, `"github"`) { + t.Errorf("TransportKind QuotedList = %q (missing local or github)", got) + } +} diff --git a/helix-org/tools/stream_members.go b/helix-org/tools/stream_members.go new file mode 100644 index 0000000000..d2577ae248 --- /dev/null +++ b/helix-org/tools/stream_members.go @@ -0,0 +1,62 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// StreamMembers returns the Worker IDs subscribed to a Stream right +// now. Read-only and non-blocking — the manager-style use case is "is +// the worker I'm about to message actually listening?". Composes with +// any outstanding-task tracking the caller does: see who's listening, +// and if the right party isn't, defer the work and reconcile later. +type StreamMembers struct { + deps Deps +} + +const StreamMembersName domain.ToolName = "stream_members" + +var streamMembersSchema = mustSchema[streamMembersArgs]() + +func (t *StreamMembers) Name() domain.ToolName { return StreamMembersName } +func (t *StreamMembers) InputSchema() *jsonschema.Schema { return streamMembersSchema } +func (t *StreamMembers) Description() string { + return "List the Worker IDs currently subscribed to a Stream. Returns immediately. " + + "Use this before publishing if you need to know whether a particular Worker is listening — " + + "e.g. before sending the first recruiting brief, check that the recruiter is subscribed." +} + +type streamMembersArgs struct { + StreamID string `json:"streamId"` +} + +func (t *StreamMembers) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args streamMembersArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" { + return nil, fmt.Errorf("streamId is required") + } + streamID := domain.StreamID(args.StreamID) + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + return nil, fmt.Errorf("stream %q: %w", streamID, err) + } + subs, err := t.deps.Store.Subscriptions.ListForStream(ctx, streamID) + if err != nil { + return nil, fmt.Errorf("list subscriptions: %w", err) + } + members := make([]domain.WorkerID, 0, len(subs)) + for _, sub := range subs { + members = append(members, sub.WorkerID) + } + return json.Marshal(map[string]any{ + "streamId": string(streamID), + "members": members, + }) +} diff --git a/helix-org/tools/subscribe.go b/helix-org/tools/subscribe.go new file mode 100644 index 0000000000..40af97b138 --- /dev/null +++ b/helix-org/tools/subscribe.go @@ -0,0 +1,65 @@ +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Subscribe adds a Subscription between the caller and the given +// Stream. A Worker subscribes themselves; see invite_workers for +// adding other Workers to a Stream. +type Subscribe struct { + deps Deps +} + +const SubscribeName domain.ToolName = "subscribe" + +var subscribeSchema = mustSchema[subscribeArgs]() + +func (t *Subscribe) Name() domain.ToolName { return SubscribeName } +func (t *Subscribe) Description() string { + return "Subscribe the calling Worker to a Stream. Idempotent: a no-op if already subscribed." +} +func (t *Subscribe) InputSchema() *jsonschema.Schema { return subscribeSchema } + +type subscribeArgs struct { + StreamID string `json:"streamId"` +} + +func (t *Subscribe) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args subscribeArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" { + return nil, fmt.Errorf("streamId is required") + } + streamID := domain.StreamID(args.StreamID) + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + return nil, fmt.Errorf("stream %q: %w", streamID, err) + } + + // Idempotent: if already subscribed, no-op. + workerID := inv.Caller.ID() + if _, err := t.deps.Store.Subscriptions.Find(ctx, workerID, streamID); err == nil { + return json.Marshal(map[string]string{"workerId": string(workerID), "streamId": string(streamID)}) + } else if !errors.Is(err, store.ErrNotFound) { + return nil, err + } + + sub, err := domain.NewSubscription(workerID, streamID, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Subscriptions.Create(ctx, sub); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"workerId": string(workerID), "streamId": string(streamID)}) +} diff --git a/helix-org/tools/unsubscribe.go b/helix-org/tools/unsubscribe.go new file mode 100644 index 0000000000..e6929d0b0f --- /dev/null +++ b/helix-org/tools/unsubscribe.go @@ -0,0 +1,44 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// Unsubscribe removes the caller's Subscription from the given Stream. +type Unsubscribe struct { + deps Deps +} + +const UnsubscribeName domain.ToolName = "unsubscribe" + +var unsubscribeSchema = mustSchema[unsubscribeArgs]() + +func (t *Unsubscribe) Name() domain.ToolName { return UnsubscribeName } +func (t *Unsubscribe) Description() string { return "Unsubscribe the calling Worker from a Stream." } +func (t *Unsubscribe) InputSchema() *jsonschema.Schema { return unsubscribeSchema } + +type unsubscribeArgs struct { + StreamID string `json:"streamId"` +} + +func (t *Unsubscribe) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args unsubscribeArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.StreamID == "" { + return nil, fmt.Errorf("streamId is required") + } + streamID := domain.StreamID(args.StreamID) + workerID := inv.Caller.ID() + if err := t.deps.Store.Subscriptions.Delete(ctx, workerID, streamID); err != nil { + return nil, err + } + return json.Marshal(map[string]string{"workerId": string(workerID), "streamId": string(streamID)}) +} diff --git a/helix-org/tools/update_identity.go b/helix-org/tools/update_identity.go new file mode 100644 index 0000000000..15af35a137 --- /dev/null +++ b/helix-org/tools/update_identity.go @@ -0,0 +1,62 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// UpdateIdentity rewrites a Worker's IdentityContent — the per-Worker +// description (persona for AI, profile for a human). It is a single DB +// write; the new content takes effect on the Worker's next activation, +// when the Spawner projects identity into the Environment. +// +// Identity is owner-managed: subordinate Workers don't get this grant. +// The identity supplied at hire time stays in place until rewritten. +type UpdateIdentity struct { + deps Deps +} + +const UpdateIdentityName domain.ToolName = "update_identity" + +var updateIdentitySchema = mustSchema[updateIdentityArgs]() + +func (t *UpdateIdentity) Name() domain.ToolName { return UpdateIdentityName } +func (t *UpdateIdentity) InputSchema() *jsonschema.Schema { return updateIdentitySchema } +func (t *UpdateIdentity) Description() string { + return "Replace a Worker's IdentityContent (persona / profile). The change takes effect " + + "on the Worker's next activation, when the Spawner projects current identity into " + + "their Environment. Owner-only." +} + +type updateIdentityArgs struct { + WorkerID string `json:"workerId"` + Content string `json:"content"` +} + +func (t *UpdateIdentity) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args updateIdentityArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.WorkerID == "" { + return nil, fmt.Errorf("workerId is required") + } + if args.Content == "" { + return nil, fmt.Errorf("content is required") + } + + existing, err := t.deps.Store.Workers.Get(ctx, domain.WorkerID(args.WorkerID)) + if err != nil { + return nil, fmt.Errorf("worker %q: %w", args.WorkerID, err) + } + if err := t.deps.Store.Workers.Update(ctx, existing.WithIdentityContent(args.Content)); err != nil { + return nil, fmt.Errorf("update worker: %w", err) + } + _ = t.deps.Workspace.PublishFile(ctx, domain.WorkerID(args.WorkerID), "identity.md", args.Content, fmt.Sprintf("update_identity: %s", args.WorkerID)) + return json.Marshal(map[string]string{"id": args.WorkerID}) +} diff --git a/helix-org/tools/update_role.go b/helix-org/tools/update_role.go new file mode 100644 index 0000000000..17b944f046 --- /dev/null +++ b/helix-org/tools/update_role.go @@ -0,0 +1,91 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/domain" +) + +// UpdateRole rewrites the canonical content of a Role. It is a single +// DB write — the new content takes effect on the next activation of +// every Worker filling a Position with this Role, because the Spawner +// projects current Role state into the Environment at the start of +// every activation. There is no fan-out, no cross-Environment write, +// and no on-disk source of truth. +// +// Workers can never modify their own Role — only the owner does, via +// this tool. +type UpdateRole struct { + deps Deps +} + +const UpdateRoleName domain.ToolName = "update_role" + +var updateRoleSchema = mustSchema[updateRoleArgs]() + +func (t *UpdateRole) Name() domain.ToolName { return UpdateRoleName } +func (t *UpdateRole) InputSchema() *jsonschema.Schema { return updateRoleSchema } +func (t *UpdateRole) Description() string { + return "Replace a Role's markdown content. The change takes effect on each Worker's " + + "next activation, when the Spawner projects current Role state into their " + + "Environment. Owner-only." +} + +type updateRoleArgs struct { + RoleID string `json:"roleId"` + Content string `json:"content"` +} + +func (t *UpdateRole) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args updateRoleArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.RoleID == "" { + return nil, fmt.Errorf("roleId is required") + } + if args.Content == "" { + return nil, fmt.Errorf("content is required") + } + roleID := domain.RoleID(args.RoleID) + + existing, err := t.deps.Store.Roles.Get(ctx, roleID) + if err != nil { + return nil, fmt.Errorf("role %q: %w", roleID, err) + } + + updated := domain.Role{ + ID: existing.ID, + Content: args.Content, + CreatedAt: existing.CreatedAt, + UpdatedAt: t.deps.Now(), + } + if err := t.deps.Store.Roles.Update(ctx, updated); err != nil { + return nil, fmt.Errorf("update role: %w", err) + } + // Mirror role content into every Worker holding a Position with + // this Role. Each runtime backend resolves the per-Worker target + // from its own state — the claude runtime writes a file in + // envsDir; the Helix runtime pushes to the per-Worker repo. + positions, _ := t.deps.Store.Positions.List(ctx) + workers, _ := t.deps.Store.Workers.List(ctx) + positionWorkers := map[domain.PositionID][]domain.WorkerID{} + for _, w := range workers { + for _, p := range w.Positions() { + positionWorkers[p] = append(positionWorkers[p], w.ID()) + } + } + for _, p := range positions { + if p.RoleID != roleID { + continue + } + for _, wid := range positionWorkers[p.ID] { + _ = t.deps.Workspace.PublishFile(ctx, wid, "role.md", args.Content, fmt.Sprintf("update_role: %s", roleID)) + } + } + return json.Marshal(map[string]string{"id": string(roleID)}) +} diff --git a/helix-org/tools/worker_log.go b/helix-org/tools/worker_log.go new file mode 100644 index 0000000000..74670245a0 --- /dev/null +++ b/helix-org/tools/worker_log.go @@ -0,0 +1,155 @@ +package tools + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "time" + + "github.com/google/jsonschema-go/jsonschema" + + "github.com/helixml/helix-org/agent" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// WorkerLog reads a single AI Worker's activation transcript — assistant +// text, tool calls, tool results — newest first. It's a shortcut over +// the underlying primitives: resolves the deterministic activation +// Stream (s-activations-), auto-subscribes the caller (so +// the agent doesn't have to chain subscribe + read_events), then +// returns the events scoped to that one Worker. +// +// Same pagination/long-poll semantics as read_events: pass since= +// to skip what you've already seen, wait= (0..60) to block for +// new events. +type WorkerLog struct { + deps Deps +} + +const WorkerLogName domain.ToolName = "worker_log" + +var workerLogSchema = mustSchema[workerLogArgs]() + +func (t *WorkerLog) Name() domain.ToolName { return WorkerLogName } +func (t *WorkerLog) InputSchema() *jsonschema.Schema { return workerLogSchema } +func (t *WorkerLog) Description() string { + return "Read a Worker's activation log — assistant text, tool calls, tool results — " + + "newest first. Reach for this whenever the user wants to watch/audit/tail/" + + "observe what a named Worker is doing or did. Auto-subscribes the caller to " + + "the Worker's activation Stream on first call; subsequent calls reuse the " + + "subscription. Same since/wait/limit semantics as read_events but scoped to " + + "one Worker. AI Workers only — Human Workers don't have activation logs." +} + +type workerLogArgs struct { + WorkerID string `json:"workerId"` + Limit int `json:"limit,omitempty"` + Since string `json:"since,omitempty"` + Wait int `json:"wait,omitempty"` +} + +func (t *WorkerLog) Invoke(ctx context.Context, inv domain.Invocation) (json.RawMessage, error) { + var args workerLogArgs + if err := json.Unmarshal(inv.Args, &args); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if args.WorkerID == "" { + return nil, fmt.Errorf("workerId is required") + } + + target := domain.WorkerID(args.WorkerID) + worker, err := t.deps.Store.Workers.Get(ctx, target) + if err != nil { + return nil, fmt.Errorf("worker %q: %w", target, err) + } + if worker.Kind() != domain.WorkerKindAI { + return nil, fmt.Errorf("worker %q is %s; only AI workers have activation logs", + target, worker.Kind()) + } + + streamID := agent.ActivationStreamID(target) + if _, err := t.deps.Store.Streams.Get(ctx, streamID); err != nil { + return nil, fmt.Errorf("activation stream for %q: %w", target, err) + } + + // Auto-subscribe the caller. Idempotent; harmless to re-run. After + // this, plain read_events will also include this Worker's + // transcript, which is usually the desired follow-up behaviour. + caller := inv.Caller.ID() + if _, err := t.deps.Store.Subscriptions.Find(ctx, caller, streamID); err != nil { + if !errors.Is(err, store.ErrNotFound) { + return nil, err + } + sub, err := domain.NewSubscription(caller, streamID, t.deps.Now()) + if err != nil { + return nil, err + } + if err := t.deps.Store.Subscriptions.Create(ctx, sub); err != nil { + return nil, fmt.Errorf("subscribe %q to %q: %w", caller, streamID, err) + } + } + + limit := args.Limit + if limit <= 0 { + limit = readEventsDefaultLimit + } + if limit > readEventsMaxLimit { + limit = readEventsMaxLimit + } + wait := args.Wait + if wait < 0 { + wait = 0 + } + if wait > readEventsMaxWaitSecs { + wait = readEventsMaxWaitSecs + } + since := domain.EventID(args.Since) + + fresh, err := t.fresh(ctx, streamID, limit, since) + if err != nil { + return nil, err + } + if len(fresh) > 0 || wait == 0 || t.deps.Broadcaster == nil { + return marshalEvents(fresh), nil + } + + wake := t.deps.Broadcaster.Subscribe([]domain.StreamID{streamID}) + defer t.deps.Broadcaster.Unsubscribe([]domain.StreamID{streamID}, wake) + + timer := time.NewTimer(time.Duration(wait) * time.Second) + defer timer.Stop() + + select { + case <-wake: + case <-timer.C: + case <-ctx.Done(): + return marshalEvents(nil), nil + } + + fresh, err = t.fresh(ctx, streamID, limit, since) + if err != nil { + return nil, err + } + return marshalEvents(fresh), nil +} + +// fresh returns events on the activation stream newer than `since` +// (exclusive), newest-first, up to `limit`. Empty `since` means +// "return everything up to limit". +func (t *WorkerLog) fresh(ctx context.Context, streamID domain.StreamID, limit int, since domain.EventID) ([]domain.Event, error) { + events, err := t.deps.Store.Events.ListForStream(ctx, streamID, limit) + if err != nil { + return nil, fmt.Errorf("list events on %q: %w", streamID, err) + } + if since == "" { + return events, nil + } + for i, e := range events { + if e.ID == since { + return events[:i], nil + } + } + return events, nil +} diff --git a/helix-org/transports/github/github.go b/helix-org/transports/github/github.go new file mode 100644 index 0000000000..ac7da04b59 --- /dev/null +++ b/helix-org/transports/github/github.go @@ -0,0 +1,450 @@ +// Package github implements helix-org's inbound GitHub webhooks +// transport. A single HTTP handler at /github/webhook turns every +// signed delivery into Events on the Streams configured for that +// repo. +// +// Server-level configuration lives in the operational config +// registry under `transport.github`: +// +// { +// "token": "", // PAT used by Workers' gh +// "webhook_secret": "" // HMAC-SHA256 over body +// } +// +// Streams declare `{"repo":"owner/name","events":[...]}`. The +// transport HMAC-verifies the delivery, fans it out to every Stream +// whose `repo` matches `payload.repository.full_name` and whose +// `events` whitelist contains the X-GitHub-Event header value, and +// builds a canonical Message envelope per the design doc. +// +// Outbound is intentionally not supported. Workers act on the repo +// via `gh` in their Environment; publish to a github stream is +// rejected at the publish tool with an explanatory error. See +// design/github-transport.md. +package github + +import ( + "context" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "strings" + "time" + + "github.com/google/uuid" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Config is the parsed shape of the operational-config row +// `transport.github`. Read on every delivery so live updates via +// `helix-org config set` apply without a restart. +type Config struct { + // Token is the GitHub PAT (or `gh auth token`) that Workers + // provision into their Environment's `gh` for outbound actions. + // Opaque to this transport; we never call the GitHub API + // ourselves on the inbound path. + Token string `json:"token"` + + // WebhookSecret is the shared secret GitHub HMAC-signs each + // delivery with. The transport verifies X-Hub-Signature-256 + // against this on every request; a mismatch is a 401 and a + // dropped delivery. + WebhookSecret string `json:"webhook_secret"` +} + +// Validate checks the Config has the fields the transport needs. +// Token validity is opaque (we don't call GitHub from here), but a +// missing token is a misconfiguration worth surfacing early. +func (c Config) Validate() error { + if c.Token == "" { + return errors.New("token is empty") + } + if c.WebhookSecret == "" { + return errors.New("webhook_secret is empty") + } + return nil +} + +// Dispatcher is the subset of the dispatcher this transport needs: +// fan an Event out to subscribed AI Workers after appending it. +// Defining the interface here keeps the import edge one-directional. +type Dispatcher interface { + Dispatch(ctx context.Context, event domain.Event) +} + +// Transport is the long-lived inbound webhook handler. One instance +// per running helix-org server. +type Transport struct { + registry *config.Registry + store *store.Store + broadcaster *broadcast.Broadcaster + dispatcher Dispatcher + logger *slog.Logger +} + +// New returns a Transport bound to the given config registry, store, +// broadcaster (for waking long-poll observers on inbound) and +// dispatcher (for activating subscribed Workers on inbound). +// dispatcher and broadcaster may be nil for tests that don't +// exercise those paths. +func New(reg *config.Registry, st *store.Store, bc *broadcast.Broadcaster, d Dispatcher, logger *slog.Logger) *Transport { + return &Transport{ + registry: reg, + store: st, + broadcaster: bc, + dispatcher: d, + logger: logger, + } +} + +func (t *Transport) config(ctx context.Context) (Config, error) { + var c Config + if err := t.registry.GetObject(ctx, "transport.github", &c); err != nil { + return Config{}, err + } + if err := c.Validate(); err != nil { + return Config{}, fmt.Errorf("transport.github: %w", err) + } + return c, nil +} + +// maxBody caps webhook body size. GitHub's hard limit is 25 MiB; we +// match that for safety. +const maxBody = 25 << 20 + +// HandleInbound is the http.Handler GitHub POSTs each signed +// delivery to. It HMAC-verifies the body, then fans the parsed +// payload out to every Stream whose repo + events whitelist +// matches. +// +// Status codes: +// - 401 on missing or mismatched signature +// - 400 on unparseable body +// - 405 on non-POST +// - 204 on success (event appended) and on no-op (delivery for a +// repo we have no streams for, or for an event type no stream +// wants — both 2xx so GitHub stops retrying) +func (t *Transport) HandleInbound() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + cfg, err := t.config(r.Context()) + if err != nil { + t.logger.Error("github.inbound: config", "err", err) + http.Error(w, "transport not configured", http.StatusServiceUnavailable) + return + } + + body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, maxBody)) + if err != nil { + http.Error(w, "read body: "+err.Error(), http.StatusBadRequest) + return + } + + // Signature first. Fail closed on missing or bad signature — + // we don't want to parse adversarial bodies before we know + // they came from GitHub. + if !verifySignature(cfg.WebhookSecret, body, r.Header.Get("X-Hub-Signature-256")) { + t.logger.Warn("github.inbound: bad signature", + "delivery", r.Header.Get("X-GitHub-Delivery"), + "event", r.Header.Get("X-GitHub-Event")) + http.Error(w, "invalid signature", http.StatusUnauthorized) + return + } + + var payload map[string]any + if err := json.Unmarshal(body, &payload); err != nil { + http.Error(w, "parse json: "+err.Error(), http.StatusBadRequest) + return + } + + eventType := r.Header.Get("X-GitHub-Event") + deliveryID := r.Header.Get("X-GitHub-Delivery") + repo := repoFullName(payload) + if repo == "" { + // Some legitimate event types (`ping`, `meta`) carry no + // repository field. Accept and log, but nothing to route to. + t.logger.Info("github.inbound: no repository in payload", "event", eventType, "delivery", deliveryID) + w.WriteHeader(http.StatusNoContent) + return + } + + // Find every stream this delivery should fan out to. + streams, err := t.matchingStreams(r.Context(), repo, eventType) + if err != nil { + t.logger.Error("github.inbound: match streams", "repo", repo, "err", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + if len(streams) == 0 { + // Either no Stream is configured for this repo, or none of + // them want this event type. Log so misconfigurations are + // visible; respond 2xx so GitHub stops retrying. + t.logger.Info("github.inbound: no matching streams", "repo", repo, "event", eventType, "delivery", deliveryID) + w.WriteHeader(http.StatusNoContent) + return + } + + // Inject the event header into the body's top level — GitHub + // puts the event type in X-GitHub-Event rather than the JSON, + // and we want roles to read it from one place. Safe: GitHub + // payloads do not have a top-level `event` field of their own. + payload["event"] = eventType + extraJSON, err := json.Marshal(payload) + if err != nil { + t.logger.Error("github.inbound: re-marshal payload", "err", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + msg := domain.Message{ + From: sender(payload), + Subject: subjectFor(eventType, payload), + Body: bodyFor(eventType, payload), + ThreadID: threadIDFor(payload), + MessageID: deliveryID, + Extra: extraJSON, + } + + now := nowUTC() + for _, s := range streams { + event, err := domain.NewMessageEvent( + domain.EventID("e-"+uuid.NewString()), + s.ID, + "", // system-emitted: external sender, no helix Worker source + msg, + now, + ) + if err != nil { + t.logger.Error("github.inbound: build event", "stream", s.ID, "err", err) + continue + } + if err := t.store.Events.Append(r.Context(), event); err != nil { + t.logger.Error("github.inbound: append", "stream", s.ID, "err", err) + continue + } + if t.broadcaster != nil { + t.broadcaster.Notify(s.ID) + } + if t.dispatcher != nil { + t.dispatcher.Dispatch(r.Context(), event) + } + t.logger.Info("github.inbound", + "stream", s.ID, "repo", repo, "event", eventType, + "delivery", deliveryID, "from", msg.From) + } + + w.WriteHeader(http.StatusNoContent) + }) +} + +// matchingStreams returns every github-transport Stream whose repo +// matches `repo` (case-insensitive) and whose events whitelist +// contains `eventType`. Linear scan is fine at the scale we expect; +// indexed lookups are an obvious follow-on if installations ever +// grow many github streams. +func (t *Transport) matchingStreams(ctx context.Context, repo, eventType string) ([]domain.Stream, error) { + all, err := t.store.Streams.List(ctx) + if err != nil { + return nil, fmt.Errorf("list streams: %w", err) + } + var matched []domain.Stream + for _, s := range all { + if s.Transport.Kind != domain.TransportGitHub { + continue + } + cfg, err := s.Transport.GitHubConfig() + if err != nil { + t.logger.Warn("github.inbound: stream config parse", "stream", s.ID, "err", err) + continue + } + if !strings.EqualFold(cfg.Repo, repo) { + continue + } + if !contains(cfg.Events, eventType) { + continue + } + matched = append(matched, s) + } + return matched, nil +} + +// verifySignature compares X-Hub-Signature-256 ("sha256=") +// against an HMAC-SHA256 of body keyed by secret. Constant-time; +// returns false on any malformed input. +func verifySignature(secret string, body []byte, header string) bool { + if header == "" { + return false + } + const prefix = "sha256=" + if !strings.HasPrefix(header, prefix) { + return false + } + want, err := hex.DecodeString(header[len(prefix):]) + if err != nil { + return false + } + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write(body) + return hmac.Equal(want, mac.Sum(nil)) +} + +// repoFullName extracts repository.full_name from a webhook payload. +func repoFullName(p map[string]any) string { + repo, _ := p["repository"].(map[string]any) + if repo == nil { + return "" + } + full, _ := repo["full_name"].(string) + return full +} + +// sender returns sender.login or empty. +func sender(p map[string]any) string { + s, _ := p["sender"].(map[string]any) + if s == nil { + return "" + } + login, _ := s["login"].(string) + return login +} + +// subjectFor picks the natural "title" field of the event. Issue +// events use `issue.title`; PR events (including PR-comment and +// PR-review variants) use `pull_request.title`; release events use +// `release.name`. Comment-on-issue events use the parent issue's +// title. +// +// Falls back to empty on events with no natural title (push, label +// changes that don't carry the parent's title in the payload, etc). +// +// Disambiguation by payload shape rather than by event type: payload +// objects (`pull_request`, `issue`, `release`) are mutually exclusive +// per event in practice, with the one exception that PR-comment +// events carry both `pull_request` and `issue` (the latter is a +// shim) — we want the PR title there, so check pull_request first. +func subjectFor(_ string, p map[string]any) string { + // Prefer pull_request.title for PR-shaped events, even if `issue` + // is also present (PR-comment events have both). + if pr, ok := p["pull_request"].(map[string]any); ok { + if title, _ := pr["title"].(string); title != "" { + return title + } + } + if iss, ok := p["issue"].(map[string]any); ok { + if title, _ := iss["title"].(string); title != "" { + return title + } + } + if rel, ok := p["release"].(map[string]any); ok { + if name, _ := rel["name"].(string); name != "" { + return name + } + } + return "" +} + +// bodyFor picks the natural "user-typed text" of the event. +// - issues.opened / issues.edited → issue.body +// - pull_request.opened / .edited → pull_request.body +// - issue_comment / pull_request_review_comment → comment.body +// - pull_request_review.submitted → review.body +// +// For events that carry no user text (label, assigned, sync, push, +// …), returns empty. +func bodyFor(eventType string, p map[string]any) string { + switch eventType { + case "issue_comment", "pull_request_review_comment": + if c, ok := p["comment"].(map[string]any); ok { + if b, _ := c["body"].(string); b != "" { + return b + } + } + case "pull_request_review": + if rev, ok := p["review"].(map[string]any); ok { + if b, _ := rev["body"].(string); b != "" { + return b + } + } + case "issues": + if iss, ok := p["issue"].(map[string]any); ok { + if b, _ := iss["body"].(string); b != "" { + return b + } + } + case "pull_request": + if pr, ok := p["pull_request"].(map[string]any); ok { + if b, _ := pr["body"].(string); b != "" { + return b + } + } + } + return "" +} + +// threadIDFor returns "#" for events scoped to one issue or +// PR (or one of their comments/reviews); empty for repo-level +// events. Lets a role read all events for one PR via `read_events` +// filtered by ThreadID. +// +// Number resolution prefers payload.number (set on PR events, +// payload-level), then pull_request.number, then issue.number. +func threadIDFor(p map[string]any) string { + if n, ok := numberFromAny(p["number"]); ok { + return fmt.Sprintf("#%d", n) + } + if pr, ok := p["pull_request"].(map[string]any); ok { + if n, ok := numberFromAny(pr["number"]); ok { + return fmt.Sprintf("#%d", n) + } + } + if iss, ok := p["issue"].(map[string]any); ok { + if n, ok := numberFromAny(iss["number"]); ok { + return fmt.Sprintf("#%d", n) + } + } + return "" +} + +// numberFromAny coerces JSON-decoded numeric values (which come in +// as float64) into an int64. Returns false for anything else. +func numberFromAny(v any) (int64, bool) { + switch n := v.(type) { + case float64: + return int64(n), true + case int: + return int64(n), true + case int64: + return n, true + case json.Number: + i, err := n.Int64() + return i, err == nil + } + return 0, false +} + +func contains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +// nowUTC returns the current wall-clock time in UTC. Matches the +// helper convention used elsewhere; trivial to override in a test +// if a deterministic timestamp is ever needed. +func nowUTC() time.Time { return time.Now().UTC() } diff --git a/helix-org/transports/github/github_test.go b/helix-org/transports/github/github_test.go new file mode 100644 index 0000000000..cb05c0779e --- /dev/null +++ b/helix-org/transports/github/github_test.go @@ -0,0 +1,644 @@ +// These tests pin down the github transport's behaviour as specified +// in design/github-transport.md. They cover: +// +// - HMAC-SHA256 signature verification on /github/webhook (good / +// bad / missing). +// - Repo routing: deliveries route to every Stream whose +// Transport.Config.Repo equals payload.repository.full_name. +// - Per-stream event filter: only event types listed in the +// stream's `events` whitelist become Events; others are accepted +// (200) but dropped. +// - Envelope mapping: From=sender.login, Subject=upstream title +// verbatim, Body=upstream user-typed text verbatim, +// ThreadID=#, MessageID=X-GitHub-Delivery, Extra is the +// full webhook body verbatim with one synthetic top-level key +// (`event`) injected from the X-GitHub-Event header. +// - Inbound-only: dispatcher fires; broadcaster wakes; helix's +// Source on the resulting Event is empty. +// - Method/body validation: GET → 405, malformed JSON → 400. +// - Domain transport validation: stream config requires repo + +// non-empty events whitelist (and rejects unknown event names). +package github_test + +import ( + "bytes" + "context" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" + githubtransport "github.com/helixml/helix-org/transports/github" +) + +const testWebhookSecret = "abc123" // shared secret used in HMAC computations + +// recordingDispatcher captures Dispatch calls so tests can assert +// the dispatcher was woken (and how many times) for a given inbound +// delivery. +type recordingDispatcher struct { + mu sync.Mutex + events []domain.Event +} + +func (d *recordingDispatcher) Dispatch(_ context.Context, e domain.Event) { + d.mu.Lock() + defer d.mu.Unlock() + d.events = append(d.events, e) +} + +func (d *recordingDispatcher) snapshot() []domain.Event { + d.mu.Lock() + defer d.mu.Unlock() + out := make([]domain.Event, len(d.events)) + copy(out, d.events) + return out +} + +func newTestTransport(t *testing.T) (*githubtransport.Transport, *store.Store, *recordingDispatcher, *broadcast.Broadcaster, *config.Registry) { + t.Helper() + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + bc := broadcast.New() + rd := &recordingDispatcher{} + reg := config.New(st.Configs) + reg.Register(config.Spec{ + Key: "transport.github", + Type: config.TypeObject, + Secrets: []string{"token", "webhook_secret"}, + }) + tp := githubtransport.New(reg, st, bc, rd, slog.New(slog.NewTextHandler(io.Discard, nil))) + return tp, st, rd, bc, reg +} + +func setGitHubConfig(t *testing.T, reg *config.Registry, token, secret string) { + t.Helper() + val, _ := json.Marshal(map[string]string{"token": token, "webhook_secret": secret}) + if err := reg.Set(context.Background(), "transport.github", string(val), ""); err != nil { + t.Fatalf("set config: %v", err) + } +} + +// seedGitHubStream creates a github-transport Stream with the given +// repo and event whitelist. Returns the persisted Stream. +func seedGitHubStream(t *testing.T, st *store.Store, id domain.StreamID, repo string, events []string) domain.Stream { + t.Helper() + cfg, _ := json.Marshal(map[string]any{"repo": repo, "events": events}) + stream, err := domain.NewStream(id, string(id), "", "w-owner", time.Now().UTC(), + domain.Transport{Kind: domain.TransportGitHub, Config: cfg}) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := st.Streams.Create(context.Background(), stream); err != nil { + t.Fatalf("create stream: %v", err) + } + return stream +} + +// signBody returns the value of the X-Hub-Signature-256 header for +// the given body and secret. GitHub's exact format: "sha256=". +func signBody(secret string, body []byte) string { + mac := hmac.New(sha256.New, []byte(secret)) + mac.Write(body) + return "sha256=" + hex.EncodeToString(mac.Sum(nil)) +} + +// postResult is what `post` returns — closing the response body +// inside the helper keeps the bodyclose linter satisfied at every +// call site. +type postResult struct { + StatusCode int + Body []byte +} + +// post POSTs body to the test transport's HandleInbound, optionally +// setting the GitHub headers. Sig defaults to a correct HMAC of body +// using testWebhookSecret if empty; "-" sends no signature header at +// all (for the missing-signature test). +func post(t *testing.T, tp http.Handler, body []byte, eventType, deliveryID, sig string) postResult { + t.Helper() + srv := httptest.NewServer(tp) + t.Cleanup(srv.Close) + + req, err := http.NewRequest(http.MethodPost, srv.URL, bytes.NewReader(body)) + if err != nil { + t.Fatalf("new request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + if eventType != "" { + req.Header.Set("X-GitHub-Event", eventType) + } + if deliveryID != "" { + req.Header.Set("X-GitHub-Delivery", deliveryID) + } + if sig == "" { + sig = signBody(testWebhookSecret, body) + } + if sig != "-" { + req.Header.Set("X-Hub-Signature-256", sig) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer func() { _ = resp.Body.Close() }() + respBody, _ := io.ReadAll(resp.Body) + return postResult{StatusCode: resp.StatusCode, Body: respBody} +} + +// issuesOpenedPayload returns a representative `issues.opened` +// webhook body for repo `owner/name`. +func issuesOpenedPayload(repo string) map[string]any { + return map[string]any{ + "action": "opened", + "issue": map[string]any{ + "id": 12345, + "number": 42, + "title": "README setup steps mention an env var that no longer exists", + "body": "Step 3 references HELIX_FOO; the code reads HELIX_BAR now.", + }, + "sender": map[string]any{"login": "philwinder"}, + "repository": map[string]any{"full_name": repo}, + } +} + +// pullRequestLabeledPayload returns a representative +// `pull_request.labeled` body for repo `owner/name` with `label.name` +// = labelName. +func pullRequestLabeledPayload(repo, labelName string) map[string]any { + return map[string]any{ + "action": "labeled", + "number": 7, + "pull_request": map[string]any{ + "id": 987, + "number": 7, + "title": "Fix typo in setup section", + "body": "Spotted while running through README.", + }, + "label": map[string]any{"name": labelName, "color": "0075ca"}, + "sender": map[string]any{"login": "octocat"}, + "repository": map[string]any{"full_name": repo}, + } +} + +// issueCommentCreatedPayload returns an `issue_comment.created` body +// — note `comment.body` is the user text, while the parent `issue` +// object carries the title that goes into Subject. +func issueCommentCreatedPayload(repo string) map[string]any { + return map[string]any{ + "action": "created", + "issue": map[string]any{ + "id": 999, + "number": 42, + "title": "README setup steps mention an env var that no longer exists", + }, + "comment": map[string]any{ + "id": 55, + "body": "I hit the same thing — happy to send a PR.", + }, + "sender": map[string]any{"login": "alice"}, + "repository": map[string]any{"full_name": repo}, + } +} + +// TestInboundIssuesOpened: full envelope mapping for a representative +// event. Subject = issue title, Body = issue body, ThreadID = #42, +// MessageID = delivery UUID, From = sender.login, Source on the +// stored Event is empty (system-emitted), and Extra is the body +// verbatim with one synthetic top-level key (`event`). +func TestInboundIssuesOpened(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", + []string{"issues", "issue_comment", "pull_request", "pull_request_review", "pull_request_review_comment"}) + + body, _ := json.Marshal(issuesOpenedPayload("helixml/helix-org")) + + resp := post(t, tp.HandleInbound(), body, "issues", "delivery-uuid-1", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d, body = %q, want 204", resp.StatusCode, resp.Body) + } + + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } + ev := events[0] + if ev.Source != "" { + t.Fatalf("Source = %q, want empty", ev.Source) + } + + msg, err := ev.Message() + if err != nil { + t.Fatalf("parse message: %v", err) + } + if msg.From != "philwinder" { + t.Fatalf("From = %q, want philwinder", msg.From) + } + if msg.Subject != "README setup steps mention an env var that no longer exists" { + t.Fatalf("Subject = %q", msg.Subject) + } + if !strings.Contains(msg.Body, "HELIX_BAR") { + t.Fatalf("Body = %q", msg.Body) + } + if msg.ThreadID != "#42" { + t.Fatalf("ThreadID = %q, want #42", msg.ThreadID) + } + if msg.MessageID != "delivery-uuid-1" { + t.Fatalf("MessageID = %q, want delivery-uuid-1", msg.MessageID) + } + if len(msg.Extra) == 0 { + t.Fatalf("Extra is empty") + } + + // Extra is the full body with `event` injected at the top level. + var extra map[string]any + if err := json.Unmarshal(msg.Extra, &extra); err != nil { + t.Fatalf("Extra not JSON: %v", err) + } + if extra["event"] != "issues" { + t.Fatalf("Extra.event = %v, want issues", extra["event"]) + } + if extra["action"] != "opened" { + t.Fatalf("Extra.action = %v, want opened (preserved from upstream body)", extra["action"]) + } + if extra["issue"] == nil { + t.Fatalf("Extra.issue missing — body should be passed through verbatim") + } + repo, _ := extra["repository"].(map[string]any) + if repo == nil || repo["full_name"] != "helixml/helix-org" { + t.Fatalf("Extra.repository.full_name = %v, want helixml/helix-org", repo) + } + + // Dispatcher fired exactly once. + if got := len(rd.snapshot()); got != 1 { + t.Fatalf("dispatcher fired %d times, want 1", got) + } +} + +// TestInboundPullRequestLabeled: action / sender / title / number all +// flow through; the role's `Extra.label.name` lookup works because +// Extra is the body verbatim. +func TestInboundPullRequestLabeled(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"pull_request"}) + + body, _ := json.Marshal(pullRequestLabeledPayload("helixml/helix-org", "docs")) + resp := post(t, tp.HandleInbound(), body, "pull_request", "d-2", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } + msg, _ := events[0].Message() + if msg.From != "octocat" { + t.Fatalf("From = %q", msg.From) + } + if msg.Subject != "Fix typo in setup section" { + t.Fatalf("Subject = %q (want PR title verbatim)", msg.Subject) + } + if !strings.Contains(msg.Body, "Spotted while") { + t.Fatalf("Body = %q (want PR body verbatim)", msg.Body) + } + if msg.ThreadID != "#7" { + t.Fatalf("ThreadID = %q", msg.ThreadID) + } + var extra map[string]any + _ = json.Unmarshal(msg.Extra, &extra) + if extra["event"] != "pull_request" || extra["action"] != "labeled" { + t.Fatalf("Extra event/action = %v / %v", extra["event"], extra["action"]) + } + label, _ := extra["label"].(map[string]any) + if label == nil || label["name"] != "docs" { + t.Fatalf("Extra.label.name = %v, want docs", label) + } +} + +// TestInboundIssueCommentMapsBodyToCommentBody: for comment events, +// Body is `comment.body` (the user-typed text), Subject is the +// parent issue's title (so a reader skimming the stream sees what +// thread the comment is on). +func TestInboundIssueCommentMapsBodyToCommentBody(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issue_comment"}) + + body, _ := json.Marshal(issueCommentCreatedPayload("helixml/helix-org")) + resp := post(t, tp.HandleInbound(), body, "issue_comment", "d-3", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + msg, _ := events[0].Message() + if msg.Body != "I hit the same thing — happy to send a PR." { + t.Fatalf("Body = %q (want comment.body verbatim)", msg.Body) + } + if msg.Subject != "README setup steps mention an env var that no longer exists" { + t.Fatalf("Subject = %q (want parent issue title)", msg.Subject) + } + if msg.ThreadID != "#42" { + t.Fatalf("ThreadID = %q", msg.ThreadID) + } +} + +// TestInboundBadSignatureReturns401: HMAC mismatch is rejected with +// 401; no event is appended; dispatcher is not called. +func TestInboundBadSignatureReturns401(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(issuesOpenedPayload("helixml/helix-org")) + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "sha256=deadbeef") + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("status = %d, want 401", resp.StatusCode) + } + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + if len(events) != 0 { + t.Fatalf("events = %d, want 0 (bad sig must not append)", len(events)) + } + if got := len(rd.snapshot()); got != 0 { + t.Fatalf("dispatcher fired %d times, want 0", got) + } +} + +// TestInboundMissingSignatureReturns401: a request without +// X-Hub-Signature-256 is rejected. We fail closed; never trust an +// unsigned webhook. +func TestInboundMissingSignatureReturns401(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(issuesOpenedPayload("helixml/helix-org")) + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "-") + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("status = %d, want 401", resp.StatusCode) + } +} + +// TestInboundUnknownRepoReturns200NoAppend: a delivery for a repo +// that no Stream is configured for is accepted (200) so GitHub stops +// retrying, but no event is appended. Operators should see this in +// the logs; tests just assert the no-op. +func TestInboundUnknownRepoReturns200NoAppend(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(issuesOpenedPayload("someone-else/their-repo")) + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "") + if resp.StatusCode/100 != 2 { + t.Fatalf("status = %d, want 2xx", resp.StatusCode) + } + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + if len(events) != 0 { + t.Fatalf("events = %d, want 0", len(events)) + } + if got := len(rd.snapshot()); got != 0 { + t.Fatalf("dispatcher fired %d times, want 0", got) + } +} + +// TestInboundEventTypeFilterDrops: a delivery for an event type not +// in the stream's `events` whitelist is accepted (so GitHub stops +// retrying) but does not become an Event. +func TestInboundEventTypeFilterDrops(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + // Stream wants only `issues`; we'll send a `pull_request`. + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(pullRequestLabeledPayload("helixml/helix-org", "docs")) + resp := post(t, tp.HandleInbound(), body, "pull_request", "d-1", "") + if resp.StatusCode/100 != 2 { + t.Fatalf("status = %d, want 2xx", resp.StatusCode) + } + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + if len(events) != 0 { + t.Fatalf("events = %d, want 0 (filtered out)", len(events)) + } + if got := len(rd.snapshot()); got != 0 { + t.Fatalf("dispatcher fired %d times, want 0", got) + } +} + +// TestInboundFanOutToMultipleStreams: two streams configured for the +// same repo with overlapping event whitelists both receive a copy of +// the event. +func TestInboundFanOutToMultipleStreams(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-docs", "helixml/helix-org", []string{"issues", "pull_request"}) + seedGitHubStream(t, st, "s-triage", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(issuesOpenedPayload("helixml/helix-org")) + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + + docsEv, _ := st.Events.ListForStream(context.Background(), "s-docs", 10) + triageEv, _ := st.Events.ListForStream(context.Background(), "s-triage", 10) + if len(docsEv) != 1 || len(triageEv) != 1 { + t.Fatalf("fan-out = %d / %d, want 1 / 1", len(docsEv), len(triageEv)) + } + if got := len(rd.snapshot()); got != 2 { + t.Fatalf("dispatcher fired %d times, want 2 (one per stream)", got) + } +} + +// TestInboundMethodNotAllowed: GET (and other non-POSTs) → 405. +func TestInboundMethodNotAllowed(t *testing.T) { + t.Parallel() + tp, _, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + resp, err := http.Get(srv.URL) + if err != nil { + t.Fatalf("GET: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusMethodNotAllowed { + t.Fatalf("status = %d, want 405", resp.StatusCode) + } +} + +// TestInboundMalformedJSONReturns400: a syntactically broken body is +// rejected. (Note: signature must still verify, since HMAC is over +// raw bytes.) +func TestInboundMalformedJSONReturns400(t *testing.T) { + t.Parallel() + tp, _, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + + body := []byte(`{not valid json`) + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "") + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status = %d, want 400", resp.StatusCode) + } +} + +// TestInboundDeliveryIDIsMessageID: the X-GitHub-Delivery header +// value lands in Message.MessageID, mirroring email's MessageID +// preservation. +func TestInboundDeliveryIDIsMessageID(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + body, _ := json.Marshal(issuesOpenedPayload("helixml/helix-org")) + resp := post(t, tp.HandleInbound(), body, "issues", "particular-uuid-here", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + msg, _ := events[0].Message() + if msg.MessageID != "particular-uuid-here" { + t.Fatalf("MessageID = %q, want particular-uuid-here", msg.MessageID) + } +} + +// TestInboundEmptySenderTolerated: events without a sender (some +// system events) leave Message.From empty rather than erroring. +func TestInboundEmptySenderTolerated(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setGitHubConfig(t, reg, "tok", testWebhookSecret) + seedGitHubStream(t, st, "s-github", "helixml/helix-org", []string{"issues"}) + + payload := issuesOpenedPayload("helixml/helix-org") + delete(payload, "sender") + body, _ := json.Marshal(payload) + + resp := post(t, tp.HandleInbound(), body, "issues", "d-1", "") + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + events, _ := st.Events.ListForStream(context.Background(), "s-github", 10) + msg, _ := events[0].Message() + if msg.From != "" { + t.Fatalf("From = %q, want empty for sender-less event", msg.From) + } +} + +// TestTransportValidateGitHub: stream-config validation. Required: +// non-empty repo of form "owner/name", non-empty events whitelist +// drawn from the supported set; rejects unknown event names. +func TestTransportValidateGitHub(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + cfg string + wantErr string // substring; "" = no error + }{ + { + name: "valid", + cfg: `{"repo":"helixml/helix-org","events":["issues","pull_request"]}`, + }, + { + name: "valid all known events", + cfg: `{"repo":"helixml/helix-org","events":["issues","issue_comment","pull_request",` + + `"pull_request_review","pull_request_review_comment"]}`, + }, + { + name: "missing repo", + cfg: `{"events":["issues"]}`, + wantErr: "repo", + }, + { + name: "repo without slash", + cfg: `{"repo":"helix-org","events":["issues"]}`, + wantErr: "owner/name", + }, + { + name: "repo with extra path segment", + cfg: `{"repo":"helixml/helix-org/extra","events":["issues"]}`, + wantErr: "owner/name", + }, + { + name: "missing events", + cfg: `{"repo":"helixml/helix-org"}`, + wantErr: "events", + }, + { + name: "empty events", + cfg: `{"repo":"helixml/helix-org","events":[]}`, + wantErr: "events", + }, + { + name: "unknown event name", + cfg: `{"repo":"helixml/helix-org","events":["not_a_real_event"]}`, + wantErr: "unknown", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tr := domain.Transport{Kind: domain.TransportGitHub, Config: json.RawMessage(tc.cfg)} + err := tr.Validate() + if tc.wantErr == "" { + if err != nil { + t.Fatalf("Validate() = %v, want nil", err) + } + return + } + if err == nil { + t.Fatalf("Validate() = nil, want error containing %q", tc.wantErr) + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("Validate() = %q, want error containing %q", err, tc.wantErr) + } + }) + } +} + +// TestGitHubConfigRoundTrip: parse Transport.Config back into a +// GitHubConfig with all fields populated. +func TestGitHubConfigRoundTrip(t *testing.T) { + t.Parallel() + + raw := json.RawMessage(`{"repo":"helixml/helix-org","events":["issues","pull_request"]}`) + c, err := domain.Transport{Kind: domain.TransportGitHub, Config: raw}.GitHubConfig() + if err != nil { + t.Fatalf("GitHubConfig() = %v", err) + } + if c.Repo != "helixml/helix-org" { + t.Fatalf("Repo = %q", c.Repo) + } + if len(c.Events) != 2 || c.Events[0] != "issues" || c.Events[1] != "pull_request" { + t.Fatalf("Events = %v", c.Events) + } +} diff --git a/helix-org/transports/postmark/postmark.go b/helix-org/transports/postmark/postmark.go new file mode 100644 index 0000000000..23b0f2d276 --- /dev/null +++ b/helix-org/transports/postmark/postmark.go @@ -0,0 +1,442 @@ +// Package postmark implements helix-org's email transport using +// Postmark (postmarkapp.com) as the provider. It handles inbound +// webhooks (Postmark POSTing parsed mail to us) and outbound emits +// (we POST to Postmark's /email API to send mail). +// +// Server-level configuration lives in the operational config +// registry under `transport.postmark`: +// +// { +// "token": "", +// "inbound": "@inbound.postmarkapp.com", +// "from": "you@gmail.com" +// } +// +// Streams declare just an alias (`{"alias":"sam"}`); the transport +// joins server-level config with stream-level alias at runtime. +// Inbound mail addressed to `+@inbound.postmarkapp.com` +// routes to the stream with that alias. Outbound mail is sent +// `From: ` with `Reply-To: +@…` so +// customers' replies land back on the right stream. +package postmark + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "net/http" + "strings" + "time" + + "github.com/google/uuid" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" +) + +// Config is the parsed shape of the operational-config row +// `transport.postmark`. The transport reads it on every operation, +// so live updates via `helix-org config set` apply immediately. +type Config struct { + Token string `json:"token"` + Inbound string `json:"inbound"` + From string `json:"from"` + + // DisableReplyTo, when true, skips the Reply-To header on outbound + // sends. Useful while a Postmark account is in pending-approval + // state — Postmark counts Reply-To as a recipient for its "all + // recipients must share the From domain" restriction, so a + // Reply-To at inbound.postmarkapp.com (the no-domain hash form) + // causes outbound sends to a winder.ai From to be blocked. With + // Reply-To off, replies go to whatever mail client default replies + // route to (usually From), so customer→Sam threading via helix is + // degraded until the account is approved. + DisableReplyTo bool `json:"disable_reply_to,omitempty"` +} + +// Validate checks the Config has the fields the transport needs. +// Loose: the token is opaque to us, the inbound is checked for an @, +// the from is checked for an @. Strict shape validation is the CLI's +// concern (registry schema). +func (c Config) Validate() error { + if c.Token == "" { + return errors.New("token is empty") + } + if !strings.Contains(c.Inbound, "@") { + return fmt.Errorf("inbound %q is not an email address", c.Inbound) + } + if !strings.Contains(c.From, "@") { + return fmt.Errorf("from %q is not an email address", c.From) + } + return nil +} + +// AliasAddress composes the full inbound address for a given alias. +// "abc123@inbound.postmarkapp.com" + "sam" → "abc123+sam@inbound.postmarkapp.com". +func (c Config) AliasAddress(alias string) string { + at := strings.Index(c.Inbound, "@") + if at < 0 { + return alias + "@" + c.Inbound // domain form fallback + } + return c.Inbound[:at] + "+" + alias + c.Inbound[at:] +} + +// Dispatcher is the subset of the dispatcher this transport needs: +// fan an Event out to subscribed AI Workers after appending it. +// Defining the interface here keeps the import edge one-directional. +type Dispatcher interface { + Dispatch(ctx context.Context, event domain.Event) +} + +// Transport is the long-lived email transport. One instance per +// running helix-org server. Both the inbound HTTP handler and the +// outbound emitter are methods on it. +type Transport struct { + registry *config.Registry + store *store.Store + broadcaster *broadcast.Broadcaster + dispatcher Dispatcher + logger *slog.Logger + client *http.Client + sendURL string +} + +// DefaultSendURL is Postmark's transactional /email endpoint. New +// constructs Transports with this; tests use SetSendURL to redirect. +const DefaultSendURL = "https://api.postmarkapp.com/email" + +// New returns a Transport bound to the given config registry, store, +// broadcaster (for waking long-poll observers on inbound) and +// dispatcher (for activating subscribed Workers on inbound). +// dispatcher and broadcaster may be nil for tests that don't exercise +// those paths. +func New(reg *config.Registry, st *store.Store, bc *broadcast.Broadcaster, d Dispatcher, logger *slog.Logger) *Transport { + return &Transport{ + registry: reg, + store: st, + broadcaster: bc, + dispatcher: d, + logger: logger, + client: &http.Client{Timeout: 10 * time.Second}, + sendURL: DefaultSendURL, + } +} + +// SetHTTPClient replaces the HTTP client used to call Postmark's API. +// Tests use this to substitute an httptest.Server. +func (t *Transport) SetHTTPClient(c *http.Client) { t.client = c } + +// SetSendURL replaces the Postmark /email endpoint this Transport +// posts to. Intended for tests that point at a fake httptest.Server. +func (t *Transport) SetSendURL(u string) { t.sendURL = u } + +func (t *Transport) config(ctx context.Context) (Config, error) { + var c Config + if err := t.registry.GetObject(ctx, "transport.postmark", &c); err != nil { + return Config{}, err + } + if err := c.Validate(); err != nil { + return Config{}, fmt.Errorf("transport.postmark: %w", err) + } + return c, nil +} + +// findStreamByAlias scans email-transport streams for one whose alias +// matches. With small N this linear scan is fine; if installations +// ever grow many email streams a denormalised alias column on the +// streams table is the obvious follow-on. +func (t *Transport) findStreamByAlias(ctx context.Context, alias string) (domain.Stream, error) { + streams, err := t.store.Streams.List(ctx) + if err != nil { + return domain.Stream{}, fmt.Errorf("list streams: %w", err) + } + for _, s := range streams { + if s.Transport.Kind != domain.TransportEmail { + continue + } + cfg, err := s.Transport.EmailConfig() + if err != nil || cfg.Alias != alias { + continue + } + return s, nil + } + return domain.Stream{}, fmt.Errorf("no email stream with alias %q", alias) +} + +// parseAlias extracts the "+alias" suffix from a recipient local-part. +// Returns "" if the address has no "+suffix" or no "@". +func parseAlias(recipient string) string { + at := strings.Index(recipient, "@") + if at < 0 { + return "" + } + local := recipient[:at] + plus := strings.Index(local, "+") + if plus < 0 { + return "" + } + return local[plus+1:] +} + +// ---------- Inbound ---------- + +// inboundPayload is the subset of Postmark's inbound JSON we care +// about. Postmark sends ~30 fields; we extract the ones that map to +// Message and stash the rest. See: +// https://postmarkapp.com/developer/webhooks/inbound-webhook +type inboundPayload struct { + From string `json:"From"` + OriginalRecipient string `json:"OriginalRecipient"` + To string `json:"To"` + Subject string `json:"Subject"` + MessageID string `json:"MessageID"` + TextBody string `json:"TextBody"` + HtmlBody string `json:"HtmlBody"` //nolint:stylecheck // Postmark API uses this casing + Headers []inboundHeader `json:"Headers"` + Attachments []inboundAttachment `json:"Attachments"` + Date string `json:"Date"` + MessageStream string `json:"MessageStream"` + Extra map[string]interface{} `json:"-"` +} + +type inboundHeader struct { + Name string `json:"Name"` + Value string `json:"Value"` +} + +type inboundAttachment struct { + Name string `json:"Name"` + ContentType string `json:"ContentType"` + // Postmark inlines attachments as base64 in `Content`. We don't + // take ownership of the bytes — for now we record the metadata + // and a pointer to wherever the bytes live (currently nowhere + // addressable; this is a known follow-on). + ContentLength int64 `json:"ContentLength"` +} + +// header returns the first matching header value (case-insensitive), +// or the zero string. +func (p inboundPayload) header(name string) string { + for _, h := range p.Headers { + if strings.EqualFold(h.Name, name) { + return h.Value + } + } + return "" +} + +// HandleInbound is the http.Handler Postmark POSTs parsed inbound +// mail to. It extracts the alias from the recipient address, looks +// up the matching Stream, builds a Message envelope, and appends it. +// Returns 204 on success (Postmark needs a 2xx to mark the inbound +// delivered) and 4xx/5xx on errors. +func (t *Transport) HandleInbound() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 25<<20)) // 25MiB cap + if err != nil { + http.Error(w, "read body: "+err.Error(), http.StatusBadRequest) + return + } + var p inboundPayload + if err := json.Unmarshal(body, &p); err != nil { + http.Error(w, "parse postmark json: "+err.Error(), http.StatusBadRequest) + return + } + recipient := p.OriginalRecipient + if recipient == "" { + recipient = p.To + } + alias := parseAlias(recipient) + if alias == "" { + t.logger.Warn("postmark.inbound: no alias", "recipient", recipient) + http.Error(w, "no alias on recipient", http.StatusBadRequest) + return + } + stream, err := t.findStreamByAlias(r.Context(), alias) + if err != nil { + t.logger.Warn("postmark.inbound: stream lookup", "alias", alias, "err", err) + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + msg := domain.Message{ + From: p.From, + To: []string{recipient}, + Subject: p.Subject, + Body: p.TextBody, + MessageID: p.MessageID, + InReplyTo: p.header("In-Reply-To"), + ThreadID: threadIDFromHeaders(p, p.MessageID), + } + if msg.Body == "" && p.HtmlBody != "" { + msg.Body = p.HtmlBody + msg.BodyContentType = "text/html" + } + for _, a := range p.Attachments { + msg.Attachments = append(msg.Attachments, domain.Attachment{ + Filename: a.Name, + ContentType: a.ContentType, + SizeBytes: a.ContentLength, + }) + } + + event, err := domain.NewMessageEvent( + domain.EventID("e-"+uuid.NewString()), + stream.ID, + "", // system-emitted: external sender, no helix Worker source + msg, + time.Now().UTC(), + ) + if err != nil { + http.Error(w, "build event: "+err.Error(), http.StatusBadRequest) + return + } + if err := t.store.Events.Append(r.Context(), event); err != nil { + t.logger.Error("postmark.inbound: append", "stream", stream.ID, "err", err) + http.Error(w, "append event", http.StatusInternalServerError) + return + } + if t.broadcaster != nil { + t.broadcaster.Notify(stream.ID) + } + if t.dispatcher != nil { + t.dispatcher.Dispatch(r.Context(), event) + } + t.logger.Info("postmark.inbound", "stream", stream.ID, "alias", alias, "from", p.From, "subject", p.Subject) + + w.WriteHeader(http.StatusNoContent) + }) +} + +// threadIDFromHeaders picks a stable conversation identifier from +// References (root) or falls back to the message's own ID. Mail +// clients honour Message-ID / In-Reply-To consistently; ThreadID is +// helix's normalised handle for the conversation. +func threadIDFromHeaders(p inboundPayload, fallback string) string { + refs := p.header("References") + if refs == "" { + // First reply also lacks References; In-Reply-To is the seed. + return p.header("In-Reply-To") + } + // References is space-separated; the root of the thread is the first. + if i := strings.Index(refs, " "); i > 0 { + return refs[:i] + } + if refs != "" { + return refs + } + return fallback +} + +// ---------- Outbound ---------- + +type sendPayload struct { + From string `json:"From"` + To string `json:"To"` + ReplyTo string `json:"ReplyTo,omitempty"` + Subject string `json:"Subject"` + TextBody string `json:"TextBody,omitempty"` + HtmlBody string `json:"HtmlBody,omitempty"` //nolint:stylecheck // Postmark API uses this casing + Headers []sendHeader `json:"Headers,omitempty"` + MessageID string `json:"MessageID,omitempty"` + Metadata map[string]any `json:"Metadata,omitempty"` +} + +type sendHeader struct { + Name string `json:"Name"` + Value string `json:"Value"` +} + +// Emit renders an Event's Message envelope to a Postmark /email API +// call. Idempotent failures (network, 5xx) are returned to the caller +// (the dispatcher's emit hook) which logs and drops; the underlying +// append has already succeeded so the system stays consistent. +// +// Returns nil on 2xx, an error otherwise. +func (t *Transport) Emit(ctx context.Context, e domain.Event) error { + msg, err := e.Message() + if err != nil { + return fmt.Errorf("parse event message: %w", err) + } + if len(msg.To) == 0 { + return errors.New("no recipient (Message.To is empty)") + } + cfg, err := t.config(ctx) + if err != nil { + return err + } + stream, err := t.store.Streams.Get(ctx, e.StreamID) + if err != nil { + return fmt.Errorf("get stream: %w", err) + } + streamCfg, err := stream.Transport.EmailConfig() + if err != nil { + return fmt.Errorf("stream email config: %w", err) + } + + from := cfg.From + if msg.From != "" && strings.Contains(msg.From, "@") && !strings.HasPrefix(msg.From, "w-") { + // Allow per-message From override only if it looks like a real + // address (not a WorkerID like "w-sam"). Sender Signatures must + // be verified; this trusts the role to send valid addresses. + from = msg.From + } + payload := sendPayload{ + From: from, + To: strings.Join(msg.To, ", "), + Subject: msg.Subject, + TextBody: msg.Body, + } + if !cfg.DisableReplyTo { + payload.ReplyTo = cfg.AliasAddress(streamCfg.Alias) + } + if msg.BodyContentType == "text/html" { + payload.TextBody = "" + payload.HtmlBody = msg.Body + } + if msg.InReplyTo != "" { + references := msg.ThreadID + if references == "" { + references = msg.InReplyTo + } + payload.Headers = []sendHeader{ + {Name: "In-Reply-To", Value: msg.InReplyTo}, + {Name: "References", Value: references}, + } + } + + body, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal send payload: %w", err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, t.sendURL, bytes.NewReader(body)) + if err != nil { + return fmt.Errorf("build send request: %w", err) + } + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Postmark-Server-Token", cfg.Token) + + resp, err := t.client.Do(req) + if err != nil { + return fmt.Errorf("postmark send: %w", err) + } + defer func() { _ = resp.Body.Close() }() + respBody, _ := io.ReadAll(resp.Body) + + if resp.StatusCode >= 400 { + return fmt.Errorf("postmark %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody))) + } + t.logger.Info("postmark.emit", "stream", e.StreamID, "to", payload.To, "subject", payload.Subject, "status", resp.StatusCode) + return nil +} diff --git a/helix-org/transports/postmark/postmark_test.go b/helix-org/transports/postmark/postmark_test.go new file mode 100644 index 0000000000..f357868d2e --- /dev/null +++ b/helix-org/transports/postmark/postmark_test.go @@ -0,0 +1,414 @@ +package postmark_test + +import ( + "context" + "encoding/json" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync" + "testing" + "time" + + "github.com/helixml/helix-org/broadcast" + "github.com/helixml/helix-org/config" + "github.com/helixml/helix-org/domain" + "github.com/helixml/helix-org/store" + "github.com/helixml/helix-org/store/sqlite" + "github.com/helixml/helix-org/transports/postmark" +) + +// recordingDispatcher captures Dispatch calls for assertion. +type recordingDispatcher struct { + mu sync.Mutex + events []domain.Event +} + +func (d *recordingDispatcher) Dispatch(_ context.Context, e domain.Event) { + d.mu.Lock() + defer d.mu.Unlock() + d.events = append(d.events, e) +} + +func (d *recordingDispatcher) snapshot() []domain.Event { + d.mu.Lock() + defer d.mu.Unlock() + out := make([]domain.Event, len(d.events)) + copy(out, d.events) + return out +} + +func newTestTransport(t *testing.T) (*postmark.Transport, *store.Store, *recordingDispatcher, *broadcast.Broadcaster, *config.Registry) { + t.Helper() + st, err := sqlite.Open(":memory:") + if err != nil { + t.Fatalf("open store: %v", err) + } + bc := broadcast.New() + rd := &recordingDispatcher{} + reg := config.New(st.Configs) + reg.Register(config.Spec{ + Key: "transport.postmark", + Type: config.TypeObject, + Secrets: []string{"token"}, + }) + tp := postmark.New(reg, st, bc, rd, slog.New(slog.NewTextHandler(io.Discard, nil))) + return tp, st, rd, bc, reg +} + +func setPostmarkConfig(t *testing.T, reg *config.Registry, token, inbound, from string) { + t.Helper() + val, _ := json.Marshal(map[string]string{"token": token, "inbound": inbound, "from": from}) + if err := reg.Set(context.Background(), "transport.postmark", string(val), ""); err != nil { + t.Fatalf("set config: %v", err) + } +} + +func seedEmailStream(t *testing.T, st *store.Store, id domain.StreamID, alias string) domain.Stream { + t.Helper() + cfg, _ := json.Marshal(domain.EmailConfig{Alias: alias}) + stream, err := domain.NewStream(id, string(id), "", "w-owner", time.Now().UTC(), + domain.Transport{Kind: domain.TransportEmail, Config: cfg}) + if err != nil { + t.Fatalf("new stream: %v", err) + } + if err := st.Streams.Create(context.Background(), stream); err != nil { + t.Fatalf("create stream: %v", err) + } + return stream +} + +// TestInboundHappyPath: a Postmark inbound POST with `+sam` alias +// lands as an Event on the s-support stream, with all envelope +// fields populated and the dispatcher fired. +func TestInboundHappyPath(t *testing.T) { + t.Parallel() + tp, st, rd, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + seedEmailStream(t, st, "s-support", "sam") + + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + + payload := map[string]any{ + "From": "alice@example.com", + "OriginalRecipient": "abc123+sam@inbound.postmarkapp.com", + "To": "abc123+sam@inbound.postmarkapp.com", + "Subject": "Webhook stream isn't firing", + "MessageID": "", + "TextBody": "I've got a stream set up but POSTs don't wake the worker.", + "Headers": []map[string]string{ + {"Name": "In-Reply-To", "Value": ""}, + }, + } + body, _ := json.Marshal(payload) + resp, err := http.Post(srv.URL, "application/json", strings.NewReader(string(body))) + if err != nil { + t.Fatalf("POST: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusNoContent { + got, _ := io.ReadAll(resp.Body) + t.Fatalf("status = %d, body = %q", resp.StatusCode, got) + } + + events, _ := st.Events.ListForStream(context.Background(), "s-support", 10) + if len(events) != 1 { + t.Fatalf("events = %d, want 1", len(events)) + } + msg, err := events[0].Message() + if err != nil { + t.Fatalf("parse message: %v", err) + } + if msg.From != "alice@example.com" { + t.Fatalf("From = %q", msg.From) + } + if msg.Subject != "Webhook stream isn't firing" { + t.Fatalf("Subject = %q", msg.Subject) + } + if !strings.Contains(msg.Body, "POSTs don't wake the worker") { + t.Fatalf("Body = %q", msg.Body) + } + if msg.MessageID != "" { + t.Fatalf("MessageID = %q", msg.MessageID) + } + if events[0].Source != "" { + t.Fatalf("Source should be empty for inbound webhook events, got %q", events[0].Source) + } + if len(rd.snapshot()) != 1 { + t.Fatalf("dispatcher fired %d times, want 1", len(rd.snapshot())) + } +} + +func TestInboundNoAliasReturns400(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + seedEmailStream(t, st, "s-support", "sam") + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + + body, _ := json.Marshal(map[string]any{ + "From": "alice@example.com", + "OriginalRecipient": "abc123@inbound.postmarkapp.com", // no +alias + "Subject": "...", + "TextBody": "...", + }) + resp, _ := http.Post(srv.URL, "application/json", strings.NewReader(string(body))) + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("status = %d, want 400", resp.StatusCode) + } +} + +func TestInboundUnknownAliasReturns404(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + seedEmailStream(t, st, "s-support", "sam") // alias=sam exists + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + + body, _ := json.Marshal(map[string]any{ + "From": "alice@example.com", + "OriginalRecipient": "abc123+marketing@inbound.postmarkapp.com", // alias=marketing missing + "Subject": "...", + "TextBody": "...", + }) + resp, _ := http.Post(srv.URL, "application/json", strings.NewReader(string(body))) + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusNotFound { + t.Fatalf("status = %d, want 404", resp.StatusCode) + } +} + +func TestInboundMethodNotAllowed(t *testing.T) { + t.Parallel() + tp, _, _, _, _ := newTestTransport(t) + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + + resp, _ := http.Get(srv.URL) + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusMethodNotAllowed { + t.Fatalf("status = %d, want 405", resp.StatusCode) + } +} + +func TestInboundReplyPopulatesInReplyTo(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + seedEmailStream(t, st, "s-support", "sam") + srv := httptest.NewServer(tp.HandleInbound()) + t.Cleanup(srv.Close) + + body, _ := json.Marshal(map[string]any{ + "From": "alice@example.com", + "OriginalRecipient": "abc123+sam@inbound.postmarkapp.com", + "Subject": "Re: Webhook stream isn't firing", + "MessageID": "", + "TextBody": "tried that, still broken", + "Headers": []map[string]string{ + {"Name": "In-Reply-To", "Value": ""}, + {"Name": "References", "Value": " "}, + }, + }) + resp, _ := http.Post(srv.URL, "application/json", strings.NewReader(string(body))) + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusNoContent { + t.Fatalf("status = %d", resp.StatusCode) + } + + events, _ := st.Events.ListForStream(context.Background(), "s-support", 10) + msg, _ := events[0].Message() + if msg.InReplyTo != "" { + t.Fatalf("InReplyTo = %q", msg.InReplyTo) + } + // References has multiple IDs space-separated; ThreadID = root. + if msg.ThreadID != "" { + t.Fatalf("ThreadID = %q, want ", msg.ThreadID) + } +} + +// fakePostmark records the inbound /email POSTs (the reverse direction +// from the transport's perspective — we *send* outbound, Postmark +// receives). Tests use this to assert outbound payload shape without +// hitting the real API. +type fakePostmark struct { + mu sync.Mutex + requests []fakePostmarkRequest + status int +} +type fakePostmarkRequest struct { + headers http.Header + payload map[string]any +} + +func newFakePostmark(t *testing.T) (*httptest.Server, *fakePostmark) { + t.Helper() + fp := &fakePostmark{status: http.StatusOK} + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + _ = r.Body.Close() + var p map[string]any + _ = json.Unmarshal(body, &p) + fp.mu.Lock() + fp.requests = append(fp.requests, fakePostmarkRequest{headers: r.Header.Clone(), payload: p}) + s := fp.status + fp.mu.Unlock() + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(s) + _, _ = w.Write([]byte(`{"ErrorCode":0,"Message":"OK","MessageID":"abc-fake"}`)) + })) + t.Cleanup(srv.Close) + return srv, fp +} + +func (fp *fakePostmark) snapshot() []fakePostmarkRequest { + fp.mu.Lock() + defer fp.mu.Unlock() + out := make([]fakePostmarkRequest, len(fp.requests)) + copy(out, fp.requests) + return out +} + +// TestEmitOutbound: a Message published to an email stream POSTs to +// Postmark with all the right fields — From from server config, +// To/Subject/Body from the Message, ReplyTo derived from alias, +// InReplyTo / References headers when threading. +func TestEmitOutbound(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "secret-token", "abc123@inbound.postmarkapp.com", "you@gmail.com") + stream := seedEmailStream(t, st, "s-support", "sam") + + fakeSrv, fp := newFakePostmark(t) + tp.SetSendURL(fakeSrv.URL) + + msg := domain.Message{ + From: "w-sam", + To: []string{"alice@example.com"}, + Subject: "Re: Webhook question", + Body: "Most webhook flow issues are config or subscription mismatches.", + InReplyTo: "", + ThreadID: "", + } + event, err := domain.NewMessageEvent( + domain.EventID("e-1"), + stream.ID, + "w-sam", + msg, + time.Now().UTC(), + ) + if err != nil { + t.Fatalf("new event: %v", err) + } + + if err := tp.Emit(context.Background(), event); err != nil { + t.Fatalf("Emit: %v", err) + } + + got := fp.snapshot() + if len(got) != 1 { + t.Fatalf("postmark requests = %d, want 1", len(got)) + } + req := got[0] + if h := req.headers.Get("X-Postmark-Server-Token"); h != "secret-token" { + t.Fatalf("token header = %q", h) + } + if req.payload["From"] != "you@gmail.com" { + t.Fatalf("From = %v, want you@gmail.com (server-config from)", req.payload["From"]) + } + if req.payload["To"] != "alice@example.com" { + t.Fatalf("To = %v", req.payload["To"]) + } + if req.payload["ReplyTo"] != "abc123+sam@inbound.postmarkapp.com" { + t.Fatalf("ReplyTo = %v", req.payload["ReplyTo"]) + } + if req.payload["Subject"] != "Re: Webhook question" { + t.Fatalf("Subject = %v", req.payload["Subject"]) + } + if !strings.Contains(req.payload["TextBody"].(string), "Most webhook flow issues") { + t.Fatalf("TextBody = %v", req.payload["TextBody"]) + } + headers, ok := req.payload["Headers"].([]any) + if !ok || len(headers) != 2 { + t.Fatalf("Headers = %v, want 2 entries (In-Reply-To, References)", req.payload["Headers"]) + } +} + +// TestEmitOverridesFromIfRealAddress: when the role's Message.From is +// a real email address (not a WorkerID), use it as the From header +// instead of the server-config default. Lets a future "billing" agent +// send From a different verified Sender Signature. +func TestEmitOverridesFromIfRealAddress(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "default@x.com") + stream := seedEmailStream(t, st, "s-billing", "billing") + + fakeSrv, fp := newFakePostmark(t) + tp.SetSendURL(fakeSrv.URL) + + msg := domain.Message{ + From: "billing@x.com", + To: []string{"alice@example.com"}, + Body: "...", + } + event, _ := domain.NewMessageEvent("e-1", stream.ID, "w-billing", msg, time.Now().UTC()) + if err := tp.Emit(context.Background(), event); err != nil { + t.Fatalf("Emit: %v", err) + } + got := fp.snapshot() + if got[0].payload["From"] != "billing@x.com" { + t.Fatalf("From = %v, want override", got[0].payload["From"]) + } +} + +func TestEmitNoRecipient(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + stream := seedEmailStream(t, st, "s-support", "sam") + + msg := domain.Message{ + Body: "I forgot the recipient", + } + event, _ := domain.NewMessageEvent("e-1", stream.ID, "", msg, time.Now().UTC()) + err := tp.Emit(context.Background(), event) + if err == nil || !strings.Contains(err.Error(), "no recipient") { + t.Fatalf("err = %v", err) + } +} + +func TestEmitPostmarkError(t *testing.T) { + t.Parallel() + tp, st, _, _, reg := newTestTransport(t) + setPostmarkConfig(t, reg, "tok", "abc123@inbound.postmarkapp.com", "you@gmail.com") + stream := seedEmailStream(t, st, "s-support", "sam") + + fakeSrv, fp := newFakePostmark(t) + fp.status = http.StatusUnprocessableEntity + tp.SetSendURL(fakeSrv.URL) + + msg := domain.Message{ + To: []string{"alice@example.com"}, + Body: "...", + } + event, _ := domain.NewMessageEvent("e-1", stream.ID, "w-sam", msg, time.Now().UTC()) + err := tp.Emit(context.Background(), event) + if err == nil || !strings.Contains(err.Error(), "postmark 422") { + t.Fatalf("err = %v, want postmark 422", err) + } +} + +func TestAliasAddressHashForm(t *testing.T) { + t.Parallel() + c := postmark.Config{Inbound: "abc123@inbound.postmarkapp.com"} + if got := c.AliasAddress("sam"); got != "abc123+sam@inbound.postmarkapp.com" { + t.Fatalf("got %q", got) + } +}