Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## 0.0.11-beta.2 — 2026-05-21

### Features
- Add `failproofai audit` command — retrospectively scan past agent transcripts across all 7 CLIs and report wasteful/risky behavior via the 39 builtin policies + 8 new audit-only detectors (`redundant-cd-cwd`, `prefer-edit-over-read-cat`, `prefer-edit-over-sed-awk`, `prefer-write-over-heredoc`, `sleep-polling-loop`, `find-from-root`, `git-commit-no-verify`, `reread-after-edit`). Outputs ANSI table + markdown report; supports `--cli`, `--project`, `--since`, `--policy`, `--limit`, `--show-examples`, `--report`, `--no-report`, `--json`, `--no-cache`. Per-transcript cache at `~/.failproofai/cache/audit/` auto-invalidates on policy/detector code changes (#377).

### Breaking
- Remove the undocumented cloud auth + event relay subsystem ahead of a from-scratch redesign. Deletes `src/auth/` (OAuth 2.0 device-flow login against `api.befailproof.ai`, `~/.failproofai/auth.json` token store) and `src/relay/` (WebSocket event relay daemon, sanitized JSONL queue at `~/.failproofai/cache/server-queue/`, PID tracking). Strips the `failproofai login` / `logout` / `whoami` / `relay start|stop|status` / `sync` subcommands and the internal `--relay-daemon` mode from `bin/failproofai.mjs`, along with their `--help` entries and "did you mean" suggestions. Removes the fire-and-forget `appendToServerQueue` + `ensureRelayRunning` calls from `src/hooks/handler.ts` so hook evaluation no longer enqueues events or lazy-spawns a daemon. The whole subsystem had zero references in `README.md`, `docs/`, `examples/`, or `__tests__/`, and only had internal cross-imports — `tsc`, `eslint`, `vitest` (1623 tests), and the `bun run build` bundles all stay green. Users who ran `failproofai login` should also wipe `~/.failproofai/{auth.json,cache/server-queue,relay.pid}` and stop any running relay daemon by hand; new auth/cloud surface will land in a follow-up.

Expand Down
166 changes: 166 additions & 0 deletions __tests__/audit/detectors.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// @vitest-environment node
import { describe, it, expect } from "vitest";
import type { NormalizedToolEvent } from "../../src/audit/types";
import { redundantCdCwd } from "../../src/audit/detectors/redundant-cd-cwd";
import { preferEditOverReadCat } from "../../src/audit/detectors/prefer-edit-over-read-cat";
import { preferEditOverSedAwk } from "../../src/audit/detectors/prefer-edit-over-sed-awk";
import { preferWriteOverHeredoc } from "../../src/audit/detectors/prefer-write-over-heredoc";
import { sleepPollingLoop } from "../../src/audit/detectors/sleep-polling-loop";
import { findFromRoot } from "../../src/audit/detectors/find-from-root";
import { gitCommitNoVerify } from "../../src/audit/detectors/git-commit-no-verify";
import { rereadAfterEdit } from "../../src/audit/detectors/reread-after-edit";

function bash(cmd: string, cwd = "/home/u/proj"): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd,
timestamp: "2026-05-21T00:00:00.000Z",
toolName: "Bash",
rawToolName: "Bash",
toolInput: { command: cmd },
};
}

function tool(name: string, input: Record<string, unknown>): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd: "/home/u/proj",
timestamp: "2026-05-21T00:00:00.000Z",
toolName: name,
rawToolName: name,
toolInput: input,
};
}

describe("redundant-cd-cwd", () => {
it("matches `cd <cwd> && cmd`", () => {
const hit = redundantCdCwd.detect(bash("cd /home/u/proj && pnpm test"), {});
expect(hit?.example).toContain("cd /home/u/proj && pnpm test");
});
it("does not match cd to a different path", () => {
expect(redundantCdCwd.detect(bash("cd /tmp && ls"), {})).toBeNull();
});
it("does not match bare cmd without cd", () => {
expect(redundantCdCwd.detect(bash("pnpm test"), {})).toBeNull();
});
});

describe("prefer-edit-over-read-cat", () => {
it("matches `cat foo.ts`", () => {
expect(preferEditOverReadCat.detect(bash("cat src/foo.ts"), {})?.example).toBe("cat src/foo.ts");
});
it("matches `head -50 bar.py`", () => {
expect(preferEditOverReadCat.detect(bash("head -50 bar.py"), {})).not.toBeNull();
});
it("does not match `cat .env`", () => {
expect(preferEditOverReadCat.detect(bash("cat .env"), {})).toBeNull();
});
it("does not match piped `cat`", () => {
expect(preferEditOverReadCat.detect(bash("cat foo.ts | wc -l"), {})).toBeNull();
});
it("does not match `cat foo.txt > out`", () => {
expect(preferEditOverReadCat.detect(bash("cat foo.ts > /tmp/out"), {})).toBeNull();
});
it("does not match `cat unknownext`", () => {
expect(preferEditOverReadCat.detect(bash("cat README"), {})).toBeNull();
});
});

describe("prefer-edit-over-sed-awk", () => {
it("matches `sed -i`", () => {
expect(preferEditOverSedAwk.detect(bash("sed -i 's/foo/bar/g' file.ts"), {})).not.toBeNull();
});
it("matches `awk '...' file > out`", () => {
expect(preferEditOverSedAwk.detect(bash("awk '{print $1}' file > out"), {})).not.toBeNull();
});
it("does not match `sed 's/x/y/'` without -i", () => {
expect(preferEditOverSedAwk.detect(bash("echo x | sed 's/x/y/'"), {})).toBeNull();
});
});

describe("prefer-write-over-heredoc", () => {
it("matches `cat <<EOF > file`", () => {
expect(preferWriteOverHeredoc.detect(bash("cat <<'EOF' > out.md\nhello\nEOF"), {})).not.toBeNull();
});
it("does not match `cat <<EOF` inside `$()`", () => {
expect(
preferWriteOverHeredoc.detect(bash(`git commit -m "$(cat <<'EOF'\nfeat\nEOF\n)"`), {}),
).toBeNull();
});
it("matches `echo \"multi\\nline\" > file`", () => {
expect(preferWriteOverHeredoc.detect(bash('echo "a\nb" > out'), {})).not.toBeNull();
});
});

describe("sleep-polling-loop", () => {
it("matches `sleep 60`", () => {
expect(sleepPollingLoop.detect(bash("sleep 60"), {})).not.toBeNull();
});
it("matches `sleep 5m`", () => {
expect(sleepPollingLoop.detect(bash("sleep 5m"), {})).not.toBeNull();
});
it("matches while-sleep loop", () => {
expect(
sleepPollingLoop.detect(bash("while true; do echo x; sleep 5; done"), {}),
).not.toBeNull();
});
it("does not match `sleep 1`", () => {
expect(sleepPollingLoop.detect(bash("sleep 1"), {})).toBeNull();
});
});

describe("find-from-root", () => {
it("matches `find /`", () => {
expect(findFromRoot.detect(bash("find / -name '*.ts'"), {})).not.toBeNull();
});
it("matches `find /home`", () => {
expect(findFromRoot.detect(bash("find /home -name foo"), {})).not.toBeNull();
});
it("does not match `find . -name foo`", () => {
expect(findFromRoot.detect(bash("find . -name foo"), {})).toBeNull();
});
it("does not match `find src`", () => {
expect(findFromRoot.detect(bash("find src -name foo"), {})).toBeNull();
});
});

describe("git-commit-no-verify", () => {
it("matches `git commit --no-verify`", () => {
expect(gitCommitNoVerify.detect(bash("git commit --no-verify -m foo"), {})).not.toBeNull();
});
it("matches short `git commit -n`", () => {
expect(gitCommitNoVerify.detect(bash("git commit -n -m foo"), {})).not.toBeNull();
});
it("does not match plain `git commit -m`", () => {
expect(gitCommitNoVerify.detect(bash("git commit -m foo"), {})).toBeNull();
});
});

describe("reread-after-edit", () => {
it("matches Read of file just Edited", () => {
const state = {};
expect(rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state)).toBeNull();
const hit = rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state);
expect(hit?.example).toContain("/a/b.ts");
});
it("matches Read after Write", () => {
const state = {};
rereadAfterEdit.detect(tool("Write", { file_path: "/a/b.ts" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state)).not.toBeNull();
});
it("does not match Read of a different file", () => {
const state = {};
rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/other.ts" }), state)).toBeNull();
});
it("decays after window of 5 tool calls", () => {
const state = {};
rereadAfterEdit.detect(tool("Edit", { file_path: "/a/b.ts" }), state);
for (let i = 0; i < 6; i++) rereadAfterEdit.detect(tool("Bash", { command: "x" }), state);
expect(rereadAfterEdit.detect(tool("Read", { file_path: "/a/b.ts" }), state)).toBeNull();
});
});
87 changes: 87 additions & 0 deletions __tests__/audit/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// @vitest-environment node
import { describe, it, expect, beforeAll, afterAll } from "vitest";
import { mkdtempSync, writeFileSync, rmSync, mkdirSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { runAudit } from "../../src/audit";
import { resetReplay } from "../../src/audit/replay";

/**
* Builds a minimal Claude JSONL transcript with three tool-use events:
* 1. Bash(env) — should trigger protect-env-vars (builtin)
* 2. Bash(cd <cwd> && pnpm test) — should trigger redundant-cd-cwd (detector)
* 3. Edit(file_path) then Read(file_path) — should trigger reread-after-edit
*/
function buildFixtureTranscript(cwd: string, sessionId: string): string {
const lines: object[] = [];
let prevUuid: string | null = null;
function pushAssistantToolUse(name: string, input: Record<string, unknown>) {
const uuid = `uuid-${lines.length}`;
lines.push({
type: "assistant",
uuid,
parentUuid: prevUuid,
sessionId,
cwd,
timestamp: new Date(2026, 4, 21, lines.length).toISOString(),
message: {
role: "assistant",
content: [{ type: "tool_use", id: `tu-${lines.length}`, name, input }],
},
});
prevUuid = uuid;
}
pushAssistantToolUse("Bash", { command: "env" });
pushAssistantToolUse("Bash", { command: `cd ${cwd} && pnpm test` });
pushAssistantToolUse("Edit", { file_path: `${cwd}/foo.ts`, old_string: "a", new_string: "b" });
pushAssistantToolUse("Read", { file_path: `${cwd}/foo.ts` });
return lines.map((l) => JSON.stringify(l)).join("\n");
}

describe("runAudit() end-to-end on a fixture transcript", () => {
let tmpRoot: string;
let origEnv: string | undefined;

beforeAll(() => {
tmpRoot = mkdtempSync(join(tmpdir(), "failproofai-audit-fixture-"));
origEnv = process.env.CLAUDE_PROJECTS_PATH;
process.env.CLAUDE_PROJECTS_PATH = tmpRoot;

// Create one project with one transcript.
const projectDir = join(tmpRoot, "-tmp-myproj");
mkdirSync(projectDir, { recursive: true });
const sessionId = "11111111-2222-3333-4444-555555555555";
const transcriptPath = join(projectDir, `${sessionId}.jsonl`);
const transcriptCwd = "/tmp/myproj";
writeFileSync(transcriptPath, buildFixtureTranscript(transcriptCwd, sessionId));
resetReplay();
});

afterAll(() => {
if (origEnv) process.env.CLAUDE_PROJECTS_PATH = origEnv;
else delete process.env.CLAUDE_PROJECTS_PATH;
rmSync(tmpRoot, { recursive: true, force: true });
});

it("counts builtin + detector hits across the fixture transcript", async () => {
const result = await runAudit({ clis: ["claude"], noCache: true, noReport: true });
expect(result.transcripts.scanned).toBeGreaterThanOrEqual(1);

const names = result.results.map((r) => r.name);
// Builtin policy hit.
expect(names.some((n) => n.includes("protect-env-vars"))).toBe(true);
// Audit-only detector hits.
expect(names).toContain("redundant-cd-cwd");
expect(names).toContain("reread-after-edit");
});

it("filters by --policy", async () => {
const result = await runAudit({
clis: ["claude"],
noCache: true,
noReport: true,
policies: ["redundant-cd-cwd"],
});
expect(result.results.map((r) => r.name)).toEqual(["redundant-cd-cwd"]);
});
});
52 changes: 52 additions & 0 deletions __tests__/audit/replay.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// @vitest-environment node
import { describe, it, expect, beforeEach } from "vitest";
import { resetReplay, replayEvent } from "../../src/audit/replay";
import type { NormalizedToolEvent } from "../../src/audit/types";

function bash(command: string): NormalizedToolEvent {
return {
cli: "claude",
sessionId: "sess-1",
transcriptPath: "/tmp/t.jsonl",
cwd: "/home/u/proj",
timestamp: "2026-05-21T00:00:00.000Z",
toolName: "Bash",
rawToolName: "Bash",
toolInput: { command },
};
}

describe("replay engine", () => {
beforeEach(() => {
resetReplay();
});

it("triggers protect-env-vars on `env`", async () => {
const hits = await replayEvent(bash("env"));
const names = hits.map((h) => h.policyName);
expect(names.some((n) => n.includes("protect-env-vars"))).toBe(true);
});

it("triggers block-force-push on `git push --force` to a non-protected branch", async () => {
// Push to `feature` (not main/master) so block-push-master doesn't
// short-circuit before block-force-push gets a chance to fire.
const hits = await replayEvent(bash("git push --force origin feature"));
const names = hits.map((h) => h.policyName);
expect(names.some((n) => n.includes("block-force-push"))).toBe(true);
});

it("does not fire on a plain `ls`", async () => {
const hits = await replayEvent(bash("ls -la"));
expect(hits.filter((h) => h.decision === "deny")).toHaveLength(0);
});

it("synthesizes PostToolUse when toolResultText is set", async () => {
// Fake JWT shape — three dot-separated base64 chunks — to trigger
// sanitize-jwt on PostToolUse without using a real-looking API-key shape.
const fakeJwt = ["eyJhbGciOiJIUzI1NiJ9", "eyJzdWIiOiJ0ZXN0In0", "test-sig-xyz"].join(".");
const event = bash("echo token");
event.toolResultText = `Authorization: Bearer ${fakeJwt}`;
const hits = await replayEvent(event);
expect(hits.some((h) => h.eventType === "PostToolUse")).toBe(true);
});
});
Loading