From 9463b751151add2e2bbbd4ad578ce41d45ea54a7 Mon Sep 17 00:00:00 2001 From: Jayesh Betala Date: Sat, 25 Apr 2026 15:54:01 +0530 Subject: [PATCH] fix(codex): avoid review prompt with --base --- codex/SKILL.md | 8 ++++++-- codex/SKILL.md.tmpl | 8 ++++++-- review/SKILL.md | 2 +- scripts/resolvers/review.ts | 2 +- ship/SKILL.md | 2 +- test/fixtures/golden-ship-claude.md | 2 +- test/fixtures/golden/claude-ship-SKILL.md | 2 +- test/fixtures/golden/factory-ship-SKILL.md | 2 +- test/gen-skill-docs.test.ts | 16 ++++++++++++++++ test/skill-validation.test.ts | 8 ++++++++ 10 files changed, 42 insertions(+), 10 deletions(-) diff --git a/codex/SKILL.md b/codex/SKILL.md index e90ec7e89e..26c4b8a984 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -843,7 +843,9 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" cd "$_REPO_ROOT" # Fix 1: wrap with timeout. 330s (5.5min) is slightly longer than the Bash 300s # so the shell wrapper only fires if Bash's own timeout doesn't. -_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. + +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" _CODEX_EXIT=$? if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" @@ -861,7 +863,9 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" cd "$_REPO_ROOT" codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. -focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes. + +focus on security" -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` 3. Capture the output. Then parse cost from stderr: diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl index c311fc80b7..77f1bffb9f 100644 --- a/codex/SKILL.md.tmpl +++ b/codex/SKILL.md.tmpl @@ -152,7 +152,9 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" cd "$_REPO_ROOT" # Fix 1: wrap with timeout. 330s (5.5min) is slightly longer than the Bash 300s # so the shell wrapper only fires if Bash's own timeout doesn't. -_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +_gstack_codex_timeout_wrapper 330 codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. + +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" _CODEX_EXIT=$? if [ "$_CODEX_EXIT" = "124" ]; then _gstack_codex_log_event "codex_timeout" "330" @@ -170,7 +172,9 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" cd "$_REPO_ROOT" codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. Do NOT modify agents/openai.yaml. Stay focused on repository code only. -focus on security" --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes. + +focus on security" -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` 3. Capture the output. Then parse cost from stderr: diff --git a/review/SKILL.md b/review/SKILL.md index f21a401213..27c60af78b 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -1521,7 +1521,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index a0f29e1746..ef159df043 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -487,7 +487,7 @@ If \`DIFF_TOTAL >= 200\` AND Codex is available AND \`OLD_CFG\` is NOT \`disable TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "${CODEX_BOUNDARY}Review the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "${CODEX_BOUNDARY}Review the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" \`\`\` Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`CODEX SAYS (code review):\` header. diff --git a/ship/SKILL.md b/ship/SKILL.md index 1030ef9938..68cef7643e 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -2247,7 +2247,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/test/fixtures/golden-ship-claude.md b/test/fixtures/golden-ship-claude.md index 05fff9871b..29bd21b1b8 100644 --- a/test/fixtures/golden-ship-claude.md +++ b/test/fixtures/golden-ship-claude.md @@ -2050,7 +2050,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/test/fixtures/golden/claude-ship-SKILL.md b/test/fixtures/golden/claude-ship-SKILL.md index 1030ef9938..68cef7643e 100644 --- a/test/fixtures/golden/claude-ship-SKILL.md +++ b/test/fixtures/golden/claude-ship-SKILL.md @@ -2247,7 +2247,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/test/fixtures/golden/factory-ship-SKILL.md b/test/fixtures/golden/factory-ship-SKILL.md index c361b59cba..fa10107271 100644 --- a/test/fixtures/golden/factory-ship-SKILL.md +++ b/test/fixtures/golden/factory-ship-SKILL.md @@ -2238,7 +2238,7 @@ If `DIFF_TOTAL >= 200` AND Codex is available AND `OLD_CFG` is NOT `disabled`: TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } cd "$_REPO_ROOT" -codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .factory/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the diff against the base branch." --base -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" +codex review "IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .factory/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\n\nReview the changes on this branch against the base branch . Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD to see the diff and review only those changes." -c 'model_reasoning_effort="high"' --enable web_search_cached < /dev/null 2>"$TMPERR" ``` Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 4c20343581..6fb4c852e4 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -2674,6 +2674,22 @@ describe('codex commands must not use inline $(git rev-parse --show-toplevel) fo } expect(violations).toEqual([]); }); + + test('codex review commands pass diff scope through prompt, not --base', () => { + const checkedFiles = [ + 'codex/SKILL.md.tmpl', + 'codex/SKILL.md', + 'scripts/resolvers/review.ts', + 'review/SKILL.md', + 'ship/SKILL.md', + ]; + + for (const rel of checkedFiles) { + const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8'); + expect(content).not.toContain('--base -c \'model_reasoning_effort="high"\''); + expect(content).toContain('Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD'); + } + }); }); // ─── Learnings + Confidence Resolver Tests ───────────────────── diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 24e5e8badc..6936d88fce 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1400,6 +1400,14 @@ describe('Codex skill', () => { expect(content).toContain('codex exec'); }); + test('codex review invocations avoid the prompt plus --base argument shape', () => { + for (const rel of ['codex/SKILL.md', 'review/SKILL.md', 'ship/SKILL.md']) { + const content = fs.readFileSync(path.join(ROOT, rel), 'utf-8'); + expect(content).not.toContain('--base -c \'model_reasoning_effort="high"\''); + expect(content).toContain('Run git diff origin/...HEAD 2>/dev/null || git diff ...HEAD'); + } + }); + test('/review persists a review-log entry for ship readiness', () => { const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8'); expect(content).toContain('"skill":"review"');