diff --git a/apps/openant-cli/cmd/analyze.go b/apps/openant-cli/cmd/analyze.go index 986213b..e9daf80 100644 --- a/apps/openant-cli/cmd/analyze.go +++ b/apps/openant-cli/cmd/analyze.go @@ -66,6 +66,9 @@ func runAnalyze(cmd *cobra.Command, args []string) { if analyzeAnalyzerOutput == "" { analyzeAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if analyzeAppContext == "" { + analyzeAppContext = ctx.scanFile("application_context.json") + } if analyzeRepoPath == "" { analyzeRepoPath = ctx.RepoPath } diff --git a/apps/openant-cli/cmd/generatecontext.go b/apps/openant-cli/cmd/generatecontext.go new file mode 100644 index 0000000..633610b --- /dev/null +++ b/apps/openant-cli/cmd/generatecontext.go @@ -0,0 +1,243 @@ +package cmd + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/knostic/open-ant-cli/internal/output" + "github.com/knostic/open-ant-cli/internal/python" + "github.com/spf13/cobra" +) + +// promptTimeout is how long the interactive override-mode prompt waits for +// user input before falling back to the default ("use"). This protects +// against indefinite hangs if a TTY is detected but no user is actually +// available to respond (e.g. some CI runners, detached terminals). +const promptTimeout = 30 * time.Second + +var generateContextCmd = &cobra.Command{ + Use: "generate-context [repository-path]", + Short: "Generate application security context for a repository", + Long: `Generate analyzes a repository and produces an application_context.json +file that describes the application type, trust boundaries, intended +behaviors, and patterns that should not be flagged as vulnerabilities. + +This context is automatically used by the analyze and verify commands +to reduce false positives. + +If no repository path is given, the active project is used (see: openant init). + +The command checks for a manual override file (OPENANT.md or OPENANT.json) +in the repository root before falling back to LLM-based generation. + +When an override file is found, you are prompted to choose how to handle it: + use - Use the override file as-is (skip LLM generation) + merge - Feed the override file into the LLM alongside other sources + ignore - Ignore the override and generate from scratch + +Use --override-mode to skip the prompt, or --force as a shortcut for --override-mode=ignore.`, + Args: cobra.MaximumNArgs(1), + Run: runGenerateContext, +} + +var ( + gcOutput string + gcForce bool + gcOverrideMode string + gcShowPrompt bool +) + +// overrideFiles lists manual override filenames checked in the target repo. +var overrideFiles = []string{"OPENANT.md", "OPENANT.json", ".openant.md", ".openant.json"} + +func init() { + generateContextCmd.Flags().StringVarP(&gcOutput, "output", "o", "", "Output path (default: /application_context.json or /application_context.json)") + generateContextCmd.Flags().BoolVar(&gcForce, "force", false, "Force regeneration, ignoring OPENANT.md override files") + generateContextCmd.Flags().StringVar(&gcOverrideMode, "override-mode", "", "How to handle OPENANT.md: use, merge, or ignore (skips interactive prompt)") + generateContextCmd.Flags().BoolVar(&gcShowPrompt, "show-prompt", false, "Include formatted prompt text in output") +} + +func runGenerateContext(cmd *cobra.Command, args []string) { + repoPath, ctx, err := resolveRepoArg(args) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Apply project defaults + if ctx != nil { + if gcOutput == "" { + gcOutput = ctx.scanFile("application_context.json") + } + } + + // Resolve effective override mode + effectiveMode, err := resolveOverrideMode(repoPath) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + rt, err := ensurePython() + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Build Python CLI args + pyArgs := []string{"generate-context", repoPath} + if gcOutput != "" { + pyArgs = append(pyArgs, "--output", gcOutput) + } + if effectiveMode != "" { + pyArgs = append(pyArgs, "--override-mode", effectiveMode) + } + if gcShowPrompt { + pyArgs = append(pyArgs, "--show-prompt") + } + + result, err := python.Invoke(rt.Path, pyArgs, "", quiet, requireAPIKey()) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + if jsonOutput { + output.PrintJSON(result.Envelope) + } else if result.Envelope.Status == "success" { + if data, ok := result.Envelope.Data.(map[string]any); ok { + printGenerateContextSummary(data) + } + } else { + output.PrintErrors(result.Envelope.Errors) + } + + os.Exit(result.ExitCode) +} + +// resolveOverrideMode determines the effective override mode based on flags +// and interactive prompting. +func resolveOverrideMode(repoPath string) (string, error) { + // --force and --override-mode are mutually exclusive + if gcForce && gcOverrideMode != "" { + return "", fmt.Errorf("--force and --override-mode are mutually exclusive") + } + + // --force is a shortcut for --override-mode=ignore + if gcForce { + return "ignore", nil + } + + // Explicit --override-mode takes precedence + if gcOverrideMode != "" { + mode := strings.ToLower(gcOverrideMode) + if mode != "use" && mode != "merge" && mode != "ignore" { + return "", fmt.Errorf("invalid --override-mode %q: must be use, merge, or ignore", gcOverrideMode) + } + return mode, nil + } + + // No explicit flag — check for override file + overrideFile := findOverrideFile(repoPath) + if overrideFile == "" { + // No override file exists; let Python use default LLM generation + return "", nil + } + + // Override file found — prompt if interactive, else default to "use" + if !isInteractiveTerminal() { + return "use", nil + } + + return promptOverrideMode(overrideFile), nil +} + +// findOverrideFile checks for manual override files in the repo root. +// Returns the filename if found, empty string otherwise. +func findOverrideFile(repoPath string) string { + for _, name := range overrideFiles { + path := filepath.Join(repoPath, name) + if info, err := os.Stat(path); err == nil && !info.IsDir() { + return name + } + } + return "" +} + +// isInteractiveTerminal returns true if stdin is a terminal (not piped/CI). +func isInteractiveTerminal() bool { + stat, err := os.Stdin.Stat() + if err != nil { + return false + } + return (stat.Mode() & os.ModeCharDevice) != 0 +} + +// promptOverrideMode shows an interactive prompt for how to handle the override file. +// The prompt times out after promptTimeout and falls back to the default ("use") +// if no input is received, so the CLI can never hang indefinitely waiting on a +// detached or unattended terminal. +func promptOverrideMode(filename string) string { + fmt.Fprintf(os.Stderr, "\nFound manual override: %s\n\n", filename) + fmt.Fprintln(os.Stderr, " [u]se — Use as-is (skip LLM generation)") + fmt.Fprintln(os.Stderr, " [m]erge — Feed into LLM alongside other sources") + fmt.Fprintln(os.Stderr, " [i]gnore — Ignore, generate from scratch") + fmt.Fprintln(os.Stderr, "") + fmt.Fprintf(os.Stderr, "Choice [u/m/i] (default: u, %ds timeout): ", + int(promptTimeout.Seconds())) + + // Read on a goroutine so we can race against a timeout. + type readResult struct { + line string + err error + } + ch := make(chan readResult, 1) + go func() { + reader := bufio.NewReader(os.Stdin) + line, err := reader.ReadString('\n') + ch <- readResult{line: line, err: err} + }() + + var answer string + select { + case res := <-ch: + answer = strings.TrimSpace(strings.ToLower(res.line)) + case <-time.After(promptTimeout): + fmt.Fprintln(os.Stderr, "\nNo response — defaulting to 'use'.") + return "use" + } + + switch answer { + case "m", "merge": + return "merge" + case "i", "ignore": + return "ignore" + default: + // "u", "use", or empty (default) + return "use" + } +} + +func printGenerateContextSummary(data map[string]any) { + output.PrintHeader("Application Context Generated") + if v, ok := data["application_type"].(string); ok { + output.PrintKeyValue("Type", v) + } + if v, ok := data["purpose"].(string); ok { + output.PrintKeyValue("Purpose", v) + } + if v, ok := data["confidence"].(float64); ok { + output.PrintKeyValue("Confidence", fmt.Sprintf("%.0f%%", v*100)) + } + if v, ok := data["source"].(string); ok { + output.PrintKeyValue("Source", v) + } + if v, ok := data["app_context_path"].(string); ok { + output.PrintKeyValue("Output", v) + } + fmt.Println() +} diff --git a/apps/openant-cli/cmd/root.go b/apps/openant-cli/cmd/root.go index 334dc9a..e584308 100644 --- a/apps/openant-cli/cmd/root.go +++ b/apps/openant-cli/cmd/root.go @@ -31,16 +31,17 @@ Stage 1: Detect potential vulnerabilities via code analysis Stage 2: Simulate an attacker to eliminate false positives Commands: - scan Full pipeline: parse → enhance → detect → verify → report - diff Scan only code changed vs a base ref or GitHub PR - parse Extract code units from a repository - enhance Add security context to a parsed dataset - analyze Run Stage 1 vulnerability detection - verify Run Stage 2 attacker simulation - build-output Assemble pipeline_output.json from verified results - dynamic-test Docker-isolated exploit testing - report Generate reports from analysis results - config Manage CLI configuration (API key, etc.)`, + scan Full pipeline: parse → enhance → detect → verify → report + diff Scan only code changed vs a base ref or GitHub PR + parse Extract code units from a repository + generate-context Generate application security context + enhance Add security context to a parsed dataset + analyze Run Stage 1 vulnerability detection + verify Run Stage 2 attacker simulation + build-output Assemble pipeline_output.json from verified results + dynamic-test Docker-isolated exploit testing + report Generate reports from analysis results + config Manage CLI configuration (API key, etc.)`, } // Execute adds all child commands to the root command and sets flags appropriately. @@ -82,6 +83,7 @@ func init() { rootCmd.AddCommand(scanCmd) rootCmd.AddCommand(diffCmd) rootCmd.AddCommand(parseCmd) + rootCmd.AddCommand(generateContextCmd) rootCmd.AddCommand(enhanceCmd) rootCmd.AddCommand(analyzeCmd) rootCmd.AddCommand(verifyCmd) diff --git a/apps/openant-cli/cmd/verify.go b/apps/openant-cli/cmd/verify.go index cad9b8a..b486db5 100644 --- a/apps/openant-cli/cmd/verify.go +++ b/apps/openant-cli/cmd/verify.go @@ -61,6 +61,9 @@ func runVerify(cmd *cobra.Command, args []string) { if verifyAnalyzerOutput == "" { verifyAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if verifyAppContext == "" { + verifyAppContext = ctx.scanFile("application_context.json") + } if verifyRepoPath == "" { verifyRepoPath = ctx.RepoPath } diff --git a/libs/openant-core/CLAUDE.md b/libs/openant-core/CLAUDE.md index 3c61665..56c5754 100644 --- a/libs/openant-core/CLAUDE.md +++ b/libs/openant-core/CLAUDE.md @@ -68,6 +68,13 @@ python -m context.generate_context /path/to/repo --list-types # Show supported **Manual override:** Create `OPENANT.md` or `OPENANT.json` in repo root. See `context/OPENANT_TEMPLATE.md` for format. +**Override modes:** When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM (default) +- `merge` — Feed override into LLM alongside other sources +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. + **Unsupported types:** If a repository doesn't match supported types, OpenAnt exits with error code 2 and instructions for creating a manual override. # Autopilot (Autonomous Pipeline) diff --git a/libs/openant-core/CURRENT_IMPLEMENTATION.md b/libs/openant-core/CURRENT_IMPLEMENTATION.md index f2524c3..a10f390 100644 --- a/libs/openant-core/CURRENT_IMPLEMENTATION.md +++ b/libs/openant-core/CURRENT_IMPLEMENTATION.md @@ -227,13 +227,17 @@ Unsupported types (desktop apps, mobile apps, games, embedded systems) are rejec **Usage:** ```bash -# List supported types -python -m context.generate_context --list-types +# Generate context via CLI (recommended) +openant generate-context /path/to/repo -# Generate context for a repository +# Generate context via Python module python -m context.generate_context /path/to/repo -# Context is saved to application_context.json in the dataset directory +# List supported types +python -m context.generate_context --list-types + +# Context is saved to application_context.json in the scan/dataset directory +# analyze and verify auto-discover it when using a project ``` **Generated Context Structure:** @@ -251,6 +255,13 @@ python -m context.generate_context /path/to/repo **Manual Override:** Place `OPENANT.md` or `OPENANT.json` in repo root to provide explicit context. Manual overrides bypass type validation. +**Override Modes:** When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM generation (default) +- `merge` — Feed override content into LLM alongside other sources (source="merged") +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. + **Integration:** Context automatically loaded in `experiment.py` and injected into Stage 1 and Stage 2 prompts. **Results on LangChain:** diff --git a/libs/openant-core/DOCUMENTATION.md b/libs/openant-core/DOCUMENTATION.md index 5f1f434..beb1761 100644 --- a/libs/openant-core/DOCUMENTATION.md +++ b/libs/openant-core/DOCUMENTATION.md @@ -221,7 +221,8 @@ For AI assistants working on the code, here are the key source files: | File | Purpose | |------|---------| | `context/application_context.py` | Context detection & formatting | -| `context/generate_context.py` | CLI for context generation | +| `context/generate_context.py` | Python module CLI for context generation | +| `openant/cli.py` (`generate-context`) | Primary CLI command (`openant generate-context`) | ### Report Generator diff --git a/libs/openant-core/PIPELINE_MANUAL.md b/libs/openant-core/PIPELINE_MANUAL.md index fe77b78..facb1d9 100644 --- a/libs/openant-core/PIPELINE_MANUAL.md +++ b/libs/openant-core/PIPELINE_MANUAL.md @@ -534,15 +534,26 @@ For typical web applications, entry-point filtering achieves 60-95% reduction. Classifies the repository type to reduce false positives. -**Location:** `context/generate_context.py` +**Location:** `context/application_context.py`, `openant/cli.py` -**Command:** +**Command (via CLI):** +```bash +openant generate-context # Uses active project +openant generate-context /path/to/repo # Explicit repo path +openant generate-context /path/to/repo -o ctx.json # Custom output path +openant generate-context --force # Skip OPENANT.md override +openant generate-context --show-prompt # Include prompt format in output +``` + +**Command (via Python module):** ```bash python -m context.generate_context /path/to/repo python -m context.generate_context /path/to/repo -o application_context.json python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), the output defaults to the project scan directory and is automatically discovered by `analyze` and `verify` — no need to pass `--app-context`. + **Supported Application Types:** | Type | Description | Attack Model | @@ -575,7 +586,14 @@ python -m context.generate_context --list-types # Show supported types **Manual Override:** -Create `OPENANT.md` or `OPENANT.json` in repo root to override automatic detection. +Create `OPENANT.md` or `OPENANT.json` in repo root to provide explicit context. + +When a manual override file is detected, the CLI prompts for how to handle it: +- `use` — Use override as-is, skip LLM generation (default) +- `merge` — Feed override content into LLM alongside other sources +- `ignore` — Ignore override, generate from scratch + +Use `--override-mode ` to skip the prompt, or `--force` as shortcut for `--override-mode ignore`. --- @@ -885,7 +903,7 @@ python parsers/python/parse_repository.py /path/to/flask-app \ python validate_dataset_schema.py datasets/flask-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/flask-app +openant generate-context /path/to/flask-app # 4. Run Stage 1 + Stage 2 on first 20 units python experiment.py --dataset flask-app --verify --limit 20 @@ -907,7 +925,7 @@ python parsers/javascript/test_pipeline.py /path/to/node-app \ python validate_dataset_schema.py datasets/node-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/node-app +openant generate-context /path/to/node-app # 4. Run full analysis python experiment.py --dataset node-app --verify @@ -953,7 +971,7 @@ python parsers/python/parse_repository.py /repo --output datasets/name/dataset.j python parsers/javascript/test_pipeline.py /repo --analyzer-path /analyzer.js --output datasets/name --processing-level codeql # Generate app context -python -m context.generate_context /repo +openant generate-context /repo # Run Stage 1 python experiment.py --dataset name diff --git a/libs/openant-core/README.md b/libs/openant-core/README.md index 9d466ed..fdc2d80 100644 --- a/libs/openant-core/README.md +++ b/libs/openant-core/README.md @@ -131,16 +131,18 @@ OpenAnt generates application context to understand what type of application is ### Generate Context ```bash -# Generate context for a repository -python -m context.generate_context /path/to/repo - -# View formatted prompt output -python -m context.generate_context /path/to/repo --show-prompt +# Generate context via CLI (recommended) +openant generate-context /path/to/repo +openant generate-context /path/to/repo --show-prompt # Include prompt format +openant generate-context --force # Skip OPENANT.md override -# List supported types -python -m context.generate_context --list-types +# Generate context via Python module +python -m context.generate_context /path/to/repo +python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), `analyze` and `verify` auto-discover the generated context — no need to pass `--app-context`. + ### Manual Override Create `OPENANT.md` or `OPENANT.json` in your repository root to provide manual security context. This is useful when: diff --git a/libs/openant-core/context/OPENANT_TEMPLATE.md b/libs/openant-core/context/OPENANT_TEMPLATE.md index 751093b..d649b79 100644 --- a/libs/openant-core/context/OPENANT_TEMPLATE.md +++ b/libs/openant-core/context/OPENANT_TEMPLATE.md @@ -16,6 +16,13 @@ OpenAnt supports these four application types: **Note:** Manual overrides can use any `application_type` value (validation is skipped for manual overrides). Use this to analyze unsupported application types by mapping them to the closest supported type. +**Override modes:** When this file is detected, the `generate-context` command prompts for how to handle it: +- `use` — Use this file as-is, skip LLM generation (default) +- `merge` — Feed this file into the LLM alongside other repo sources (README, etc.) +- `ignore` — Ignore this file and generate context from scratch + +Use `--override-mode ` to skip the prompt. + ## Format Include a JSON code block with the following structure: diff --git a/libs/openant-core/context/application_context.py b/libs/openant-core/context/application_context.py index f7fa55d..c5f1c23 100644 --- a/libs/openant-core/context/application_context.py +++ b/libs/openant-core/context/application_context.py @@ -153,6 +153,12 @@ def get_type_info(self) -> dict: ".openant.json", ] +# Maximum size (chars) of an override file when included in merge-mode LLM +# input. Larger files are truncated with a marker so they don't blow the +# prompt budget. 10 KB is comfortably above a hand-written notes file but +# well below the 200K-token context window. +MAX_OVERRIDE_MERGE_CHARS = 10000 + # Priority files to read for context generation CONTEXT_FILES = [ "README.md", @@ -192,17 +198,49 @@ def get_type_info(self) -> dict: } -def gather_context_sources(repo_path: Path) -> dict[str, str]: +def find_override_file(repo_path: Path) -> Path | None: + """Return path to first existing manual override file, or None. + + Only regular files are considered — directories that happen to share + an override filename are skipped (matches the Go CLI's behavior). + + Args: + repo_path: Path to repository root. + + Returns: + Path to override file if found, None otherwise. + """ + for filename in MANUAL_OVERRIDE_FILES: + filepath = repo_path / filename + if filepath.is_file(): + return filepath + return None + + +def gather_context_sources(repo_path: Path, override_path: Path | None = None) -> dict[str, str]: """Gather relevant files for context generation. Args: repo_path: Path to the repository root. + override_path: Optional path to override file to include as a source. Returns: Dictionary mapping filename to content. """ sources = {} + # Include override file content if provided (merge mode) + if override_path is not None: + try: + content = override_path.read_text(errors="ignore") + if len(content) > MAX_OVERRIDE_MERGE_CHARS: + content = ( + content[:MAX_OVERRIDE_MERGE_CHARS] + "\n\n[... truncated ...]" + ) + sources[override_path.name] = content + except Exception as e: + print(f"Warning: Could not read {override_path.name}: {e}", file=sys.stderr) + # Read priority files for filename in CONTEXT_FILES: filepath = repo_path / filename @@ -384,12 +422,21 @@ def _build_type_descriptions() -> str: return "\n".join(lines) +MERGE_CONTEXT_SUPPLEMENT = """ +## Developer-Provided Context + +The repository maintainer provided a manual security context file (listed above +in the sources). Treat their classification of intended behaviors, trust +boundaries, and not-a-vulnerability entries as authoritative hints. Validate the +application type against the other source files and reconcile any conflicts. +""" + CONTEXT_GENERATION_PROMPT = """Analyze this software repository and generate a security analysis context. ## Repository Information {sources} - +{developer_context} --- ## Task @@ -464,6 +511,7 @@ def generate_application_context( repo_path: Path, model: str = "claude-sonnet-4-20250514", force_regenerate: bool = False, + override_mode: str | None = None, ) -> ApplicationContext: """Generate application context using LLM analysis. @@ -472,7 +520,9 @@ def generate_application_context( Args: repo_path: Path to the repository root. model: Anthropic model to use for generation. - force_regenerate: If True, skip manual override check. + force_regenerate: If True, skip manual override check (legacy, use override_mode). + override_mode: How to handle override files: "use" (verbatim), "merge" (feed + into LLM), "ignore" (skip override), or None (legacy behavior). Returns: ApplicationContext with security-relevant information. @@ -482,16 +532,29 @@ def generate_application_context( """ repo_path = Path(repo_path) - # Check for manual override first - if not force_regenerate: + # Resolve effective mode from override_mode or legacy force_regenerate + if override_mode is None: + effective_mode = "ignore" if force_regenerate else "use" + else: + effective_mode = override_mode + + # "use" mode: return manual override verbatim if found + if effective_mode == "use": manual_context = check_manual_override(repo_path) if manual_context: - print(f"Using manual override from repository", file=sys.stderr) + print("Using manual override from repository", file=sys.stderr) return manual_context - # Gather sources + # "merge" mode: find override file to include as LLM source + override_path = None + if effective_mode == "merge": + override_path = find_override_file(repo_path) + if override_path: + print(f"Merging {override_path.name} into LLM context", file=sys.stderr) + + # Gather sources (includes override file in merge mode) print(f"Gathering context sources from {repo_path}...", file=sys.stderr) - sources = gather_context_sources(repo_path) + sources = gather_context_sources(repo_path, override_path=override_path) if not sources: raise ValueError(f"No context sources found in {repo_path}") @@ -501,6 +564,9 @@ def generate_application_context( for name, content in sources.items(): sources_text += f"\n### {name}\n```\n{content}\n```\n" + # Add developer context supplement when merging + developer_context = MERGE_CONTEXT_SUPPLEMENT if override_path else "" + # Call LLM print(f"Generating context with {model}...", file=sys.stderr) client = Anthropic() @@ -509,7 +575,10 @@ def generate_application_context( max_tokens=2000, messages=[{ "role": "user", - "content": CONTEXT_GENERATION_PROMPT.format(sources=sources_text) + "content": CONTEXT_GENERATION_PROMPT.format( + sources=sources_text, + developer_context=developer_context, + ) }] ) @@ -529,7 +598,7 @@ def generate_application_context( except json.JSONDecodeError as e: raise ValueError(f"Failed to parse LLM response as JSON: {e}\nResponse: {response_text}") - data['source'] = 'llm' + data['source'] = 'merged' if override_path else 'llm' # Validate and create context (will raise UnsupportedApplicationTypeError if invalid) return ApplicationContext(**data) diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index b0ce345..66822ec 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -5,6 +5,7 @@ Commands: openant scan /path/to/repo --output /tmp/results openant parse /path/to/repo --output /tmp/results + openant generate-context /path/to/repo -o /tmp/results/application_context.json openant enhance dataset.json --analyzer-output ao.json --repo-path /repo -o enhanced.json openant analyze dataset.json --output /tmp/results openant verify results.json --analyzer-output ao.json --output /tmp/results @@ -29,6 +30,20 @@ def _output_json(data: dict): sys.stdout.write("\n") +def _find_app_context(*candidate_dirs: str) -> str | None: + """Search candidate directories for application_context.json. + + Returns the first existing path, or None. + """ + for d in candidate_dirs: + if not d: + continue + path = os.path.join(d, "application_context.json") + if os.path.isfile(path): + return path + return None + + def _load_step_reports(directory: str) -> list[dict]: """Load all {step}.report.json files from a directory. @@ -152,6 +167,66 @@ def cmd_parse(args): return 2 +def cmd_generate_context(args): + """Generate application security context for a repository.""" + from pathlib import Path + from context.application_context import ( + generate_application_context, + save_context, + format_context_for_prompt, + ) + from core.schemas import success, error + from core.step_report import step_context + + output_path = args.output or os.path.join(args.repo, "application_context.json") + output_dir = os.path.dirname(os.path.abspath(output_path)) + + # Resolve effective override mode + if args.override_mode: + effective_mode = args.override_mode + elif args.force: + effective_mode = "ignore" + else: + effective_mode = None # legacy default behavior + + try: + with step_context("generate-context", output_dir, inputs={ + "repo_path": os.path.abspath(args.repo), + "force": args.force, + "override_mode": effective_mode, + }) as ctx: + app_context = generate_application_context( + Path(args.repo), + override_mode=effective_mode, + ) + save_context(app_context, Path(output_path)) + + ctx.summary = { + "application_type": app_context.application_type, + "confidence": app_context.confidence, + "source": app_context.source, + } + ctx.outputs = {"app_context_path": os.path.abspath(output_path)} + + result = { + "app_context_path": os.path.abspath(output_path), + "application_type": app_context.application_type, + "purpose": app_context.purpose, + "confidence": app_context.confidence, + "source": app_context.source, + } + + if args.show_prompt: + result["prompt_format"] = format_context_for_prompt(app_context) + + _output_json(success(result)) + return 0 + + except Exception as e: + _output_json(error(str(e))) + return 2 + + def cmd_enhance(args): """Enhance a dataset with security context.""" from core.enhancer import enhance_dataset @@ -225,6 +300,18 @@ def cmd_analyze(args): exploitable_filter = "all" if args.exploitable_all else ("strict" if args.exploitable_only else None) + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.dataset)), + ) + if app_context_path: + print(f"[Analyze] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("analyze", output_dir, inputs={ "dataset_path": os.path.abspath(args.dataset), @@ -236,7 +323,7 @@ def cmd_analyze(args): dataset_path=args.dataset, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, limit=args.limit, model=args.model, @@ -277,7 +364,7 @@ def cmd_analyze(args): results_path=result.results_path, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, backoff_seconds=args.backoff, @@ -322,18 +409,30 @@ def cmd_verify(args): output_dir = args.output or tempfile.mkdtemp(prefix="open_ant_verify_") + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.results)), + ) + if app_context_path: + print(f"[Verify] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("verify", output_dir, inputs={ "results_path": os.path.abspath(args.results), "analyzer_output_path": os.path.abspath(args.analyzer_output), - "app_context_path": os.path.abspath(args.app_context) if args.app_context else None, + "app_context_path": os.path.abspath(app_context_path) if app_context_path else None, "repo_path": os.path.abspath(args.repo_path) if args.repo_path else None, }) as ctx: result = run_verification( results_path=args.results, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, checkpoint_path=getattr(args, "checkpoint", None), @@ -1019,6 +1118,25 @@ def main(): parse_p.add_argument("--diff-manifest", help="Path to diff_manifest.json; tags units with diff_selected") parse_p.set_defaults(func=cmd_parse) + # --------------------------------------------------------------- + # generate-context — generate application security context + # --------------------------------------------------------------- + gc_p = subparsers.add_parser( + "generate-context", + help="Generate application security context for a repository", + ) + gc_p.add_argument("repo", help="Path to repository") + gc_p.add_argument("--output", "-o", + help="Output path (default: /application_context.json)") + gc_p.add_argument("--force", action="store_true", + help="Force regeneration, ignoring OPENANT.md override files") + gc_p.add_argument("--override-mode", choices=["use", "ignore", "merge"], + default=None, + help="How to handle OPENANT.md: use (as-is), merge (into LLM), ignore") + gc_p.add_argument("--show-prompt", action="store_true", + help="Include formatted prompt text in output") + gc_p.set_defaults(func=cmd_generate_context) + # --------------------------------------------------------------- # enhance — add security context to a dataset # --------------------------------------------------------------- diff --git a/libs/openant-core/tests/test_app_context_discovery.py b/libs/openant-core/tests/test_app_context_discovery.py new file mode 100644 index 0000000..74949d6 --- /dev/null +++ b/libs/openant-core/tests/test_app_context_discovery.py @@ -0,0 +1,86 @@ +"""Tests for application_context.json auto-discovery in the Python CLI. + +These tests exercise the `_find_app_context` helper used by `analyze` and +`verify` to locate `application_context.json` automatically when +`--app-context` is not passed. +""" +import json +from pathlib import Path + +from openant.cli import _find_app_context + + +def _write_dummy_context(path: Path) -> None: + path.write_text(json.dumps({ + "application_type": "web_app", + "purpose": "test", + "confidence": "high", + "source": "test", + })) + + +class TestFindAppContext: + def test_returns_none_when_no_dirs(self): + assert _find_app_context() is None + + def test_returns_none_when_dirs_empty(self): + assert _find_app_context("", None) is None + + def test_returns_none_when_no_file_present(self, tmp_path): + d1 = tmp_path / "out" + d1.mkdir() + d2 = tmp_path / "repo" + d2.mkdir() + assert _find_app_context(str(d1), str(d2)) is None + + def test_finds_in_first_dir(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + ctx_path = out_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(tmp_path / "repo")) + assert result == str(ctx_path) + + def test_finds_in_second_dir_when_first_missing(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(ctx_path) + + def test_first_match_wins(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + first = out_dir / "application_context.json" + second = repo_dir / "application_context.json" + _write_dummy_context(first) + _write_dummy_context(second) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(first) + + def test_skips_falsy_dirs(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + # First two are falsy (empty / None) — should be skipped without error + result = _find_app_context("", None, str(repo_dir)) + assert result == str(ctx_path) + + def test_ignores_directory_named_application_context_json(self, tmp_path): + """A *directory* with the magic name should not be treated as a hit.""" + out_dir = tmp_path / "out" + out_dir.mkdir() + # Create a directory (not file) with the target name + (out_dir / "application_context.json").mkdir() + + assert _find_app_context(str(out_dir)) is None diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py index fc92113..fc9c9f9 100644 --- a/libs/openant-core/tests/test_go_cli.py +++ b/libs/openant-core/tests/test_go_cli.py @@ -23,8 +23,12 @@ ) -def run_cli(*args, env_override=None): - """Run the openant CLI binary and return the CompletedProcess.""" +def run_cli(*args, env_override=None, stdin_input=""): + """Run the openant CLI binary and return the CompletedProcess. + + `stdin_input` defaults to "" (a piped, non-TTY stdin). Pass `stdin_input=None` + to inherit the parent's stdin instead. + """ env = os.environ.copy() # Don't let the test hit any real API env.pop("ANTHROPIC_API_KEY", None) @@ -45,6 +49,7 @@ def run_cli(*args, env_override=None): text=True, timeout=30, env=env, + input=stdin_input, ) @@ -163,6 +168,90 @@ def test_parse_json_output_is_valid(self, sample_python_repo, tmp_path): assert "status" in envelope +class TestGenerateContextHelp: + """Tests for `openant generate-context --help`.""" + + def test_help(self): + result = run_cli("generate-context", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "repository" in output.lower() + assert "context" in output.lower() + + def test_help_shows_override_mode(self): + result = run_cli("generate-context", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "override-mode" in output + + +class TestGenerateContext: + """Tests for `openant generate-context` (no API key).""" + + def test_requires_api_key(self, sample_python_repo): + """generate-context should fail without an API key.""" + result = run_cli("generate-context", sample_python_repo) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "api key" in output.lower() + + def test_force_and_override_mode_mutually_exclusive(self, sample_python_repo): + """--force and --override-mode together should error.""" + result = run_cli( + "generate-context", sample_python_repo, + "--force", "--override-mode", "merge", + ) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "mutually exclusive" in output.lower() + + @pytest.mark.parametrize("mode", ["use", "merge", "ignore"]) + def test_override_mode_accepts_valid_values(self, sample_python_repo, mode): + """All three valid override-mode values are accepted by the Go CLI. + + We don't have an API key in this environment so the call still fails, + but the failure should NOT be a flag-validation error — it should be + the API-key check downstream. + """ + result = run_cli( + "generate-context", sample_python_repo, + "--override-mode", mode, + ) + output = (result.stderr + result.stdout).lower() + # Should NOT be rejected as an invalid mode value. + assert "invalid --override-mode" not in output + assert "must be use, merge, or ignore" not in output + + def test_override_mode_rejects_invalid_value(self, sample_python_repo): + """Unknown --override-mode value is rejected before any LLM call.""" + result = run_cli( + "generate-context", sample_python_repo, + "--override-mode", "bogus", + ) + output = (result.stderr + result.stdout).lower() + assert result.returncode != 0 + assert "invalid" in output + assert "use, merge, or ignore" in output + + def test_no_tty_default_is_use(self, sample_python_repo, tmp_path): + """When stdin is not a TTY (subprocess) and an override file exists, + the CLI should silently default to 'use' rather than prompt — i.e. + it should NOT print the interactive prompt menu.""" + # Copy the sample repo into tmp_path so we don't pollute the fixture + repo_copy = tmp_path / "repo" + shutil.copytree(sample_python_repo, repo_copy) + (repo_copy / "OPENANT.md").write_text( + '# manual override\n' + 'application_type: web_app\n' + ) + result = run_cli("generate-context", str(repo_copy)) + output = result.stderr + result.stdout + # The interactive prompt's text should NOT appear under non-TTY stdin. + assert "[u]se" not in output + assert "[m]erge" not in output + assert "Choice [u/m/i]" not in output + + class TestApiKeyHandling: def test_scan_requires_api_key(self, sample_python_repo): """Scan should fail without an API key.""" diff --git a/libs/openant-core/tests/test_override_mode.py b/libs/openant-core/tests/test_override_mode.py new file mode 100644 index 0000000..9abeafc --- /dev/null +++ b/libs/openant-core/tests/test_override_mode.py @@ -0,0 +1,258 @@ +"""Unit tests for the override-mode functionality of generate-context. + +These tests cover the Python-side logic for `find_override_file()`, +`gather_context_sources()` merge behavior, and the override-mode dispatch +inside `generate_application_context()`. They do not invoke the LLM — +network calls are mocked or avoided by exercising the early-return paths. +""" +from pathlib import Path +from unittest.mock import patch + +import pytest + +from context.application_context import ( + MANUAL_OVERRIDE_FILES, + MERGE_CONTEXT_SUPPLEMENT, + find_override_file, + gather_context_sources, + generate_application_context, +) + + +class TestFindOverrideFile: + """Tests for the `find_override_file()` helper.""" + + def test_returns_none_when_no_override(self, tmp_path): + """No override files in repo -> returns None.""" + # Create a non-override file to ensure the directory is real + (tmp_path / "README.md").write_text("# repo") + assert find_override_file(tmp_path) is None + + def test_finds_openant_md(self, tmp_path): + """OPENANT.md is detected.""" + path = tmp_path / "OPENANT.md" + path.write_text("# override") + result = find_override_file(tmp_path) + assert result == path + + def test_finds_openant_json(self, tmp_path): + """OPENANT.json is detected when no OPENANT.md exists.""" + path = tmp_path / "OPENANT.json" + path.write_text('{"application_type": "web_app"}') + result = find_override_file(tmp_path) + assert result == path + + def test_finds_dot_openant_md(self, tmp_path): + """.openant.md is detected.""" + path = tmp_path / ".openant.md" + path.write_text("# override") + result = find_override_file(tmp_path) + assert result == path + + def test_priority_md_over_json(self, tmp_path): + """When both OPENANT.md and OPENANT.json exist, MD is preferred.""" + md = tmp_path / "OPENANT.md" + md.write_text("# md override") + js = tmp_path / "OPENANT.json" + js.write_text('{"application_type": "web_app"}') + result = find_override_file(tmp_path) + # OPENANT.md is listed first in MANUAL_OVERRIDE_FILES + assert result == md + assert MANUAL_OVERRIDE_FILES.index("OPENANT.md") < MANUAL_OVERRIDE_FILES.index("OPENANT.json") + + def test_directory_with_override_name_is_skipped(self, tmp_path): + """A directory named OPENANT.md must NOT be returned — only regular + files are valid overrides, matching the Go CLI's behavior. Otherwise + merge mode would crash trying to read_text() on a directory.""" + d = tmp_path / "OPENANT.md" + d.mkdir() + # A real override file lower in priority should be picked up instead. + json_override = tmp_path / "OPENANT.json" + json_override.write_text('{"application_type": "web_app"}') + + result = find_override_file(tmp_path) + assert result == json_override + + def test_directory_only_returns_none(self, tmp_path): + """If the only matching path is a directory, return None — not crash.""" + (tmp_path / "OPENANT.md").mkdir() + assert find_override_file(tmp_path) is None + + def test_accepts_str_path(self, tmp_path): + """Helper accepts a Path; calling with str via Path() conversion works.""" + (tmp_path / "OPENANT.md").write_text("# override") + result = find_override_file(Path(str(tmp_path))) + assert result is not None + assert result.name == "OPENANT.md" + + +class TestGatherContextSourcesMerge: + """Tests for `gather_context_sources()` with override_path (merge mode).""" + + def test_no_override_path(self, tmp_path): + """Without override_path, override file is not included as a source.""" + (tmp_path / "README.md").write_text("# readme") + sources = gather_context_sources(tmp_path) + assert "README.md" in sources + # No OPENANT.md key because we didn't pass override_path + assert "OPENANT.md" not in sources + + def test_override_path_included(self, tmp_path): + """When override_path is provided, its content is included.""" + readme = tmp_path / "README.md" + readme.write_text("# readme") + override = tmp_path / "OPENANT.md" + override.write_text("# manual override\nIntended behavior") + + sources = gather_context_sources(tmp_path, override_path=override) + assert "OPENANT.md" in sources + assert "manual override" in sources["OPENANT.md"] + + def test_override_truncated_when_huge(self, tmp_path): + """Override content >10000 chars is truncated.""" + override = tmp_path / "OPENANT.md" + override.write_text("x" * 12000) + sources = gather_context_sources(tmp_path, override_path=override) + content = sources["OPENANT.md"] + assert "[... truncated ...]" in content + # 10000 + truncation marker + assert len(content) < 12000 + + +class TestGenerateApplicationContextDispatch: + """Tests for the override-mode dispatch inside generate_application_context. + + These avoid hitting the LLM by exercising the "use" path (which returns + early when an override file is found). + """ + + def _write_valid_override_md(self, repo_path: Path) -> Path: + """Write a minimal valid OPENANT.md that check_manual_override accepts.""" + # check_manual_override prefers OPENANT.json for structured input; + # use OPENANT.json with the schema generate_application_context expects. + path = repo_path / "OPENANT.json" + path.write_text( + '{"application_type": "web_app", "purpose": "test app", ' + '"confidence": "high", "intended_behaviors": [], ' + '"trust_boundaries": [], "not_a_vulnerability": []}' + ) + return path + + def test_use_mode_returns_override_without_llm(self, tmp_path): + """override_mode='use' with an override file returns it verbatim + without ever calling the LLM.""" + self._write_valid_override_md(tmp_path) + + # If the LLM is called, this will blow up because we don't patch it. + # Test passes if we get a context back without any Anthropic call. + with patch("context.application_context.Anthropic") as mock_anth: + ctx = generate_application_context(tmp_path, override_mode="use") + mock_anth.assert_not_called() + assert ctx.application_type == "web_app" + + def test_force_regenerate_ignores_override(self, tmp_path): + """force_regenerate=True (legacy) should NOT short-circuit to override.""" + self._write_valid_override_md(tmp_path) + + with patch("context.application_context.Anthropic") as mock_anth: + # The LLM would be called — we don't actually want to wait for it. + # We just confirm the early-return for "use" did NOT happen by + # asserting Anthropic was instantiated. We then bail with an + # exception inside the mock to avoid running the rest. + mock_anth.side_effect = RuntimeError("LLM-call-attempted") + with pytest.raises(RuntimeError, match="LLM-call-attempted"): + generate_application_context(tmp_path, force_regenerate=True) + mock_anth.assert_called_once() + + def test_override_mode_ignore_skips_override(self, tmp_path): + """override_mode='ignore' should NOT short-circuit to override.""" + self._write_valid_override_md(tmp_path) + + with patch("context.application_context.Anthropic") as mock_anth: + mock_anth.side_effect = RuntimeError("LLM-call-attempted") + with pytest.raises(RuntimeError, match="LLM-call-attempted"): + generate_application_context(tmp_path, override_mode="ignore") + mock_anth.assert_called_once() + + def test_override_mode_merge_includes_supplement(self, tmp_path): + """override_mode='merge' should send the override content + supplement + to the LLM.""" + override = tmp_path / "OPENANT.md" + override.write_text("# manual override\nIntended behavior") + # Need at least one source so gather_context_sources doesn't raise + (tmp_path / "README.md").write_text("# readme") + + captured_prompt = {} + + class _FakeContent: + def __init__(self, text): + self.text = text + + class _FakeResponse: + def __init__(self, text): + self.content = [_FakeContent(text)] + + def _fake_create(**kwargs): + captured_prompt["content"] = kwargs["messages"][0]["content"] + return _FakeResponse( + '```json\n' + '{"application_type": "web_app", "purpose": "x", ' + '"confidence": "high", "intended_behaviors": [], ' + '"trust_boundaries": [], "not_a_vulnerability": []}\n' + '```' + ) + + with patch("context.application_context.Anthropic") as mock_anth: + instance = mock_anth.return_value + instance.messages.create.side_effect = _fake_create + ctx = generate_application_context(tmp_path, override_mode="merge") + + assert "OPENANT.md" in captured_prompt["content"] + assert MERGE_CONTEXT_SUPPLEMENT.strip() in captured_prompt["content"] + # Source should be marked as 'merged' when an override is merged. + assert ctx.source == "merged" + + +class TestPythonCLIArgparse: + """Verify the argparse-level wiring of --override-mode and --force. + + Invokes the CLI via subprocess to verify the public surface. + """ + + @staticmethod + def _run_cli(*args): + import os + import subprocess + import sys + + env = os.environ.copy() + # Don't let the test reach a real LLM + env.pop("ANTHROPIC_API_KEY", None) + return subprocess.run( + [sys.executable, "-m", "openant.cli"] + list(args), + capture_output=True, + text=True, + timeout=15, + env=env, + ) + + def test_override_mode_choices_validation(self): + """--override-mode rejects values outside use/ignore/merge.""" + result = self._run_cli( + "generate-context", "/tmp/nonexistent-repo", + "--override-mode", "bogus", + ) + assert result.returncode != 0 + # argparse error mentions invalid choice and the offending value + assert "invalid choice" in (result.stderr + result.stdout).lower() + + def test_override_mode_help_lists_choices(self): + """`generate-context --help` advertises the override-mode flag.""" + result = self._run_cli("generate-context", "--help") + assert result.returncode == 0 + out = result.stdout + result.stderr + assert "--override-mode" in out + # All three valid values appear in help text + assert "use" in out + assert "merge" in out + assert "ignore" in out