diff --git a/apps/openant-cli/cmd/report.go b/apps/openant-cli/cmd/report.go index d2b34b7..5060c19 100644 --- a/apps/openant-cli/cmd/report.go +++ b/apps/openant-cli/cmd/report.go @@ -26,6 +26,7 @@ Formats: summary Narrative security overview (uses LLM) html Interactive HTML report with charts and filters csv Spreadsheet export of all findings + sarif SARIF 2.1.0 log for GitHub Code Scanning / GitLab SAST upload If no results path is given, the active project's results_verified.json is used. Python owns default output paths — you only need -o to override. @@ -50,7 +51,7 @@ var ( func init() { reportCmd.Flags().StringVarP(&reportOutput, "output", "o", "", "Output path (default: derived from format)") reportCmd.Flags().StringVar(&reportDataset, "dataset", "", "Path to dataset JSON (for html/csv)") - reportCmd.Flags().StringVarP(&reportFormat, "format", "f", "", "Report format: disclosure, summary, html, csv") + reportCmd.Flags().StringVarP(&reportFormat, "format", "f", "", "Report format: disclosure, summary, html, csv, sarif") reportCmd.Flags().StringVar(&reportPipelineOutput, "pipeline-output", "", "Path to pipeline_output.json (for summary/disclosure)") reportCmd.Flags().StringVar(&reportRepoName, "repo-name", "", "Repository name (used when auto-building pipeline_output)") reportCmd.Flags().StringVar(&reportExtraDest, "copy-to", "", "Copy reports to an additional location") @@ -213,6 +214,31 @@ func runReport(cmd *cobra.Command, args []string) { output.PrintReportSummary(data) } allResults = append(allResults, data) + } else if fmt == "sarif" { + // SARIF reports use the Go renderer for the same reason HTML + // does: it's a deterministic data transformation, not an + // LLM-generated narrative, so there's no need to round-trip + // through Python. + outputPath := reportOutput + if outputPath == "" { + resultsDir := filepath.Dir(resultsPath) + outputPath = filepath.Join(resultsDir, "final-reports", "report.sarif") + } + + if err := runSARIFReport(rt, resultsPath, outputPath); err != nil { + output.PrintError("sarif: " + err.Error()) + exitCode = 2 + continue + } + + data := map[string]any{ + "output_path": outputPath, + "format": "sarif", + } + if !jsonOutput { + output.PrintReportSummary(data) + } + allResults = append(allResults, data) } else { // Other formats delegate to Python pyArgs := buildReportArgs(resultsPath, fmt) @@ -262,6 +288,7 @@ func promptFormats() ([]string, error) { huh.NewOption("Summary — narrative security overview written by AI ($)", "summary"), huh.NewOption("HTML — interactive report with charts and filters", "html"), huh.NewOption("CSV — spreadsheet export of all findings", "csv"), + huh.NewOption("SARIF — GitHub Code Scanning / GitLab SAST upload", "sarif"), ). Value(&selected), ), @@ -349,6 +376,48 @@ func runHTMLReport(rt *python.RuntimeInfo, resultsPath string, outputPath string return nil } +// runSARIFReport generates a SARIF 2.1.0 log using the Go renderer. Like +// runHTMLReport, it asks Python's report-data subcommand for pre-computed +// data, then transforms it deterministically here. Driver version is wired +// to the CLI's `version` (set via -ldflags at build time). +func runSARIFReport(rt *python.RuntimeInfo, resultsPath string, outputPath string) error { + pyArgs := []string{"report-data", resultsPath} + if reportDataset != "" { + pyArgs = append(pyArgs, "--dataset", reportDataset) + } + + result, err := python.Invoke(rt.Path, pyArgs, "", quiet, resolvedAPIKey()) + if err != nil { + return fmt.Errorf("report-data failed: %w", err) + } + if result.Envelope.Status != "success" { + msg := "report-data returned error" + if len(result.Envelope.Errors) > 0 { + msg = result.Envelope.Errors[0] + } + return fmt.Errorf("%s", msg) + } + + dataBytes, err := json.Marshal(result.Envelope.Data) + if err != nil { + return fmt.Errorf("failed to marshal report data: %w", err) + } + + var reportData report.ReportData + if err := json.Unmarshal(dataBytes, &reportData); err != nil { + return fmt.Errorf("failed to parse report data: %w", err) + } + + opts := report.SARIFOptions{ + ToolVersion: version, + InformationURI: "https://github.com/knostic/OpenAnt", + } + if err := report.GenerateSARIF(reportData, outputPath, opts); err != nil { + return fmt.Errorf("failed to render SARIF: %w", err) + } + return nil +} + // buildReportArgs constructs the Python CLI arguments for a single format. func buildReportArgs(resultsPath string, format string) []string { pyArgs := []string{"report", resultsPath, "--format", format} diff --git a/apps/openant-cli/internal/report/sarif.go b/apps/openant-cli/internal/report/sarif.go new file mode 100644 index 0000000..88c903b --- /dev/null +++ b/apps/openant-cli/internal/report/sarif.go @@ -0,0 +1,297 @@ +package report + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// sarifVersion is the SARIF spec version we emit. 2.1.0 is what GitHub Code +// Scanning, GitLab SAST, and most third-party SARIF consumers expect. +const sarifVersion = "2.1.0" + +// sarifSchema points at the OASIS-published JSON schema for SARIF 2.1.0. +// Consumers that schema-validate the upload (e.g. GitHub Code Scanning's +// pre-ingest check) read this URL. +const sarifSchema = "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json" + +// SARIFOptions controls extra metadata baked into the emitted log. All fields +// are optional; sensible defaults are chosen when empty. +type SARIFOptions struct { + // ToolVersion is `tool.driver.version`. Defaults to "dev" when empty. + ToolVersion string + // InformationURI is `tool.driver.informationUri`. Points reviewers at + // the project for context on the verdicts. + InformationURI string + // ToolName overrides `tool.driver.name`. Defaults to "OpenAnt". + ToolName string +} + +// BuildSARIF turns a ReportData (the same struct that drives the HTML report) +// into a SARIF 2.1.0 log. The returned value is plain map[string]any so it +// round-trips cleanly through json.Marshal — keeping the schema explicit at +// the call site instead of fragmenting it across a dozen typed structs. +func BuildSARIF(data ReportData, opts SARIFOptions) map[string]any { + if opts.ToolName == "" { + opts.ToolName = "OpenAnt" + } + if opts.ToolVersion == "" { + opts.ToolVersion = "dev" + } + + rules, ruleIndex := sarifRulesFor(data) + results := make([]map[string]any, 0, len(data.Findings)) + for _, f := range data.Findings { + results = append(results, sarifResultFor(f, ruleIndex)) + } + + driver := map[string]any{ + "name": opts.ToolName, + "version": opts.ToolVersion, + "semanticVersion": opts.ToolVersion, + "rules": rules, + } + if opts.InformationURI != "" { + driver["informationUri"] = opts.InformationURI + } + + run := map[string]any{ + "tool": map[string]any{ + "driver": driver, + }, + "results": results, + } + if data.RepoURL != "" { + // versionControlProvenance is consumed by GitHub Code Scanning to + // associate the upload with a specific commit. Skip when we don't + // have it rather than emitting an empty/misleading object. + prov := map[string]any{ + "repositoryUri": data.RepoURL, + } + if data.CommitSHA != "" { + prov["revisionId"] = data.CommitSHA + } + run["versionControlProvenance"] = []any{prov} + } + + return map[string]any{ + "$schema": sarifSchema, + "version": sarifVersion, + "runs": []any{run}, + } +} + +// sarifRulesFor returns the SARIF `rules` array plus a map from verdict +// string to its index in that array. We synthesize one rule per distinct +// verdict (vulnerable, bypassable, …) since OpenAnt findings are not yet +// keyed by a stable per-rule taxonomy. Categories from ReportData supply +// the rule descriptions. +func sarifRulesFor(data ReportData) ([]map[string]any, map[string]int) { + descByVerdict := make(map[string]string, len(data.Categories)) + for _, c := range data.Categories { + descByVerdict[c.Verdict] = c.Description + } + + seen := make(map[string]int) + rules := make([]map[string]any, 0) + for _, f := range data.Findings { + v := normalizedVerdict(f.Verdict) + if _, ok := seen[v]; ok { + continue + } + seen[v] = len(rules) + + desc := descByVerdict[v] + if desc == "" { + desc = fmt.Sprintf("Finding with verdict %q.", v) + } + + rules = append(rules, map[string]any{ + "id": "openant.verdict." + v, + "name": "OpenAntVerdict_" + strings.ReplaceAll(v, "-", "_"), + "shortDescription": map[string]any{ + "text": fmt.Sprintf("OpenAnt %s finding", v), + }, + "fullDescription": map[string]any{ + "text": desc, + }, + "defaultConfiguration": map[string]any{ + "level": sarifLevelForVerdict(v), + }, + "properties": map[string]any{ + "verdict": v, + "tags": []string{"security", "openant"}, + }, + }) + } + + return rules, seen +} + +// sarifResultFor renders a single Finding as a SARIF result object. +// +// We intentionally emit a file-scoped location with no startLine because the +// current Finding struct does not carry line numbers; emitting startLine: 1 +// (or any synthetic value) would cause GitHub Code Scanning to anchor the +// alert to the wrong row, which is worse than no anchor at all. When line +// data lands in ReportData, the region payload here is the only place that +// needs to grow. +func sarifResultFor(f Finding, ruleIndex map[string]int) map[string]any { + v := normalizedVerdict(f.Verdict) + + result := map[string]any{ + "ruleId": "openant.verdict." + v, + "level": sarifLevelForVerdict(v), + "message": map[string]any{ + "text": findingMessage(f), + }, + "locations": []any{ + sarifLocationFor(f), + }, + } + + if idx, ok := ruleIndex[v]; ok { + result["ruleIndex"] = idx + } + + props := map[string]any{ + "verdict": v, + "function": f.Function, + } + if f.DynamicTestStatus != "" { + props["dynamicTestStatus"] = f.DynamicTestStatus + } + if f.DynamicTestDetails != "" { + props["dynamicTestDetails"] = f.DynamicTestDetails + } + result["properties"] = props + + // PartialFingerprints is what makes SARIF de-dup work across runs in + // GitHub Code Scanning. Without these, the same finding from successive + // scans shows up as a fresh alert each time. + result["partialFingerprints"] = map[string]any{ + "openant/file/function/verdict/v1": fingerprintFor(f, v), + } + + return result +} + +// sarifLocationFor builds the SARIF `location` object for a finding. The +// physicalLocation has only artifactLocation + a logicalLocations entry for +// the function name (so SARIF consumers that care about logical scope still +// get something). +func sarifLocationFor(f Finding) map[string]any { + loc := map[string]any{ + "physicalLocation": map[string]any{ + "artifactLocation": map[string]any{ + "uri": sarifURI(f.File), + "uriBaseId": "%SRCROOT%", + }, + }, + } + if f.Function != "" { + loc["logicalLocations"] = []any{ + map[string]any{ + "name": f.Function, + "kind": "function", + }, + } + } + return loc +} + +// findingMessage condenses the Finding's narrative fields into a single +// `message.text` line. SARIF allows arbitrary length here, but we cap so +// CI inboxes don't drown. +func findingMessage(f Finding) string { + parts := []string{} + if f.AttackVector != "" { + parts = append(parts, strings.TrimSpace(f.AttackVector)) + } + if f.Analysis != "" { + parts = append(parts, strings.TrimSpace(f.Analysis)) + } + if len(parts) == 0 { + // Fall back so the result still passes SARIF schema validation, + // which requires `message.text` to be non-empty. + return fmt.Sprintf("OpenAnt %s finding in %s", f.Verdict, f.File) + } + msg := strings.Join(parts, "\n\n") + const cap = 4096 + if len(msg) > cap { + msg = msg[:cap-1] + "…" + } + return msg +} + +// fingerprintFor returns a stable string used as the SARIF result's +// `partialFingerprints` value. Order of fields is fixed and explicit so +// that adding a new Finding field later cannot silently invalidate +// existing fingerprints. +func fingerprintFor(f Finding, verdict string) string { + return fmt.Sprintf("%s|%s|%s", f.File, f.Function, verdict) +} + +// sarifLevelForVerdict maps an OpenAnt verdict to a SARIF result.level. +// Vulnerable + bypassable surface as `error`; inconclusive/unclear as +// `warning`; everything else (safe, protected, etc.) as `note` so they +// don't pollute Code-Scanning alert lists. +func sarifLevelForVerdict(v string) string { + switch v { + case "vulnerable", "bypassable": + return "error" + case "inconclusive", "unclear": + return "warning" + default: + return "note" + } +} + +// normalizedVerdict trims/lowercases the verdict so casing or whitespace +// drift in upstream pipeline output cannot fan rules out. +func normalizedVerdict(v string) string { + v = strings.TrimSpace(strings.ToLower(v)) + if v == "" { + return "unknown" + } + return v +} + +// sarifURI normalizes a file path into a SARIF artifactLocation.uri value. +// SARIF wants forward slashes and stable relative paths; we strip any +// leading "./" but otherwise preserve the path as-recorded so consumers can +// match it against the working tree. +func sarifURI(path string) string { + p := strings.ReplaceAll(path, "\\", "/") + p = strings.TrimPrefix(p, "./") + return p +} + +// GenerateSARIF renders a SARIF log to the given output path, creating +// parent directories as needed. The file is overwritten if present. +func GenerateSARIF(data ReportData, outputPath string, opts SARIFOptions) error { + if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil { + return err + } + + f, err := os.Create(outputPath) + if err != nil { + return err + } + defer f.Close() + + return RenderSARIF(data, f, opts) +} + +// RenderSARIF writes a SARIF log to the given writer. Indented for human +// review; consumers that care about size can pass through `jq -c` to +// minify. +func RenderSARIF(data ReportData, w io.Writer, opts SARIFOptions) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + enc.SetEscapeHTML(false) + return enc.Encode(BuildSARIF(data, opts)) +} diff --git a/apps/openant-cli/internal/report/sarif_test.go b/apps/openant-cli/internal/report/sarif_test.go new file mode 100644 index 0000000..896fc7c --- /dev/null +++ b/apps/openant-cli/internal/report/sarif_test.go @@ -0,0 +1,269 @@ +package report + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +func sarifFixtureData() ReportData { + return ReportData{ + Title: "demo", + RepoName: "knostic/demo", + CommitSHA: "deadbeefcafebabe1234567890abcdefdeadbeef", + RepoURL: "https://github.com/knostic/demo", + Language: "python", + Findings: []Finding{ + { + Number: 1, + Verdict: "vulnerable", + File: "src/auth/login.py", + Function: "do_login", + AttackVector: "Unsanitized input flows into eval().", + Analysis: "User-controlled username is passed to eval, allowing RCE.", + }, + { + Number: 2, + Verdict: "BYPASSABLE", // exercises normalizedVerdict + File: "./src/api/handler.py", + Function: "handle_get", + AttackVector: "Auth bypass via header injection.", + DynamicTestStatus: "CONFIRMED", + DynamicTestDetails: "PoC succeeded.", + }, + { + Number: 3, + Verdict: "safe", + File: "src/util/helpers.py", + Function: "noop", + }, + }, + Categories: []Category{ + {Verdict: "vulnerable", Description: "Confirmed exploitable code path."}, + {Verdict: "bypassable", Description: "Has guard but reachable around it."}, + {Verdict: "safe", Description: "No exploitable path identified."}, + }, + } +} + +func TestBuildSARIF_TopLevelEnvelope(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{ToolVersion: "1.2.3"}) + + if got["version"] != sarifVersion { + t.Fatalf("version: got %v, want %s", got["version"], sarifVersion) + } + if got["$schema"] != sarifSchema { + t.Fatalf("$schema: got %v, want %s", got["$schema"], sarifSchema) + } + runs, ok := got["runs"].([]any) + if !ok || len(runs) != 1 { + t.Fatalf("runs: expected one run, got %v", got["runs"]) + } +} + +func TestBuildSARIF_DriverNameAndVersion(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{ + ToolVersion: "1.2.3", + InformationURI: "https://github.com/knostic/OpenAnt", + }) + driver := got["runs"].([]any)[0].(map[string]any)["tool"].(map[string]any)["driver"].(map[string]any) + if driver["name"] != "OpenAnt" { + t.Errorf("driver.name: got %v, want OpenAnt", driver["name"]) + } + if driver["version"] != "1.2.3" { + t.Errorf("driver.version: got %v, want 1.2.3", driver["version"]) + } + if driver["informationUri"] != "https://github.com/knostic/OpenAnt" { + t.Errorf("driver.informationUri: got %v", driver["informationUri"]) + } +} + +func TestBuildSARIF_RulesDeduplicatedByVerdict(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + rules := got["runs"].([]any)[0].(map[string]any)["tool"].(map[string]any)["driver"].(map[string]any)["rules"].([]map[string]any) + if len(rules) != 3 { + t.Fatalf("expected 3 rules (one per verdict), got %d", len(rules)) + } + + wantIDs := map[string]bool{ + "openant.verdict.vulnerable": false, + "openant.verdict.bypassable": false, + "openant.verdict.safe": false, + } + for _, r := range rules { + id, _ := r["id"].(string) + if _, ok := wantIDs[id]; ok { + wantIDs[id] = true + } + } + for id, seen := range wantIDs { + if !seen { + t.Errorf("rule %s missing", id) + } + } +} + +func TestBuildSARIF_ResultLevelMapping(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + if len(results) != 3 { + t.Fatalf("expected 3 results, got %d", len(results)) + } + + wantLevels := []string{"error", "error", "note"} + for i, r := range results { + if r["level"] != wantLevels[i] { + t.Errorf("result[%d].level: got %v, want %s", i, r["level"], wantLevels[i]) + } + } +} + +func TestBuildSARIF_FilePathsNormalized(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + uri := results[1]["locations"].([]any)[0].(map[string]any)["physicalLocation"].(map[string]any)["artifactLocation"].(map[string]any)["uri"] + if uri != "src/api/handler.py" { + t.Errorf("artifactLocation.uri: got %q, want %q (./ should be stripped)", uri, "src/api/handler.py") + } +} + +func TestBuildSARIF_LogicalLocationCarriesFunction(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + logicals, ok := results[0]["locations"].([]any)[0].(map[string]any)["logicalLocations"].([]any) + if !ok || len(logicals) != 1 { + t.Fatalf("logicalLocations missing on result[0]") + } + got0 := logicals[0].(map[string]any) + if got0["name"] != "do_login" || got0["kind"] != "function" { + t.Errorf("logicalLocations[0]: got %v", got0) + } +} + +func TestBuildSARIF_DynamicTestPropertiesPropagate(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + props := results[1]["properties"].(map[string]any) + if props["dynamicTestStatus"] != "CONFIRMED" { + t.Errorf("dynamicTestStatus: got %v", props["dynamicTestStatus"]) + } + if props["dynamicTestDetails"] != "PoC succeeded." { + t.Errorf("dynamicTestDetails: got %v", props["dynamicTestDetails"]) + } +} + +func TestBuildSARIF_PartialFingerprintsStable(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + for _, r := range results { + fps, ok := r["partialFingerprints"].(map[string]any) + if !ok { + t.Fatalf("partialFingerprints missing on %v", r["ruleId"]) + } + if _, ok := fps["openant/file/function/verdict/v1"].(string); !ok { + t.Fatalf("expected v1 fingerprint key on every result") + } + } +} + +func TestBuildSARIF_VersionControlProvenanceWhenRepoURLPresent(t *testing.T) { + got := BuildSARIF(sarifFixtureData(), SARIFOptions{}) + run := got["runs"].([]any)[0].(map[string]any) + prov, ok := run["versionControlProvenance"].([]any) + if !ok || len(prov) != 1 { + t.Fatalf("expected versionControlProvenance with one entry") + } + entry := prov[0].(map[string]any) + if entry["repositoryUri"] != "https://github.com/knostic/demo" { + t.Errorf("repositoryUri: got %v", entry["repositoryUri"]) + } + if entry["revisionId"] != "deadbeefcafebabe1234567890abcdefdeadbeef" { + t.Errorf("revisionId: got %v", entry["revisionId"]) + } +} + +func TestBuildSARIF_NoVCSWhenRepoURLEmpty(t *testing.T) { + d := sarifFixtureData() + d.RepoURL = "" + got := BuildSARIF(d, SARIFOptions{}) + run := got["runs"].([]any)[0].(map[string]any) + if _, has := run["versionControlProvenance"]; has { + t.Fatalf("versionControlProvenance must be omitted when RepoURL is empty") + } +} + +func TestBuildSARIF_MessageFallbackWhenAttackVectorEmpty(t *testing.T) { + d := ReportData{ + Findings: []Finding{ + {Verdict: "vulnerable", File: "src/x.py"}, + }, + } + got := BuildSARIF(d, SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + msg := results[0]["message"].(map[string]any)["text"].(string) + if msg == "" { + t.Fatalf("message.text must never be empty per SARIF schema") + } + if !strings.Contains(msg, "src/x.py") { + t.Errorf("expected fallback message to reference file path, got %q", msg) + } +} + +func TestBuildSARIF_MessageTruncationCap(t *testing.T) { + huge := strings.Repeat("a", 8000) + d := ReportData{ + Findings: []Finding{{Verdict: "vulnerable", File: "src/x.py", AttackVector: huge}}, + } + got := BuildSARIF(d, SARIFOptions{}) + results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any) + msg := results[0]["message"].(map[string]any)["text"].(string) + if len(msg) >= len(huge) { + t.Errorf("message must be truncated, got %d bytes", len(msg)) + } +} + +func TestRenderSARIF_RoundTripsThroughJSONUnmarshal(t *testing.T) { + var buf bytes.Buffer + if err := RenderSARIF(sarifFixtureData(), &buf, SARIFOptions{ToolVersion: "1.0.0"}); err != nil { + t.Fatalf("RenderSARIF: %v", err) + } + + var anyVal map[string]interface{} + if err := json.Unmarshal(buf.Bytes(), &anyVal); err != nil { + t.Fatalf("emitted SARIF must be valid JSON: %v", err) + } + if anyVal["version"] != sarifVersion { + t.Errorf("round-trip version drift: %v", anyVal["version"]) + } +} + +func TestNormalizedVerdict_HandlesEdgeCases(t *testing.T) { + cases := []struct { + in, want string + }{ + {"VULNERABLE", "vulnerable"}, + {" Bypassable ", "bypassable"}, + {"", "unknown"}, + } + for _, c := range cases { + if got := normalizedVerdict(c.in); got != c.want { + t.Errorf("normalizedVerdict(%q): got %q, want %q", c.in, got, c.want) + } + } +} + +func TestSARIFURI_StripsLeadingDotSlashAndNormalizesBackslashes(t *testing.T) { + cases := []struct { + in, want string + }{ + {"./src/x.py", "src/x.py"}, + {`src\nested\file.py`, "src/nested/file.py"}, + {"src/x.py", "src/x.py"}, + } + for _, c := range cases { + if got := sarifURI(c.in); got != c.want { + t.Errorf("sarifURI(%q): got %q, want %q", c.in, got, c.want) + } + } +}