From 9de4429cff995d41c0bb4774421e90dc7a4ebe8a Mon Sep 17 00:00:00 2001 From: duriantaco Date: Wed, 13 May 2026 12:08:32 +0800 Subject: [PATCH] feat: import sarif security evidence --- README.md | 5 +- ROADMAP.md | 2 +- docs/COMPILER.md | 5 + docs/site/compiler.md | 8 + docs/site/index.md | 5 +- internal/vouch/evidence.go | 5 +- internal/vouch/evidence_artifacts.go | 7 + internal/vouch/evidence_test.go | 118 +++++++++ internal/vouch/onboarding.go | 3 + internal/vouch/onboarding_test.go | 34 +++ internal/vouch/sarif.go | 357 +++++++++++++++++++++++++++ 11 files changed, 541 insertions(+), 8 deletions(-) create mode 100644 internal/vouch/sarif.go diff --git a/README.md b/README.md index ab1c005..aba78e9 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,9 @@ vouch evidence import junit .vouch/artifacts/pytest.xml vouch gate ``` -JUnit covers required-test obligations only. Behavior, security, runtime, and -rollback obligations need their own evidence. +JUnit covers required-test obligations only. `security_check` artifacts can use +SARIF 2.1.0 when scanner rules or results reference exact obligation IDs. +Behavior, runtime, and rollback obligations need their own evidence. ## Install diff --git a/ROADMAP.md b/ROADMAP.md index 1297444..702935c 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -60,6 +60,7 @@ Implemented today: - Manifest creation from changed files and owned paths. - Artifact attachment with obligation inference. - JUnit test-map adapter for raw pytest-style JUnit evidence. +- SARIF 2.1.0 import for `security_check` evidence with exact obligation-ID mapping. - Machine-readable gate result artifact output for status checks. - Release policy files loaded from `.vouch/policy/release-policy.json`. - Policy simulation command with structured policy input/output. @@ -200,7 +201,6 @@ Planned work: - Typed API/signature obligation suggestions. - Coverage report import. - Static analysis import. -- SARIF import. - Secret scanning import. - Logging and PII scanner import. - Migration and external-effect detectors. diff --git a/docs/COMPILER.md b/docs/COMPILER.md index 90b0f72..69dbdde 100644 --- a/docs/COMPILER.md +++ b/docs/COMPILER.md @@ -165,6 +165,11 @@ vouch --repo DIR gate JUnit covers `required_test` obligations only. Missing behavior, security, runtime, or rollback evidence can still block. +`security_check` artifacts can also be SARIF 2.1.0 logs. Vouch treats SARIF as +scanner evidence only when rules or result properties reference exact compiled +obligation IDs. High or critical mapped SARIF results become blocking findings; +unmapped scanner output is not treated as contract evidence. + ## What Is Proven The repo-local benchmark is VouchBench: diff --git a/docs/site/compiler.md b/docs/site/compiler.md index c05478e..1f480fb 100644 --- a/docs/site/compiler.md +++ b/docs/site/compiler.md @@ -110,3 +110,11 @@ The current decisions are: - `auto_merge` `gate` exits non-zero only when the final decision is `block`. + +## Evidence Model + +Manifest-backed `security_check` artifacts can be generic exact-ID JSON or +SARIF 2.1.0 scanner logs. SARIF rules and result properties must reference exact +compiled obligation IDs. High or critical mapped SARIF results enter the normal +blocking finding and policy path; unmapped scanner output does not satisfy a +contract obligation. diff --git a/docs/site/index.md b/docs/site/index.md index 790f148..d304aa4 100644 --- a/docs/site/index.md +++ b/docs/site/index.md @@ -42,8 +42,9 @@ vouch evidence import junit .vouch/artifacts/pytest.xml vouch gate ``` -JUnit covers required-test obligations only. Behavior, security, runtime, and -rollback obligations need their own evidence. +JUnit covers required-test obligations only. `security_check` artifacts can use +SARIF 2.1.0 when scanner rules or results reference exact obligation IDs. +Behavior, runtime, and rollback obligations need their own evidence. ## Status diff --git a/internal/vouch/evidence.go b/internal/vouch/evidence.go index 37bb764..6ec7af5 100644 --- a/internal/vouch/evidence.go +++ b/internal/vouch/evidence.go @@ -162,10 +162,9 @@ func artifactCoverageByKind(artifacts []ArtifactResult) map[EvidenceKind]map[str func importVerifierFindings(evidence *Evidence) { for _, result := range evidence.ArtifactResults { - if result.VerifierOutput == nil { - continue + if result.VerifierOutput != nil { + evidence.VerifierOutputs = append(evidence.VerifierOutputs, *result.VerifierOutput) } - evidence.VerifierOutputs = append(evidence.VerifierOutputs, *result.VerifierOutput) evidence.Findings = append(evidence.Findings, result.VerifierFindings...) } } diff --git a/internal/vouch/evidence_artifacts.go b/internal/vouch/evidence_artifacts.go index f5c0d80..21384cc 100644 --- a/internal/vouch/evidence_artifacts.go +++ b/internal/vouch/evidence_artifacts.go @@ -113,6 +113,13 @@ func LinkEvidenceArtifacts(repo string, manifest Manifest, artifacts []EvidenceA result.addIssue("junit_import", issue) } } + } else if artifact.Kind == EvidenceSecurityCheck && len(data) > 0 && sarifLooksLike(data) { + covered, findings, issues := importSARIFEvidence(data, artifact.Obligations, index) + result.CoveredObligations = covered + result.VerifierFindings = findings + for _, issue := range issues { + result.addIssue("sarif_import", issue) + } } else if len(data) > 0 { covered, issues := importGenericEvidence(data, artifact.Obligations) result.CoveredObligations = covered diff --git a/internal/vouch/evidence_test.go b/internal/vouch/evidence_test.go index de39690..b9bc7ca 100644 --- a/internal/vouch/evidence_test.go +++ b/internal/vouch/evidence_test.go @@ -1026,6 +1026,86 @@ func TestGenericArtifactRequiresExactObligationTokens(t *testing.T) { } } +func TestSARIFSecurityEvidenceCoversReferencedObligation(t *testing.T) { + repo, manifestPath, ids := writeFullyCoveredUIScenario(t, nil) + securityID := ids[ObligationSecurity] + manifest := mustLoadManifest(t, manifestPath) + setArtifactPath(t, &manifest, "security", "artifacts/security.sarif") + writeJSON(t, manifestPath, manifest) + writeSARIFArtifact(t, repo, "artifacts/security.sarif", sarifSecurityLog(securityID, nil)) + + evidence, err := CollectEvidence(repo, manifestPath) + if err != nil { + t.Fatal(err) + } + if evidence.Decision != "auto_merge" { + t.Fatalf("expected SARIF security evidence to pass, got %s: findings=%#v invalid=%#v", evidence.Decision, evidence.Findings, evidence.InvalidEvidence) + } + if !artifactCovered(evidence, "security", securityID) { + t.Fatalf("expected SARIF artifact to cover security obligation: %#v", evidence.ArtifactResults) + } + if hasInvalidEvidence(evidence, "security", "sarif_import") { + t.Fatalf("expected SARIF artifact to import cleanly: %#v", evidence.InvalidEvidence) + } +} + +func TestSARIFHighSecurityFindingBlocks(t *testing.T) { + repo, manifestPath, ids := writeFullyCoveredUIScenario(t, nil) + securityID := ids[ObligationSecurity] + manifest := mustLoadManifest(t, manifestPath) + setArtifactPath(t, &manifest, "security", "artifacts/security.sarif") + writeJSON(t, manifestPath, manifest) + writeSARIFArtifact(t, repo, "artifacts/security.sarif", sarifSecurityLog( + "rules.no-hardcoded-secret", + map[string]any{ + "severity": "warning", + "tags": []string{securityID}, + "security-severity": "8.2", + }, + sarifFindingResult("rules.no-hardcoded-secret", "warning", "hardcoded secret in changed file"), + )) + + evidence, err := CollectEvidence(repo, manifestPath) + if err != nil { + t.Fatal(err) + } + if evidence.Decision != "block" { + t.Fatalf("expected high SARIF finding to block, got %s", evidence.Decision) + } + if artifactCovered(evidence, "security", securityID) { + t.Fatalf("expected blocked SARIF obligation to remain uncovered: %#v", evidence.ArtifactResults) + } + if !hasFinding(evidence, "sarif", "semgrep reported high-severity finding rules.no-hardcoded-secret") { + t.Fatalf("expected imported SARIF finding: %#v", evidence.Findings) + } + if !contains(evidence.PolicyResult.RulesFired, "block_verifier_findings") { + t.Fatalf("expected policy to block on SARIF finding: %#v", evidence.PolicyResult) + } +} + +func TestSARIFRequiresExactObligationIDs(t *testing.T) { + data, err := json.Marshal(sarifSecurityLog("rules.near-match", map[string]any{ + "tags": []string{"obligation.one_extra"}, + })) + if err != nil { + t.Fatal(err) + } + covered, findings, issues := importSARIFEvidence(data, []string{"obligation.one"}, ObligationIndex{ + ByID: map[string]Obligation{ + "obligation.one": {ID: "obligation.one"}, + }, + }) + if len(covered) != 0 { + t.Fatalf("expected near-match SARIF reference not to cover, got %#v", covered) + } + if len(findings) != 0 { + t.Fatalf("expected no finding for unmapped SARIF result, got %#v", findings) + } + if !containsSubstring(issues, "SARIF does not reference obligation obligation.one") { + t.Fatalf("expected exact-ID issue, got %#v", issues) + } +} + func TestJUnitErrorsAndSkipsInvalidateArtifact(t *testing.T) { covered, failed, issues := importJUnitEvidence([]byte(` @@ -2291,6 +2371,44 @@ func writeVerifierOutputArtifact(t *testing.T, repo string, relPath string, outp writeArtifact(t, repo, relPath, string(append(data, '\n'))) } +func writeSARIFArtifact(t *testing.T, repo string, relPath string, log map[string]any) { + t.Helper() + data, err := json.MarshalIndent(log, "", " ") + if err != nil { + t.Fatal(err) + } + writeArtifact(t, repo, relPath, string(append(data, '\n'))) +} + +func sarifSecurityLog(ruleID string, properties map[string]any, results ...map[string]any) map[string]any { + rule := map[string]any{"id": ruleID} + if len(properties) > 0 { + rule["properties"] = properties + } + return map[string]any{ + "version": sarifVersion, + "runs": []any{map[string]any{ + "tool": map[string]any{ + "driver": map[string]any{ + "name": "semgrep", + "rules": []any{rule}, + }, + }, + "results": results, + }}, + } +} + +func sarifFindingResult(ruleID string, level string, message string) map[string]any { + return map[string]any{ + "ruleId": ruleID, + "level": level, + "message": map[string]any{ + "text": message, + }, + } +} + func writeEvidenceBundle(t *testing.T, repo string, relPath string, manifest Manifest, artifact EvidenceArtifact, mutate func(EvidenceArtifact, *EvidenceBundle)) { t.Helper() artifactData := mustReadFile(t, filepath.Join(repo, artifact.Path)) diff --git a/internal/vouch/onboarding.go b/internal/vouch/onboarding.go index ded9b7c..6733edc 100644 --- a/internal/vouch/onboarding.go +++ b/internal/vouch/onboarding.go @@ -1012,6 +1012,9 @@ func importArtifactCoverage(data []byte, kind EvidenceKind, candidateObligations covered, _, issues := importJUnitEvidence(data, candidateObligations) return covered, issues } + if kind == EvidenceSecurityCheck && sarifLooksLike(data) { + return importSARIFReferences(data, candidateObligations) + } return importGenericEvidence(data, candidateObligations) } diff --git a/internal/vouch/onboarding_test.go b/internal/vouch/onboarding_test.go index 4814844..181a296 100644 --- a/internal/vouch/onboarding_test.go +++ b/internal/vouch/onboarding_test.go @@ -235,6 +235,40 @@ func TestAttachArtifactInfersCoveredObligations(t *testing.T) { } } +func TestAttachArtifactInfersSARIFSecurityObligations(t *testing.T) { + repo := initializedRepo(t) + spec := createSampleContract(t, repo, RiskMedium) + _, err := CreateManifest(repo, ManifestCreateOptions{ + TaskID: "agent-1", + Summary: "change app service", + Agent: "codex", + RunID: "run-1", + ChangedFiles: []string{"src/app/service.py"}, + Out: ".vouch/manifests/agent-1.json", + }) + if err != nil { + t.Fatal(err) + } + securityID := obligationID(t, spec, ObligationSecurity, "project paths stay inside repo") + writeSARIFArtifact(t, repo, ".vouch/artifacts/security.sarif", sarifSecurityLog(securityID, nil)) + + _, artifact, err := AttachArtifact(repo, AttachArtifactOptions{ + ManifestPath: ".vouch/manifests/agent-1.json", + ID: "security", + Kind: EvidenceSecurityCheck, + Path: ".vouch/artifacts/security.sarif", + Command: "semgrep scan --sarif", + ExitCode: 0, + Out: ".vouch/manifests/agent-1.with-security.json", + }) + if err != nil { + t.Fatal(err) + } + if !contains(artifact.Obligations, securityID) { + t.Fatalf("expected inferred SARIF obligation %s, got %#v", securityID, artifact.Obligations) + } +} + func TestAttachArtifactRejectsNonZeroExitAndPathEscape(t *testing.T) { repo := initializedRepo(t) createSampleContract(t, repo, RiskMedium) diff --git a/internal/vouch/sarif.go b/internal/vouch/sarif.go new file mode 100644 index 0000000..3fc413e --- /dev/null +++ b/internal/vouch/sarif.go @@ -0,0 +1,357 @@ +package vouch + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "strconv" + "strings" +) + +const sarifVersion = "2.1.0" + +type sarifLog struct { + Version string `json:"version"` + Runs []sarifRun `json:"runs"` +} + +type sarifRun struct { + Tool sarifTool `json:"tool"` + Results []sarifResult `json:"results"` +} + +type sarifTool struct { + Driver sarifDriver `json:"driver"` +} + +type sarifDriver struct { + Name string `json:"name"` + Rules []sarifRule `json:"rules"` +} + +type sarifRule struct { + ID string `json:"id"` + Name string `json:"name"` + ShortDescription sarifMessage `json:"shortDescription"` + FullDescription sarifMessage `json:"fullDescription"` + DefaultConfiguration sarifDefault `json:"defaultConfiguration"` + Properties sarifProperties `json:"properties"` +} + +type sarifDefault struct { + Level string `json:"level"` +} + +type sarifResult struct { + RuleID string `json:"ruleId"` + Level string `json:"level"` + Message sarifMessage `json:"message"` + Properties sarifProperties `json:"properties"` +} + +type sarifMessage struct { + Text string `json:"text"` +} + +type sarifProperties map[string]any + +func sarifLooksLike(data []byte) bool { + var header struct { + Version string `json:"version"` + Runs []any `json:"runs"` + } + if err := json.Unmarshal(data, &header); err != nil { + return false + } + return header.Version != "" && header.Runs != nil +} + +func importSARIFReferences(data []byte, obligationIDs []string) ([]string, []string) { + log, issues := parseSARIF(data) + if len(issues) > 0 { + return nil, issues + } + refs := sarifReferencedObligations(log, stringSet(obligationIDs)) + var covered []string + for _, obligationID := range obligationIDs { + if refs[obligationID] { + covered = append(covered, obligationID) + } + } + if len(covered) == 0 { + issues = append(issues, "SARIF does not reference any expected obligation IDs") + } + return covered, issues +} + +func importSARIFEvidence(data []byte, obligationIDs []string, index ObligationIndex) ([]string, []Finding, []string) { + log, issues := parseSARIF(data) + if len(issues) > 0 { + return nil, nil, issues + } + + obligations := make(map[string]bool, len(obligationIDs)) + for _, obligationID := range obligationIDs { + if _, ok := index.ByID[obligationID]; !ok { + issues = append(issues, fmt.Sprintf("unknown obligation %q", obligationID)) + continue + } + obligations[obligationID] = true + } + refs := sarifReferencedObligations(log, obligations) + blocked := map[string]bool{} + var findings []Finding + + for _, run := range log.Runs { + rules := sarifRulesByID(run) + ruleRefs := sarifRunRuleRefs(run, obligations) + tool := run.Tool.Driver.Name + if strings.TrimSpace(tool) == "" { + tool = "sarif" + } + for _, result := range run.Results { + resultRefs := sarifResultRefs(result, ruleRefs, obligations) + if len(resultRefs) == 0 { + continue + } + rule := rules[result.RuleID] + severity := sarifSeverity(result, rule) + if sarifSeverityRank(severity) < sarifSeverityRank("high") { + continue + } + for _, obligationID := range resultRefs { + blocked[obligationID] = true + } + findings = append(findings, Finding{ + Verifier: "sarif", + Severity: severity, + Decision: "block", + Claim: sarifClaim(tool, result), + Evidence: sarifEvidence(result, resultRefs), + RequiredFix: "fix the SARIF finding or attach passing security evidence", + Obligations: resultRefs, + }) + } + } + + var covered []string + for _, obligationID := range obligationIDs { + if !obligations[obligationID] { + continue + } + if !refs[obligationID] { + issues = append(issues, fmt.Sprintf("SARIF does not reference obligation %s", obligationID)) + continue + } + if !blocked[obligationID] { + covered = append(covered, obligationID) + } + } + return covered, findings, issues +} + +func parseSARIF(data []byte) (sarifLog, []string) { + var log sarifLog + decoder := json.NewDecoder(bytes.NewReader(data)) + if err := decoder.Decode(&log); err != nil { + return log, []string{fmt.Sprintf("cannot parse SARIF JSON: %v", err)} + } + if err := decoder.Decode(&struct{}{}); err != io.EOF { + return log, []string{"trailing JSON content after SARIF log"} + } + if log.Version != sarifVersion { + return log, []string{fmt.Sprintf("SARIF version must be %s", sarifVersion)} + } + if len(log.Runs) == 0 { + return log, []string{"SARIF log contains no runs"} + } + return log, nil +} + +func sarifReferencedObligations(log sarifLog, obligations map[string]bool) map[string]bool { + refs := map[string]bool{} + for _, run := range log.Runs { + ruleRefs := sarifRunRuleRefs(run, obligations) + for _, ids := range ruleRefs { + for id := range ids { + refs[id] = true + } + } + for _, result := range run.Results { + for _, id := range sarifResultRefs(result, ruleRefs, obligations) { + refs[id] = true + } + } + } + return refs +} + +func sarifRunRuleRefs(run sarifRun, obligations map[string]bool) map[string]map[string]bool { + refs := map[string]map[string]bool{} + for _, rule := range run.Tool.Driver.Rules { + ruleSet := sarifRuleRefs(rule, obligations) + if len(ruleSet) > 0 { + refs[rule.ID] = ruleSet + } + } + return refs +} + +func sarifRulesByID(run sarifRun) map[string]sarifRule { + rules := make(map[string]sarifRule, len(run.Tool.Driver.Rules)) + for _, rule := range run.Tool.Driver.Rules { + rules[rule.ID] = rule + } + return rules +} + +func sarifRuleRefs(rule sarifRule, obligations map[string]bool) map[string]bool { + refs := map[string]bool{} + if obligations[rule.ID] { + refs[rule.ID] = true + } + addSARIFPropertyRefs(refs, rule.Properties, obligations) + return refs +} + +func sarifResultRefs(result sarifResult, ruleRefs map[string]map[string]bool, obligations map[string]bool) []string { + refs := map[string]bool{} + if obligations[result.RuleID] { + refs[result.RuleID] = true + } + for id := range ruleRefs[result.RuleID] { + refs[id] = true + } + addSARIFPropertyRefs(refs, result.Properties, obligations) + return sortedStringKeys(refs) +} + +func addSARIFPropertyRefs(refs map[string]bool, value any, obligations map[string]bool) { + switch typed := value.(type) { + case string: + if obligations[typed] { + refs[typed] = true + } + case []any: + for _, item := range typed { + addSARIFPropertyRefs(refs, item, obligations) + } + case map[string]any: + for _, item := range typed { + addSARIFPropertyRefs(refs, item, obligations) + } + case sarifProperties: + for _, item := range typed { + addSARIFPropertyRefs(refs, item, obligations) + } + } +} + +func sarifSeverity(result sarifResult, rule sarifRule) string { + if severity := severityFromProperties(result.Properties); severity != "" { + return severity + } + if severity := severityFromProperties(rule.Properties); severity != "" { + return severity + } + level := strings.TrimSpace(result.Level) + if level == "" { + level = strings.TrimSpace(rule.DefaultConfiguration.Level) + } + switch level { + case "error": + return "high" + case "warning": + return "medium" + case "note", "none": + return "low" + default: + return "low" + } +} + +func severityFromProperties(properties sarifProperties) string { + if severity := securitySeverity(properties["security-severity"]); severity != "" { + return severity + } + for _, key := range []string{"severity", "problem.severity"} { + if severity := normalizeSeverity(properties[key]); severity != "" { + return severity + } + } + return "" +} + +func normalizeSeverity(value any) string { + text, ok := value.(string) + if !ok { + return "" + } + switch strings.ToLower(strings.TrimSpace(text)) { + case "critical": + return "critical" + case "high", "error": + return "high" + case "medium", "warning": + return "medium" + case "low", "note", "none": + return "low" + default: + return "" + } +} + +func securitySeverity(value any) string { + var score float64 + switch typed := value.(type) { + case float64: + score = typed + case string: + parsed, err := strconv.ParseFloat(strings.TrimSpace(typed), 64) + if err != nil { + return "" + } + score = parsed + default: + return "" + } + switch { + case score >= 9: + return "critical" + case score >= 7: + return "high" + case score >= 4: + return "medium" + default: + return "low" + } +} + +func sarifClaim(tool string, result sarifResult) string { + if strings.TrimSpace(result.RuleID) == "" { + return tool + " reported a high-severity security finding" + } + return fmt.Sprintf("%s reported high-severity finding %s", tool, result.RuleID) +} + +func sarifEvidence(result sarifResult, obligations []string) string { + message := strings.TrimSpace(result.Message.Text) + if message == "" { + message = "SARIF result did not include a message" + } + return fmt.Sprintf("%s; obligations: %s", message, strings.Join(obligations, ", ")) +} + +func sarifSeverityRank(severity string) int { + switch severity { + case "critical": + return 3 + case "high": + return 2 + case "medium": + return 1 + default: + return 0 + } +}