|
12 | 12 | from saar.models import CodebaseDNA |
13 | 13 | from saar.formatters._tribal import render_tribal_knowledge |
14 | 14 |
|
| 15 | +# Markers we strip from team_rules before embedding -- prevents nested markers |
| 16 | +# and the inception loop where saar reads its own output as input |
| 17 | +_SAAR_MARKER_START = "<!-- SAAR:AUTO-START -->" |
| 18 | +_SAAR_MARKER_END = "<!-- SAAR:AUTO-END -->" |
| 19 | + |
| 20 | +# Max chars to include from hand-written team rules. |
| 21 | +# The auto-detected DNA above already covers conventions -- team rules should |
| 22 | +# add what's MISSING (data flow, key services, commit rules), not repeat them. |
| 23 | +_TEAM_RULES_MAX_CHARS = 3000 |
| 24 | + |
| 25 | + |
| 26 | +def _clean_team_rules(raw: str) -> str: |
| 27 | + """Strip SAAR markers and truncate team rules before embedding. |
| 28 | +
|
| 29 | + Why: team_rules comes from reading CLAUDE.md or .cursorrules verbatim. |
| 30 | + If CLAUDE.md was previously generated by saar, it contains SAAR:AUTO-START |
| 31 | + markers, causing nested markers and triple duplication on re-runs. |
| 32 | +
|
| 33 | + We strip the auto-generated block (between markers) and keep only the |
| 34 | + human-written sections that live outside the markers. |
| 35 | + """ |
| 36 | + # strip auto-generated blocks between markers -- those are already |
| 37 | + # covered by the DNA sections above, no need to repeat them |
| 38 | + result = raw |
| 39 | + while _SAAR_MARKER_START in result and _SAAR_MARKER_END in result: |
| 40 | + start = result.find(_SAAR_MARKER_START) |
| 41 | + end = result.find(_SAAR_MARKER_END, start) |
| 42 | + if start == -1 or end == -1: |
| 43 | + break |
| 44 | + result = result[:start] + result[end + len(_SAAR_MARKER_END):] |
| 45 | + |
| 46 | + result = result.strip() |
| 47 | + |
| 48 | + # truncate if still very long -- human rules should be concise |
| 49 | + if len(result) > _TEAM_RULES_MAX_CHARS: |
| 50 | + result = result[:_TEAM_RULES_MAX_CHARS].rstrip() |
| 51 | + result += "\n\n*(truncated -- see full file for remaining rules)*" |
| 52 | + |
| 53 | + return result |
| 54 | + |
15 | 55 |
|
16 | 56 | def render_agents_md(dna: CodebaseDNA) -> str: |
17 | 57 | """Render DNA as an AGENTS.md file.""" |
@@ -164,11 +204,13 @@ def render_agents_md(dna: CodebaseDNA) -> str: |
164 | 204 | if tribal: |
165 | 205 | lines.append(tribal) |
166 | 206 |
|
167 | | - # -- team rules verbatim -- |
| 207 | + # -- team rules verbatim (human-written sections only) -- |
168 | 208 | if dna.team_rules: |
169 | | - lines.append("\n## Project-Specific Rules\n") |
170 | | - if dna.team_rules_source: |
171 | | - lines.append(f"*From `{dna.team_rules_source}`*\n") |
172 | | - lines.append(dna.team_rules) |
| 209 | + cleaned = _clean_team_rules(dna.team_rules) |
| 210 | + if cleaned: |
| 211 | + lines.append("\n## Project-Specific Rules\n") |
| 212 | + if dna.team_rules_source: |
| 213 | + lines.append(f"*From `{dna.team_rules_source}`*\n") |
| 214 | + lines.append(cleaned) |
173 | 215 |
|
174 | 216 | return "\n".join(lines) + "\n" |
0 commit comments