Skip to content

Commit f2a787f

Browse files
committed
refactor(toctree matching): create new directive_regex.go and optional toctree flag
1 parent d82630b commit f2a787f

14 files changed

+167
-65
lines changed

audit-cli/README.md

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -294,22 +294,29 @@ The command searches all RST files (`.rst` and `.txt` extensions) and YAML files
294294

295295
**Use Cases:**
296296

297+
By default, this command searches for content inclusion directives (include, literalinclude,
298+
io-code-block) that transclude content into pages. Use `--include-toctree` to also search
299+
for toctree entries, which are navigation links rather than content transclusion.
300+
297301
This command helps writers:
298302
- Understand the impact of changes to a file (what pages will be affected)
299303
- Find all usages of an include file across the documentation
300304
- Track where code examples are referenced
301-
- Identify orphaned files (files with no references from include, literalinclude, io-code-block, or toctree directives)
305+
- Identify orphaned files (files with no references from content inclusion directives)
302306
- Plan refactoring by understanding file dependencies
303307

304308
**Basic Usage:**
305309

306310
```bash
307-
# Find what references an include file
311+
# Find what references an include file (content inclusion only)
308312
./audit-cli analyze file-references path/to/includes/fact.rst
309313

310314
# Find what references a code example
311315
./audit-cli analyze file-references path/to/code-examples/example.js
312316

317+
# Include toctree references (navigation links)
318+
./audit-cli analyze file-references path/to/file.rst --include-toctree
319+
313320
# Get JSON output for automation
314321
./audit-cli analyze file-references path/to/file.rst --format json
315322

@@ -323,7 +330,8 @@ This command helps writers:
323330
- `-v, --verbose` - Show detailed information including line numbers and reference paths
324331
- `-c, --count-only` - Only show the count of references (useful for quick checks and scripting)
325332
- `--paths-only` - Only show the file paths, one per line (useful for piping to other commands)
326-
- `-t, --directive-type <type>` - Filter by directive type: `include`, `literalinclude`, or `io-code-block`
333+
- `-t, --directive-type <type>` - Filter by directive type: `include`, `literalinclude`, `io-code-block`, or `toctree`
334+
- `--include-toctree` - Include toctree entries (navigation links) in addition to content inclusion directives
327335

328336
**Understanding the Counts:**
329337

@@ -339,20 +347,20 @@ This helps identify both the impact scope (how many files) and duplicate include
339347

340348
**Supported Directive Types:**
341349

342-
The command tracks three types of RST directives:
350+
By default, the command tracks content inclusion directives:
343351

344-
1. **`.. include::`** - RST content includes
352+
1. **`.. include::`** - RST content includes (transcluded)
345353
```rst
346354
.. include:: /includes/intro.rst
347355
```
348356

349-
2. **`.. literalinclude::`** - Code file references
357+
2. **`.. literalinclude::`** - Code file references (transcluded)
350358
```rst
351359
.. literalinclude:: /code-examples/example.py
352360
:language: python
353361
```
354362

355-
3. **`.. io-code-block::`** - Input/output examples with file arguments
363+
3. **`.. io-code-block::`** - Input/output examples with file arguments (transcluded)
356364
```rst
357365
.. io-code-block::
358366
@@ -363,6 +371,17 @@ The command tracks three types of RST directives:
363371
:language: json
364372
```
365373

374+
With `--include-toctree`, also tracks:
375+
376+
4. **`.. toctree::`** - Table of contents entries (navigation links, not transcluded)
377+
```rst
378+
.. toctree::
379+
:maxdepth: 2
380+
381+
intro
382+
getting-started
383+
```
384+
366385
**Note:** Only file-based references are tracked. Inline content (e.g., `.. input::` with `:language:` but no file path) is not tracked.
367386

368387
**Output Formats:**

audit-cli/commands/analyze/file-references/analyzer.go

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,31 @@ import (
55
"fmt"
66
"os"
77
"path/filepath"
8-
"regexp"
98
"sort"
109
"strings"
1110

1211
"github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver"
1312
"github.com/mongodb/code-example-tooling/audit-cli/internal/rst"
1413
)
1514

16-
// Regular expressions for matching directives
17-
var (
18-
// includeRegex matches: .. include:: /path/to/file.rst
19-
includeRegex = regexp.MustCompile(`^\.\.\s+include::\s+(.+)$`)
20-
21-
// literalIncludeRegex matches: .. literalinclude:: /path/to/file.ext
22-
literalIncludeRegex = regexp.MustCompile(`^\.\.\s+literalinclude::\s+(.+)$`)
23-
24-
// ioCodeBlockRegex matches: .. io-code-block::
25-
ioCodeBlockRegex = regexp.MustCompile(`^\.\.\s+io-code-block::`)
26-
27-
// inputRegex matches: .. input:: /path/to/file.ext (within io-code-block)
28-
inputRegex = regexp.MustCompile(`^\.\.\s+input::\s+(.+)$`)
29-
30-
// outputRegex matches: .. output:: /path/to/file.ext (within io-code-block)
31-
outputRegex = regexp.MustCompile(`^\.\.\s+output::\s+(.+)$`)
32-
)
33-
3415
// AnalyzeReferences finds all files that reference the target file.
3516
//
3617
// This function searches through all RST files (.rst, .txt) and YAML files (.yaml, .yml)
3718
// in the source directory to find files that reference the target file using include,
3819
// literalinclude, or io-code-block directives. YAML files are included because extract
3920
// and release files contain RST directives within their content blocks.
4021
//
22+
// By default, only content inclusion directives are searched. Set includeToctree to true
23+
// to also search for toctree entries (navigation links).
24+
//
4125
// Parameters:
4226
// - targetFile: Absolute path to the file to analyze
27+
// - includeToctree: If true, include toctree entries in the search
4328
//
4429
// Returns:
4530
// - *ReferenceAnalysis: The analysis results
4631
// - error: Any error encountered during analysis
47-
func AnalyzeReferences(targetFile string) (*ReferenceAnalysis, error) {
32+
func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalysis, error) {
4833
// Get absolute path
4934
absTargetFile, err := filepath.Abs(targetFile)
5035
if err != nil {
@@ -83,7 +68,7 @@ func AnalyzeReferences(targetFile string) (*ReferenceAnalysis, error) {
8368
}
8469

8570
// Search for references in this file
86-
refs, err := findReferencesInFile(path, absTargetFile, sourceDir)
71+
refs, err := findReferencesInFile(path, absTargetFile, sourceDir, includeToctree)
8772
if err != nil {
8873
// Log error but continue processing other files
8974
fmt.Fprintf(os.Stderr, "Warning: failed to process %s: %v\n", path, err)
@@ -110,17 +95,19 @@ func AnalyzeReferences(targetFile string) (*ReferenceAnalysis, error) {
11095
// findReferencesInFile searches a single file for references to the target file.
11196
//
11297
// This function scans through the file line by line looking for include,
113-
// literalinclude, io-code-block, and toctree directives that reference the target file.
98+
// literalinclude, and io-code-block directives that reference the target file.
99+
// If includeToctree is true, also searches for toctree entries.
114100
//
115101
// Parameters:
116102
// - filePath: Path to the file to search
117103
// - targetFile: Absolute path to the target file
118104
// - sourceDir: Source directory (for resolving relative paths)
105+
// - includeToctree: If true, include toctree entries in the search
119106
//
120107
// Returns:
121108
// - []FileReference: List of references found in this file
122109
// - error: Any error encountered during processing
123-
func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReference, error) {
110+
func findReferencesInFile(filePath, targetFile, sourceDir string, includeToctree bool) ([]FileReference, error) {
124111
file, err := os.Open(filePath)
125112
if err != nil {
126113
return nil, err
@@ -140,15 +127,15 @@ func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReferen
140127
line := scanner.Text()
141128
trimmedLine := strings.TrimSpace(line)
142129

143-
// Check for toctree start (use shared regex from rst package)
144-
if rst.ToctreeDirectiveRegex.MatchString(trimmedLine) {
130+
// Check for toctree start (only if includeToctree is enabled)
131+
if includeToctree && rst.ToctreeDirectiveRegex.MatchString(trimmedLine) {
145132
inToctree = true
146133
toctreeStartLine = lineNum
147134
continue
148135
}
149136

150137
// Check for io-code-block start
151-
if ioCodeBlockRegex.MatchString(trimmedLine) {
138+
if rst.IOCodeBlockDirectiveRegex.MatchString(trimmedLine) {
152139
inIOCodeBlock = true
153140
ioCodeBlockStartLine = lineNum
154141
continue
@@ -165,7 +152,7 @@ func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReferen
165152
}
166153

167154
// Check for include directive
168-
if matches := includeRegex.FindStringSubmatch(trimmedLine); matches != nil {
155+
if matches := rst.IncludeDirectiveRegex.FindStringSubmatch(trimmedLine); matches != nil {
169156
refPath := strings.TrimSpace(matches[1])
170157
if referencesTarget(refPath, targetFile, sourceDir, filePath) {
171158
references = append(references, FileReference{
@@ -179,7 +166,7 @@ func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReferen
179166
}
180167

181168
// Check for literalinclude directive
182-
if matches := literalIncludeRegex.FindStringSubmatch(trimmedLine); matches != nil {
169+
if matches := rst.LiteralIncludeDirectiveRegex.FindStringSubmatch(trimmedLine); matches != nil {
183170
refPath := strings.TrimSpace(matches[1])
184171
if referencesTarget(refPath, targetFile, sourceDir, filePath) {
185172
references = append(references, FileReference{
@@ -195,7 +182,7 @@ func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReferen
195182
// Check for input/output directives within io-code-block
196183
if inIOCodeBlock {
197184
// Check for input directive
198-
if matches := inputRegex.FindStringSubmatch(trimmedLine); matches != nil {
185+
if matches := rst.InputDirectiveRegex.FindStringSubmatch(trimmedLine); matches != nil {
199186
refPath := strings.TrimSpace(matches[1])
200187
if referencesTarget(refPath, targetFile, sourceDir, filePath) {
201188
references = append(references, FileReference{
@@ -209,7 +196,7 @@ func findReferencesInFile(filePath, targetFile, sourceDir string) ([]FileReferen
209196
}
210197

211198
// Check for output directive
212-
if matches := outputRegex.FindStringSubmatch(trimmedLine); matches != nil {
199+
if matches := rst.OutputDirectiveRegex.FindStringSubmatch(trimmedLine); matches != nil {
213200
refPath := strings.TrimSpace(matches[1])
214201
if referencesTarget(refPath, targetFile, sourceDir, filePath) {
215202
references = append(references, FileReference{

audit-cli/commands/analyze/file-references/file_references.go

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@ import (
3737
// - -t, --directive-type: Filter by directive type (include, literalinclude, io-code-block, toctree)
3838
func NewFileReferencesCommand() *cobra.Command {
3939
var (
40-
format string
41-
verbose bool
42-
countOnly bool
43-
pathsOnly bool
44-
directiveType string
40+
format string
41+
verbose bool
42+
countOnly bool
43+
pathsOnly bool
44+
directiveType string
45+
includeToctree bool
4546
)
4647

4748
cmd := &cobra.Command{
@@ -50,13 +51,15 @@ func NewFileReferencesCommand() *cobra.Command {
5051
Long: `Find all files that reference a target file through RST directives.
5152
5253
This command performs reverse dependency analysis, showing which files reference
53-
the target file through include, literalinclude, io-code-block, or toctree directives.
54+
the target file through content inclusion directives (include, literalinclude,
55+
io-code-block). Use --include-toctree to also search for toctree entries, which
56+
are navigation links rather than content transclusion.
5457
5558
Supported directive types:
56-
- .. include:: RST content includes
57-
- .. literalinclude:: Code file references
58-
- .. io-code-block:: Input/output examples with file arguments
59-
- .. toctree:: Table of contents entries
59+
- .. include:: RST content includes (transcluded)
60+
- .. literalinclude:: Code file references (transcluded)
61+
- .. io-code-block:: Input/output examples with file arguments (transcluded)
62+
- .. toctree:: Table of contents entries (navigation links, requires --include-toctree)
6063
6164
The command searches all RST files (.rst, .txt) and YAML files (.yaml, .yml) in
6265
the source directory tree. YAML files are included because extract and release
@@ -66,7 +69,7 @@ This is useful for:
6669
- Understanding the impact of changes to a file
6770
- Finding all usages of an include file
6871
- Tracking code example references
69-
- Identifying orphaned files (files with no references, including toctree entries)
72+
- Identifying orphaned files (files with no references from content inclusion directives)
7073
7174
Examples:
7275
# Find what references an include file
@@ -75,6 +78,9 @@ Examples:
7578
# Find what references a code example
7679
analyze file-references /path/to/code-examples/example.js
7780
81+
# Include toctree references (navigation links)
82+
analyze file-references /path/to/file.rst --include-toctree
83+
7884
# Get JSON output
7985
analyze file-references /path/to/file.rst --format json
8086
@@ -91,7 +97,7 @@ Examples:
9197
analyze file-references /path/to/file.rst --directive-type include`,
9298
Args: cobra.ExactArgs(1),
9399
RunE: func(cmd *cobra.Command, args []string) error {
94-
return runReferences(args[0], format, verbose, countOnly, pathsOnly, directiveType)
100+
return runReferences(args[0], format, verbose, countOnly, pathsOnly, directiveType, includeToctree)
95101
},
96102
}
97103

@@ -100,6 +106,7 @@ Examples:
100106
cmd.Flags().BoolVarP(&countOnly, "count-only", "c", false, "Only show the count of references")
101107
cmd.Flags().BoolVar(&pathsOnly, "paths-only", false, "Only show the file paths (one per line)")
102108
cmd.Flags().StringVarP(&directiveType, "directive-type", "t", "", "Filter by directive type (include, literalinclude, io-code-block, toctree)")
109+
cmd.Flags().BoolVar(&includeToctree, "include-toctree", false, "Include toctree entries (navigation links) in addition to content inclusion directives")
103110

104111
return cmd
105112
}
@@ -115,10 +122,11 @@ Examples:
115122
// - countOnly: If true, only show the count
116123
// - pathsOnly: If true, only show the file paths
117124
// - directiveType: Filter by directive type (empty string means all types)
125+
// - includeToctree: If true, include toctree entries in the search
118126
//
119127
// Returns:
120128
// - error: Any error encountered during analysis
121-
func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool, directiveType string) error {
129+
func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool, directiveType string, includeToctree bool) error {
122130
// Validate directive type if specified
123131
if directiveType != "" {
124132
validTypes := map[string]bool{
@@ -147,7 +155,7 @@ func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool
147155
}
148156

149157
// Perform analysis
150-
analysis, err := AnalyzeReferences(targetFile)
158+
analysis, err := AnalyzeReferences(targetFile, includeToctree)
151159
if err != nil {
152160
return fmt.Errorf("failed to analyze references: %w", err)
153161
}

0 commit comments

Comments
 (0)