Skip to content

Commit 887896d

Browse files
committed
More explicit string matching behavior, a few new flags, and tests
1 parent c1884d1 commit 887896d

File tree

9 files changed

+390
-15
lines changed

9 files changed

+390
-15
lines changed

audit-cli/README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,17 @@ After extraction, a report is displayed showing:
122122

123123
Search through files for a specific substring. Can search through extracted code example files or RST source files.
124124

125+
**Default Behavior:**
126+
- **Case-insensitive** search (matches "curl", "CURL", "Curl", etc.)
127+
- **Exact word matching** (excludes partial matches like "curl" in "libcurl")
128+
129+
Use `--case-sensitive` to make the search case-sensitive, or `--partial-match` to allow matching the substring as part of larger words.
130+
125131
**Basic Usage:**
126132

127133
```bash
128-
# Search in a single file
129-
./audit-cli search find-string path/to/file.js "substring"
134+
# Search in a single file (case-insensitive, exact word match)
135+
./audit-cli search find-string path/to/file.js "curl"
130136

131137
# Search in a directory (non-recursive)
132138
./audit-cli search find-string path/to/output "substring"
@@ -142,13 +148,24 @@ Search through files for a specific substring. Can search through extracted code
142148

143149
# Verbose output (show file paths and language breakdown)
144150
./audit-cli search find-string path/to/output "substring" -r -v
151+
152+
# Case-sensitive search (only matches exact case)
153+
./audit-cli search find-string path/to/output "CURL" --case-sensitive
154+
155+
# Partial match (includes "curl" in "libcurl")
156+
./audit-cli search find-string path/to/output "curl" --partial-match
157+
158+
# Combine flags for case-sensitive partial matching
159+
./audit-cli search find-string path/to/output "curl" --case-sensitive --partial-match
145160
```
146161

147162
**Flags:**
148163

149164
- `-r, --recursive` - Recursively search all files in subdirectories
150165
- `-f, --follow-includes` - Follow `.. include::` directives in RST files
151166
- `-v, --verbose` - Show file paths and language breakdown
167+
- `--case-sensitive` - Make search case-sensitive (default: case-insensitive)
168+
- `--partial-match` - Allow partial matches within words (default: exact word matching)
152169

153170
**Report:**
154171

audit-cli/commands/search/find-string/find_string.go

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,18 @@
33
// This package implements the "search find-string" subcommand, which searches through
44
// extracted code example files to find occurrences of a specific substring.
55
//
6-
// The search is case-sensitive and counts each file only once, even if the substring
7-
// appears multiple times in the same file.
6+
// By default, the search is case-insensitive and matches exact words only (not partial matches
7+
// within larger words). These behaviors can be changed with the --case-sensitive and
8+
// --partial-match flags. Each file is counted only once, even if the substring appears
9+
// multiple times in the same file.
810
//
911
// Supports:
1012
// - Recursive directory scanning
1113
// - Following include directives in RST files
1214
// - Verbose output with file paths and language breakdown
1315
// - Language detection based on file extension
16+
// - Case-insensitive search (default) or case-sensitive search (--case-sensitive flag)
17+
// - Exact word matching (default) or partial matching (--partial-match flag)
1418
package find_string
1519

1620
import (
@@ -32,29 +36,39 @@ import (
3236
// - -r, --recursive: Recursively search all files in subdirectories
3337
// - -f, --follow-includes: Follow .. include:: directives in RST files
3438
// - -v, --verbose: Show file paths and language breakdown
39+
// - --case-sensitive: Make search case-sensitive (default: case-insensitive)
40+
// - --partial-match: Allow partial matches within words (default: exact word matching)
3541
func NewFindStringCommand() *cobra.Command {
3642
var (
3743
recursive bool
3844
followIncludes bool
3945
verbose bool
46+
caseSensitive bool
47+
partialMatch bool
4048
)
4149

4250
cmd := &cobra.Command{
4351
Use: "find-string [filepath] [substring]",
4452
Short: "Search for a substring in extracted code example files",
4553
Long: `Search through extracted code example files to find occurrences of a specific substring.
46-
Reports the number of code examples containing the substring.`,
54+
Reports the number of code examples containing the substring.
55+
56+
By default, the search is case-insensitive and matches exact words only. Use --case-sensitive
57+
to make the search case-sensitive, or --partial-match to allow matching the substring as part
58+
of larger words (e.g., "curl" matching "libcurl").`,
4759
Args: cobra.ExactArgs(2),
4860
RunE: func(cmd *cobra.Command, args []string) error {
4961
filePath := args[0]
5062
substring := args[1]
51-
return runSearch(filePath, substring, recursive, followIncludes, verbose)
63+
return runSearch(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch)
5264
},
5365
}
5466

5567
cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively search all files in subdirectories")
5668
cmd.Flags().BoolVarP(&followIncludes, "follow-includes", "f", false, "Follow .. include:: directives in RST files")
5769
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution")
70+
cmd.Flags().BoolVar(&caseSensitive, "case-sensitive", false, "Make search case-sensitive (default: case-insensitive)")
71+
cmd.Flags().BoolVar(&partialMatch, "partial-match", false, "Allow partial matches within words (default: exact word matching)")
5872

5973
return cmd
6074
}
@@ -66,29 +80,31 @@ Reports the number of code examples containing the substring.`,
6680
//
6781
// Parameters:
6882
// - filePath: Path to file or directory to search
69-
// - substring: The substring to search for (case-sensitive)
83+
// - substring: The substring to search for
7084
// - recursive: If true, recursively search subdirectories
7185
// - followIncludes: If true, follow .. include:: directives
7286
// - verbose: If true, show detailed information during search
87+
// - caseSensitive: If true, search is case-sensitive; if false, case-insensitive
88+
// - partialMatch: If true, allow partial matches within words; if false, match exact words only
7389
//
7490
// Returns:
7591
// - *SearchReport: Statistics about the search operation
7692
// - error: Any error encountered during search
77-
func RunSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) {
78-
return runSearchInternal(filePath, substring, recursive, followIncludes, verbose)
93+
func RunSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) (*SearchReport, error) {
94+
return runSearchInternal(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch)
7995
}
8096

8197
// runSearch executes the search operation (internal wrapper for CLI).
8298
//
8399
// This is a thin wrapper around runSearchInternal that discards the report
84100
// and only returns errors, suitable for use in the CLI command handler.
85-
func runSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) error {
86-
_, err := runSearchInternal(filePath, substring, recursive, followIncludes, verbose)
101+
func runSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) error {
102+
_, err := runSearchInternal(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch)
87103
return err
88104
}
89105

90106
// runSearchInternal contains the core logic for the search-code-examples command
91-
func runSearchInternal(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) {
107+
func runSearchInternal(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) (*SearchReport, error) {
92108
fileInfo, err := os.Stat(filePath)
93109
if err != nil {
94110
return nil, fmt.Errorf("failed to access path %s: %w", filePath, err)
@@ -113,6 +129,8 @@ func runSearchInternal(filePath string, substring string, recursive bool, follow
113129
if verbose {
114130
fmt.Printf("Found %d files to search\n", len(filesToSearch))
115131
fmt.Printf("Searching for substring: %q\n", substring)
132+
fmt.Printf("Case sensitive: %v\n", caseSensitive)
133+
fmt.Printf("Partial match: %v\n", partialMatch)
116134
fmt.Printf("Follow includes: %v\n\n", followIncludes)
117135
}
118136

@@ -141,7 +159,7 @@ func runSearchInternal(filePath string, substring string, recursive bool, follow
141159

142160
// Search all collected files
143161
for _, fileToSearch := range filesToSearchWithIncludes {
144-
result, err := searchFile(fileToSearch, substring)
162+
result, err := searchFile(fileToSearch, substring, caseSensitive, partialMatch)
145163
if err != nil {
146164
fmt.Fprintf(os.Stderr, "Warning: failed to search %s: %v\n", fileToSearch, err)
147165
continue
@@ -211,7 +229,7 @@ func collectFilesWithIncludes(filePath string, visited map[string]bool, verbose
211229
}
212230

213231
// searchFile searches a single file for the substring
214-
func searchFile(filePath string, substring string) (SearchResult, error) {
232+
func searchFile(filePath string, substring string, caseSensitive bool, partialMatch bool) (SearchResult, error) {
215233
result := SearchResult{
216234
FilePath: filePath,
217235
Language: extractLanguageFromFilename(filePath),
@@ -223,11 +241,70 @@ func searchFile(filePath string, substring string) (SearchResult, error) {
223241
return result, err
224242
}
225243

226-
result.Contains = strings.Contains(string(content), substring)
244+
contentStr := string(content)
245+
searchStr := substring
246+
247+
// Handle case sensitivity
248+
if !caseSensitive {
249+
contentStr = strings.ToLower(contentStr)
250+
searchStr = strings.ToLower(searchStr)
251+
}
252+
253+
// Check if substring exists in content
254+
if !strings.Contains(contentStr, searchStr) {
255+
return result, nil
256+
}
257+
258+
// If partial match is allowed, we're done
259+
if partialMatch {
260+
result.Contains = true
261+
return result, nil
262+
}
263+
264+
// For exact word matching, check if the match is a whole word
265+
result.Contains = isExactWordMatch(contentStr, searchStr)
227266

228267
return result, nil
229268
}
230269

270+
// isExactWordMatch checks if the substring appears as a complete word in the content.
271+
// A word boundary is defined as the start/end of the string or a non-alphanumeric character.
272+
func isExactWordMatch(content string, substring string) bool {
273+
// Find all occurrences of the substring
274+
index := 0
275+
for {
276+
pos := strings.Index(content[index:], substring)
277+
if pos == -1 {
278+
break
279+
}
280+
281+
actualPos := index + pos
282+
283+
// Check if this is a whole word match
284+
// Check character before (if not at start)
285+
beforeOK := actualPos == 0 || !isWordChar(rune(content[actualPos-1]))
286+
287+
// Check character after (if not at end)
288+
afterPos := actualPos + len(substring)
289+
afterOK := afterPos >= len(content) || !isWordChar(rune(content[afterPos]))
290+
291+
if beforeOK && afterOK {
292+
return true
293+
}
294+
295+
// Move to next potential match
296+
index = actualPos + 1
297+
}
298+
299+
return false
300+
}
301+
302+
// isWordChar returns true if the character is alphanumeric or underscore.
303+
// These characters are considered part of a word.
304+
func isWordChar(c rune) bool {
305+
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
306+
}
307+
231308
// extractLanguageFromFilename extracts the language from the file extension
232309
func extractLanguageFromFilename(filePath string) string {
233310
ext := filepath.Ext(filePath)

0 commit comments

Comments
 (0)