diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md deleted file mode 100644 index 0ed4079..0000000 --- a/.claude/CLAUDE.md +++ /dev/null @@ -1,205 +0,0 @@ -# Gone - Dead Link Detector - -## Project Overview - -Gone is a CLI tool that scans markdown files for dead links. It extracts all HTTP/HTTPS URLs and checks if they're still alive (return 200 status code). - -## Goals - -1. **Learn Go** - This project is a learning exercise for Go programming -2. **Build a useful tool** - A practical dead link detector for markdown documentation -3. **Explore Go patterns** - Concurrency (goroutines/channels), CLI frameworks, TUI development - -## Tech Stack - -- **Go** - Programming language -- **Cobra** - CLI framework for argument parsing and subcommands -- **Bubble Tea** - TUI framework (Model-View-Update architecture) -- **Lipgloss** - Terminal styling (colors, borders, etc.) -- **Bubbles** - Pre-built TUI components (spinners, etc.) - -## Commands - -```bash -gone check # Scan current dir, text output -gone check --format=json # JSON output for CI/scripts -gone check ./docs # Scan specific directory -gone interactive # Launch interactive TUI -``` - -## Project Structure - -``` -gone/ -├── main.go # Entry point -├── cmd/ -│ ├── root.go # Cobra root command -│ ├── check.go # CLI mode (text/JSON output) -│ └── interactive.go # TUI mode (Bubble Tea) -├── internal/ -│ ├── scanner/ # Find .md files in directories -│ ├── parser/ # Extract URLs from content -│ └── checker/ # HTTP checking with concurrency -``` - -## Development Commands - -```bash -go run . check # Run without building -go run . interactive # Run TUI mode -go build . # Build binary -go test ./... # Run tests (when added) -``` - -## Conventions - -- Keep packages in `internal/` for encapsulation -- Error handling: always check and return errors, don't panic -- Concurrency: use goroutines + channels for parallel work - -## Git Commit Guidelines - -**CRITICAL - READ THIS FIRST** - -NEVER commit without explicit user permission. NEVER use git reset on pushed commits. - -Rules: - -- DO NOT run git commit unless user explicitly says "commit" or "commit our changes" -- DO NOT run git reset on commits that have been pushed to remote -- DO NOT undo commits without explicit user instruction -- WAIT for user to ask before committing -- ASK user if unsure about whether to commit - -IMPORTANT: ONLY commit changes when explicitly asked by the user. - -### Pre-Commit Checklist - -1. Stage changes: ALWAYS use `git add -A` (not `git add .`) to stage all changes including deletions -2. Verify staged files: Run `git status` to review what will be committed -3. Review changes: Run `git diff --cached --stat` to see a summary of changes -4. Check authorship: Run `git log -1 --format='%an %ae'` to verify git user configuration - -### Commit Message Format - -Follow the Conventional Commits specification strictly: - -``` -[optional scope]: - -[optional body] - -[optional footer(s)] -``` - -### Structure Rules - -- **Type**: Required (feat, fix, chore, docs, style, refactor, perf, test, build, ci, revert) -- **Scope**: Optional, use package or feature name -- **Description**: Required, concise summary in imperative mood -- **Body**: Optional, detailed explanation separated by blank line -- **Footer**: Optional, for references or breaking changes - -### Writing Style - -Use imperative mood - write as commands: - -- "add feature" (correct) -- "added feature" or "adds feature" (incorrect) - -Think: "If applied, this commit will ___" - -Subject line format: - -- Start with lowercase after colon -- No period at end -- Keep under 72 characters -- Be specific and descriptive - -Body guidelines (when needed): - -- Explain what and why, not how -- Wrap lines at 72 characters -- Use present tense -- Separate from subject with blank line -- Can include bullet points for multiple changes - -Breaking changes: - -- Add `!` after type/scope: `feat(parser)!: change URL extraction` -- OR include footer: `BREAKING CHANGE: description` - -### Scope Names for Gone - -Packages: - -- `scanner` - markdown file discovery -- `parser` - URL extraction -- `checker` - HTTP link validation -- `cmd` - CLI commands - -Features: - -- `check` - CLI check command -- `interactive` - TUI mode -- `output` - text/JSON formatting - -Infrastructure: - -- `deps` - dependencies -- `ci` - continuous integration -- `docs` - documentation -- `config` - configuration files - -### Examples - -Simple feature: - -``` -feat(checker): add timeout configuration for HTTP requests -``` - -Bug fix: - -``` -fix(parser): handle URLs with trailing punctuation -``` - -With body: - -``` -feat(interactive): add progress bar during link checking - -Replace spinner with progress bar showing checked/total count. -Updates in real-time as each link check completes. -``` - -Breaking change: - -``` -feat(checker)!: change Result struct fields - -BREAKING CHANGE: Renamed StatusCode to Status and added -new fields for response headers. -``` - -Chore: - -``` -chore(deps): update bubble tea to v1.4 -``` - -Documentation: - -``` -docs: add usage examples to README -``` - -## Future Improvements - -- [ ] Recursive directory scanning flag -- [ ] Follow redirects option (treat 301/302 as alive) -- [ ] Timeout configuration -- [ ] Ignore patterns (skip certain domains) -- [ ] Unit tests for all packages -- [ ] Remove dead links from files (interactive mode feature) diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0f906f5 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,282 @@ +# Gone - AI Agent Guidelines + +## Project Overview + +Gone is a fast, concurrent dead link detector for documentation files. It scans Markdown, JSON, YAML, TOML, and XML files for HTTP/HTTPS URLs and checks if they're still alive. + +## Tech Stack + +- **Go 1.23+** - Programming language +- **Cobra** - CLI framework for argument parsing and subcommands +- **Bubble Tea** - TUI framework (Model-View-Update architecture) +- **Lipgloss** - Terminal styling +- **Bubbles** - Pre-built TUI components +- **Goldmark** - Markdown parser +- **gopkg.in/yaml.v3** - YAML parser +- **BurntSushi/toml** - TOML parser +- **gobwas/glob** - Glob pattern matching + +## Project Structure + +``` +gone/ +├── main.go # Entry point +├── cmd/ +│ ├── root.go # Cobra root command +│ ├── check.go # CLI check command +│ ├── check_output.go # Output formatting for check +│ ├── check_print.go # Print helpers for check +│ ├── fix.go # Auto-fix redirects command +│ ├── interactive.go # TUI mode (Bubble Tea) +│ └── helpers.go # Shared CLI helpers +├── internal/ +│ ├── checker/ # HTTP link validation with concurrency +│ │ ├── checker.go # Main checker logic +│ │ ├── options.go # Checker configuration +│ │ └── result.go # Result types +│ ├── config/ # Configuration file handling +│ │ └── config.go # .gonerc.yaml parsing +│ ├── filter/ # URL filtering (ignore rules) +│ │ └── filter.go # Domain, pattern, regex filters +│ ├── fixer/ # Auto-fix functionality +│ │ └── fixer.go # Replace URLs in files +│ ├── helpers/ # Shared utilities +│ │ └── helpers.go # Common helper functions +│ ├── output/ # Output formatting +│ │ ├── output.go # Report structure +│ │ ├── json.go # JSON output +│ │ ├── yaml.go # YAML output +│ │ ├── xml.go # XML output +│ │ ├── junit.go # JUnit XML output +│ │ └── markdown.go # Markdown output +│ ├── parser/ # URL extraction from files +│ │ ├── parser.go # Common parser utilities +│ │ ├── registry.go # Parser registry +│ │ ├── json/ # JSON parser +│ │ ├── markdown/ # Markdown parser +│ │ ├── toml/ # TOML parser +│ │ ├── xml/ # XML parser +│ │ └── yaml/ # YAML parser +│ ├── scanner/ # File discovery +│ │ └── scanner.go # Find files by type +│ ├── stats/ # Performance statistics +│ │ └── stats.go # Timing and metrics +│ └── ui/ # TUI components +│ ├── app.go # Main TUI model +│ ├── commands.go # TUI commands +│ ├── keys.go # Key bindings +│ ├── messages.go # TUI messages +│ └── styles.go # TUI styling +``` + +## Commands + +```bash +# Basic usage +gone check # Scan current dir for markdown files +gone check ./docs # Scan specific directory +gone check --types=md,json,yaml # Scan multiple file types + +# Output formats +gone check --format=json # JSON output +gone check --format=yaml # YAML output +gone check --output=report.xml # Write to file (format inferred) + +# Filtering +gone check --dead # Show only dead links +gone check --warnings # Show only warnings +gone check --all # Show all including alive + +# Performance +gone check --concurrency=100 # Increase concurrent workers +gone check --timeout=30 # Set timeout in seconds +gone check --retries=3 # Set retry attempts + +# Interactive mode +gone interactive # Launch TUI +gone interactive --types=md,json # TUI with multiple file types + +# Auto-fix redirects +gone fix # Fix redirect URLs +gone fix --dry-run # Preview changes +gone fix --yes # Apply without prompting +``` + +## Development Commands + +```bash +# Build and run +go run . check # Run without building +go build . # Build binary +./gone check # Run built binary + +# Testing +go test ./... # Run all tests +go test ./internal/parser/... # Run parser tests +go test -race ./... # Run with race detector +go test -bench=. ./... # Run benchmarks + +# Linting +golangci-lint run ./... # Run linter + +# All checks (before committing) +go build ./... && go test ./... && golangci-lint run ./... +``` + +## Configuration + +Create `.gonerc.yaml` in project root: + +```yaml +types: + - md + - json + - yaml + +scan: + include: + - "docs/**" + exclude: + - "node_modules/**" + - "vendor/**" + +check: + concurrency: 50 + timeout: 10 + retries: 2 + strict: false + +output: + format: "" + showAlive: false + showWarnings: true + showDead: true + showStats: false + +ignore: + domains: + - localhost + - example.com + patterns: + - "*.local/*" + regex: + - "192\\.168\\..*" +``` + +## Conventions + +### Code Style + +- Keep packages in `internal/` for encapsulation +- Error handling: always check and return errors, don't panic +- Concurrency: use goroutines + channels for parallel work +- Interfaces: define in the package that uses them, not implements them +- Tests: place in same package with `_test.go` suffix + +### Package Guidelines + +- `checker` - HTTP validation, concurrent checking +- `parser` - URL extraction, file format handling +- `scanner` - File discovery, glob patterns +- `filter` - URL filtering, ignore rules +- `config` - Configuration file parsing +- `output` - Report formatting +- `cmd` - CLI commands (thin layer over internal packages) + +## Git Commit Guidelines + +**CRITICAL**: NEVER commit without explicit user permission. + +### Rules + +- DO NOT run `git commit` unless user explicitly says "commit" +- DO NOT run `git reset` on commits that have been pushed +- DO NOT undo commits without explicit instruction +- WAIT for user to ask before committing +- ASK if unsure about whether to commit + +### Pre-Commit Checklist + +1. Run tests: `go test ./...` +2. Run linter: `golangci-lint run ./...` +3. Stage changes: `git add -A` +4. Review staged: `git status` and `git diff --cached --stat` + +### Commit Message Format + +Follow Conventional Commits: + +``` +[optional scope]: + +[optional body] + +[optional footer(s)] +``` + +### Types + +- `feat` - New feature +- `fix` - Bug fix +- `refactor` - Code change that neither fixes bug nor adds feature +- `perf` - Performance improvement +- `test` - Adding or updating tests +- `docs` - Documentation only +- `chore` - Maintenance tasks +- `build` - Build system or dependencies +- `ci` - CI configuration + +### Scopes + +Packages: `checker`, `parser`, `scanner`, `filter`, `config`, `output`, `fixer`, `stats`, `ui`, `cmd` + +Features: `check`, `fix`, `interactive` + +### Writing Style + +Use imperative mood ("add feature" not "added feature"): +- Think: "If applied, this commit will ___" +- Start with lowercase after colon +- No period at end +- Keep under 72 characters + +### Examples + +``` +feat(parser): add TOML file support + +fix(checker): handle timeout errors gracefully + +refactor(scanner): simplify file filtering logic + +Consolidate include/exclude pattern matching into single function. +Improves readability and reduces code duplication. + +perf(checker): reduce memory allocations in URL validation + +chore(deps): update goldmark to v1.7.0 + +test(parser): add edge case tests for malformed JSON +``` + +### Breaking Changes + +Add `!` after type/scope: + +``` +feat(parser)!: change FileParser interface + +BREAKING CHANGE: Removed Validate and Parse methods. +Use ValidateAndParse instead. +``` + +## Pull Request Guidelines + +1. Create feature branch from `master` +2. Make changes with clear, focused commits +3. Ensure all tests pass +4. Ensure linter passes +5. Push branch and create PR +6. Include summary of changes in PR description + +Never push directly to `master`. diff --git a/cmd/check.go b/cmd/check.go index cdf90f2..dca1f39 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "strings" - "time" "github.com/leonardomso/gone/internal/checker" "github.com/leonardomso/gone/internal/filter" @@ -180,7 +179,10 @@ func runCheck(_ *cobra.Command, args []string) { // Phase 4: Output results effectiveShowStats := loadedCfg.GetShowStats(showStats) - routeOutputWithConfig(files, results, summary, urlFilter, perf, useStructuredOutput, effectiveFormat, effectiveShowStats) + routeOutputWithConfig( + files, results, summary, urlFilter, perf, + useStructuredOutput, effectiveFormat, effectiveShowStats, + ) if summary.HasDeadLinks() { os.Exit(1) @@ -233,27 +235,6 @@ func scanFilesWithConfig(path string, cfg *LoadedConfig, perf *stats.Stats, useS return files } -// scanFiles scans for files with the specified types and returns the list. -// Deprecated: Use scanFilesWithConfig for config support. -func scanFiles(path string, perf *stats.Stats, useStructuredOutput bool) []string { - perf.StartScan() - - // Validate file types - if err := validateFileTypes(fileTypes); err != nil { - exitOnError(err, "Invalid file types") - } - - files, err := scanner.FindFilesByTypes(path, fileTypes) - exitOnError(err, "Error scanning directory") - perf.EndScan(len(files)) - - if !useStructuredOutput { - typeStr := strings.Join(fileTypes, ", ") - fmt.Printf("Found %d file(s) of type(s): %s\n", len(files), typeStr) - } - return files -} - // validateFileTypes checks if all specified file types are supported. func validateFileTypes(types []string) error { supportedTypes := parser.SupportedFileTypes() @@ -315,49 +296,6 @@ func parseAndFilterLinksWithConfig( return links, urlFilter, false } -// parseAndFilterLinks extracts links from files and applies filters. -// Returns the links, filter, and whether processing should stop (done=true). -// Deprecated: Use parseAndFilterLinksWithConfig for config support. -func parseAndFilterLinks( - files []string, perf *stats.Stats, useStructuredOutput bool, -) ([]checker.Link, *filter.Filter, bool) { - perf.StartParse() - parserLinks, err := parser.ExtractLinksFromMultipleFilesWithRegistry(files, strictMode) - exitOnError(err, "Error parsing files") - - if len(parserLinks) == 0 { - perf.EndParse(0, 0, 0, 0) - handleEmptyLinksWithStats(files, useStructuredOutput, perf) - return nil, nil, true - } - - urlFilter, err := CreateFilter(FilterOptions{ - Domains: ignoreDomains, - Patterns: ignorePatterns, - Regex: ignoreRegex, - NoConfig: noConfig, - }) - exitOnError(err, "Error creating filter") - - links := FilterParserLinks(parserLinks, urlFilter) - ignoredCount := getIgnoredCount(urlFilter) - uniqueURLs := CountUniqueURLs(links) - duplicates := len(links) - uniqueURLs - - perf.EndParse(len(parserLinks), uniqueURLs, duplicates, ignoredCount) - - if !useStructuredOutput { - printProgressMessage(len(parserLinks), len(links), uniqueURLs, duplicates, ignoredCount) - } - - if len(links) == 0 { - handleAllFilteredWithStats(files, useStructuredOutput, urlFilter, perf) - return nil, urlFilter, true - } - - return links, urlFilter, false -} - // getIgnoredCount returns the ignored count from filter, or 0 if filter is nil. func getIgnoredCount(urlFilter *filter.Filter) int { if urlFilter != nil { @@ -367,7 +305,9 @@ func getIgnoredCount(urlFilter *filter.Filter) int { } // checkLinksWithConfig checks all links using config values and returns results with summary. -func checkLinksWithConfig(links []checker.Link, cfg *LoadedConfig, perf *stats.Stats) ([]checker.Result, checker.Summary) { +func checkLinksWithConfig( + links []checker.Link, cfg *LoadedConfig, perf *stats.Stats, +) ([]checker.Result, checker.Summary) { perf.StartCheck() opts := cfg.BuildCheckerOptions(concurrency, timeout, retries) @@ -380,24 +320,6 @@ func checkLinksWithConfig(links []checker.Link, cfg *LoadedConfig, perf *stats.S return results, summary } -// checkLinks checks all links and returns results with summary. -// Deprecated: Use checkLinksWithConfig for config support. -func checkLinks(links []checker.Link, perf *stats.Stats) ([]checker.Result, checker.Summary) { - perf.StartCheck() - - opts := checker.DefaultOptions(). - WithConcurrency(concurrency). - WithTimeout(time.Duration(timeout) * time.Second). - WithMaxRetries(retries) - - c := checker.New(opts) - results := c.CheckAll(links) - summary := checker.Summarize(results) - - perf.EndCheck() - return results, summary -} - // routeOutputWithConfig handles output based on format flags and config. func routeOutputWithConfig( files []string, results []checker.Result, summary checker.Summary, @@ -417,25 +339,6 @@ func routeOutputWithConfig( } } -// routeOutput handles output based on format flags. -// Deprecated: Use routeOutputWithConfig for config support. -func routeOutput( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, useStructuredOutput bool, -) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStats(files, results, summary, urlFilter, perf) - case outputFile != "": - handleFileOutputWithStats(files, results, summary, urlFilter, perf) - default: - outputText(results, summary, urlFilter) - if showStats { - fmt.Print(perf.String()) - } - } -} - // validateCheckFlags checks for invalid flag combinations. func validateCheckFlags() error { // Validate mutually exclusive flags @@ -468,27 +371,16 @@ func handleEmptyLinksWithStatsV2(files []string, useStructuredOutput bool, perf } } -// handleEmptyLinksWithStats handles the case when no links are found in the files. -// Deprecated: Use handleEmptyLinksWithStatsV2 for config support. -func handleEmptyLinksWithStats(files []string, useStructuredOutput bool, perf *stats.Stats) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStats(files, nil, checker.Summary{}, nil, perf) - case outputFile != "": - handleFileOutputWithStats(files, nil, checker.Summary{}, nil, perf) - default: - fmt.Println("No links found.") - if showStats { - fmt.Print(perf.String()) - } - } -} - // handleAllFilteredWithStatsV2 handles the case when all links were filtered out, with config. -func handleAllFilteredWithStatsV2(files []string, useStructuredOutput bool, urlFilter *filter.Filter, perf *stats.Stats, effectiveShowStats bool) { +func handleAllFilteredWithStatsV2( + files []string, useStructuredOutput bool, urlFilter *filter.Filter, + perf *stats.Stats, effectiveShowStats bool, +) { switch { case useStructuredOutput: - handleStructuredOutputWithStatsV2(files, nil, checker.Summary{}, urlFilter, perf, outputFormat, effectiveShowStats) + handleStructuredOutputWithStatsV2( + files, nil, checker.Summary{}, urlFilter, perf, outputFormat, effectiveShowStats, + ) case outputFile != "": handleFileOutputWithStatsV2(files, nil, checker.Summary{}, urlFilter, perf, effectiveShowStats) default: @@ -502,25 +394,6 @@ func handleAllFilteredWithStatsV2(files []string, useStructuredOutput bool, urlF } } -// handleAllFilteredWithStats handles the case when all links were filtered out. -// Deprecated: Use handleAllFilteredWithStatsV2 for config support. -func handleAllFilteredWithStats(files []string, useStructuredOutput bool, urlFilter *filter.Filter, perf *stats.Stats) { - switch { - case useStructuredOutput: - handleStructuredOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf) - case outputFile != "": - handleFileOutputWithStats(files, nil, checker.Summary{}, urlFilter, perf) - default: - fmt.Println("\nAll links were ignored by filter rules.") - if showIgnored && urlFilter != nil { - printIgnoredURLs(urlFilter) - } - if showStats { - fmt.Print(perf.String()) - } - } -} - // handleStructuredOutputWithStatsV2 outputs to stdout with optional stats, using config. func handleStructuredOutputWithStatsV2( files []string, results []checker.Result, summary checker.Summary, @@ -537,23 +410,6 @@ func handleStructuredOutputWithStatsV2( fmt.Print(string(data)) } -// handleStructuredOutputWithStats outputs to stdout with optional stats. -// Deprecated: Use handleStructuredOutputWithStatsV2 for config support. -func handleStructuredOutputWithStats( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, -) { - report := buildReportWithStats(files, results, summary, urlFilter, perf) - - data, err := output.FormatReport(report, output.Format(outputFormat)) - if err != nil { - fmt.Fprintf(os.Stderr, "Error formatting output: %v\n", err) - os.Exit(1) - } - - fmt.Print(string(data)) -} - // handleFileOutputWithStatsV2 writes to file with optional stats, using config. func handleFileOutputWithStatsV2( files []string, results []checker.Result, summary checker.Summary, @@ -581,34 +437,6 @@ func handleFileOutputWithStatsV2( } } -// handleFileOutputWithStats writes to file with optional stats. -// Deprecated: Use handleFileOutputWithStatsV2 for config support. -func handleFileOutputWithStats( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, -) { - report := buildReportWithStats(files, results, summary, urlFilter, perf) - - if err := output.WriteToFile(report, outputFile); err != nil { - fmt.Fprintf(os.Stderr, "Error writing file: %v\n", err) - os.Exit(1) - } - - fmt.Printf("Wrote report to %s\n", outputFile) - - // Also print summary to stdout - fmt.Printf("\nSummary: %d alive | %d warnings | %d dead | %d duplicates", - summary.Alive, summary.WarningsCount(), summary.Dead+summary.Errors, summary.Duplicates) - if urlFilter != nil && urlFilter.IgnoredCount() > 0 { - fmt.Printf(" | %d ignored", urlFilter.IgnoredCount()) - } - fmt.Println() - - if showStats { - fmt.Print(perf.String()) - } -} - // buildReportWithStatsV2 creates an output.Report with optional stats, using config. func buildReportWithStatsV2( files []string, results []checker.Result, summary checker.Summary, @@ -623,19 +451,3 @@ func buildReportWithStatsV2( return report } - -// buildReportWithStats creates an output.Report with optional stats. -// Deprecated: Use buildReportWithStatsV2 for config support. -func buildReportWithStats( - files []string, results []checker.Result, summary checker.Summary, - urlFilter *filter.Filter, perf *stats.Stats, -) *output.Report { - report := buildReport(files, results, summary, urlFilter) - - // Add stats if requested - if showStats && perf != nil { - report.Stats = perf.ToJSON() - } - - return report -} diff --git a/cmd/interactive.go b/cmd/interactive.go index 3c47c5b..0f94b62 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -116,7 +116,8 @@ func runInteractive(_ *cobra.Command, args []string) { // Get scan options for include/exclude patterns scanInclude, scanExclude := loadedCfg.GetScanOptions() - p := tea.NewProgram(ui.New(path, urlFilter, effectiveTypes, effectiveStrict, scanInclude, scanExclude), tea.WithAltScreen()) + model := ui.New(path, urlFilter, effectiveTypes, effectiveStrict, scanInclude, scanExclude) + p := tea.NewProgram(model, tea.WithAltScreen()) if _, err := p.Run(); err != nil { fmt.Printf("Error running interactive mode: %v\n", err) os.Exit(1) //nolint:revive // deep-exit is acceptable for CLI entry points diff --git a/internal/parser/json/json.go b/internal/parser/json/json.go index ebac5a2..d0e5d19 100644 --- a/internal/parser/json/json.go +++ b/internal/parser/json/json.go @@ -24,28 +24,7 @@ func (*Parser) Extensions() []string { return []string{".json"} } -// Validate checks if the content is valid JSON. -func (*Parser) Validate(content []byte) error { - if len(content) == 0 { - return nil // Empty file is valid (no links to extract) - } - - var v any - if err := json.Unmarshal(content, &v); err != nil { - return fmt.Errorf("invalid JSON: %w", err) - } - return nil -} - -// Parse extracts links from JSON content. -// It extracts URLs from both string values and object keys. -// Deprecated: Use ValidateAndParse for better performance. -func (p *Parser) Parse(filename string, content []byte) ([]parser.Link, error) { - return p.ValidateAndParse(filename, content) -} - // ValidateAndParse validates the content and extracts links in a single pass. -// This is more efficient than calling Validate and Parse separately. func (*Parser) ValidateAndParse(filename string, content []byte) ([]parser.Link, error) { if len(content) == 0 { return nil, nil diff --git a/internal/parser/json/json_bench_test.go b/internal/parser/json/json_bench_test.go index a6b9c89..bbfb89c 100644 --- a/internal/parser/json/json_bench_test.go +++ b/internal/parser/json/json_bench_test.go @@ -6,26 +6,14 @@ import ( "testing" ) -// BenchmarkParse measures JSON parsing performance. -func BenchmarkParse(b *testing.B) { - content := createJSONContent(50) - p := New() - - b.ResetTimer() - for b.Loop() { - _, _ = p.Parse("test.json", content) - } -} - -// BenchmarkValidateAndParse measures combined validation and parsing. +// BenchmarkValidateAndParse measures JSON parsing performance. func BenchmarkValidateAndParse(b *testing.B) { content := createJSONContent(50) p := New() b.ResetTimer() for b.Loop() { - _ = p.Validate(content) - _, _ = p.Parse("test.json", content) + _, _ = p.ValidateAndParse("test.json", content) } } diff --git a/internal/parser/json/json_test.go b/internal/parser/json/json_test.go index 63ad901..4033482 100644 --- a/internal/parser/json/json_test.go +++ b/internal/parser/json/json_test.go @@ -17,55 +17,14 @@ func TestParser_Extensions(t *testing.T) { assert.Contains(t, exts, ".json") } -func TestParser_Validate(t *testing.T) { - t.Parallel() - - p := New() - - t.Run("ValidJSON", func(t *testing.T) { - t.Parallel() - content := []byte(`{"key": "value"}`) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("ValidJSONArray", func(t *testing.T) { - t.Parallel() - content := []byte(`["a", "b", "c"]`) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("EmptyContent", func(t *testing.T) { - t.Parallel() - err := p.Validate([]byte{}) - assert.NoError(t, err) - }) - - t.Run("InvalidJSON", func(t *testing.T) { - t.Parallel() - content := []byte(`{"key": }`) - err := p.Validate(content) - assert.Error(t, err) - assert.Contains(t, err.Error(), "invalid JSON") - }) - - t.Run("TrailingComma", func(t *testing.T) { - t.Parallel() - content := []byte(`{"key": "value",}`) - err := p.Validate(content) - assert.Error(t, err) - }) -} - -func TestParser_Parse(t *testing.T) { +func TestParser_ValidateAndParse(t *testing.T) { t.Parallel() p := New() t.Run("SimpleObject", func(t *testing.T) { t.Parallel() content := []byte(`{"url": "https://example.com"}`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -78,7 +37,7 @@ func TestParser_Parse(t *testing.T) { "homepage": "https://example.com", "repo": "https://github.com/test/repo" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -93,7 +52,7 @@ func TestParser_Parse(t *testing.T) { } } }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -101,7 +60,7 @@ func TestParser_Parse(t *testing.T) { t.Run("Array", func(t *testing.T) { t.Parallel() content := []byte(`["https://one.com", "https://two.com"]`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -114,7 +73,7 @@ func TestParser_Parse(t *testing.T) { {"url": "https://cdn.example.com"} ] }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -125,7 +84,7 @@ func TestParser_Parse(t *testing.T) { "https://example.com": "Example site", "https://github.com": "GitHub" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) @@ -142,7 +101,7 @@ func TestParser_Parse(t *testing.T) { content := []byte(`{ "description": "Check out https://example.com for more info" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -153,7 +112,7 @@ func TestParser_Parse(t *testing.T) { content := []byte(`{ "message": "Visit https://example.com and https://github.com" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -161,14 +120,14 @@ func TestParser_Parse(t *testing.T) { t.Run("NoURLs", func(t *testing.T) { t.Parallel() content := []byte(`{"name": "test", "value": 42}`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Empty(t, links) }) t.Run("EmptyContent", func(t *testing.T) { t.Parallel() - links, err := p.Parse("test.json", []byte{}) + links, err := p.ValidateAndParse("test.json", []byte{}) require.NoError(t, err) assert.Empty(t, links) }) @@ -181,7 +140,7 @@ func TestParser_Parse(t *testing.T) { "mailto": "mailto:test@example.com", "file": "file:///path/to/file" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -192,7 +151,7 @@ func TestParser_Parse(t *testing.T) { content := []byte(`{ "text": "Visit https://example.com. Or https://github.com, for code" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) @@ -282,7 +241,7 @@ func TestParser_LineNumbers(t *testing.T) { "url1": "https://line2.example.com", "url2": "https://line3.example.com" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) require.Len(t, links, 2) @@ -331,7 +290,7 @@ func TestParser_EdgeCases(t *testing.T) { "punycode": "https://xn--r8jz45g.jp/path", "emoji_path": "https://example.com/🎉" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.GreaterOrEqual(t, len(links), 2) }) @@ -339,7 +298,7 @@ func TestParser_EdgeCases(t *testing.T) { t.Run("EscapedSlashesInURL", func(t *testing.T) { t.Parallel() content := []byte(`{"url": "https:\/\/example.com\/path"}`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) // The JSON decoder handles escape sequences, so this should parse correctly assert.GreaterOrEqual(t, len(links), 1) @@ -360,7 +319,7 @@ func TestParser_EdgeCases(t *testing.T) { } } }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://deep.example.com", links[0].URL) @@ -373,7 +332,7 @@ func TestParser_EdgeCases(t *testing.T) { "nullable": null, "nested": {"value": null} }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -385,7 +344,7 @@ func TestParser_EdgeCases(t *testing.T) { "enabled": true, "disabled": false }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -399,7 +358,7 @@ func TestParser_EdgeCases(t *testing.T) { "negative": -100, "scientific": 1.23e10 }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -411,7 +370,7 @@ func TestParser_EdgeCases(t *testing.T) { "empty_array": [], "empty_object": {} }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -423,7 +382,7 @@ func TestParser_EdgeCases(t *testing.T) { "fragment": "https://example.com/page#section", "encoded": "https://example.com/path%20with%20spaces" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 3) }) @@ -434,7 +393,7 @@ func TestParser_EdgeCases(t *testing.T) { "local": "https://localhost:8080/api", "custom": "https://example.com:3000/path" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -442,7 +401,7 @@ func TestParser_EdgeCases(t *testing.T) { t.Run("URLWithBasicAuth", func(t *testing.T) { t.Parallel() content := []byte(`{"url": "https://user:pass@example.com/path"}`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -459,7 +418,7 @@ func TestParser_EdgeCases(t *testing.T) { } content = append(content, []byte(`]}`)...) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 100) }) @@ -473,7 +432,7 @@ func TestParser_EdgeCases(t *testing.T) { "partial": "example.com", "valid3": "https://final.io" }`) - links, err := p.Parse("test.json", content) + links, err := p.ValidateAndParse("test.json", content) require.NoError(t, err) assert.Len(t, links, 3) // Only http/https URLs }) @@ -481,7 +440,7 @@ func TestParser_EdgeCases(t *testing.T) { t.Run("WhitespaceOnlyContent", func(t *testing.T) { t.Parallel() content := []byte(" \n\t ") - _, err := p.Parse("test.json", content) + _, err := p.ValidateAndParse("test.json", content) // Whitespace-only content is invalid JSON assert.Error(t, err) }) diff --git a/internal/parser/markdown/markdown.go b/internal/parser/markdown/markdown.go index 109cba6..91ea1da 100644 --- a/internal/parser/markdown/markdown.go +++ b/internal/parser/markdown/markdown.go @@ -29,19 +29,6 @@ func (*Parser) Extensions() []string { return []string{".md", ".mdx", ".markdown"} } -// Validate checks if the content is valid markdown. -// Markdown is very permissive, so we just return nil. -// Any text content is valid markdown. -func (*Parser) Validate(_ []byte) error { - return nil -} - -// Parse extracts links from markdown content. -// Deprecated: Use ValidateAndParse for better performance. -func (p *Parser) Parse(filename string, content []byte) ([]parser.Link, error) { - return p.ValidateAndParse(filename, content) -} - // ValidateAndParse validates the content and extracts links in a single pass. // For markdown, validation always passes (any text is valid markdown). func (*Parser) ValidateAndParse(filename string, content []byte) ([]parser.Link, error) { diff --git a/internal/parser/markdown/markdown_bench_test.go b/internal/parser/markdown/markdown_bench_test.go index 2c179af..b8b432c 100644 --- a/internal/parser/markdown/markdown_bench_test.go +++ b/internal/parser/markdown/markdown_bench_test.go @@ -6,14 +6,14 @@ import ( "testing" ) -// BenchmarkParse measures Markdown parsing performance. -func BenchmarkParse(b *testing.B) { +// BenchmarkValidateAndParse measures Markdown parsing performance. +func BenchmarkValidateAndParse(b *testing.B) { content := createMarkdownContent(50) p := New() b.ResetTimer() for b.Loop() { - _, _ = p.Parse("test.md", content) + _, _ = p.ValidateAndParse("test.md", content) } } diff --git a/internal/parser/markdown/markdown_test.go b/internal/parser/markdown/markdown_test.go index 809783f..8661709 100644 --- a/internal/parser/markdown/markdown_test.go +++ b/internal/parser/markdown/markdown_test.go @@ -431,24 +431,10 @@ func TestParser_Validate(t *testing.T) { t.Parallel() p := New() - t.Run("AlwaysValid", func(t *testing.T) { - t.Parallel() - // Markdown is very permissive - assert.NoError(t, p.Validate([]byte("# Valid"))) - assert.NoError(t, p.Validate([]byte("random text"))) - assert.NoError(t, p.Validate([]byte{})) - assert.NoError(t, p.Validate(nil)) - }) -} - -func TestParser_Parse(t *testing.T) { - t.Parallel() - p := New() - t.Run("ParsesLinks", func(t *testing.T) { t.Parallel() content := []byte("[link](http://example.com)") - links, err := p.Parse("test.md", content) + links, err := p.ValidateAndParse("test.md", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "http://example.com", links[0].URL) diff --git a/internal/parser/parser.go b/internal/parser/parser.go index a179995..947f614 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -147,9 +147,7 @@ func OffsetToLineCol(lines []int, offset int) (lineNum, colNum int) { return lines[i] > offset }) - 1 - if lineIdx < 0 { - lineIdx = 0 - } + lineIdx = max(lineIdx, 0) lineNum = lineIdx + 1 // Convert to 1-indexed colNum = offset - lines[lineIdx] + 1 diff --git a/internal/parser/registry.go b/internal/parser/registry.go index 03123ee..cefe513 100644 --- a/internal/parser/registry.go +++ b/internal/parser/registry.go @@ -17,18 +17,7 @@ type FileParser interface { // Extensions should include the leading dot. Extensions() []string - // Validate checks if the content is valid for this file type. - // Returns an error if the content is malformed. - // Deprecated: Use ValidateAndParse for better performance. - Validate(content []byte) error - - // Parse extracts links from the file content. - // Returns a slice of Link structs with URL, file path, line number, etc. - // Deprecated: Use ValidateAndParse for better performance. - Parse(filename string, content []byte) ([]Link, error) - // ValidateAndParse validates the content and extracts links in a single pass. - // This is more efficient than calling Validate and Parse separately. // Returns an error if the content is malformed. ValidateAndParse(filename string, content []byte) ([]Link, error) } diff --git a/internal/parser/toml/toml.go b/internal/parser/toml/toml.go index 2094780..f53f804 100644 --- a/internal/parser/toml/toml.go +++ b/internal/parser/toml/toml.go @@ -24,28 +24,7 @@ func (*Parser) Extensions() []string { return []string{".toml"} } -// Validate checks if the content is valid TOML. -func (*Parser) Validate(content []byte) error { - if len(content) == 0 { - return nil // Empty file is valid (no links to extract) - } - - var v any - if _, err := toml.Decode(string(content), &v); err != nil { - return fmt.Errorf("invalid TOML: %w", err) - } - return nil -} - -// Parse extracts links from TOML content. -// It extracts URLs from both string values and table/key names. -// Deprecated: Use ValidateAndParse for better performance. -func (p *Parser) Parse(filename string, content []byte) ([]parser.Link, error) { - return p.ValidateAndParse(filename, content) -} - // ValidateAndParse validates the content and extracts links in a single pass. -// This is more efficient than calling Validate and Parse separately. func (*Parser) ValidateAndParse(filename string, content []byte) ([]parser.Link, error) { if len(content) == 0 { return nil, nil diff --git a/internal/parser/toml/toml_bench_test.go b/internal/parser/toml/toml_bench_test.go index 8382e76..ae1c2c1 100644 --- a/internal/parser/toml/toml_bench_test.go +++ b/internal/parser/toml/toml_bench_test.go @@ -6,26 +6,14 @@ import ( "testing" ) -// BenchmarkParse measures TOML parsing performance. -func BenchmarkParse(b *testing.B) { - content := createTOMLContent(50) - p := New() - - b.ResetTimer() - for b.Loop() { - _, _ = p.Parse("test.toml", content) - } -} - -// BenchmarkValidateAndParse measures combined validation and parsing. +// BenchmarkValidateAndParse measures TOML parsing performance. func BenchmarkValidateAndParse(b *testing.B) { content := createTOMLContent(50) p := New() b.ResetTimer() for b.Loop() { - _ = p.Validate(content) - _, _ = p.Parse("test.toml", content) + _, _ = p.ValidateAndParse("test.toml", content) } } diff --git a/internal/parser/toml/toml_test.go b/internal/parser/toml/toml_test.go index 2d60450..4fb1c19 100644 --- a/internal/parser/toml/toml_test.go +++ b/internal/parser/toml/toml_test.go @@ -17,50 +17,14 @@ func TestParser_Extensions(t *testing.T) { assert.Contains(t, exts, ".toml") } -func TestParser_Validate(t *testing.T) { - t.Parallel() - - p := New() - - t.Run("ValidTOML", func(t *testing.T) { - t.Parallel() - content := []byte(`key = "value"`) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("ValidTOMLTable", func(t *testing.T) { - t.Parallel() - content := []byte(`[section] -key = "value" -`) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("EmptyContent", func(t *testing.T) { - t.Parallel() - err := p.Validate([]byte{}) - assert.NoError(t, err) - }) - - t.Run("InvalidTOML", func(t *testing.T) { - t.Parallel() - content := []byte(`key = "unclosed`) - err := p.Validate(content) - assert.Error(t, err) - assert.Contains(t, err.Error(), "invalid TOML") - }) -} - -func TestParser_Parse(t *testing.T) { +func TestParser_ValidateAndParse(t *testing.T) { t.Parallel() p := New() t.Run("SimpleKeyValue", func(t *testing.T) { t.Parallel() content := []byte(`url = "https://example.com"`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -73,7 +37,7 @@ func TestParser_Parse(t *testing.T) { homepage = "https://example.com" repo = "https://github.com/test/repo" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -88,7 +52,7 @@ name = "test" homepage = "https://example.com" docs = "https://docs.example.com" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -96,7 +60,7 @@ docs = "https://docs.example.com" t.Run("Arrays", func(t *testing.T) { t.Parallel() content := []byte(`urls = ["https://one.com", "https://two.com"]`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -110,7 +74,7 @@ url = "https://server1.com" [[servers]] url = "https://server2.com" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -118,7 +82,7 @@ url = "https://server2.com" t.Run("InlineTables", func(t *testing.T) { t.Parallel() content := []byte(`link = { name = "Example", url = "https://example.com" }`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -126,7 +90,7 @@ url = "https://server2.com" t.Run("EmbeddedURLsInStrings", func(t *testing.T) { t.Parallel() content := []byte(`description = "Check out https://example.com for more info"`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -138,7 +102,7 @@ url = "https://server2.com" Visit https://example.com and https://docs.example.com """`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -146,7 +110,7 @@ and https://docs.example.com t.Run("LiteralStrings", func(t *testing.T) { t.Parallel() content := []byte(`path = 'https://literal.example.com'`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -155,14 +119,14 @@ and https://docs.example.com t.Parallel() content := []byte(`name = "test" value = 42`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Empty(t, links) }) t.Run("EmptyContent", func(t *testing.T) { t.Parallel() - links, err := p.Parse("test.toml", []byte{}) + links, err := p.ValidateAndParse("test.toml", []byte{}) require.NoError(t, err) assert.Empty(t, links) }) @@ -175,7 +139,7 @@ ftp = "ftp://files.example.com" mailto = "mailto:test@example.com" file = "file:///path/to/file" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -188,7 +152,7 @@ file = "file:///path/to/file" "https://example.com" = "Example site" "https://github.com" = "GitHub" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -277,7 +241,7 @@ func TestParser_LineNumbers(t *testing.T) { url1 = "https://line2.example.com" url2 = "https://line3.example.com" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) require.Len(t, links, 2) @@ -296,7 +260,7 @@ func TestParser_EdgeCases(t *testing.T) { t.Run("DottedKeys", func(t *testing.T) { t.Parallel() content := []byte(`project.homepage = "https://example.com"`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -307,7 +271,7 @@ func TestParser_EdgeCases(t *testing.T) { [level1.level2.level3.level4] url = "https://deep.example.com" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://deep.example.com", links[0].URL) @@ -322,7 +286,7 @@ float = 3.14 boolean = true datetime = 2024-01-01T00:00:00Z `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -334,7 +298,7 @@ query = "https://example.com/search?q=hello+world&lang=en" fragment = "https://example.com/page#section" encoded = "https://example.com/path%20with%20spaces" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 3) }) @@ -345,7 +309,7 @@ encoded = "https://example.com/path%20with%20spaces" local = "https://localhost:8080/api" custom = "https://example.com:3000/path" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -356,7 +320,7 @@ custom = "https://example.com:3000/path" # This is a comment with https://comment.example.com url = "https://example.com" # inline comment with https://inline.example.com `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) // Only the actual value URL should be found assert.Len(t, links, 1) @@ -366,7 +330,7 @@ url = "https://example.com" # inline comment with https://inline.example.com t.Run("EscapedCharactersInStrings", func(t *testing.T) { t.Parallel() content := []byte(`url = "https://example.com/path\"quoted\""`) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.GreaterOrEqual(t, len(links), 1) }) @@ -377,7 +341,7 @@ url = "https://example.com" # inline comment with https://inline.example.com japanese = "https://例え.jp" emoji = "https://example.com/🎉" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.GreaterOrEqual(t, len(links), 1) }) @@ -388,7 +352,7 @@ emoji = "https://example.com/🎉" empty = "" url = "https://example.com" `) - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -396,7 +360,7 @@ url = "https://example.com" t.Run("WhitespaceOnlyContent", func(t *testing.T) { t.Parallel() content := []byte(" \n \n") - links, err := p.Parse("test.toml", content) + links, err := p.ValidateAndParse("test.toml", content) require.NoError(t, err) assert.Empty(t, links) }) diff --git a/internal/parser/xml/xml.go b/internal/parser/xml/xml.go index 0470ae3..f09d718 100644 --- a/internal/parser/xml/xml.go +++ b/internal/parser/xml/xml.go @@ -1,5 +1,5 @@ // Package xml implements a URL extractor for XML files. -package xml +package xml //nolint:revive // package name matches file type being parsed import ( "bytes" @@ -41,34 +41,7 @@ func (*Parser) Extensions() []string { return []string{".xml"} } -// Validate checks if the content is valid XML. -func (*Parser) Validate(content []byte) error { - if len(content) == 0 { - return nil // Empty file is valid (no links to extract) - } - - decoder := xml.NewDecoder(bytes.NewReader(content)) - for { - _, err := decoder.Token() - if errors.Is(err, io.EOF) { - break - } - if err != nil { - return fmt.Errorf("invalid XML: %w", err) - } - } - return nil -} - -// Parse extracts links from XML content. -// It extracts URLs from known URL attributes and text content. -// Deprecated: Use ValidateAndParse for better performance. -func (p *Parser) Parse(filename string, content []byte) ([]parser.Link, error) { - return p.ValidateAndParse(filename, content) -} - // ValidateAndParse validates the content and extracts links in a single pass. -// This is more efficient than calling Validate and Parse separately. func (*Parser) ValidateAndParse(filename string, content []byte) ([]parser.Link, error) { if len(content) == 0 { return nil, nil diff --git a/internal/parser/xml/xml_bench_test.go b/internal/parser/xml/xml_bench_test.go index 1ed9016..5160c7f 100644 --- a/internal/parser/xml/xml_bench_test.go +++ b/internal/parser/xml/xml_bench_test.go @@ -1,4 +1,4 @@ -package xml +package xml //nolint:revive // package name matches file type being parsed import ( "strconv" @@ -6,26 +6,14 @@ import ( "testing" ) -// BenchmarkParse measures XML parsing performance. -func BenchmarkParse(b *testing.B) { - content := createXMLContent(50) - p := New() - - b.ResetTimer() - for b.Loop() { - _, _ = p.Parse("test.xml", content) - } -} - -// BenchmarkValidateAndParse measures combined validation and parsing. +// BenchmarkValidateAndParse measures XML parsing performance. func BenchmarkValidateAndParse(b *testing.B) { content := createXMLContent(50) p := New() b.ResetTimer() for b.Loop() { - _ = p.Validate(content) - _, _ = p.Parse("test.xml", content) + _, _ = p.ValidateAndParse("test.xml", content) } } diff --git a/internal/parser/xml/xml_test.go b/internal/parser/xml/xml_test.go index fc6044d..695cc60 100644 --- a/internal/parser/xml/xml_test.go +++ b/internal/parser/xml/xml_test.go @@ -1,4 +1,4 @@ -package xml +package xml //nolint:revive // package name matches file type being parsed import ( "testing" @@ -17,48 +17,14 @@ func TestParser_Extensions(t *testing.T) { assert.Contains(t, exts, ".xml") } -func TestParser_Validate(t *testing.T) { - t.Parallel() - - p := New() - - t.Run("ValidXML", func(t *testing.T) { - t.Parallel() - content := []byte(``) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("ValidXMLWithAttributes", func(t *testing.T) { - t.Parallel() - content := []byte(``) - err := p.Validate(content) - assert.NoError(t, err) - }) - - t.Run("EmptyContent", func(t *testing.T) { - t.Parallel() - err := p.Validate([]byte{}) - assert.NoError(t, err) - }) - - t.Run("InvalidXML", func(t *testing.T) { - t.Parallel() - content := []byte(``) - err := p.Validate(content) - assert.Error(t, err) - assert.Contains(t, err.Error(), "invalid XML") - }) -} - -func TestParser_Parse(t *testing.T) { +func TestParser_ValidateAndParse(t *testing.T) { t.Parallel() p := New() t.Run("SimpleHref", func(t *testing.T) { t.Parallel() content := []byte(`Link`) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -73,7 +39,7 @@ func TestParser_Parse(t *testing.T) { `) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) assert.Len(t, links, 3) }) @@ -81,7 +47,7 @@ func TestParser_Parse(t *testing.T) { t.Run("URLInTextContent", func(t *testing.T) { t.Parallel() content := []byte(`Visit https://example.com for info`) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) require.Len(t, links, 1) assert.Equal(t, "https://example.com", links[0].URL) @@ -90,7 +56,7 @@ func TestParser_Parse(t *testing.T) { t.Run("MultipleURLsInText", func(t *testing.T) { t.Parallel() content := []byte(`Check https://one.com and https://two.com`) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) assert.Len(t, links, 2) }) @@ -105,7 +71,7 @@ func TestParser_Parse(t *testing.T) { `) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -113,7 +79,7 @@ func TestParser_Parse(t *testing.T) { t.Run("SelfClosingElements", func(t *testing.T) { t.Parallel() content := []byte(``) - links, err := p.Parse("test.xml", content) + links, err := p.ValidateAndParse("test.xml", content) require.NoError(t, err) assert.Len(t, links, 1) }) @@ -128,7 +94,7 @@ func TestParser_Parse(t *testing.T) {