Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.StringSliceVarP(&options.ExtensionFilter, "extension-filter", "ef", nil, "filter output for given extension (eg, -ef png,css)", goflags.CommaSeparatedStringSliceOptions),
flagSet.StringVarP(&options.OutputMatchCondition, "match-condition", "mdc", "", "match response with dsl based condition"),
flagSet.StringVarP(&options.OutputFilterCondition, "filter-condition", "fdc", "", "filter response with dsl based condition"),
flagSet.StringSliceVarP(&options.CountPathDepth, "count-path-depth", "cpd", nil, "filter urls by path depth count (e.g., '>=3', '==2', '3-5')", goflags.CommaSeparatedStringSliceOptions),
flagSet.StringSliceVarP(&options.CountQueryParams, "count-query-params", "cqp", nil, "filter urls by query parameter count (e.g., '>=3', '==2', '1-3')", goflags.CommaSeparatedStringSliceOptions),
flagSet.StringSliceVarP(&options.CountSubdomainDepth, "count-subdomain-depth", "csd", nil, "filter urls by subdomain depth count (e.g., '>=2', '==1', '1-3')", goflags.CommaSeparatedStringSliceOptions),
flagSet.BoolVar(&options.DepthFilterOrLogic, "depth-filter-or", false, "use OR logic between different depth filter types (default: AND logic)"),
flagSet.BoolVarP(&options.DisableUniqueFilter, "disable-unique-filter", "duf", false, "disable duplicate content filtering"),
)

Expand Down
28 changes: 28 additions & 0 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/projectdiscovery/gologger/formatter"
"github.com/projectdiscovery/katana/pkg/types"
"github.com/projectdiscovery/katana/pkg/utils"
"github.com/projectdiscovery/katana/pkg/utils/filters"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"gopkg.in/yaml.v3"
Expand Down Expand Up @@ -58,6 +59,33 @@ func validateOptions(options *types.Options) error {
}
options.FilterRegex = append(options.FilterRegex, cr)
}

// Validate depth filter expressions
for _, filter := range options.CountPathDepth {
if filter == "" {
continue
}
if err := filters.ValidateAndSuggest("path depth", filter); err != nil {
return err
}
}
for _, filter := range options.CountQueryParams {
if filter == "" {
continue
}
if err := filters.ValidateAndSuggest("query parameter", filter); err != nil {
return err
}
}
for _, filter := range options.CountSubdomainDepth {
if filter == "" {
continue
}
if err := filters.ValidateAndSuggest("subdomain depth", filter); err != nil {
return err
}
}

if options.KnownFiles != "" && options.MaxDepth < 3 {
gologger.Info().Msgf("Depth automatically set to 3 to accommodate the `--known-files` option (originally set to %d).", options.MaxDepth)
options.MaxDepth = 3
Expand Down
4 changes: 4 additions & 0 deletions pkg/output/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,8 @@ type Options struct {
OutputTemplate string
OutputMatchCondition string
OutputFilterCondition string
CountPathDepth []string
CountQueryParams []string
CountSubdomainDepth []string
DepthFilterOrLogic bool
}
31 changes: 30 additions & 1 deletion pkg/output/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package output
import (
"errors"
"fmt"
"net/url"
"os"
"path/filepath"
"regexp"
Expand All @@ -17,6 +18,7 @@ import (
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/pkg/navigation"
"github.com/projectdiscovery/katana/pkg/utils/extensions"
"github.com/projectdiscovery/katana/pkg/utils/filters"
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
"github.com/stoewer/go-strcase"
Expand Down Expand Up @@ -63,6 +65,7 @@ type StandardWriter struct {
outputTemplate *fasttemplate.Template
outputMatchCondition string
outputFilterCondition string
depthValidator *filters.DepthFilterValidator
}

// New returns a new output writer instance
Expand All @@ -85,6 +88,20 @@ func New(options Options) (Writer, error) {
outputFilterCondition: options.OutputFilterCondition,
}

// Initialize depth filter validator if depth filters are configured
if len(options.CountPathDepth) > 0 || len(options.CountQueryParams) > 0 || len(options.CountSubdomainDepth) > 0 {
depthValidator, err := filters.NewDepthFilterValidator(
options.CountPathDepth,
options.CountQueryParams,
options.CountSubdomainDepth,
options.DepthFilterOrLogic,
)
if err != nil {
return nil, err
}
writer.depthValidator = depthValidator
}

if options.StoreFieldDir != "" {
storeFieldDir = options.StoreFieldDir
}
Expand Down Expand Up @@ -353,10 +370,22 @@ func (w *StandardWriter) matchOutput(event *Result) bool {

// filterOutput returns true if the event should be filtered out
func (w *StandardWriter) filterOutput(event *Result) bool {
if w.filterRegex == nil && w.outputFilterCondition == "" {
if w.filterRegex == nil && w.outputFilterCondition == "" && w.depthValidator == nil {
return false
}

// Apply depth filtering if configured
if w.depthValidator != nil {
parsedURL, err := url.Parse(event.Request.URL)
if err != nil {
// If URL parsing fails, filter out the result
return true
}
if !w.depthValidator.ValidateURL(parsedURL) {
return true
}
}

for _, regex := range w.filterRegex {
if regex.MatchString(event.Request.URL) {
return true
Expand Down
30 changes: 29 additions & 1 deletion pkg/types/crawler_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type CrawlerOptions struct {
Dialer *fastdialer.Dialer
// Wappalyzer instance for technologies detection
Wappalyzer *wappalyzer.Wappalyze
// DepthValidator is a validator for URL depth filtering
DepthValidator *filters.DepthFilterValidator
}

// NewCrawlerOptions creates a new crawler options structure
Expand Down Expand Up @@ -94,6 +96,10 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) {
OutputTemplate: options.OutputTemplate,
OutputMatchCondition: options.OutputMatchCondition,
OutputFilterCondition: options.OutputFilterCondition,
CountPathDepth: options.CountPathDepth,
CountQueryParams: options.CountQueryParams,
CountSubdomainDepth: options.CountSubdomainDepth,
DepthFilterOrLogic: options.DepthFilterOrLogic,
}
Comment on lines +99 to 103
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Compile-time type mismatch: convert goflags.StringSlice to []string

output.Options fields are []string, while options.Count* are goflags.StringSlice. Add explicit conversions to avoid build errors.

-        CountPathDepth:        options.CountPathDepth,
-        CountQueryParams:      options.CountQueryParams,
-        CountSubdomainDepth:   options.CountSubdomainDepth,
+        CountPathDepth:        []string(options.CountPathDepth),
+        CountQueryParams:      []string(options.CountQueryParams),
+        CountSubdomainDepth:   []string(options.CountSubdomainDepth),
         DepthFilterOrLogic:    options.DepthFilterOrLogic,
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
CountPathDepth: options.CountPathDepth,
CountQueryParams: options.CountQueryParams,
CountSubdomainDepth: options.CountSubdomainDepth,
DepthFilterOrLogic: options.DepthFilterOrLogic,
}
CountPathDepth: []string(options.CountPathDepth),
CountQueryParams: []string(options.CountQueryParams),
CountSubdomainDepth: []string(options.CountSubdomainDepth),
DepthFilterOrLogic: options.DepthFilterOrLogic,
🤖 Prompt for AI Agents
In pkg/types/crawler_options.go around lines 99 to 103, output.Options fields
(CountPathDepth, CountQueryParams, CountSubdomainDepth, DepthFilterOrLogic) are
typed as []string but the assigned values are goflags.StringSlice, causing a
compile-time type mismatch; fix by converting each goflags.StringSlice to a
[]string before assignment (e.g., call a conversion helper or use
[]string(someStringSlice) / someStringSlice.ToSlice() as appropriate) so each
field receives a plain []string.


for _, mr := range options.OutputMatchRegex {
Expand All @@ -116,6 +122,20 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) {
return nil, errorutil.NewWithErr(err).Msgf("could not create output writer")
}

// Initialize depth filter validator if depth filters are configured
var depthValidator *filters.DepthFilterValidator
if len(options.CountPathDepth) > 0 || len(options.CountQueryParams) > 0 || len(options.CountSubdomainDepth) > 0 {
depthValidator, err = filters.NewDepthFilterValidator(
options.CountPathDepth,
options.CountQueryParams,
options.CountSubdomainDepth,
options.DepthFilterOrLogic,
)
Comment on lines +128 to +133
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Also convert goflags.StringSlice when constructing the validator

filters.NewDepthFilterValidator takes []string slices; pass converted values.

-        depthValidator, err = filters.NewDepthFilterValidator(
-            options.CountPathDepth,
-            options.CountQueryParams,
-            options.CountSubdomainDepth,
-            options.DepthFilterOrLogic,
-        )
+        depthValidator, err = filters.NewDepthFilterValidator(
+            []string(options.CountPathDepth),
+            []string(options.CountQueryParams),
+            []string(options.CountSubdomainDepth),
+            options.DepthFilterOrLogic,
+        )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
depthValidator, err = filters.NewDepthFilterValidator(
options.CountPathDepth,
options.CountQueryParams,
options.CountSubdomainDepth,
options.DepthFilterOrLogic,
)
depthValidator, err = filters.NewDepthFilterValidator(
[]string(options.CountPathDepth),
[]string(options.CountQueryParams),
[]string(options.CountSubdomainDepth),
options.DepthFilterOrLogic,
)
🤖 Prompt for AI Agents
In pkg/types/crawler_options.go around lines 128-133, the call to
filters.NewDepthFilterValidator is passing goflags.StringSlice values directly;
convert those to plain []string slices before passing them. Replace the
arguments options.CountPathDepth, options.CountQueryParams and
options.CountSubdomainDepth with their []string equivalents (perform a simple
conversion or extract the underlying slice) so NewDepthFilterValidator receives
[]string types.

if err != nil {
return nil, errorutil.NewWithErr(err).Msgf("could not create depth filter validator")
}
}

crawlerOptions := &CrawlerOptions{
ExtensionsValidator: extensionsValidator,
Parser: responseParser,
Expand All @@ -124,6 +144,7 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) {
Options: options,
Dialer: fastdialerInstance,
OutputWriter: outputWriter,
DepthValidator: depthValidator,
}

if options.RateLimit > 0 {
Expand All @@ -150,9 +171,16 @@ func (c *CrawlerOptions) Close() error {
}

func (c *CrawlerOptions) ValidatePath(path string) bool {
// First check extension validation
if c.ExtensionsValidator != nil {
return c.ExtensionsValidator.ValidatePath(path)
if !c.ExtensionsValidator.ValidatePath(path) {
return false
}
}

// Note: Depth validation is handled at output stage to allow crawling
// but filter final results. This ensures we can discover URLs first.

return true
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ type Options struct {
OutputMatchCondition string
// OutputFilterCondition is the condition to filter output
OutputFilterCondition string
// CountPathDepth filters URLs by path depth count
CountPathDepth goflags.StringSlice
// CountQueryParams filters URLs by query parameter count
CountQueryParams goflags.StringSlice
// CountSubdomainDepth filters URLs by subdomain depth count
CountSubdomainDepth goflags.StringSlice
// DepthFilterOrLogic uses OR logic between depth filter types
DepthFilterOrLogic bool
// MaxDepth is the maximum depth to crawl
MaxDepth int
// BodyReadSize is the maximum size of response body to read
Expand Down
Loading