Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,15 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.HeadlessNoIncognito, "no-incognito", "noi", false, "start headless chrome without incognito mode"),
flagSet.StringVarP(&options.ChromeWSUrl, "chrome-ws-url", "cwu", "", "use chrome browser instance launched elsewhere with the debugger listening at this URL"),
flagSet.BoolVarP(&options.XhrExtraction, "xhr-extraction", "xhr", false, "extract xhr request url,method in jsonl output"),
flagSet.BoolVarP(&options.JavaScriptInteractions, "js-interactions", "jsi", false, "enable clicking JavaScript-enabled elements to discover hidden URLs"),
)

flagSet.CreateGroup("scope", "Scope",
flagSet.StringSliceVarP(&options.Scope, "crawl-scope", "cs", nil, "in scope url regex to be followed by crawler", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringSliceVarP(&options.OutOfScope, "crawl-out-scope", "cos", nil, "out of scope url regex to be excluded by crawler", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringVarP(&options.FieldScope, "field-scope", "fs", "rdn", "pre-defined scope field (dn,rdn,fqdn) or custom regex (e.g., '(company-staging.io|company.com)')"),
flagSet.BoolVarP(&options.NoScope, "no-scope", "ns", false, "disables host based default scope"),
flagSet.BoolVarP(&options.AllInputScope, "all-input-scope", "ais", false, "treat all input targets as explicit scope roots for the entire crawling session"),
flagSet.BoolVarP(&options.DisplayOutScope, "display-out-scope", "do", false, "display external endpoint from scoped crawling"),
)

Expand Down
12 changes: 12 additions & 0 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ func validateOptions(options *types.Options) error {
return errkit.New("no inputs specified for crawler")
}

// Validate all-input-scope flag conflicts and precedence
if options.AllInputScope && options.FieldScope != "rdn" {
return errkit.New("all-input-scope (-ais) and field-scope (-fs) flags cannot be used together")
}
if options.AllInputScope && options.NoScope {
gologger.Info().Msgf("all-input-scope (-ais) takes precedence over no-scope (-ns), ignoring -ns flag")
options.NoScope = false
}
if options.AllInputScope {
gologger.Info().Msgf("all-input-scope mode enabled: treating all input targets as explicit scope roots")
}

// Disabling automatic form fill (-aff) for headless navigation due to incorrect implementation.
// Form filling should be handled via headless actions within the page context
if options.Headless && options.AutomaticFormFill {
Expand Down
4 changes: 4 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ type Options struct {
OutOfScope goflags.StringSlice
// NoScope disables host based default scope
NoScope bool
// AllInputScope treats all input targets as explicit scope roots for the entire crawling session
AllInputScope bool
// DisplayOutScope displays out of scope items in results
DisplayOutScope bool
// ExtensionsMatch contains extensions to match explicitly
Expand Down Expand Up @@ -146,6 +148,8 @@ type Options struct {
HeadlessNoIncognito bool
// XhrExtraction extract xhr requests
XhrExtraction bool
// JavaScriptInteractions enables clicking JavaScript-enabled elements to discover hidden URLs
JavaScriptInteractions bool
// HealthCheck determines if a self-healthcheck should be performed
HealthCheck bool
// PprofServer enables pprof server
Expand Down
Loading