Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added initial fuzz input url deduplication implementation #5594

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/nuclei/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ on extensive configurability, massive extensibility and ease of use.`)
flagSet.BoolVar(&fuzzFlag, "fuzz", false, "enable loading fuzzing templates (Deprecated: use -dast instead)"),
flagSet.BoolVar(&options.DAST, "dast", false, "enable / run dast (fuzz) nuclei templates"),
flagSet.BoolVarP(&options.DisplayFuzzPoints, "display-fuzz-points", "dfp", false, "display fuzz points in the output for debugging"),
flagSet.BoolVarP(&options.FuzzingDedupe, "fuzzing-dedupe", "fd", false, "deduplicate fuzzing url inputs"),
flagSet.IntVar(&options.FuzzParamFrequency, "fuzz-param-frequency", 10, "frequency of uninteresting parameters for fuzzing before skipping"),
flagSet.StringVarP(&options.FuzzAggressionLevel, "fuzz-aggression", "fa", "low", "fuzzing aggression level controls payload count for fuzz (low, medium, high)"),
)
Expand Down
3 changes: 3 additions & 0 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ func ValidateOptions(options *types.Options) error {
if options.Verbose && options.Silent {
return errors.New("both verbose and silent mode specified")
}
if options.FuzzingDedupe && options.Stream {
return errors.New("both fuzzing dedupe and stream mode specified")
}

if (options.HeadlessOptionalArguments != nil || options.ShowBrowser || options.UseInstalledChrome) && !options.Headless {
return errors.New("headless mode (-headless) is required if -ho, -sb, -sc or -lha are set")
Expand Down
102 changes: 102 additions & 0 deletions pkg/input/provider/dedupe/dedupe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Package dedupe implements a duplicate URL deduplication mechanism
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// for Nuclei DAST or Fuzzing inputs.
//
// It is used to remove similar or non-relevant inputs from fuzzing
// or DAST scans to reduce the number of requests made.
package dedupe

import (
"fmt"
"net/url"
"regexp"
"slices"
"strings"

mapsutil "github.com/projectdiscovery/utils/maps"
)

// FuzzingDeduper is a deduper for fuzzing inputs
//
// The normalization works as follows:
//
// - The path is normalized to remove any trailing slashes
// - The query is normalized by templating the query parameters with their names
// TODO: Doesn't handle different values, everything is stripped. Maybe make it more flexible?
// - Numeric IDs in the path are replaced with {numeric_id}
//
// This allows us to deduplicate URLs with different query parameters
// or orders but the same structure or key names.
type FuzzingDeduper struct {
items *mapsutil.SyncLockMap[string, struct{}]
}

// NewFuzzingDeduper creates a new fuzzing deduper
func NewFuzzingDeduper() *FuzzingDeduper {
return &FuzzingDeduper{
items: mapsutil.NewSyncLockMap[string, struct{}](),
}
}

// Add adds a new URL to the deduper
func (d *FuzzingDeduper) Add(URL string) bool {
generatedPattern, err := generatePattern(URL)
if err != nil {
return false
}

_, found := d.items.Get(generatedPattern)
if found {
return false
}
d.items.Set(generatedPattern, struct{}{})

Check failure on line 51 in pkg/input/provider/dedupe/dedupe.go

View workflow job for this annotation

GitHub Actions / Lint Test

Error return value of `d.items.Set` is not checked (errcheck)
return true
}

func generatePattern(urlStr string) (string, error) {
parsedURL, err := url.ParseRequestURI(urlStr)
if err != nil {
return "", err
}

path := normalizePath(parsedURL.Path)
query := extractQuery(parsedURL.Query())

var builder strings.Builder
builder.Grow(len(urlStr))
builder.WriteString(parsedURL.Scheme)
builder.WriteString("://")
builder.WriteString(parsedURL.Host)
builder.WriteString(path)
if query != "" {
builder.WriteString("?")
builder.WriteString(query)
}
pattern := builder.String()
return pattern, nil
}

var (
numericIDPathRegex = regexp.MustCompile(`/(\d+)(?:/|$)`)
)

func normalizePath(path string) string {
subMatches := numericIDPathRegex.FindAllStringSubmatch(path, -1)
for _, match := range subMatches {
path = strings.ReplaceAll(path, match[0], "/{numeric_id}")
}
return path
}

func extractQuery(query url.Values) string {
normalizedParams := make([]string, 0, len(query))

for k, v := range query {
if len(v) == 0 {
normalizedParams = append(normalizedParams, k)
} else {
normalizedParams = append(normalizedParams, fmt.Sprintf("%s={%s}", k, k))
}
}
slices.Sort(normalizedParams)
return strings.Join(normalizedParams, "&")
}
137 changes: 137 additions & 0 deletions pkg/input/provider/dedupe/dedupe_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package dedupe

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"
)

func TestFuzzingDeduper(t *testing.T) {
t.Run("Basic URL Deduplication", func(t *testing.T) {
tests := []struct {
name string
urls []string
expected []bool
}{
{
name: "Simple unique URLs",
urls: []string{"http://example.com/page1", "http://example.com/page2"},
expected: []bool{true, true},
},
{
name: "Duplicate URLs",
urls: []string{"http://example.com/page1", "http://example.com/page1"},
expected: []bool{true, false},
},
{
name: "URLs with different query param values",
urls: []string{"http://example.com/page?id=1", "http://example.com/page?id=2"},
expected: []bool{true, false},
},
{
name: "URLs with different query param orders",
urls: []string{"http://example.com/page?a=1&b=2", "http://example.com/page?b=2&a=1"},
expected: []bool{true, false},
},
{
name: "URLs with and without trailing slash",
urls: []string{"http://example.com/page/", "http://example.com/page"},
expected: []bool{true, true},
},
{
name: "URLs with different schemes",
urls: []string{"http://example.com", "https://example.com"},
expected: []bool{true, true},
},
{
name: "URLs with query params and without",
urls: []string{"http://example.com/page", "http://example.com/page?param=value"},
expected: []bool{true, true},
},
{
name: "Invalid URLs",
urls: []string{"http://example.com/page", "not a valid url"},
expected: []bool{true, false},
},
{
name: "URLs with empty query params",
urls: []string{"http://example.com/page?param1=&param2=", "http://example.com/page?param2=&param1="},
expected: []bool{true, false},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
deduper := NewFuzzingDeduper()
for i, url := range tt.urls {
result := deduper.Add(url)
require.Equal(t, tt.expected[i], result, "Add(%q) = %v, want %v", url, result, tt.expected[i])
}
})
}
})

t.Run("Large Set Deduplication", func(t *testing.T) {
deduper := NewFuzzingDeduper()
baseURL := "http://example.com/page?id=%d&param=%s"

for i := 0; i < 1000; i++ {
url := fmt.Sprintf(baseURL, i, "value")
result := deduper.Add(url)
if i == 0 {
require.True(t, result, "First URL should be added")
} else {
require.False(t, result, "Duplicate URL pattern should not be added: %s", url)
}
}

allItems := deduper.items.GetAll()
require.Len(t, allItems, 1, "Expected 1 unique URL pattern, got %d", len(allItems))
})

t.Run("Path Parameters", func(t *testing.T) {
deduper := NewFuzzingDeduper()

require.True(t, deduper.Add("https://example.com/page/1337"))
require.False(t, deduper.Add("https://example.com/page/1332"))
})

t.Run("TestPHP Vulnweb URLs", func(t *testing.T) {
urls := []string{
"http://testphp.vulnweb.com/hpp/?pp=12",
"http://testphp.vulnweb.com/hpp/params.php?p=valid&pp=12",
"http://testphp.vulnweb.com/artists.php?artist=3",
"http://testphp.vulnweb.com/artists.php?artist=1",
"http://testphp.vulnweb.com/artists.php?artist=2",
"http://testphp.vulnweb.com/listproducts.php?artist=3",
"http://testphp.vulnweb.com/listproducts.php?cat=4",
"http://testphp.vulnweb.com/listproducts.php?cat=3",
"http://testphp.vulnweb.com/listproducts.php?cat=2",
"http://testphp.vulnweb.com/listproducts.php?artist=2",
"http://testphp.vulnweb.com/listproducts.php?artist=1",
"http://testphp.vulnweb.com/listproducts.php?cat=1",
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg",
"http://testphp.vulnweb.com/product.php?pic=6",
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg&size=160",
}

expectedUnique := 8

deduper := NewFuzzingDeduper()
uniqueCount := 0

for _, url := range urls {
if deduper.Add(url) {
uniqueCount++
}
}

require.Equal(t, expectedUnique, uniqueCount, "Expected %d unique URLs, but got %d", expectedUnique, uniqueCount)

// Test for duplicates
for _, url := range urls {
require.False(t, deduper.Add(url), "URL should have been identified as duplicate: %s", url)
}
})
}
12 changes: 12 additions & 0 deletions pkg/input/provider/list/hmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/projectdiscovery/hmap/filekv"
"github.com/projectdiscovery/hmap/store/hybrid"
"github.com/projectdiscovery/mapcidr/asn"
"github.com/projectdiscovery/nuclei/v3/pkg/input/provider/dedupe"
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
Expand Down Expand Up @@ -48,6 +49,8 @@ type ListInputProvider struct {
hostMapStream *filekv.FileDB
hostMapStreamOnce sync.Once
sync.Once

fuzzDeduper *dedupe.FuzzingDeduper
}

// Options is a wrapper around types.Options structure
Expand Down Expand Up @@ -78,6 +81,9 @@ func New(opts *Options) (*ListInputProvider, error) {
},
excludedHosts: make(map[string]struct{}),
}
if options.FuzzingDedupe {
input.fuzzDeduper = dedupe.NewFuzzingDeduper()
}
if options.Stream {
fkvOptions := filekv.DefaultOptions
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
Expand Down Expand Up @@ -472,6 +478,12 @@ func (i *ListInputProvider) setItem(metaInput *contextargs.MetaInput) {
}

i.inputCount++ // tracks target count
if i.fuzzDeduper != nil {
if !i.fuzzDeduper.Add(metaInput.Target()) {
gologger.Verbose().Msgf("Ignoring duplicate fuzzing target: %s\n", metaInput.Target())
return
}
}
_ = i.hostMap.Set(key, nil)
if i.hostMapStream != nil {
i.setHostMapStream(key)
Expand Down
2 changes: 2 additions & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ type Options struct {
StoreResponseDir string
// DisableRedirects disables following redirects for http request module
DisableRedirects bool
// FuzzingDedupe enables deduplication of input URLs for fuzzing
FuzzingDedupe bool
// SNI custom hostname
SNI string
// InputFileMode specifies the mode of input file (jsonl, burp, openapi, swagger, etc)
Expand Down
Loading