diff --git a/v2/pkg/passive/sources.go b/v2/pkg/passive/sources.go index 374c5fdd7..b8ce15472 100644 --- a/v2/pkg/passive/sources.go +++ b/v2/pkg/passive/sources.go @@ -29,6 +29,7 @@ import ( "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fullhunt" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/github" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hackertarget" + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hudsonrock" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hunter" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/intelx" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/leakix" @@ -92,6 +93,7 @@ var AllSources = [...]subscraping.Source{ // &threatminer.Source{}, // failing api // &reconcloud.Source{}, // failing due to cloudflare bot protection &builtwith.Source{}, + &hudsonrock.Source{}, } var sourceWarnings = mapsutil.NewSyncLockMap[string, string]( diff --git a/v2/pkg/passive/sources_test.go b/v2/pkg/passive/sources_test.go index 19aa41579..c6b2c2304 100644 --- a/v2/pkg/passive/sources_test.go +++ b/v2/pkg/passive/sources_test.go @@ -53,6 +53,7 @@ var ( // "threatminer", // "reconcloud", "builtwith", + "hudsonrock", } expectedDefaultSources = []string{ diff --git a/v2/pkg/runner/enumerate.go b/v2/pkg/runner/enumerate.go index 727359f50..8cdcbe31e 100644 --- a/v2/pkg/runner/enumerate.go +++ b/v2/pkg/runner/enumerate.go @@ -19,6 +19,8 @@ import ( const maxNumCount = 2 var replacer = strings.NewReplacer( + "•.", "", + "•", "", "*.", "", "http://", "", "https://", "", @@ -63,12 +65,13 @@ func (r *Runner) EnumerateSingleDomainWithCtx(ctx context.Context, domain string case subscraping.Error: gologger.Warning().Msgf("Encountered an error with source %s: %s\n", result.Source, result.Error) case subscraping.Subdomain: + subdomain := replacer.Replace(result.Value) + // Validate the subdomain found and remove wildcards from - if !strings.HasSuffix(result.Value, "."+domain) { + if !strings.HasSuffix(subdomain, "."+domain) { skippedCounts[result.Source]++ continue } - subdomain := replacer.Replace(result.Value) if matchSubdomain := r.filterAndMatchSubdomain(subdomain); matchSubdomain { if _, ok := uniqueMap[subdomain]; !ok { diff --git a/v2/pkg/runner/options.go b/v2/pkg/runner/options.go index 6cad1a3e8..5e3d4b172 100644 --- a/v2/pkg/runner/options.go +++ b/v2/pkg/runner/options.go @@ -257,4 +257,5 @@ var defaultRateLimits = []string{ "netlas=1/s", // "gitlab=2/s", "github=83/m", + "hudsonrock=5/s", } diff --git a/v2/pkg/subscraping/sources/hudsonrock/hudsonrock.go b/v2/pkg/subscraping/sources/hudsonrock/hudsonrock.go new file mode 100644 index 000000000..b109a3192 --- /dev/null +++ b/v2/pkg/subscraping/sources/hudsonrock/hudsonrock.go @@ -0,0 +1,100 @@ +// Package hudsonrock logic +package hudsonrock + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" +) + +type hudsonrockResponse struct { + Data struct { + EmployeesUrls []struct { + URL string `json:"url"` + } `json:"employees_urls"` + ClientsUrls []struct { + URL string `json:"url"` + } `json:"clients_urls"` + } `json:"data"` +} + +// Source is the passive scraping agent +type Source struct { + timeTaken time.Duration + errors int + results int +} + +// Run function returns all subdomains found with the service +func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { + results := make(chan subscraping.Result) + s.errors = 0 + s.results = 0 + + go func() { + defer func(startTime time.Time) { + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) + + resp, err := session.SimpleGet(ctx, fmt.Sprintf("https://cavalier.hudsonrock.com/api/json/v2/osint-tools/urls-by-domain?domain=%s", domain)) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + session.DiscardHTTPResponse(resp) + return + } + defer resp.Body.Close() + + var response hudsonrockResponse + err = json.NewDecoder(resp.Body).Decode(&response) + if err != nil { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ + resp.Body.Close() + return + } + + for _, record := range append(response.Data.EmployeesUrls, response.Data.ClientsUrls...) { + for _, subdomain := range session.Extractor.Extract(record.URL) { + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ + } + } + + }() + + return results +} + +// Name returns the name of the source +func (s *Source) Name() string { + return "hudsonrock" +} + +func (s *Source) IsDefault() bool { + return false +} + +func (s *Source) HasRecursiveSupport() bool { + return false +} + +func (s *Source) NeedsKey() bool { + return false +} + +func (s *Source) AddApiKeys(_ []string) { + // no key needed +} + +func (s *Source) Statistics() subscraping.Statistics { + return subscraping.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + } +}