Skip to content

Commit

Permalink
Merge branch 'dev' into fix_github_perf_issue
Browse files Browse the repository at this point in the history
  • Loading branch information
ehsandeep authored Nov 4, 2024
2 parents 5ac4567 + df17c01 commit e82675f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 122 deletions.
2 changes: 0 additions & 2 deletions v2/pkg/passive/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/certspotter"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/chaos"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/chinaz"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/columbus"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/commoncrawl"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/crtsh"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/digitorus"
Expand Down Expand Up @@ -62,7 +61,6 @@ var AllSources = [...]subscraping.Source{
&certspotter.Source{},
&chaos.Source{},
&chinaz.Source{},
&columbus.Source{},
&commoncrawl.Source{},
&crtsh.Source{},
&digitorus.Source{},
Expand Down
2 changes: 0 additions & 2 deletions v2/pkg/passive/sources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ var (
"certspotter",
"chaos",
"chinaz",
"columbus",
"commoncrawl",
"crtsh",
"digitorus",
Expand Down Expand Up @@ -67,7 +66,6 @@ var (
"censys",
"chaos",
"chinaz",
"columbus",
"crtsh",
"digitorus",
"dnsdumpster",
Expand Down
92 changes: 0 additions & 92 deletions v2/pkg/subscraping/sources/columbus/columbus.go

This file was deleted.

40 changes: 14 additions & 26 deletions v2/pkg/subscraping/sources/sitedossier/sitedossier.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@ const SleepRandIntn = 5

var reNext = regexp.MustCompile(`<a href="([A-Za-z0-9/.]+)"><b>`)

type agent struct {
results chan subscraping.Result
errors int
session *subscraping.Session
}

// Source is the passive scraping agent
type Source struct {
timeTaken time.Duration
Expand All @@ -37,58 +31,52 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se
s.errors = 0
s.results = 0

a := agent{
session: session,
results: results,
}

go func() {
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(a.results)
close(results)
}(time.Now())

a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain))
s.errors = a.errors
s.results = len(a.results)
s.enumerate(ctx, session, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain), results)
}()

return a.results
return results
}

func (a *agent) enumerate(ctx context.Context, baseURL string) {
func (s *Source) enumerate(ctx context.Context, session *subscraping.Session, baseURL string, results chan subscraping.Result) {
select {
case <-ctx.Done():
return
default:
}

resp, err := a.session.SimpleGet(ctx, baseURL)
resp, err := session.SimpleGet(ctx, baseURL)
isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
if err != nil && !isnotfound {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
a.session.DiscardHTTPResponse(resp)
results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
s.errors++
session.DiscardHTTPResponse(resp)
return
}

body, err := io.ReadAll(resp.Body)
if err != nil {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.errors++
results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
s.errors++
resp.Body.Close()
return
}
resp.Body.Close()

src := string(body)
for _, subdomain := range a.session.Extractor.Extract(src) {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain}
for _, subdomain := range session.Extractor.Extract(src) {
results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: subdomain}
s.results++
}

match := reNext.FindStringSubmatch(src)
if len(match) > 0 {
a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com%s", match[1]))
s.enumerate(ctx, session, fmt.Sprintf("http://www.sitedossier.com%s", match[1]), results)
}
}

Expand Down

0 comments on commit e82675f

Please sign in to comment.