Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support ignoring URL params #90 #111

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ possible flags with default values:
show version
-workers int
number of workers (default - number of CPU cores)
-ignore-query
ignore query parameters in URL
```

# flags autocompletion
Expand Down
10 changes: 8 additions & 2 deletions cmd/crawley/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ var (
fSkipSSL, fScanJS bool
fScanCSS, fScanALL bool
fSubdomains bool
fIgnoreQuery bool
fDirsPolicy, fProxyAuth string
fRobotsPolicy, fUA string
fDelay time.Duration
Expand Down Expand Up @@ -165,6 +166,7 @@ func parseFlags() (rv []crawler.Option, err error) {
crawler.WithProxyAuth(fProxyAuth),
crawler.WithTimeout(fTimeout),
crawler.WithSubdomains(fSubdomains),
crawler.WithIgnoreQueryParams(fIgnoreQuery),
}

return rv, nil
Expand Down Expand Up @@ -193,6 +195,7 @@ func setupFlags() {
flag.BoolVar(&fSkipSSL, "skip-ssl", false, "skip ssl verification")
flag.BoolVar(&fSilent, "silent", false, "suppress info and error messages in stderr")
flag.BoolVar(&fVersion, "version", false, "show version")
flag.BoolVar(&fIgnoreQuery, "ignore-query", false, "ignore query parameters in URL comparison")

flag.StringVar(&fDirsPolicy, "dirs", crawler.DefaultDirsPolicy,
"policy for non-resource urls: show / hide / only")
Expand Down Expand Up @@ -236,8 +239,11 @@ func main() {
if fSilent {
log.SetOutput(io.Discard)
}

if err := crawl(flag.Arg(0), opts...); err != nil {
uri := flag.Arg(0)
if fIgnoreQuery {
uri = crawler.NormalizeURL(uri)
}
if err := crawl(uri, opts...); err != nil {
// forcing back stderr in case of errors, otherwise, if 'silent' is on - no one will knows what happened.
log.SetOutput(os.Stderr)
log.Fatal("[-] crawler:", err)
Expand Down
29 changes: 17 additions & 12 deletions internal/crawler/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,19 @@ const (
)

type config struct {
AlowedTags []string
Ignored []string
Client client.Config
Delay time.Duration
Depth int
Robots RobotsPolicy
Dirs DirsPolicy
Brute bool
NoHEAD bool
ScanJS bool
ScanCSS bool
Subdomains bool
AlowedTags []string
Ignored []string
Client client.Config
Delay time.Duration
Depth int
Robots RobotsPolicy
Dirs DirsPolicy
Brute bool
NoHEAD bool
ScanJS bool
ScanCSS bool
Subdomains bool
IgnoreQuery bool
}

func (c *config) validate() {
Expand Down Expand Up @@ -64,5 +65,9 @@ func (c *config) String() (rv string) {
sb.WriteString(" +subdomains")
}

if c.IgnoreQuery {
sb.WriteString(" +ignore-query")
}

return sb.String()
}
7 changes: 7 additions & 0 deletions internal/crawler/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,10 @@ func WithSubdomains(v bool) Option {
c.Subdomains = v
}
}

// WithIgnoreQueryParams strips query parameters from uri.
func WithIgnoreQueryParams(v bool) Option {
return func(c *config) {
c.IgnoreQuery = v
}
}
10 changes: 10 additions & 0 deletions internal/crawler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,13 @@ func resolveRef(base, uri string) (rv string, ok bool) {

return rv, true
}

func NormalizeURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
u.RawQuery = ""

return u.String()
}