Skip to content

Commit

Permalink
Merge branch 'dev' into pr/1343
Browse files Browse the repository at this point in the history
  • Loading branch information
Mzack9999 committed Nov 22, 2023
2 parents 0ada177 + c5a4b72 commit 3ddd03a
Show file tree
Hide file tree
Showing 24 changed files with 592 additions and 319 deletions.
52 changes: 27 additions & 25 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,40 @@
version: 2
updates:

# Maintain dependencies for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
target-branch: "dep"
commit-message:
prefix: "chore"
include: "scope"
labels:
- "Type: Maintenance"

# Maintain dependencies for go modules
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "daily"
target-branch: "dep"
interval: "weekly"
target-branch: "dev"
commit-message:
prefix: "chore"
include: "scope"
labels:
- "Type: Maintenance"
allow:
- dependency-name: "github.com/projectdiscovery/*"

# Maintain dependencies for docker
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
target-branch: "dep"
commit-message:
prefix: "chore"
include: "scope"
labels:
- "Type: Maintenance"
# # Maintain dependencies for GitHub Actions
# - package-ecosystem: "github-actions"
# directory: "/"
# schedule:
# interval: "weekly"
# target-branch: "dev"
# commit-message:
# prefix: "chore"
# include: "scope"
# labels:
# - "Type: Maintenance"
#
# # Maintain dependencies for docker
# - package-ecosystem: "docker"
# directory: "/"
# schedule:
# interval: "weekly"
# target-branch: "dev"
# commit-message:
# prefix: "chore"
# include: "scope"
# labels:
# - "Type: Maintenance"
4 changes: 2 additions & 2 deletions .github/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ changelog:
authors:
- dependabot
categories:
- title: 🎉 Features
- title: 🎉 New Features
labels:
- "Type: Enhancement"
- title: 🐞 Bugs
- title: 🐞 Bug Fixes
labels:
- "Type: Bug"
- title: 🔨 Maintenance
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/dep-auto-merge.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: 🤖 dep-auto-merge
name: 🤖 dep auto merge

on:
pull_request:
branches:
- main
- dev
workflow_dispatch:

permissions:
Expand All @@ -14,6 +14,7 @@ permissions:
jobs:
automerge:
runs-on: ubuntu-latest
if: github.actor == 'dependabot[bot]'
steps:
- uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-binary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: "Create release on GitHub"
uses: goreleaser/goreleaser-action@v4
with:
args: "release --rm-dist"
args: "release --clean"
version: latest
workdir: .
env:
Expand Down
44 changes: 33 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,23 @@ PROBES:
-lc, -line-count display response body line count
-wc, -word-count display response body word count
-title display page title
-bp, -body-preview display first N characters of response body (default 100)
-server, -web-server display server name
-td, -tech-detect display technology in use based on wappalyzer dataset
-method display http request method
-websocket display server using websocket
-ip display host ip
-cname display host cname
-asn display host asn information
-cdn display cdn in use
-cdn display cdn/waf in use
-probe display probe status

HEADLESS:
-ss, -screenshot enable saving screenshot of the page using headless browser
-system-chrome enable using local installed chrome for screenshot
-ho, -headless-options string[] start headless chrome with additional options
-esb, -exclude-screenshot-bytes enable excluding screenshot bytes from json output
-ehb, -exclude-headless-body enable excluding headless header from json output

MATCHERS:
-mc, -match-code string match response with specified status code (-mc 200,302)
Expand All @@ -127,26 +130,27 @@ MATCHERS:
-mfc, -match-favicon string[] match response with specified favicon hash (-mfc 1494302000)
-ms, -match-string string match response with specified string (-ms admin)
-mr, -match-regex string match response with specified regex (-mr admin)
-mcdn, -match-cdn string[] match host with specified cdn provider (incapsula, oracle, google, azure, cloudflare, cloudfront, fastly, akamai, sucuri, leaseweb)
-mcdn, -match-cdn string[] match host with specified cdn provider (cloudfront, fastly, google, leaseweb, stackpath)
-mrt, -match-response-time string match response with specified response time in seconds (-mrt '< 1')
-mdc, -match-condition string match response with dsl expression condition

EXTRACTOR:
-er, -extract-regex string[] display response content with matched regex
-ep, -extract-preset string[] display response content matched by a pre-defined regex (url,ipv4,mail)
-ep, -extract-preset string[] display response content matched by a pre-defined regex (ipv4,mail,url)

FILTERS:
-fc, -filter-code string filter response with specified status code (-fc 403,401)
-fep, -filter-error-page filter response with ML based error page detection
-fl, -filter-length string filter response with specified content length (-fl 23,33)
-flc, -filter-line-count string filter response body with specified line count (-flc 423,532)
-fwc, -filter-word-count string filter response body with specified word count (-fwc 423,532)
-ffc, -filter-favicon string[] filter response with specified favicon hash (-mfc 1494302000)
-ffc, -filter-favicon string[] filter response with specified favicon hash (-ffc 1494302000)
-fs, -filter-string string filter response with specified string (-fs admin)
-fe, -filter-regex string filter response with specified regex (-fe admin)
-fcdn, -filter-cdn string[] filter host with specified cdn provider (google, leaseweb, stackpath, cloudfront, fastly)
-fcdn, -filter-cdn string[] filter host with specified cdn provider (cloudfront, fastly, google, leaseweb, stackpath)
-frt, -filter-response-time string filter response with specified response time in seconds (-frt '> 1')
-fdc, -filter-condition string filter response with dsl expression condition
-strip strips all tags in response. supported formats: html,xml (default html)

RATE-LIMIT:
-t, -threads int number of threads to use (default 50)
Expand Down Expand Up @@ -176,13 +180,16 @@ OUTPUT:
-srd, -store-response-dir string store http response to custom directory
-csv store output in csv format
-csvo, -csv-output-encoding string define output encoding
-json store output in JSONL(ines) format
-irr, -include-response include http request/response in JSON output (-json only)
-j, -json store output in JSONL(ines) format
-irh, -include-response-header include http response (headers) in JSON output (-json only)
-irr, -include-response include http request/response (headers + body) in JSON output (-json only)
-irrb, -include-response-base64 include base64 encoded http request/response in JSON output (-json only)
-include-chain include redirect http chain in JSON output (-json only)
-store-chain include http redirect chain in responses (-sr only)
-svrc, -store-vision-recon-cluster include visual recon clusters (-ss and -sr only)

CONFIGURATIONS:
-config string path to the httpx configuration file (default $HOME/.config/httpx/config.yaml)
-r, -resolvers string[] list of custom resolver (file or comma separated)
-allow string[] allowed list of IP/CIDR's to process (file or comma separated)
-deny string[] denied list of IP/CIDR's to process (file or comma separated)
Expand All @@ -195,6 +202,7 @@ CONFIGURATIONS:
-fr, -follow-redirects follow http redirects
-maxr, -max-redirects int max number of redirects to follow per host (default 10)
-fhr, -follow-host-redirects follow redirects on the same host
-rhsts, -respect-hsts respect HSTS response headers for redirect requests
-vhost-input get a list of vhosts as input
-x string request methods to probe, use 'all' to probe all HTTP methods
-body string post body to include in http request
Expand All @@ -203,7 +211,7 @@ CONFIGURATIONS:
-ldp, -leave-default-ports leave default http/https ports in host header (eg. http://host:80 - https://host:443
-ztls use ztls library with autofallback to standard one for tls13
-no-decode avoid decoding body
-tlsi, -tls-impersonate enable random tls client (ja3) impersonation (experimental)
-tlsi, -tls-impersonate enable experimental client hello (ja3) tls randomization
-no-stdin Disable Stdin processing

DEBUG:
Expand All @@ -223,10 +231,11 @@ OPTIMIZATIONS:
-nf, -no-fallback display both probed protocol (HTTPS and HTTP)
-nfs, -no-fallback-scheme probe with protocol scheme specified in input
-maxhr, -max-host-error int max error count per host before skipping remaining path/s (default 30)
-ec, -exclude-cdn skip full port scans for CDNs (only checks for 80,443)
-ec, -exclude-cdn skip full port scans for CDN/WAF (only checks for 80,443)
-eph, -exclude-private-hosts skip any hosts which have a private ip address
-retries int number of retries
-timeout int timeout in seconds (default 5)
-delay duration duration between each http request (eg: 200ms, 1s) (default -1ns)
-timeout int timeout in seconds (default 10)
-delay value duration between each http request (eg: 200ms, 1s) (default -1ns)
-rsts, -response-size-to-save int max response size to save in bytes (default 2147483647)
-rstr, -response-size-to-read int max response size to read in bytes (default 2147483647)
```
Expand Down Expand Up @@ -557,6 +566,19 @@ Screenshots are stored in the output/screenshot directory by default. To specify
httpx -screenshot -srd /path/to/custom/directory -u https://example.com
```

### Body Preview
Body preview shows first N characters of response. And strip html tags in response.

```console
httpx -u https://example.com -silent -body-preview
https://example.com [Example Domain This domain is for use in illustrative examples in documents. You may use this domai]
```

```console
httpx -u https://example.com -silent -body-preview=200 -strip=html
https://example.com [Example Domain This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission. More information...]
```

#### ⏳ Performance Considerations

Please note that since screenshots are captured using a headless browser, httpx runs will be slower when using the `-screenshot` option.
Expand Down
3 changes: 2 additions & 1 deletion cmd/functional-test/testcases.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ scanme.sh {{binary}} -silent -ztls
scanme.sh {{binary}} -silent -jarm
https://scanme.sh?a=1*1 {{binary}} -silent
https://scanme.sh:443 {{binary}} -asn
scanme.sh {{binary}} -silent -tls-impersonate
scanme.sh {{binary}} -silent -tls-impersonate
example.com {{binary}} -silent -bp -strip
22 changes: 11 additions & 11 deletions cmd/integration-test/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ var httpTestcases = map[string]testutils.TestCase{
"Raw HTTP GET Request": &standardHttpGet{unsafe: true},
"Raw request with non standard rfc path via stdin": &standardHttpGet{unsafe: true, stdinPath: "/%invalid"},
"Raw request with non standard rfc path via cli flag": &standardHttpGet{unsafe: true, path: "/%invalid"},
"Regression test for: https://github.com/projectdiscovery/httpx/issues/363": &issue363{}, // infinite redirect
"Regression test for: https://github.com/projectdiscovery/httpx/issues/276": &issue276{}, // full path with port in output
"Regression test for: https://github.com/projectdiscovery/httpx/issues/277": &issue277{}, // scheme://host:port via stdin
"Regression test for: https://github.com/projectdiscovery/httpx/issues/303": &issue303{}, // misconfigured gzip header with uncompressed body
"Regression test for: https://github.com/projectdiscovery/httpx/issues/400": &issue400{}, // post operation with body
"Regression test for: https://github.com/projectdiscovery/httpx/issues/414": &issue414{}, // stream mode with path
"Regression test for: https://github.com/projectdiscovery/httpx/issues/433": &issue433{}, // new line scanning with title flag
"Request URI to existing file - https://github.com/projectdiscovery/httpx/issues/480": &issue480{}, // request uri pointing to existing file
"Regression test for: https://github.com/projectdiscovery/httpx/issues/363": &issue363{}, // infinite redirect
"Regression test for: https://github.com/projectdiscovery/httpx/issues/276": &issue276{}, // full path with port in output
"Regression test for: https://github.com/projectdiscovery/httpx/issues/277": &issue277{}, // scheme://host:port via stdin
"Regression test for: https://github.com/projectdiscovery/httpx/issues/303": &issue303{}, // misconfigured gzip header with uncompressed body
"Regression test for: https://github.com/projectdiscovery/httpx/issues/400": &issue400{}, // post operation with body
"Regression test for: https://github.com/projectdiscovery/httpx/issues/414": &issue414{}, // stream mode with path
"Regression test for unwanted chars": &titleUnwantedChars{}, // new line scanning with title flag, Regression test for: https://github.com/projectdiscovery/httpx/issues/433
"Request URI to existing file - https://github.com/projectdiscovery/httpx/issues/480": &issue480{}, // request uri pointing to existing file
"Standard HTTP GET Request with match response time": &standardHttpGet{mrt: true, inputValue: "\"<10s\""},
"Standard HTTP GET Request with filter response time": &standardHttpGet{frt: true, inputValue: "\">3ms\""},
"Multiple Custom Header": &customHeader{inputData: []string{"-debug-req", "-H", "'user-agent: test'", "-H", "'foo: bar'"}, expectedOutput: []string{"User-Agent: test", "Foo: bar"}},
Expand Down Expand Up @@ -257,14 +257,14 @@ func (h *issue414) Execute() error {
return nil
}

type issue433 struct{}
type titleUnwantedChars struct{}

func (h *issue433) Execute() error {
func (h *titleUnwantedChars) Execute() error {
var ts *httptest.Server
router := httprouter.New()
uriPath := "/index"
router.GET(uriPath, httprouter.Handle(func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
htmlResponse := "<html><head><title>Project\n\r Discovery\n - Httpx></title></head><body>test data</body></html>"
htmlResponse := "<html><head><title>\v\fProject\n\r Discovery\n - Httpx\t></title></head><body>test data</body></html>"
fmt.Fprint(w, htmlResponse)
}))
ts = httptest.NewServer(router)
Expand Down
11 changes: 11 additions & 0 deletions common/httputilz/normalize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package httputilz

import "regexp"

var (
normalizeSpacesRegex = regexp.MustCompile(`\s+`)
)

func NormalizeSpaces(data string) string {
return normalizeSpacesRegex.ReplaceAllString(data, " ")
}
10 changes: 8 additions & 2 deletions common/httpx/cdn.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@ import (
"net"
)

// CdnCheck verifies if the given ip is part of Cdn ranges
// CdnCheck verifies if the given ip is part of Cdn/WAF ranges
func (h *HTTPX) CdnCheck(ip string) (bool, string, error) {
if h.cdn == nil {
return false, "", fmt.Errorf("cdn client not configured")
}

return h.cdn.CheckCDN(net.ParseIP((ip)))
// the goal is to check if ip is part of cdn/waf to decide if target should be scanned or not
// since 'cloud' itemtype does not fit logic here , we consider target is not part of cdn/waf
matched, value, itemType, err := h.cdn.Check(net.ParseIP((ip)))
if itemType == "cloud" {
return false, "", err
}
return matched, value, err
}
31 changes: 31 additions & 0 deletions common/httpx/httpx.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/projectdiscovery/cdncheck"
"github.com/projectdiscovery/fastdialer/fastdialer"
"github.com/projectdiscovery/fastdialer/fastdialer/ja3/impersonate"
"github.com/projectdiscovery/httpx/common/httputilz"
"github.com/projectdiscovery/rawhttp"
retryablehttp "github.com/projectdiscovery/retryablehttp-go"
"github.com/projectdiscovery/utils/generic"
Expand Down Expand Up @@ -65,6 +66,14 @@ func New(options *Options) (*HTTPX, error) {
retryablehttpOptions.Timeout = httpx.Options.Timeout
retryablehttpOptions.RetryMax = httpx.Options.RetryMax

handleHSTS := func(req *http.Request) {
if req.Response.Header.Get("Strict-Transport-Security") == "" {
return
}

req.URL.Scheme = "https"
}

var redirectFunc = func(_ *http.Request, _ []*http.Request) error {
// Tell the http client to not follow redirect
return http.ErrUseLastResponse
Expand All @@ -75,10 +84,16 @@ func New(options *Options) (*HTTPX, error) {
redirectFunc = func(redirectedRequest *http.Request, previousRequests []*http.Request) error {
// add custom cookies if necessary
httpx.setCustomCookies(redirectedRequest)

if len(previousRequests) >= options.MaxRedirects {
// https://github.com/golang/go/issues/10069
return http.ErrUseLastResponse
}

if options.RespectHSTS {
handleHSTS(redirectedRequest)
}

return nil
}
}
Expand All @@ -103,6 +118,11 @@ func New(options *Options) (*HTTPX, error) {
// https://github.com/golang/go/issues/10069
return http.ErrUseLastResponse
}

if options.RespectHSTS {
handleHSTS(redirectedRequest)
}

return nil
}
}
Expand Down Expand Up @@ -389,3 +409,14 @@ func (httpx *HTTPX) setCustomCookies(req *http.Request) {
}
}
}

func (httpx *HTTPX) Sanitize(respStr string, trimLine, normalizeSpaces bool) string {
respStr = httpx.htmlPolicy.Sanitize(respStr)
if trimLine {
respStr = strings.Replace(respStr, "\n", "", -1)
}
if normalizeSpaces {
respStr = httputilz.NormalizeSpaces(respStr)
}
return respStr
}
Loading

0 comments on commit 3ddd03a

Please sign in to comment.