diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0d384b..87a7fe2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,10 @@ on: branches: - master +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' + cancel-in-progress: true + jobs: lint: runs-on: ubuntu-latest @@ -22,7 +26,7 @@ jobs: with: go-version: ^1.22 - name: golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v5 - name: goreleaser-check uses: goreleaser/goreleaser-action@v5 with: @@ -43,8 +47,8 @@ jobs: if: ${{ github.event_name == 'pull_request' }} run: make test - name: test-coverage - if: ${{ false && github.event_name == 'push' }} - uses: paambaati/codeclimate-action@v5.0.0 + if: ${{ github.event_name == 'push' }} + uses: paambaati/codeclimate-action@v6.0.0 env: CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} with: diff --git a/.golangci.yml b/.golangci.yml index ae70df3..1e4787e 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -57,6 +57,7 @@ issues: - gosec - path: ._test\.go linters: + - canonicalheader - dupword - ifshort - cyclop diff --git a/cmd/crawley/main.go b/cmd/crawley/main.go index 26ebf9a..670a90f 100644 --- a/cmd/crawley/main.go +++ b/cmd/crawley/main.go @@ -41,7 +41,7 @@ var ( fBrute, fNoHeads bool fSkipSSL, fScanJS bool fScanCSS, fScanALL bool - fSubdomains bool + fSubdomains bool fDirsPolicy, fProxyAuth string fRobotsPolicy, fUA string fDelay time.Duration diff --git a/go.mod b/go.mod index 7c32fa1..589fe8c 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,6 @@ go 1.22 require ( github.com/s0rg/compflag v1.1.0 github.com/s0rg/set v1.2.0 - github.com/tdewolff/parse/v2 v2.7.12 - golang.org/x/net v0.24.0 + github.com/tdewolff/parse/v2 v2.7.14 + golang.org/x/net v0.26.0 ) diff --git a/go.sum b/go.sum index 3937fe5..1f80c0d 100644 --- a/go.sum +++ b/go.sum @@ -2,9 +2,9 @@ github.com/s0rg/compflag v1.1.0 h1:xhCUPLy+5Ue/Q9I/nIcLti2Ul6P42JYx4UvtYoDXmlQ= github.com/s0rg/compflag v1.1.0/go.mod h1:XMntVpc3+jpmBe0s8xo4w9swH8T9ARGkMC9HFiDRoUw= github.com/s0rg/set v1.2.0 h1:53b207YMktNQJXYei/oHuTR5oOO2e9+eieZOncYsh9g= github.com/s0rg/set v1.2.0/go.mod h1:xz3nDbjF4nyMLvAHvmE7rigXpNrKKTsi6iANznIB1/4= -github.com/tdewolff/parse/v2 v2.7.12 h1:tgavkHc2ZDEQVKy1oWxwIyh5bP4F5fEh/JmBwPP/3LQ= -github.com/tdewolff/parse/v2 v2.7.12/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA= +github.com/tdewolff/parse/v2 v2.7.14 h1:100KJ+QAO3PpMb3uUjzEU/NpmCdbBYz6KPmCIAfWpR8= +github.com/tdewolff/parse/v2 v2.7.14/go.mod h1:3FbJWZp3XT9OWVN3Hmfp0p/a08v4h8J9W1aghka0soA= github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52 h1:gAQliwn+zJrkjAHVcBEYW/RFvd2St4yYimisvozAYlA= github.com/tdewolff/test v1.0.11-0.20231101010635-f1265d231d52/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= diff --git a/internal/crawler/config.go b/internal/crawler/config.go index 3c3793c..2465e85 100644 --- a/internal/crawler/config.go +++ b/internal/crawler/config.go @@ -60,7 +60,7 @@ func (c *config) String() (rv string) { sb.WriteString(" +css") } - if c.Subdomains{ + if c.Subdomains { sb.WriteString(" +subdomains") } diff --git a/internal/crawler/util.go b/internal/crawler/util.go index 9926a1e..47457da 100644 --- a/internal/crawler/util.go +++ b/internal/crawler/util.go @@ -73,24 +73,28 @@ func prepareFilter(tags []string) links.TokenFilter { func canCrawl(a, b *url.URL, d int, subdomains bool) (yes bool) { if a.Host != b.Host { - if subdomains{ - domainA := strings.Split(a.Host, ".") - domainB := strings.Split(b.Host, ".") - if len(domainA) >= len(domainB){ - // The base domain must be shorter than the found domain - return - } - j := len(domainB) - 1 - for i := len(domainA) - 1; i >= 0 && j >= 0; i-- { - // Traverse each domain from the end, to check if their top-level domain are the same - if domainA[i] != domainB[j] { - // not the same top-level host - return - } - j-- + if !subdomains { + return false + } + + domainA := strings.Split(a.Host, ".") + domainB := strings.Split(b.Host, ".") + + if len(domainA) >= len(domainB) { + // The base domain must be shorter than the found domain + return false + } + + j := len(domainB) - 1 + + for i := len(domainA) - 1; i >= 0 && j >= 0; i-- { + // Traverse each domain from the end, to check if their top-level domain are the same + if domainA[i] != domainB[j] { + // not the same top-level host + return false } - } else{ - return + + j-- } } @@ -106,11 +110,11 @@ func canCrawl(a, b *url.URL, d int, subdomains bool) (yes bool) { depth, found := relativeDepth(apath, bpath) if !found { - return + return false } if d >= 0 && depth > d { - return + return false } return true diff --git a/internal/crawler/util_test.go b/internal/crawler/util_test.go index 26b19e7..d421021 100644 --- a/internal/crawler/util_test.go +++ b/internal/crawler/util_test.go @@ -117,9 +117,9 @@ func TestCanCrawl(t *testing.T) { t.Parallel() type args struct { - b *url.URL - u *url.URL - d int + b *url.URL + u *url.URL + d int subdomains bool }