From 84ec6702281bad4b8b6aea11e2849457c8c5df10 Mon Sep 17 00:00:00 2001 From: Kashif Khan <70996046+kashifkhan0771@users.noreply.github.com> Date: Wed, 30 Oct 2024 21:39:07 +0500 Subject: [PATCH] standardize email pattern (#3524) * standardize email pattern * added email pattern test cases * added negative test cases for email pattern --- pkg/common/patterns.go | 2 +- pkg/common/patterns_test.go | 44 +++++- pkg/detectors/checkvist/checkvist.go | 22 +-- .../checkvist/checkvist_integration_test.go | 118 ++++++++++++++ pkg/detectors/checkvist/checkvist_test.go | 138 ++++++---------- .../cloudflareglobalapikey.go | 23 ++- ...cloudflareglobalapikey_integration_test.go | 124 +++++++++++++++ .../cloudflareglobalapikey_test.go | 144 ++++++----------- pkg/detectors/cloze/cloze.go | 15 +- pkg/detectors/cloze/cloze_integration_test.go | 118 ++++++++++++++ pkg/detectors/cloze/cloze_test.go | 138 ++++++---------- pkg/detectors/currencycloud/currencycloud.go | 22 +-- .../currencycloud_integration_test.go | 121 ++++++++++++++ .../currencycloud/currencycloud_test.go | 141 ++++++----------- pkg/detectors/gocanvas/gocanvas.go | 20 +-- .../gocanvas/gocanvas_integration_test.go | 118 ++++++++++++++ pkg/detectors/gocanvas/gocanvas_test.go | 138 ++++++---------- pkg/detectors/magicbell/magicbell.go | 22 +-- .../magicbell/magicbell_integration_test.go | 121 ++++++++++++++ pkg/detectors/magicbell/magicbell_test.go | 141 ++++++----------- pkg/detectors/mrticktock/mrticktock.go | 22 +-- pkg/detectors/mrticktock/mrticktock_test.go | 140 ++++++----------- .../mrticktock/mrticktok_integration_test.go | 121 ++++++++++++++ pkg/detectors/onedesk/onedesk.go | 18 +-- .../onedesk/onedesk_integration_test.go | 121 ++++++++++++++ pkg/detectors/onedesk/onedesk_test.go | 140 ++++++----------- .../satismeterprojectkey.go | 45 +++--- .../satismeterprojectkey_integration_test.go | 122 +++++++++++++++ .../satismeterprojectkey_test.go | 147 +++++++----------- pkg/detectors/verifier/verifier.go | 21 ++- .../verifier/verifier_integration_test.go | 121 ++++++++++++++ pkg/detectors/verifier/verifier_test.go | 143 ++++++----------- pkg/detectors/zipapi/zipapi.go | 43 +++-- .../zipapi/zipapi_integration_test.go | 122 +++++++++++++++ pkg/detectors/zipapi/zipapi_test.go | 147 +++++++----------- pkg/detectors/zipbooks/zipbooks.go | 23 +-- .../zipbooks/zipbooks_integration_test.go | 121 ++++++++++++++ pkg/detectors/zipbooks/zipbooks_test.go | 141 ++++++----------- 38 files changed, 2249 insertions(+), 1239 deletions(-) create mode 100644 pkg/detectors/checkvist/checkvist_integration_test.go create mode 100644 pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_integration_test.go create mode 100644 pkg/detectors/cloze/cloze_integration_test.go create mode 100644 pkg/detectors/currencycloud/currencycloud_integration_test.go create mode 100644 pkg/detectors/gocanvas/gocanvas_integration_test.go create mode 100644 pkg/detectors/magicbell/magicbell_integration_test.go create mode 100644 pkg/detectors/mrticktock/mrticktok_integration_test.go create mode 100644 pkg/detectors/onedesk/onedesk_integration_test.go create mode 100644 pkg/detectors/satismeterprojectkey/satismeterprojectkey_integration_test.go create mode 100644 pkg/detectors/verifier/verifier_integration_test.go create mode 100644 pkg/detectors/zipapi/zipapi_integration_test.go create mode 100644 pkg/detectors/zipbooks/zipbooks_integration_test.go diff --git a/pkg/common/patterns.go b/pkg/common/patterns.go index 774dbf3f6528..5202c8bd3fc4 100644 --- a/pkg/common/patterns.go +++ b/pkg/common/patterns.go @@ -7,7 +7,7 @@ import ( "strings" ) -const EmailPattern = `\b(?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])\b` +const EmailPattern = `\b((?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))\b` const SubDomainPattern = `\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)\b` const UUIDPattern = `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b` const UUIDPatternUpperCase = `\b([0-9A-Z]{8}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{12})\b` diff --git a/pkg/common/patterns_test.go b/pkg/common/patterns_test.go index 8eaeb6abde2f..329f4c7f8d1e 100644 --- a/pkg/common/patterns_test.go +++ b/pkg/common/patterns_test.go @@ -1,9 +1,10 @@ package common import ( - "github.com/stretchr/testify/assert" "regexp" "testing" + + "github.com/stretchr/testify/assert" ) const ( @@ -13,6 +14,47 @@ const ( passwordRegex = `(?im)(?:pass|password)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:^<>;.*&|£\n\s]{4,40})` ) +func TestEmailRegexCheck(t *testing.T) { + testEmails := ` + // positive cases + standard email = john.doe@example.com + subdomain email = jane_doe123@sub.domain.co.us + organization email = alice.smith@test.org + test email = bob@test.name + with tag email = user.name+tag@domain.com + hyphen domain = info@my-site.net + service email = contact@web-service.io + underscore email = example_user@domain.info + departement email = first.last@department.company.edu + alphanumeric email = user1234@domain.co + local server email = admin@local-server.local + dot email = test.email@my-email-service.xyz + special char email = special@characters.com + support email = support@customer-service.org + + // negative cases + not an email = abc.123@z + looks like email = test@user <- no domain + email but not = user12@service.COM <- capital letters not supported for domain + random text = here's some information about local-user@edu user + ` + + expectedStr := []string{ + "john.doe@example.com", "jane_doe123@sub.domain.co.us", + "alice.smith@test.org", "bob@test.name", "user.name+tag@domain.com", + "info@my-site.net", "contact@web-service.io", "example_user@domain.info", + "first.last@department.company.edu", "user1234@domain.co", "admin@local-server.local", + "test.email@my-email-service.xyz", "special@characters.com", "support@customer-service.org", + } + + emailRegex := regexp.MustCompile(EmailPattern) + + emailMatches := emailRegex.FindAllString(testEmails, -1) + + assert.Exactly(t, emailMatches, expectedStr) + +} + func TestUsernameRegexCheck(t *testing.T) { usernameRegexPat := UsernameRegexCheck(usernamePattern) diff --git a/pkg/detectors/checkvist/checkvist.go b/pkg/detectors/checkvist/checkvist.go index 494bc817ef28..c9be36dc23ef 100644 --- a/pkg/detectors/checkvist/checkvist.go +++ b/pkg/detectors/checkvist/checkvist.go @@ -2,17 +2,18 @@ package checkvist import ( "context" - regexp "github.com/wasilibs/go-re2" "net/http" "net/url" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -24,7 +25,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([0-9a-zA-Z]{14})\b`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([\w\.-]+@[\w-]+\.[\w\.-]{2,5})\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -38,14 +39,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - resEmailMatch := strings.TrimSpace(emailMatch[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for emailMatch := range uniqueEmailMatches { for _, match := range matches { if len(match) != 2 { continue @@ -55,12 +55,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Checkvist, Raw: []byte(resMatch), - RawV2: []byte(resMatch + resEmailMatch), + RawV2: []byte(resMatch + emailMatch), } if verify { payload := url.Values{} - payload.Add("username", resEmailMatch) + payload.Add("username", emailMatch) payload.Add("remote_key", resMatch) req, err := http.NewRequestWithContext(ctx, "GET", "https://checkvist.com/auth/login.json?version=2", strings.NewReader(payload.Encode())) diff --git a/pkg/detectors/checkvist/checkvist_integration_test.go b/pkg/detectors/checkvist/checkvist_integration_test.go new file mode 100644 index 000000000000..92e6583ea15d --- /dev/null +++ b/pkg/detectors/checkvist/checkvist_integration_test.go @@ -0,0 +1,118 @@ +//go:build detectors +// +build detectors + +package checkvist + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestCheckvist_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + user := testSecrets.MustGetField("CHECKVIST_EMAIL") + secret := testSecrets.MustGetField("CHECKVIST") + inactiveSecret := testSecrets.MustGetField("CHECKVIST_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within", user, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Checkvist, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within but not valid", user, inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Checkvist, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Checkvist.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Checkvist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/checkvist/checkvist_test.go b/pkg/detectors/checkvist/checkvist_test.go index 92e6583ea15d..86525059cf5e 100644 --- a/pkg/detectors/checkvist/checkvist_test.go +++ b/pkg/detectors/checkvist/checkvist_test.go @@ -1,117 +1,81 @@ -//go:build detectors -// +build detectors - package checkvist import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestCheckvist_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - user := testSecrets.MustGetField("CHECKVIST_EMAIL") - secret := testSecrets.MustGetField("CHECKVIST") - inactiveSecret := testSecrets.MustGetField("CHECKVIST_INACTIVE") +var ( + validPattern = "wdvnusa87afxYn / testuser1005@example.com" + invalidPattern = "wdvn-usa87a-fxp9ioasQQsstestUsQQ@example" +) + +func TestCheckvist_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within", user, secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Checkvist, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("checkvist: %s", validPattern), + want: []string{"wdvnusa87afxYntestuser1005@example.com"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within but not valid", user, inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Checkvist, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("checkvist keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("checkvist: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Checkvist.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } - got[i].Raw = nil } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Checkvist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey.go b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey.go index 96d28389bb46..f0cfc2c522a4 100644 --- a/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey.go +++ b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey.go @@ -12,7 +12,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -24,7 +24,7 @@ var ( apiKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloudflare"}) + `\b([A-Za-z0-9_-]{37})\b`) - emailPat = regexp.MustCompile(`\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}(\.[A-Za-z]{2})?)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -38,7 +38,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) apiKeyMatches := apiKeyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) + + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } for _, apiKeyMatch := range apiKeyMatches { if len(apiKeyMatch) != 2 { @@ -46,17 +50,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } apiKeyRes := strings.TrimSpace(apiKeyMatch[1]) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - emailRes := strings.TrimSpace(emailMatch[1]) - + for emailMatch := range uniqueEmailMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_CloudflareGlobalApiKey, - Redacted: emailRes, + Redacted: emailMatch, Raw: []byte(apiKeyRes), - RawV2: []byte(apiKeyRes + emailRes), + RawV2: []byte(apiKeyRes + emailMatch), } if verify { @@ -64,7 +63,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if err != nil { continue } - req.Header.Add("X-Auth-Email", emailRes) + req.Header.Add("X-Auth-Email", emailMatch) req.Header.Add("X-Auth-Key", apiKeyRes) req.Header.Add("Content-Type", "application/json") diff --git a/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_integration_test.go b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_integration_test.go new file mode 100644 index 000000000000..49a96a617fa4 --- /dev/null +++ b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_integration_test.go @@ -0,0 +1,124 @@ +//go:build detectors +// +build detectors + +package cloudflareglobalapikey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestCloudflareGlobalApiKey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + + globalApiKey := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY") + globalApiKeyEmail := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY_EMAIL") + inactiveglobalApiKey := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a cloudflare globalapikey secret %s within with email %s", globalApiKey, globalApiKeyEmail)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CloudflareGlobalApiKey, + Redacted: globalApiKeyEmail, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a cloudflare globalapikey secret %s with email %s within but unverified", inactiveglobalApiKey, globalApiKeyEmail)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CloudflareGlobalApiKey, + Redacted: globalApiKeyEmail, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("CloudflareGlobalApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("CloudflareGlobalApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_test.go b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_test.go index 49a96a617fa4..fef882a30b73 100644 --- a/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_test.go +++ b/pkg/detectors/cloudflareglobalapikey/cloudflareglobalapikey_test.go @@ -1,124 +1,82 @@ -//go:build detectors -// +build detectors - package cloudflareglobalapikey import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestCloudflareGlobalApiKey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } +var ( + validPattern = "abcD123efg456HIJklmn789OPQ_rstUVWxYZ-012 / testuser1005@example.com" + invalidPattern = "abcD123efg456HIJklmn789OPQ_rstUVWxYZ-012/testing@go" +) - globalApiKey := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY") - globalApiKeyEmail := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY_EMAIL") - inactiveglobalApiKey := testSecrets.MustGetField("CLOUDFLARE_GLOBAL_API_KEY_INACTIVE") +func TestCloudFlareGlobalAPIKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a cloudflare globalapikey secret %s within with email %s", globalApiKey, globalApiKeyEmail)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_CloudflareGlobalApiKey, - Redacted: globalApiKeyEmail, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("cloudflare: %s", validPattern), + want: []string{"abcD123efg456HIJklmn789OPQ_rstUVWxYZ-testuser1005@example.com"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a cloudflare globalapikey secret %s with email %s within but unverified", inactiveglobalApiKey, globalApiKeyEmail)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_CloudflareGlobalApiKey, - Redacted: globalApiKeyEmail, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("cloudflare keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("cloudflare: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("CloudflareGlobalApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("CloudflareGlobalApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/cloze/cloze.go b/pkg/detectors/cloze/cloze.go index 90cac9e8870f..010474e7b01d 100644 --- a/pkg/detectors/cloze/cloze.go +++ b/pkg/detectors/cloze/cloze.go @@ -25,7 +25,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloze"}) + `\b([0-9a-f]{32})\b`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloze"}) + `\b([\w\.-]+@[\w-]+\.[\w\.-]{2,5})\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"cloze"}) + common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -39,14 +39,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - resEmailMatch := strings.TrimSpace(emailMatch[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for emailMatch := range uniqueEmailMatches { for _, match := range matches { if len(match) != 2 { continue @@ -60,7 +59,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { payload := url.Values{} - payload.Add("user", resEmailMatch) + payload.Add("user", emailMatch) payload.Add("api_key", resMatch) req, err := http.NewRequestWithContext(ctx, "GET", "https://api.cloze.com/v1/profile?"+payload.Encode(), nil) diff --git a/pkg/detectors/cloze/cloze_integration_test.go b/pkg/detectors/cloze/cloze_integration_test.go new file mode 100644 index 000000000000..73733f5edd97 --- /dev/null +++ b/pkg/detectors/cloze/cloze_integration_test.go @@ -0,0 +1,118 @@ +//go:build detectors +// +build detectors + +package cloze + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestCloze_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + email := testSecrets.MustGetField("CLOZE_EMAIL") + secret := testSecrets.MustGetField("CLOZE") + inactiveSecret := testSecrets.MustGetField("CLOZE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a cloze user %s with cloze secret %s within", email, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Cloze, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a cloze user %s with cloze secret %s within but not valid", email, inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Cloze, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Cloze.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Cloze.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/cloze/cloze_test.go b/pkg/detectors/cloze/cloze_test.go index 73733f5edd97..71ba2d172071 100644 --- a/pkg/detectors/cloze/cloze_test.go +++ b/pkg/detectors/cloze/cloze_test.go @@ -1,117 +1,81 @@ -//go:build detectors -// +build detectors - package cloze import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestCloze_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - email := testSecrets.MustGetField("CLOZE_EMAIL") - secret := testSecrets.MustGetField("CLOZE") - inactiveSecret := testSecrets.MustGetField("CLOZE_INACTIVE") +var ( + validPattern = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d / testuser1005@example.com" + invalidPattern = "abcD123efg456HIJklmn789OPQ_rstUVWxYZ-012/testing@go" +) + +func TestCloze_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a cloze user %s with cloze secret %s within", email, secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Cloze, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("cloze: %s", validPattern), + want: []string{"1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a cloze user %s with cloze secret %s within but not valid", email, inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Cloze, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("cloze keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("cloze: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Cloze.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } - got[i].Raw = nil } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Cloze.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/currencycloud/currencycloud.go b/pkg/detectors/currencycloud/currencycloud.go index 239fc232ded4..adf664b2ad0d 100644 --- a/pkg/detectors/currencycloud/currencycloud.go +++ b/pkg/detectors/currencycloud/currencycloud.go @@ -3,17 +3,18 @@ package currencycloud import ( "context" "fmt" - regexp "github.com/wasilibs/go-re2" "io" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -25,7 +26,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"currencycloud"}) + `\b([0-9a-z]{64})\b`) - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-z]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -39,7 +40,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) + + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } for _, match := range matches { if len(match) != 2 { @@ -47,12 +52,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } resMatch := strings.TrimSpace(match[1]) - for _, emailmatch := range emailMatches { - if len(emailmatch) != 2 { - continue - } - resEmailMatch := strings.TrimSpace(emailmatch[1]) - + for emailmatch := range uniqueEmailMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_CurrencyCloud, Raw: []byte(resMatch), @@ -61,7 +61,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { for _, env := range environments { // Get authentication token - payload := strings.NewReader(`{"login_id":"` + resEmailMatch + `","api_key":"` + resMatch + `"`) + payload := strings.NewReader(`{"login_id":"` + emailmatch + `","api_key":"` + resMatch + `"`) req, err := http.NewRequestWithContext(ctx, "POST", "https://"+env+".currencycloud.com/v2/authenticate/api", payload) if err != nil { continue diff --git a/pkg/detectors/currencycloud/currencycloud_integration_test.go b/pkg/detectors/currencycloud/currencycloud_integration_test.go new file mode 100644 index 000000000000..72d4ff92f288 --- /dev/null +++ b/pkg/detectors/currencycloud/currencycloud_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package currencycloud + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestCurrencycloud_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("CURRENCYCLOUD") + email := testSecrets.MustGetField("SCANNERS_EMAIL") + inactiveSecret := testSecrets.MustGetField("CURRENCYCLOUD_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a currencycloud secret %s within %s", secret, email)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CurrencyCloud, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a currencycloud secret %s within %s but not valid", inactiveSecret, email)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_CurrencyCloud, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Currencycloud.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Currencycloud.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/currencycloud/currencycloud_test.go b/pkg/detectors/currencycloud/currencycloud_test.go index 72d4ff92f288..29641af47874 100644 --- a/pkg/detectors/currencycloud/currencycloud_test.go +++ b/pkg/detectors/currencycloud/currencycloud_test.go @@ -1,121 +1,82 @@ -//go:build detectors -// +build detectors - package currencycloud import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestCurrencycloud_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("CURRENCYCLOUD") - email := testSecrets.MustGetField("SCANNERS_EMAIL") - inactiveSecret := testSecrets.MustGetField("CURRENCYCLOUD_INACTIVE") +var ( + validPattern = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b / testuser1005@example.com" + invalidPattern = "abcD123efg456HIJklmn789OPQ_rstUVWxYZ-012/testing@go" +) + +func TestCurrencyCloud_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a currencycloud secret %s within %s", secret, email)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_CurrencyCloud, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("currencycloud: %s", validPattern), + want: []string{"1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a currencycloud secret %s within %s but not valid", inactiveSecret, email)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_CurrencyCloud, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("currencycloud keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("currencycloud: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Currencycloud.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Currencycloud.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/gocanvas/gocanvas.go b/pkg/detectors/gocanvas/gocanvas.go index 69068c48e601..6fdb201a31c2 100644 --- a/pkg/detectors/gocanvas/gocanvas.go +++ b/pkg/detectors/gocanvas/gocanvas.go @@ -4,18 +4,19 @@ import ( "context" "encoding/xml" "fmt" - regexp "github.com/wasilibs/go-re2" "io" "net/http" "net/url" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -27,7 +28,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gocanvas"}) + `\b([0-9A-Za-z/+]{43}=[ \r\n]{1})`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gocanvas"}) + `\b([\w\.-]+@[\w-]+\.[\w\.-]{2,5})\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gocanvas"}) + common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -41,14 +42,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - resEmailMatch := strings.TrimSpace(emailMatch[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for emailMatch := range uniqueEmailMatches { for _, match := range matches { if len(match) != 2 { continue @@ -62,7 +62,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { payload := url.Values{} - payload.Add("username", resEmailMatch) + payload.Add("username", emailMatch) req, err := http.NewRequestWithContext(ctx, "GET", "https://www.gocanvas.com/apiv2/forms.xml", strings.NewReader(payload.Encode())) if err != nil { diff --git a/pkg/detectors/gocanvas/gocanvas_integration_test.go b/pkg/detectors/gocanvas/gocanvas_integration_test.go new file mode 100644 index 000000000000..d6bf551feb6e --- /dev/null +++ b/pkg/detectors/gocanvas/gocanvas_integration_test.go @@ -0,0 +1,118 @@ +//go:build detectors +// +build detectors + +package gocanvas + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestGoCanvas_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + username := testSecrets.MustGetField("SCANNERS_EMAIL") + secret := testSecrets.MustGetField("GOCANVAS") + inactiveSecret := testSecrets.MustGetField("GOCANVAS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a gocanvas username %s with gocanvas secret %s within", username, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_GoCanvas, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a gocanvas username %s with gocanvas secret %s within but not valid", username, inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_GoCanvas, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("GoCanvas.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("GoCanvas.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/gocanvas/gocanvas_test.go b/pkg/detectors/gocanvas/gocanvas_test.go index d6bf551feb6e..d711c2186d6b 100644 --- a/pkg/detectors/gocanvas/gocanvas_test.go +++ b/pkg/detectors/gocanvas/gocanvas_test.go @@ -1,117 +1,81 @@ -//go:build detectors -// +build detectors - package gocanvas import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestGoCanvas_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - username := testSecrets.MustGetField("SCANNERS_EMAIL") - secret := testSecrets.MustGetField("GOCANVAS") - inactiveSecret := testSecrets.MustGetField("GOCANVAS_INACTIVE") +var ( + validPattern = "Abc123+/Xyz456mnopQRStuvw89YZ12345678ABad6C= / gocanvasemail = testuser1005@example.com" + invalidPattern = "abcD123efg456HIJklmn789OPQ_rstUVWxYZ-012/testing@go" +) + +func TestGoCanvas_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a gocanvas username %s with gocanvas secret %s within", username, secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_GoCanvas, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("gocanvas: %s", validPattern), + want: []string{"Abc123+/Xyz456mnopQRStuvw89YZ12345678ABad6C="}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a gocanvas username %s with gocanvas secret %s within but not valid", username, inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_GoCanvas, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("gocanvas keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("gocanvas: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("GoCanvas.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } - got[i].Raw = nil } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("GoCanvas.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/magicbell/magicbell.go b/pkg/detectors/magicbell/magicbell.go index cd007c9a7bab..99cc7ac4c7a2 100644 --- a/pkg/detectors/magicbell/magicbell.go +++ b/pkg/detectors/magicbell/magicbell.go @@ -2,16 +2,17 @@ package magicbell import ( "context" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -23,7 +24,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"magicbell"}) + `\b([a-zA-Z-0-9]{40})\b`) - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -37,7 +38,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) apiKeyMatches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) + + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } for _, keyMatch := range apiKeyMatches { if len(keyMatch) != 2 { @@ -45,12 +50,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } apiKeyRes := strings.TrimSpace(keyMatch[1]) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - emailRes := strings.TrimSpace(emailMatch[1]) - + for emailMatch := range uniqueEmailMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_MagicBell, Raw: []byte(apiKeyRes), @@ -62,7 +62,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result continue } req.Header.Add("X-MAGICBELL-API-KEY", apiKeyRes) - req.Header.Add("X-MAGICBELL-USER-EMAIL", emailRes) + req.Header.Add("X-MAGICBELL-USER-EMAIL", emailMatch) res, err := client.Do(req) if err == nil { defer res.Body.Close() diff --git a/pkg/detectors/magicbell/magicbell_integration_test.go b/pkg/detectors/magicbell/magicbell_integration_test.go new file mode 100644 index 000000000000..ad8b34a18a6b --- /dev/null +++ b/pkg/detectors/magicbell/magicbell_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package magicbell + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestMagicBell_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("MAGICBELL") + inactiveSecret := testSecrets.MustGetField("MAGICBELL_INACTIVE") + userEmail := testSecrets.MustGetField("MAGICBELL_USER_EMAIL") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a magicbell secret %s with email %s within", secret, userEmail)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_MagicBell, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a magicbell secret %s with email %s within but not valid", inactiveSecret, userEmail)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_MagicBell, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("MagicBell.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("MagicBell.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/magicbell/magicbell_test.go b/pkg/detectors/magicbell/magicbell_test.go index ad8b34a18a6b..0476897ff40d 100644 --- a/pkg/detectors/magicbell/magicbell_test.go +++ b/pkg/detectors/magicbell/magicbell_test.go @@ -1,121 +1,82 @@ -//go:build detectors -// +build detectors - package magicbell import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = "abcde12345-67890fghijklmnopqrs-tuvwxyzYu/ magicbell_email = testuser1005@example.com" + invalidPattern = "abcde12345-67890fghijklmnopqrs#tuvwxyz/testing@go" ) -func TestMagicBell_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("MAGICBELL") - inactiveSecret := testSecrets.MustGetField("MAGICBELL_INACTIVE") - userEmail := testSecrets.MustGetField("MAGICBELL_USER_EMAIL") +func TestMagicBell_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a magicbell secret %s with email %s within", secret, userEmail)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_MagicBell, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("magicbell: %s", validPattern), + want: []string{"abcde12345-67890fghijklmnopqrs-tuvwxyzYu"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a magicbell secret %s with email %s within but not valid", inactiveSecret, userEmail)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_MagicBell, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("magicbell keyword is not close to the real key and id = %s", validPattern), + want: nil, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("magicbell: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("MagicBell.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("MagicBell.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/mrticktock/mrticktock.go b/pkg/detectors/mrticktock/mrticktock.go index 2d43c3a937f1..f1e117abafab 100644 --- a/pkg/detectors/mrticktock/mrticktock.go +++ b/pkg/detectors/mrticktock/mrticktock.go @@ -3,17 +3,18 @@ package mrticktock import ( "context" "fmt" - regexp "github.com/wasilibs/go-re2" "io" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -24,7 +25,7 @@ var ( client = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-z]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) pwordPat = regexp.MustCompile(detectors.PrefixRegex([]string{"mrticktock"}) + `\b([a-zA-Z0-9!=@#$%()_^]{1,50})`) ) @@ -38,15 +39,14 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := emailPat.FindAllStringSubmatch(dataStr, -1) passwordMatches := pwordPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for emailMatch := range uniqueEmailMatches { for _, passwordMatch := range passwordMatches { if len(passwordMatch) != 2 { continue @@ -55,11 +55,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Mrticktock, - Raw: []byte(resMatch), + Raw: []byte(emailMatch), } if verify { - payload := strings.NewReader(fmt.Sprintf(`email=%s&password=%s`, resMatch, resPassword)) + payload := strings.NewReader(fmt.Sprintf(`email=%s&password=%s`, emailMatch, resPassword)) req, err := http.NewRequestWithContext(ctx, "POST", "https://mrticktock.com/app/api/is_timer_active", payload) if err != nil { continue diff --git a/pkg/detectors/mrticktock/mrticktock_test.go b/pkg/detectors/mrticktock/mrticktock_test.go index 0a9e09075f90..a04ef63de03a 100644 --- a/pkg/detectors/mrticktock/mrticktock_test.go +++ b/pkg/detectors/mrticktock/mrticktock_test.go @@ -1,121 +1,77 @@ -//go:build detectors -// +build detectors - package mrticktock import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = "abc123!@#^()def456GHijk$% / testuser1005@example.com" + invalidPattern = "abcde12345-67890fghijklmnopqrs#tuvwxyz/testing@go" ) -func TestMrticktock_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - email := testSecrets.MustGetField("SCANNERS_EMAIL") - pword := testSecrets.MustGetField("SCANNERS_PASS") - inactiveSecret := testSecrets.MustGetField("SCANNERS_INACTIVE") +func TestMrTickTock_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a mrticktock secret %s within %s", email, pword)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Mrticktock, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("mrticktock: %s", validPattern), + want: []string{"testuser1005@example.com"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a mrticktock secret %s within %s but not valid", email, inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Mrticktock, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("mrticktock: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Mrticktock.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Mrticktock.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/mrticktock/mrticktok_integration_test.go b/pkg/detectors/mrticktock/mrticktok_integration_test.go new file mode 100644 index 000000000000..0a9e09075f90 --- /dev/null +++ b/pkg/detectors/mrticktock/mrticktok_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package mrticktock + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestMrticktock_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + email := testSecrets.MustGetField("SCANNERS_EMAIL") + pword := testSecrets.MustGetField("SCANNERS_PASS") + inactiveSecret := testSecrets.MustGetField("SCANNERS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a mrticktock secret %s within %s", email, pword)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Mrticktock, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a mrticktock secret %s within %s but not valid", email, inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Mrticktock, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Mrticktock.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Mrticktock.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/onedesk/onedesk.go b/pkg/detectors/onedesk/onedesk.go index 0afc88963034..b339f2861df6 100644 --- a/pkg/detectors/onedesk/onedesk.go +++ b/pkg/detectors/onedesk/onedesk.go @@ -25,7 +25,7 @@ var ( client = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-z]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) pwordPat = regexp.MustCompile(detectors.PrefixRegex([]string{"onedesk"}) + `\b([a-zA-Z0-9!=@#$%^]{8,64})`) ) @@ -39,14 +39,14 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := emailPat.FindAllStringSubmatch(dataStr, -1) pwordMatches := pwordPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + + for emailMatch := range uniqueEmailMatches { for _, pwordMatch := range pwordMatches { if len(pwordMatch) != 2 { continue @@ -55,11 +55,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Onedesk, - Raw: []byte(resMatch), + Raw: []byte(emailMatch), } if verify { - payload := strings.NewReader(fmt.Sprintf(`{"email": "%s", "password": "%s"}`, resMatch, resPword)) + payload := strings.NewReader(fmt.Sprintf(`{"email": "%s", "password": "%s"}`, emailMatch, resPword)) req, err := http.NewRequestWithContext(ctx, "POST", "https://app.onedesk.com/rest/2.0/login/loginUser", payload) if err != nil { continue diff --git a/pkg/detectors/onedesk/onedesk_integration_test.go b/pkg/detectors/onedesk/onedesk_integration_test.go new file mode 100644 index 000000000000..1fdd5e72385a --- /dev/null +++ b/pkg/detectors/onedesk/onedesk_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package onedesk + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestOnedesk_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + email := testSecrets.MustGetField("SCANNERS_EMAIL") + pword := testSecrets.MustGetField("SCANNERS_PASS") + inactivePword := testSecrets.MustGetField("SCANNERS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a onedesk email %s within onedesk password %s", email, pword)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Onedesk, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a onedesk secret %s within onedesk password %s but not valid", email, inactivePword)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Onedesk, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Onedesk.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Onedesk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/onedesk/onedesk_test.go b/pkg/detectors/onedesk/onedesk_test.go index 1fdd5e72385a..53be64becc5c 100644 --- a/pkg/detectors/onedesk/onedesk_test.go +++ b/pkg/detectors/onedesk/onedesk_test.go @@ -1,121 +1,77 @@ -//go:build detectors -// +build detectors - package onedesk import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = "abc123!@#^()def456GHijk$% / testuser1005@example.com" + invalidPattern = "abcde/testing@go" ) -func TestOnedesk_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - email := testSecrets.MustGetField("SCANNERS_EMAIL") - pword := testSecrets.MustGetField("SCANNERS_PASS") - inactivePword := testSecrets.MustGetField("SCANNERS_INACTIVE") +func TestOneDesk_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a onedesk email %s within onedesk password %s", email, pword)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Onedesk, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: fmt.Sprintf("onedesk: %s", validPattern), + want: []string{"testuser1005@example.com"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a onedesk secret %s within onedesk password %s but not valid", email, inactivePword)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Onedesk, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("onedesk: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Onedesk.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Onedesk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/satismeterprojectkey/satismeterprojectkey.go b/pkg/detectors/satismeterprojectkey/satismeterprojectkey.go index 59d1cf341ef0..e94f57bc60ba 100644 --- a/pkg/detectors/satismeterprojectkey/satismeterprojectkey.go +++ b/pkg/detectors/satismeterprojectkey/satismeterprojectkey.go @@ -4,10 +4,11 @@ import ( "context" b64 "encoding/base64" "fmt" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -25,7 +26,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"satismeter"}) + `\b([a-zA-Z0-9]{24})\b`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"satismeter"}) + `\b([a-zA-Z0-9]{4,20}@[a-zA-Z0-9]{2,12}.[a-zA-Z0-9]{2,12})\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"satismeter"}) + common.EmailPattern) passPat = regexp.MustCompile(detectors.PrefixRegex([]string{"satismeter"}) + `\b([a-zA-Z0-9!=@#$%^]{6,32})`) ) @@ -39,40 +40,34 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailmatches := emailPat.FindAllStringSubmatch(dataStr, -1) - passmatches := passPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueEmailMatches, uniqueKeyMatches, uniquePassMatches := make(map[string]struct{}), make(map[string]struct{}), make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } - for _, emailmatch := range emailmatches { - if len(emailmatch) != 2 { - continue - } - resEmailMatch := strings.TrimSpace(emailmatch[1]) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uniqueKeyMatches[strings.TrimSpace(match[1])] = struct{}{} + } - for _, passmatch := range passmatches { - if len(passmatch) != 2 { - continue - } - resPassMatch := strings.TrimSpace(passmatch[1]) + for _, match := range passPat.FindAllStringSubmatch(dataStr, -1) { + uniquePassMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for keyMatch := range uniqueKeyMatches { + for emailMatch := range uniqueEmailMatches { + for passMatch := range uniquePassMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_SatismeterProjectkey, - Raw: []byte(resMatch), - RawV2: []byte(resMatch + resPassMatch), + Raw: []byte(keyMatch), + RawV2: []byte(keyMatch + passMatch), } if verify { - data := fmt.Sprintf("%s:%s", resEmailMatch, resPassMatch) + data := fmt.Sprintf("%s:%s", emailMatch, passMatch) sEnc := b64.StdEncoding.EncodeToString([]byte(data)) - req, err := http.NewRequestWithContext(ctx, "GET", "https://app.satismeter.com/api/users?project="+resMatch, nil) + req, err := http.NewRequestWithContext(ctx, "GET", "https://app.satismeter.com/api/users?project="+keyMatch, nil) if err != nil { continue } diff --git a/pkg/detectors/satismeterprojectkey/satismeterprojectkey_integration_test.go b/pkg/detectors/satismeterprojectkey/satismeterprojectkey_integration_test.go new file mode 100644 index 000000000000..8b0a3bfcd249 --- /dev/null +++ b/pkg/detectors/satismeterprojectkey/satismeterprojectkey_integration_test.go @@ -0,0 +1,122 @@ +//go:build detectors +// +build detectors + +package satismeterprojectkey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestSatismeterProjectkey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("SATISMETERPROJECTKEY_TOKEN") + inactiveSecret := testSecrets.MustGetField("SATISMETERPROJECTKEY_INACTIVE") + email := testSecrets.MustGetField("SATISMETERPROJECTKEY_EMAIL") + password := testSecrets.MustGetField("SATISMETERPROJECTKEY_PASSWORD") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a satismeterprojectkey secret %s within satismeterprojectkeyemail %s satismeterprojectkeypassword %s", secret, email, password)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SatismeterProjectkey, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a satismeterprojectkey secret %s within but not valid satismeterprojectkeyemail %s satismeterprojectkeypassword %s", inactiveSecret, email, password)), // the secret would satisfy the regex but not pass validation), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_SatismeterProjectkey, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("SatismeterProjectkey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("SatismeterProjectkey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/satismeterprojectkey/satismeterprojectkey_test.go b/pkg/detectors/satismeterprojectkey/satismeterprojectkey_test.go index 8b0a3bfcd249..1e2c6414cb2c 100644 --- a/pkg/detectors/satismeterprojectkey/satismeterprojectkey_test.go +++ b/pkg/detectors/satismeterprojectkey/satismeterprojectkey_test.go @@ -1,122 +1,85 @@ -//go:build detectors -// +build detectors - package satismeterprojectkey import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = ` + satismeter_key = satismeter12345678901234 + satismeter_email = satismeter@example.com + satismeter_pass = satismeterSecureP@ss123 + ` + invalidPattern = "abcde/testing@go" ) -func TestSatismeterProjectkey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("SATISMETERPROJECTKEY_TOKEN") - inactiveSecret := testSecrets.MustGetField("SATISMETERPROJECTKEY_INACTIVE") - email := testSecrets.MustGetField("SATISMETERPROJECTKEY_EMAIL") - password := testSecrets.MustGetField("SATISMETERPROJECTKEY_PASSWORD") +func TestSatisMeterProjectKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a satismeterprojectkey secret %s within satismeterprojectkeyemail %s satismeterprojectkeypassword %s", secret, email, password)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_SatismeterProjectkey, - Verified: true, - }, - }, - wantErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a satismeterprojectkey secret %s within but not valid satismeterprojectkeyemail %s satismeterprojectkeypassword %s", inactiveSecret, email, password)), // the secret would satisfy the regex but not pass validation), - verify: true, + name: "valid pattern", + input: validPattern, + want: []string{ + "satismeter12345678901234satismeter12345678901234", + "satismeter12345678901234satismeter@example", + "satismeter12345678901234satismeterSecureP@ss123", }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_SatismeterProjectkey, - Verified: false, - }, - }, - wantErr: false, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("satismeter: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("SatismeterProjectkey.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("SatismeterProjectkey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/verifier/verifier.go b/pkg/detectors/verifier/verifier.go index e6388a7a8026..854364051b77 100644 --- a/pkg/detectors/verifier/verifier.go +++ b/pkg/detectors/verifier/verifier.go @@ -3,10 +3,11 @@ package verifier import ( "context" "fmt" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -24,7 +25,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"verifier"}) + `\b([a-z0-9]{96})\b`) - emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"verifier"}) + `\b([a-zA-Z-0-9-]{5,16}\@[a-zA-Z-0-9]{4,16}\.[a-zA-Z-0-9]{3,6})\b`) + emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"verifier"}) + common.EmailPattern) ) // Keywords are used for efficiently pre-filtering chunks. @@ -38,7 +39,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result dataStr := string(data) matches := keyPat.FindAllStringSubmatch(dataStr, -1) - idMatches := emailPat.FindAllStringSubmatch(dataStr, -1) + + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } for _, match := range matches { if len(match) != 2 { @@ -46,19 +51,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } resMatch := strings.TrimSpace(match[1]) - for _, idMatch := range idMatches { - if len(idMatch) != 2 { - continue - } - - userPatMatch := strings.TrimSpace(idMatch[1]) - + for emailMatch := range uniqueEmailMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Verifier, Raw: []byte(resMatch), } if verify { - req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://verifier.meetchopra.com/verify/%s?token=%s", userPatMatch, resMatch), nil) + req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://verifier.meetchopra.com/verify/%s?token=%s", emailMatch, resMatch), nil) if err != nil { continue } diff --git a/pkg/detectors/verifier/verifier_integration_test.go b/pkg/detectors/verifier/verifier_integration_test.go new file mode 100644 index 000000000000..d6e55a1861e3 --- /dev/null +++ b/pkg/detectors/verifier/verifier_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package verifier + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVerifier_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VERIFIER") + user := testSecrets.MustGetField("ACCOUNT_USER") + inactiveSecret := testSecrets.MustGetField("VERIFIER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a verifier secret %s within verifier %s", secret, user)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Verifier, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a verifier secret %s within verifier %s but not valid", inactiveSecret, user)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Verifier, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Verifier.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Verifier.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/verifier/verifier_test.go b/pkg/detectors/verifier/verifier_test.go index d6e55a1861e3..22a78b4852d3 100644 --- a/pkg/detectors/verifier/verifier_test.go +++ b/pkg/detectors/verifier/verifier_test.go @@ -1,121 +1,80 @@ -//go:build detectors -// +build detectors - package verifier import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = ` + verifier_key = abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890 + verifier_email = verifiertest@example.com + ` + invalidPattern = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890" ) -func TestVerifier_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VERIFIER") - user := testSecrets.MustGetField("ACCOUNT_USER") - inactiveSecret := testSecrets.MustGetField("VERIFIER_INACTIVE") +func TestVerifier_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a verifier secret %s within verifier %s", secret, user)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Verifier, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern", + input: validPattern, + want: []string{"abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a verifier secret %s within verifier %s but not valid", inactiveSecret, user)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Verifier, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("verifier: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Verifier.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Verifier.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/zipapi/zipapi.go b/pkg/detectors/zipapi/zipapi.go index b25e7cabaa3d..e95dce05a134 100644 --- a/pkg/detectors/zipapi/zipapi.go +++ b/pkg/detectors/zipapi/zipapi.go @@ -4,10 +4,11 @@ import ( "context" b64 "encoding/base64" "fmt" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -25,7 +26,7 @@ var ( // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zipapi"}) + `\b([0-9a-z]{32})\b`) - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-z]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) pwordPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zipapi"}) + `\b([a-zA-Z0-9!=@#$%^]{7,})`) ) @@ -39,35 +40,31 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1) - pwordMatches := pwordPat.FindAllStringSubmatch(dataStr, -1) + uniqueEmailMatches, uniqueKeyMatches, uniquePassMatches := make(map[string]struct{}), make(map[string]struct{}), make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) - for _, emailMatch := range emailMatches { - if len(emailMatch) != 2 { - continue - } - resEmail := strings.TrimSpace(emailMatch[1]) - for _, pwordMatch := range pwordMatches { - if len(pwordMatch) != 2 { - continue - } - resPword := strings.TrimSpace(pwordMatch[1]) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uniqueKeyMatches[strings.TrimSpace(match[1])] = struct{}{} + } + + for _, match := range pwordPat.FindAllStringSubmatch(dataStr, -1) { + uniquePassMatches[strings.TrimSpace(match[1])] = struct{}{} + } + for keyMatch := range uniqueKeyMatches { + for emailMatch := range uniqueEmailMatches { + for passMatch := range uniquePassMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_ZipAPI, - Raw: []byte(resMatch), + Raw: []byte(keyMatch), } if verify { - data := fmt.Sprintf("%s:%s", resEmail, resPword) + data := fmt.Sprintf("%s:%s", emailMatch, passMatch) sEnc := b64.StdEncoding.EncodeToString([]byte(data)) - req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://service.zipapi.us/zipcode/90210/?X-API-KEY=%s", resMatch), nil) + req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://service.zipapi.us/zipcode/90210/?X-API-KEY=%s", keyMatch), nil) if err != nil { continue } diff --git a/pkg/detectors/zipapi/zipapi_integration_test.go b/pkg/detectors/zipapi/zipapi_integration_test.go new file mode 100644 index 000000000000..8c5c73a1e847 --- /dev/null +++ b/pkg/detectors/zipapi/zipapi_integration_test.go @@ -0,0 +1,122 @@ +//go:build detectors +// +build detectors + +package zipapi + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestZipapi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("ZIPAPI") + email := testSecrets.MustGetField("SCANNERS_EMAIL") + pword := testSecrets.MustGetField("SCANNERS_PASS") + inactiveSecret := testSecrets.MustGetField("ZIPAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a zipapi secret %s within zipapi email %s and zipapi password %s", secret, email, pword)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ZipAPI, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a zipapi secret %s within zipapi email %s and zipapi password %s but not valid", inactiveSecret, email, pword)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ZipAPI, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Zipapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Zipapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/zipapi/zipapi_test.go b/pkg/detectors/zipapi/zipapi_test.go index 8c5c73a1e847..58c6b242d9b6 100644 --- a/pkg/detectors/zipapi/zipapi_test.go +++ b/pkg/detectors/zipapi/zipapi_test.go @@ -1,122 +1,85 @@ -//go:build detectors -// +build detectors - package zipapi import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +var ( + validPattern = ` + zipapi_key = zipapiabcdef1234567890abcdef1234 + zipapi_email = zipapi_user@example.com + zipapi_pass = zipapiSecurePass123! + ` + invalidPattern = "abcde/testing@go" ) -func TestZipapi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("ZIPAPI") - email := testSecrets.MustGetField("SCANNERS_EMAIL") - pword := testSecrets.MustGetField("SCANNERS_PASS") - inactiveSecret := testSecrets.MustGetField("ZIPAPI_INACTIVE") +func TestZipapi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zipapi secret %s within zipapi email %s and zipapi password %s", secret, email, pword)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ZipAPI, - Verified: true, - }, - }, - wantErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zipapi secret %s within zipapi email %s and zipapi password %s but not valid", inactiveSecret, email, pword)), // the secret would satisfy the regex but not pass validation - verify: true, + name: "valid pattern", + input: validPattern, + want: []string{ + "zipapiabcdef1234567890abcdef1234", + "zipapiabcdef1234567890abcdef1234", + "zipapiabcdef1234567890abcdef1234", }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ZipAPI, - Verified: false, - }, - }, - wantErr: false, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("zipapi: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Zipapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Zipapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/zipbooks/zipbooks.go b/pkg/detectors/zipbooks/zipbooks.go index cf0c54be8ada..cfde9e4589bb 100644 --- a/pkg/detectors/zipbooks/zipbooks.go +++ b/pkg/detectors/zipbooks/zipbooks.go @@ -3,16 +3,17 @@ package zipbooks import ( "context" "fmt" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{ +type Scanner struct { detectors.DefaultMultiPartCredentialProvider } @@ -23,7 +24,7 @@ var ( client = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - emailPat = regexp.MustCompile(`\b([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-z]+)\b`) + emailPat = regexp.MustCompile(common.EmailPattern) pwordPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zipbooks", "password"}) + `\b([a-zA-Z0-9!=@#$%^]{8,})`) ) @@ -37,14 +38,14 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := emailPat.FindAllStringSubmatch(dataStr, -1) pwordMatches := pwordPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueEmailMatches := make(map[string]struct{}) + for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) { + uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{} + } + + for emailMatch := range uniqueEmailMatches { for _, pwordMatch := range pwordMatches { if len(pwordMatch) != 2 { continue @@ -53,11 +54,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_ZipBooks, - Raw: []byte(resMatch), + Raw: []byte(emailMatch), } if verify { - payload := strings.NewReader(fmt.Sprintf(`{"email": "%s", "password": "%s"}`, resMatch, resPword)) + payload := strings.NewReader(fmt.Sprintf(`{"email": "%s", "password": "%s"}`, emailMatch, resPword)) req, err := http.NewRequestWithContext(ctx, "POST", "https://api.zipbooks.com/v2/auth/login", payload) if err != nil { continue diff --git a/pkg/detectors/zipbooks/zipbooks_integration_test.go b/pkg/detectors/zipbooks/zipbooks_integration_test.go new file mode 100644 index 000000000000..1e5e4532738a --- /dev/null +++ b/pkg/detectors/zipbooks/zipbooks_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package zipbooks + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestZipbooks_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + email := testSecrets.MustGetField("SCANNERS_EMAIL") + pword := testSecrets.MustGetField("SCANNERS_PASS") + inactivePass := testSecrets.MustGetField("SCANNERS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a zipbooks email %s within zipbooks password %s", email, pword)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ZipBooks, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a zipbooks email %s within zipbooks password %s but not valid", email, inactivePass)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ZipBooks, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Zipbooks.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Zipbooks.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/zipbooks/zipbooks_test.go b/pkg/detectors/zipbooks/zipbooks_test.go index 1e5e4532738a..83a04f3eae57 100644 --- a/pkg/detectors/zipbooks/zipbooks_test.go +++ b/pkg/detectors/zipbooks/zipbooks_test.go @@ -1,121 +1,82 @@ -//go:build detectors -// +build detectors - package zipbooks import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestZipbooks_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - email := testSecrets.MustGetField("SCANNERS_EMAIL") - pword := testSecrets.MustGetField("SCANNERS_PASS") - inactivePass := testSecrets.MustGetField("SCANNERS_INACTIVE") +var ( + validPattern = "SecureP@ss123 / admin@secure.com" + invalidPattern = "abcde/testing@go" +) + +func TestZipBooks_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) - type args struct { - ctx context.Context - data []byte - verify bool - } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zipbooks email %s within zipbooks password %s", email, pword)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ZipBooks, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - zipbooks keyword", + input: fmt.Sprintf("zipbooks = %s", validPattern), + want: []string{"admin@secure.com"}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a zipbooks email %s within zipbooks password %s but not valid", email, inactivePass)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ZipBooks, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - password keyword", + input: fmt.Sprintf("zipbooks-password: %s", validPattern), + want: []string{"admin@secure.com"}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("zipbooks: %s", invalidPattern), + want: nil, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Zipbooks.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 && test.want != nil { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Zipbooks.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + t.Errorf("expected %d results, got %d", len(test.want), len(results)) + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } }