diff --git a/pkg/detectors/falsepositives.go b/pkg/detectors/falsepositives.go index 3d7a576be60e..7fb1a51d0f2b 100644 --- a/pkg/detectors/falsepositives.go +++ b/pkg/detectors/falsepositives.go @@ -13,7 +13,12 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" ) -var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "*****"} +var ( + DefaultFalsePositives = map[FalsePositive]struct{}{ + "example": {}, "xxxxxx": {}, "aaaaaa": {}, "abcde": {}, "00000": {}, "sample": {}, "*****": {}, + } + UuidFalsePositives map[FalsePositive]struct{} +) type FalsePositive string @@ -24,18 +29,21 @@ type CustomFalsePositiveChecker interface { IsFalsePositive(result Result) (bool, string) } -//go:embed "badlist.txt" -var badList []byte - -//go:embed "words.txt" -var wordList []byte - -//go:embed "programmingbooks.txt" -var programmingBookWords []byte - -var filter *ahocorasick.Trie +var ( + filter *ahocorasick.Trie + + //go:embed "fp_badlist.txt" + badList []byte + //go:embed "fp_words.txt" + wordList []byte + //go:embed "fp_programmingbooks.txt" + programmingBookWords []byte + //go:embed "fp_uuids.txt" + uuidList []byte +) func init() { + // Populate trie. builder := ahocorasick.NewTrieBuilder() wordList := bytesToCleanWordList(wordList) @@ -47,7 +55,16 @@ func init() { programmingBookWords := bytesToCleanWordList(programmingBookWords) builder.AddStrings(programmingBookWords) + uuidList := bytesToCleanWordList(uuidList) + builder.AddStrings(uuidList) + filter = builder.Build() + + // Populate custom FalsePositive list + UuidFalsePositives = make(map[FalsePositive]struct{}, len(uuidList)) + for _, uuid := range uuidList { + UuidFalsePositives[FalsePositive(uuid)] = struct{}{} + } } func GetFalsePositiveCheck(detector Detector) func(Result) (bool, string) { @@ -65,15 +82,20 @@ func GetFalsePositiveCheck(detector Detector) func(Result) (bool, string) { // // Currently, this includes: english word in key or matches common example patterns. // Only the secret key material should be passed into this function -func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) (bool, string) { +func IsKnownFalsePositive(match string, falsePositives map[FalsePositive]struct{}, wordCheck bool) (bool, string) { if !utf8.ValidString(match) { return true, "invalid utf8" } lower := strings.ToLower(match) - for _, fp := range falsePositives { + + if _, exists := falsePositives[FalsePositive(lower)]; exists { + return true, "matches term: " + lower + } + + for fp := range falsePositives { fps := string(fp) if strings.Contains(lower, fps) { - return true, "matches term: " + fps + return true, "contains term: " + fps } } diff --git a/pkg/detectors/falsepositives_test.go b/pkg/detectors/falsepositives_test.go index e022a539f124..a90672892042 100644 --- a/pkg/detectors/falsepositives_test.go +++ b/pkg/detectors/falsepositives_test.go @@ -1,6 +1,3 @@ -//go:build detectors -// +build detectors - package detectors import ( @@ -32,17 +29,23 @@ func (d fakeDetector) Type() detectorspb.DetectorType { func (f fakeDetector) Description() string { return "" } func (d customFalsePositiveChecker) IsFalsePositive(result Result) (bool, string) { - return IsKnownFalsePositive(string(result.Raw), []FalsePositive{"a specific magic string"}, false) + return IsKnownFalsePositive(string(result.Raw), map[FalsePositive]struct{}{"a specific magic string": {}}, false) } func TestFilterKnownFalsePositives_DefaultLogic(t *testing.T) { results := []Result{ - {Raw: []byte("00000")}, // "default" false positive list - {Raw: []byte("number")}, // from wordlist - {Raw: []byte("hga8adshla3434g")}, // real secret + {Raw: []byte("00000")}, // "default" false positive list + {Raw: []byte("number")}, // from wordlist + // from uuid list + {Raw: []byte("00000000-0000-0000-0000-000000000000")}, + {Raw: []byte("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")}, + // real secrets + {Raw: []byte("hga8adshla3434g")}, + {Raw: []byte("f795f7db-2dfe-4095-96f3-8f8370c735f9")}, } expected := []Result{ {Raw: []byte("hga8adshla3434g")}, + {Raw: []byte("f795f7db-2dfe-4095-96f3-8f8370c735f9")}, } filtered := FilterKnownFalsePositives(logContext.Background(), fakeDetector{}, results) assert.ElementsMatch(t, expected, filtered) @@ -67,7 +70,7 @@ func TestFilterKnownFalsePositives_CustomLogic(t *testing.T) { func TestIsFalsePositive(t *testing.T) { type args struct { match string - falsePositives []FalsePositive + falsePositives map[FalsePositive]struct{} useWordlist bool } tests := []struct { diff --git a/pkg/detectors/fetchrss/fetchrss.go b/pkg/detectors/fetchrss/fetchrss.go index 64f9af85bbea..ff6d319ba0ea 100644 --- a/pkg/detectors/fetchrss/fetchrss.go +++ b/pkg/detectors/fetchrss/fetchrss.go @@ -2,26 +2,30 @@ package fetchrss import ( "context" - regexp "github.com/wasilibs/go-re2" + "encoding/json" + "fmt" "io" "net/http" - "strings" + + regexp "github.com/wasilibs/go-re2" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{} +type Scanner struct { + client *http.Client +} // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClient() + defaultClient = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"fetchrss"}) + `\b([0-9A-Za-z.]{40})\b`) + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"fetchrss"}) + `\b([a-zA-Z0-9.]{40})\b`) ) // Keywords are used for efficiently pre-filtering chunks. @@ -34,37 +38,26 @@ func (s Scanner) Keywords() []string { func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + uniqueMatches := make(map[string]struct{}) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uniqueMatches[match[1]] = struct{}{} + } + for token := range uniqueMatches { s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_Fetchrss, - Raw: []byte(resMatch), + Raw: []byte(token), } if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://fetchrss.com/api/v1/feed/list?auth="+resMatch, nil) - if err != nil { - continue - } - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - bodyBytes, err := io.ReadAll(res.Body) - if err != nil { - continue - } - body := string(bodyBytes) - - if !strings.Contains(body, "Not authorised") { - s1.Verified = true - } + client := s.client + if client == nil { + client = defaultClient } + + verified, verificationErr := verifyToken(ctx, client, token) + s1.Verified = verified + s1.SetVerificationError(verificationErr) } results = append(results, s1) @@ -73,6 +66,51 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result return results, nil } +func verifyToken(ctx context.Context, client *http.Client, token string) (bool, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://fetchrss.com/api/v1/feed/list?auth="+token, nil) + if err != nil { + return false, err + } + + res, err := client.Do(req) + if err != nil { + return false, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + // The API seems to always return a 200 status code. + // See: https://fetchrss.com/developers + if res.StatusCode != http.StatusOK { + return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + } + + var apiRes response + if err := json.NewDecoder(res.Body).Decode(&apiRes); err != nil { + return false, err + } + + if apiRes.Success { + // The key is valid. + return true, nil + } else if apiRes.Error.Code == 401 { + // The key is invalid. + return false, nil + } else { + return false, fmt.Errorf("unexpected error: [code=%d, message=%s]", apiRes.Error.Code, apiRes.Error.Message) + } +} + +type response struct { + Success bool `json:"success"` + Error struct { + Message string `json:"message"` + Code int `json:"code"` + } `json:"error"` +} + func (s Scanner) Type() detectorspb.DetectorType { return detectorspb.DetectorType_Fetchrss } diff --git a/pkg/detectors/badlist.txt b/pkg/detectors/fp_badlist.txt similarity index 100% rename from pkg/detectors/badlist.txt rename to pkg/detectors/fp_badlist.txt diff --git a/pkg/detectors/programmingbooks.txt b/pkg/detectors/fp_programmingbooks.txt similarity index 100% rename from pkg/detectors/programmingbooks.txt rename to pkg/detectors/fp_programmingbooks.txt diff --git a/pkg/detectors/fp_uuids.txt b/pkg/detectors/fp_uuids.txt new file mode 100644 index 000000000000..89a42a2efc36 --- /dev/null +++ b/pkg/detectors/fp_uuids.txt @@ -0,0 +1,37 @@ +00000000-0000-0000-0000-000000000000 +11111111-1111-1111-1111-111111111111 +22222222-2222-2222-2222-222222222222 +33333333-3333-3333-3333-333333333333 +44444444-4444-4444-4444-444444444444 +55555555-5555-5555-5555-555555555555 +66666666-6666-6666-6666-666666666666 +77777777-7777-7777-7777-777777777777 +88888888-8888-8888-8888-888888888888 +99999999-9999-9999-9999-999999999999 +12345678-1234-1234-1234-123456789abc +23456789-2345-2345-2345-23456789abcd +34567890-3456-3456-3456-34567890bcde +45678901-4567-4567-4567-45678901cdef +56789012-5678-5678-5678-56789012def0 +aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa +bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb +cccccccc-cccc-cccc-cccc-cccccccccccc +dddddddd-dddd-dddd-dddd-dddddddddddd +eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee +ffffffff-ffff-ffff-ffff-ffffffffffff +deadbeef-dead-beef-dead-beefdeadbeef +cafebabe-cafe-babe-cafe-babecafebabe +badc0ffee-badc-0ffe-badc-0ffeebadc0f +deadface-dead-face-dead-facedeadface +feedface-feed-face-feed-facefeedface +a1b2c3d4-a1b2-c3d4-a1b2-c3d4a1b2c3d4 +98765432-9876-5432-9876-543298765432 +abcdefab-cdef-abcd-efab-cdefabcdefab +a0a0a0a0-a0a0-a0a0-a0a0-a0a0a0a0a0a0 +b0b0b0b0-b0b0-b0b0-b0b0-b0b0b0b0b0b0 +c0c0c0c0-c0c0-c0c0-c0c0-c0c0c0c0c0c0 +d0d0d0d0-d0d0-d0d0-d0d0-d0d0d0d0d0d0 +e0e0e0e0-e0e0-e0e0-e0e0-e0e0e0e0e0e0 +f0f0f0f0-f0f0-f0f0-f0f0-f0f0f0f0f0f0 +xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +-xxxx-xxxx-xxxx-xxxxxxxxxxxx diff --git a/pkg/detectors/words.txt b/pkg/detectors/fp_words.txt similarity index 100% rename from pkg/detectors/words.txt rename to pkg/detectors/fp_words.txt diff --git a/pkg/detectors/ftp/ftp.go b/pkg/detectors/ftp/ftp.go index 02d99cc41389..fc4a30382a4c 100644 --- a/pkg/detectors/ftp/ftp.go +++ b/pkg/detectors/ftp/ftp.go @@ -103,8 +103,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result return results, nil } +var ftpFalsePositives = map[detectors.FalsePositive]struct{}{ + detectors.FalsePositive("@ftp.freebsd.org"): {}, +} + func (s Scanner) IsFalsePositive(result detectors.Result) (bool, string) { - return detectors.IsKnownFalsePositive(string(result.Raw), []detectors.FalsePositive{"@ftp.freebsd.org"}, false) + return detectors.IsKnownFalsePositive(string(result.Raw), ftpFalsePositives, false) } func isErrDeterminate(e error) bool { diff --git a/pkg/detectors/github/v1/github_old.go b/pkg/detectors/github/v1/github_old.go index 53a41478be4f..2ec2a8a564df 100644 --- a/pkg/detectors/github/v1/github_old.go +++ b/pkg/detectors/github/v1/github_old.go @@ -58,6 +58,10 @@ func (s Scanner) Keywords() []string { return []string{"github", "gh", "pat", "token"} } +var ghFalsePositives = map[detectors.FalsePositive]struct{}{ + detectors.FalsePositive("github commit"): {}, +} + // FromData will find and optionally verify GitHub secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -74,8 +78,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result // Note that this false positive check happens **before** verification! I don't know why it's written this way // but that's why this logic wasn't moved into a CustomFalsePositiveChecker implementation. - specificFPs := []detectors.FalsePositive{"github commit"} - if isFp, _ := detectors.IsKnownFalsePositive(token, specificFPs, false); isFp { + if isFp, _ := detectors.IsKnownFalsePositive(token, ghFalsePositives, false); isFp { continue } diff --git a/pkg/detectors/mailgun/mailgun.go b/pkg/detectors/mailgun/mailgun.go index c9d5ca3598e5..b9d29ecef81d 100644 --- a/pkg/detectors/mailgun/mailgun.go +++ b/pkg/detectors/mailgun/mailgun.go @@ -2,7 +2,9 @@ package mailgun import ( "context" + "encoding/json" "fmt" + "io" "net/http" "strings" @@ -15,16 +17,17 @@ import ( type Scanner struct { detectors.DefaultMultiPartCredentialProvider + client *http.Client } // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClient() + defaultClient = common.SaneHttpClient() tokenPats = map[string]*regexp.Regexp{ - "Original MailGun Token": regexp.MustCompile(detectors.PrefixRegex([]string{"mailgun"}) + `\b([a-zA-Z-0-9]{72})\b`), + "Original MailGun Token": regexp.MustCompile(detectors.PrefixRegex([]string{"mailgun"}) + `\b([a-zA-Z0-9-]{72})\b`), "Key-MailGun Token": regexp.MustCompile(`\b(key-[a-z0-9]{32})\b`), "Hex MailGun Token": regexp.MustCompile(`\b([a-f0-9]{32}-[a-f0-9]{8}-[a-f0-9]{8})\b`), } @@ -33,55 +36,120 @@ var ( // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. func (s Scanner) Keywords() []string { - return []string{"mailgun"} + return []string{"mailgun", "key-"} +} + +func (s Scanner) getClient() *http.Client { + if s.client != nil { + return s.client + } + return defaultClient } // FromData will find and optionally verify Mailgun secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) + uniqueMatches := make(map[string]struct{}) for _, tokenPat := range tokenPats { - matches := tokenPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) + for _, match := range tokenPat.FindAllStringSubmatch(dataStr, -1) { + uniqueMatches[match[1]] = struct{}{} + } + } - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_Mailgun, - Raw: []byte(resMatch), - } + for match := range uniqueMatches { + s1 := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(match), + AnalysisInfo: map[string]string{"key": match}, + } - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://api.mailgun.net/v3/domains", nil) - if err != nil { - continue - } + if verify { + client := s.getClient() + isVerified, extraData, verificationErr := verifyMatch(ctx, client, match) + s1.Verified = isVerified + s1.ExtraData = extraData + s1.SetVerificationError(verificationErr) + } - // If resMatch has "key" prefix, use it as the username for basic auth. - if strings.HasPrefix(resMatch, "key-") { - req.SetBasicAuth("api", resMatch) - } else { - req.Header.Add("Authorization", fmt.Sprintf("Basic %s", resMatch)) - } + results = append(results, s1) + } - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } - } - s1.AnalysisInfo = map[string]string{"key": resMatch} + return +} - } +func verifyMatch(ctx context.Context, client *http.Client, token string) (bool, map[string]string, error) { + // https://documentation.mailgun.com/docs/mailgun/api-reference/openapi-final/tag/Domains/ + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.mailgun.net/v3/domains", nil) + if err != nil { + return false, nil, err + } + + if len(token) == 72 { + // This matches prior logic, but may not be correct. + req.Header.Add("Authorization", fmt.Sprintf("Basic %s", token)) + } else { + // https://documentation.mailgun.com/docs/mailgun/api-reference/authentication/ + req.SetBasicAuth("api", token) + } + req.Header.Add("Content-Type", "application/json") - results = append(results, s1) + res, err := client.Do(req) + if err != nil { + return false, nil, err + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + if res.StatusCode == http.StatusOK { + var domains domainResponse + if err := json.NewDecoder(res.Body).Decode(&domains); err != nil { + return false, nil, fmt.Errorf("error decoding response body: %w", err) + } + + var extraData map[string]string + if len(domains.Items) > 0 { + sb := strings.Builder{} + for i, item := range domains.Items { + if i != 0 { + sb.WriteString(", ") + } + sb.WriteString(item.Name) + sb.WriteString(" (") + sb.WriteString(item.State) + sb.WriteString(",") + sb.WriteString(item.Type) + if item.IsDisabled { + sb.WriteString(",disabled") + } + sb.WriteString(")") + } + extraData = map[string]string{ + "Domains": sb.String(), + } } + + return true, extraData, nil + } else if res.StatusCode == http.StatusUnauthorized { + return false, nil, nil + } else { + return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) } +} + +type domainResponse struct { + TotalCount int `json:"total_count"` + Items []item `json:"items"` +} - return results, nil +type item struct { + ID string `json:"id"` + IsDisabled bool `json:"is_disabled"` + Name string `json:"name"` + State string `json:"state"` + Type string `json:"type"` } func (s Scanner) Type() detectorspb.DetectorType { diff --git a/pkg/detectors/mailgun/mailgun_test.go b/pkg/detectors/mailgun/mailgun_test.go index 0b887ce2c37e..f5fd7fd01d2f 100644 --- a/pkg/detectors/mailgun/mailgun_test.go +++ b/pkg/detectors/mailgun/mailgun_test.go @@ -9,13 +9,111 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) +func TestMailgun_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + tests := []struct { + name string + input string + want []string + }{ + // TODO: Confirm that this is actually an "original token". + // It's just a hex token encoded as basic auth. + { + name: "original token", + input: `- request: + method: get + uri: https://api.mailgun.net/v3/integration-test.domain.invalid/templates/test.template + body: + encoding: US-ASCII + string: '' + headers: + Accept: + - "*/*" + User-Agent: + - rest-client/2.1.0 (darwin21.6.0 x86_64) ruby/2.5.1p57 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Host: + - api.mailgun.net + Authorization: + - Basic YXBpOmFjZWM0YzA1YjFmMmZjZWJjZmE4ZGE2NDVkYTEwMjMxLTQxM2UzNzNjLTBhYWQzYzM3`, + want: []string{"YXBpOmFjZWM0YzA1YjFmMmZjZWJjZmE4ZGE2NDVkYTEwMjMxLTQxM2UzNzNjLTBhYWQzYzM3"}, + }, + { + name: "key- token", + input: `public static ClientResponse GetBounce() { + Client client = new Client(); + client.addFilter(new HTTPBasicAuthFilter("api", + "key-3ax63njp29jz6fds4gc373sgvjxteol1")); + WebResource webResource = + client.resource("https://api.mailgun.net/v2/samples.mailgun.org/" + + "bounces/foo@bar.com"); + return webResource.get(ClientResponse.class); +}`, + want: []string{"key-3ax63njp29jz6fds4gc373sgvjxteol1"}, + }, + { + name: "hex token", + input: `curl -X POST https://api.mailgun.net/v3/DOMAIN.TEST/messages -u "api:e915b5cdb9a582685d8f3fb1bea0f20f-07bc7b05-f14816a1"`, + want: []string{"e915b5cdb9a582685d8f3fb1bea0f20f-07bc7b05-f14816a1"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, 2) + ahoCorasickCore.PopulateMatchingDetectors(test.input, chunkSpecificDetectors) + if len(chunkSpecificDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } + }) + } +} + func TestMailgun_FromChunk(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel()