Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into feature/detector/zo…
Browse files Browse the repository at this point in the history
…ho-crm
  • Loading branch information
nabeelalam committed Oct 30, 2024
2 parents 5149f81 + e81ff76 commit 1a36281
Show file tree
Hide file tree
Showing 19 changed files with 372 additions and 39 deletions.
24 changes: 0 additions & 24 deletions .github/workflows/snifftest.yml

This file was deleted.

3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,5 @@ release-protos-image:
docker buildx build --push --platform=linux/amd64,linux/arm64 \
-t ${PROTOS_IMAGE} -f hack/Dockerfile.protos .

snifftest:
./hack/snifftest/snifftest.sh

test-release:
goreleaser release --clean --skip-publish --snapshot
2 changes: 1 addition & 1 deletion pkg/detectors/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,5 +458,5 @@ func (s scanner) Type() detectorspb.DetectorType {
}

func (s scanner) Description() string {
return "AWS is a cloud service used offering over 200 API's to transact data and compute. AWS API keys can be used to access and modify this data and compute."
return "AWS (Amazon Web Services) is a comprehensive cloud computing platform offering a wide range of on-demand services like computing power, storage, databases. API keys for AWS can have varying amount of access to these services depending on the IAM policy attached."
}
2 changes: 1 addition & 1 deletion pkg/detectors/awssessionkeys/awssessionkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,5 +344,5 @@ func (s scanner) Type() detectorspb.DetectorType {
}

func (s scanner) Description() string {
return "AWS is a cloud service used offering over 200 API's to transact data and compute. AWS API keys can be used to access and modify this data and compute."
return "AWS (Amazon Web Services) is a comprehensive cloud computing platform offering a wide range of on-demand services like computing power, storage, databases. API keys for AWS can have varying amount of access to these services depending on the IAM policy attached. AWS Session Tokens are short-lived keys."
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,25 @@ package captaindata

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

func (s Scanner) Version() int { return 1 }

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)

var (
client = common.SaneHttpClient()
Expand Down
113 changes: 113 additions & 0 deletions pkg/detectors/captaindata/v2/captaindata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package captaindata

import (
"context"
"fmt"
"io"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct {
client *http.Client
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.Versioner = (*Scanner)(nil)

func (Scanner) Version() int { return 2 }

var (
defaultClient = common.SaneHttpClient()
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{64})\b`)
projIdPat = regexp.MustCompile(detectors.PrefixRegex([]string{"captaindata"}) + `\b([0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12})\b`)
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"captaindata"}
}

// FromData will find and optionally verify CaptainData secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

uniqueMatches := make(map[string]struct{})
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
uniqueMatches[match[1]] = struct{}{}
}

uniqueProjIdMatches := make(map[string]struct{})
for _, match := range projIdPat.FindAllStringSubmatch(dataStr, -1) {
uniqueProjIdMatches[match[1]] = struct{}{}
}

for projId := range uniqueProjIdMatches {
for apiKey := range uniqueMatches {
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_CaptainData,
Raw: []byte(apiKey),
RawV2: []byte(projId + apiKey),
}

if verify {
client := s.client
if client == nil {
client = defaultClient
}

isVerified, extraData, verificationErr := verifyMatch(ctx, client, projId, apiKey)
s1.Verified = isVerified
s1.ExtraData = extraData
s1.SetVerificationError(verificationErr, apiKey)
}

results = append(results, s1)
}
}

return
}

func verifyMatch(ctx context.Context, client *http.Client, projId, apiKey string) (bool, map[string]string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", "https://api.captaindata.co/v3/workspace", nil)
if err != nil {
return false, nil, nil
}
req.Header.Set("Authorization", "x-api-key "+apiKey)
req.Header.Set("x-project-id", projId)

res, err := client.Do(req)
if err != nil {
return false, nil, err
}
defer func() {
_, _ = io.Copy(io.Discard, res.Body)
_ = res.Body.Close()
}()

switch res.StatusCode {
case http.StatusOK:
return true, nil, nil
case http.StatusUnauthorized:
return false, nil, nil
default:
return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
}
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_CaptainData
}

func (s Scanner) Description() string {
return "CaptainData is a service for automating data extraction and processing. The API keys can be used to access and control these automation processes."
}
129 changes: 129 additions & 0 deletions pkg/detectors/captaindata/v2/captaindata_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//go:build detectors
// +build detectors

package captaindata

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCaptainData_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
projId := testSecrets.MustGetField("CAPTAINDATA_PROJID")
secret := testSecrets.MustGetField("CAPTAINDATA")
inactiveSecret := testSecrets.MustGetField("CAPTAINDATA_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within", projId, secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_CaptainData,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a captaindata project %s with captaindata secret %s within but not valid", projId, inactiveSecret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_CaptainData,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: true,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("CaptainData.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "ExtraData", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("CaptainData.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
Loading

0 comments on commit 1a36281

Please sign in to comment.