Skip to content

Commit

Permalink
Merge branch 'main' into impl-data-model-gitlab
Browse files Browse the repository at this point in the history
  • Loading branch information
abmussani authored Oct 30, 2024
2 parents 58bba02 + f4670aa commit 58fb84f
Show file tree
Hide file tree
Showing 7 changed files with 701 additions and 502 deletions.
12 changes: 8 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ var (
gitlabScanToken = gitlabScan.Flag("token", "GitLab token. Can be provided with environment variable GITLAB_TOKEN.").Envar("GITLAB_TOKEN").Required().String()
gitlabScanIncludePaths = gitlabScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
gitlabScanExcludePaths = gitlabScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
gitlabScanIncludeRepos = gitlabScan.Flag("include-repos", `Repositories to include in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/trufflehog", "trufflesecurity/t*"`).Strings()
gitlabScanExcludeRepos = gitlabScan.Flag("exclude-repos", `Repositories to exclude in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/driftwood", "trufflesecurity/d*"`).Strings()

filesystemScan = cli.Command("filesystem", "Find credentials in a filesystem.")
filesystemPaths = filesystemScan.Arg("path", "Path to file or directory to scan.").Strings()
Expand Down Expand Up @@ -674,10 +676,12 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
}

cfg := sources.GitlabConfig{
Endpoint: *gitlabScanEndpoint,
Token: *gitlabScanToken,
Repos: *gitlabScanRepos,
Filter: filter,
Endpoint: *gitlabScanEndpoint,
Token: *gitlabScanToken,
Repos: *gitlabScanRepos,
IncludeRepos: *gitlabScanIncludeRepos,
ExcludeRepos: *gitlabScanExcludeRepos,
Filter: filter,
}
if err := eng.ScanGitLab(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan GitLab: %v", err)
Expand Down
8 changes: 8 additions & 0 deletions pkg/engine/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) error {
connection.Repositories = c.Repos
}

if len(c.IncludeRepos) > 0 {
connection.IncludeRepos = c.IncludeRepos
}

if len(c.ExcludeRepos) > 0 {
connection.IgnoreRepos = c.ExcludeRepos
}

var conn anypb.Any
err := anypb.MarshalFrom(&conn, connection, proto.MarshalOptions{})
if err != nil {
Expand Down
946 changes: 478 additions & 468 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

105 changes: 75 additions & 30 deletions pkg/sources/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ type Source struct {
jobID sources.JobID
verify bool

authMethod string
user string
password string
token string
url string
repos []string
ignoreRepos []string
authMethod string
user string
password string
token string
url string
repos []string
ignoreRepos []string
includeRepos []string

useCustomContentWriter bool
git *git.Git
Expand Down Expand Up @@ -82,6 +83,56 @@ func (s *Source) JobID() sources.JobID {
return s.jobID
}

// globRepoFilter is a wrapper around cache.Cache that filters out repos
// based on include and exclude globs.
type globRepoFilter struct {
include, exclude []glob.Glob
}

func newGlobRepoFilter(include, exclude []string, onCompileErr func(err error, pattern string)) *globRepoFilter {
includeGlobs := make([]glob.Glob, 0, len(include))
excludeGlobs := make([]glob.Glob, 0, len(exclude))
for _, ig := range include {
g, err := glob.Compile(ig)
if err != nil {
onCompileErr(err, ig)
continue
}
includeGlobs = append(includeGlobs, g)
}
for _, eg := range exclude {
g, err := glob.Compile(eg)
if err != nil {
onCompileErr(err, eg)
continue
}
excludeGlobs = append(excludeGlobs, g)
}
return &globRepoFilter{include: includeGlobs, exclude: excludeGlobs}
}

func (c *globRepoFilter) ignoreRepo(s string) bool {
for _, g := range c.exclude {
if g.Match(s) {
return true
}
}
return false
}

func (c *globRepoFilter) includeRepo(s string) bool {
if len(c.include) == 0 {
return true
}

for _, g := range c.include {
if g.Match(s) {
return true
}
}
return false
}

// Init returns an initialized Gitlab source.
func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sourceId sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
s.name = name
Expand All @@ -101,9 +152,12 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
return fmt.Errorf("error unmarshalling connection: %w", err)
}

s.repos = conn.Repositories
s.ignoreRepos = conn.IgnoreRepos
s.repos = conn.GetRepositories()
s.ignoreRepos = conn.GetIgnoreRepos()
s.includeRepos = conn.GetIncludeRepos()

ctx.Logger().V(3).Info("setting ignore repos patterns", "patterns", s.ignoreRepos)
ctx.Logger().V(3).Info("setting include repos patterns", "patterns", s.includeRepos)

switch cred := conn.GetCredential().(type) {
case *sourcespb.GitLab_Token:
Expand Down Expand Up @@ -192,8 +246,8 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
// Get all repos if not specified.
if len(repos) == 0 {
ctx.Logger().Info("no repositories configured, enumerating")
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
ignoreRepo := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile include/exclude repo glob", "glob", pattern)
})
reporter := sources.VisitorReporter{
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
Expand Down Expand Up @@ -316,8 +370,8 @@ func (s *Source) Validate(ctx context.Context) []error {
return errs
}

ignoreProject := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
errs = append(errs, fmt.Errorf("could not compile ignore repo pattern %q: %w", pattern, err))
ignoreProject := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
errs = append(errs, fmt.Errorf("could not compile include/exclude repo pattern %q: %w", pattern, err))
})

// Query GitLab for the list of configured repos.
Expand Down Expand Up @@ -646,23 +700,14 @@ func (s *Source) WithScanOptions(scanOptions *git.ScanOptions) {
s.scanOptions = scanOptions
}

func buildIgnorer(patterns []string, onCompileErr func(err error, pattern string)) func(repo string) bool {
var globs []glob.Glob
func buildIgnorer(include, exclude []string, onCompile func(err error, pattern string)) func(repo string) bool {

for _, pattern := range patterns {
g, err := glob.Compile(pattern)
if err != nil {
onCompileErr(err, pattern)
continue
}
globs = append(globs, g)
}
// compile and load globRepoFilter
globRepoFilter := newGlobRepoFilter(include, exclude, onCompile)

f := func(repo string) bool {
for _, g := range globs {
if g.Match(repo) {
return true
}
if !globRepoFilter.includeRepo(repo) || globRepoFilter.ignoreRepo(repo) {
return true
}
return false
}
Expand Down Expand Up @@ -765,10 +810,10 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e
}

// Otherwise, enumerate all repos.
ignoreRepo := buildIgnorer(s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile ignore repo glob", "glob", pattern)
ignoreRepo := buildIgnorer(s.includeRepos, s.ignoreRepos, func(err error, pattern string) {
ctx.Logger().Error(err, "could not compile include/exclude repo glob", "glob", pattern)
// TODO: Handle error returned from UnitErr.
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile ignore repo glob: %w", err))
_ = reporter.UnitErr(ctx, fmt.Errorf("could not compile include/exclude repo glob: %w", err))
})
return s.getAllProjectRepos(ctx, apiClient, ignoreRepo, reporter)
}
Expand Down
127 changes: 127 additions & 0 deletions pkg/sources/gitlab/gitlab_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,24 @@ func TestSource_Validate(t *testing.T) {
},
wantErrCount: 2,
},

{
name: "could not compile include glob(s)",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{
"tes1188/*-gitlab",
"[", // glob doesn't compile
"[a-]", // glob doesn't compile
},
IgnoreRepos: []string{
"[",
},
},
wantErrCount: 3,
},
{
name: "repositories do not exist or are not accessible",
connection: &sourcespb.GitLab{
Expand Down Expand Up @@ -444,3 +462,112 @@ func TestSource_Chunks_TargetedScan(t *testing.T) {
})
}
}

func TestSource_InclusionGlobbing(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()

secret, err := common.GetTestSecret(ctx)
if err != nil {
t.Fatal(fmt.Errorf("failed to access secret: %v", err))
}

token := secret.MustGetField("GITLAB_TOKEN")

tests := []struct {
name string
connection *sourcespb.GitLab
wantReposScanned int
wantErrCount int
}{
{
name: "Get all Repos",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{"*"},
IgnoreRepos: nil,
},
wantReposScanned: 6,
wantErrCount: 0,
},
{
name: "Ignore testy repo, include all others",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{"*"},
IgnoreRepos: []string{"*testy*"},
},
wantReposScanned: 5,
wantErrCount: 0,
},
{
name: "Ignore all repos",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: nil,
IgnoreRepos: []string{"*"},
},
wantReposScanned: 0,
wantErrCount: 0,
},
{
name: "Ignore all repos, but glob doesn't compile",
connection: &sourcespb.GitLab{
Credential: &sourcespb.GitLab_Token{
Token: token,
},
IncludeRepos: []string{
"[", // glob doesn't compile
"[a-]", // glob doesn't compile
},
IgnoreRepos: []string{
"*", // ignore all repos
"[", // glob doesn't compile
},
},
wantReposScanned: 0,
wantErrCount: 3,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

src := &Source{}
conn, err := anypb.New(tt.connection)
assert.NoError(t, err)

err = src.Init(ctx, tt.name, 0, 0, false, conn, 1)
assert.NoError(t, err)

// Query GitLab for the list of configured repos.
var repos []string
visitor := sources.VisitorReporter{
VisitUnit: func(ctx context.Context, unit sources.SourceUnit) error {
id, _ := unit.SourceUnitID()
repos = append(repos, id)
return nil
},
}
apiClient, err := src.newClient()
assert.NoError(t, err)

var errs []error
ignoreRepo := buildIgnorer(ctx, src.includeRepos, src.ignoreRepos, func(err error, pattern string) {
errs = append(errs, err)
})
err = src.getAllProjectRepos(ctx, apiClient, ignoreRepo, visitor)
assert.NoError(t, err)

assert.Equal(t, tt.wantErrCount, len(errs))
assert.Equal(t, tt.wantReposScanned, len(repos))

})
}
}
4 changes: 4 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ type GitlabConfig struct {
Filter *common.Filter
// SkipBinaries allows skipping binary files from the scan.
SkipBinaries bool
// IncludeRepos is a list of repositories to include in the scan.
IncludeRepos []string
// ExcludeRepos is a list of repositories to exclude from the scan.
ExcludeRepos []string
}

// FilesystemConfig defines the optional configuration for a filesystem source.
Expand Down
1 change: 1 addition & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ message GitLab {
repeated string ignore_repos = 6;
bool skip_binaries = 7;
bool skip_archives = 8;
repeated string include_repos = 9;
}

message GitHub {
Expand Down

0 comments on commit 58fb84f

Please sign in to comment.