Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dependency Extraction Data Source Driver #5094

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions docs/docs/ref/proto.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ require (
github.com/open-policy-agent/opa v0.70.0
github.com/openfga/go-sdk v0.6.3
github.com/openfga/openfga v1.8.0
github.com/package-url/packageurl-go v0.1.3
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
github.com/prometheus/client_golang v1.20.5
github.com/protobom/protobom v0.5.0
Expand Down Expand Up @@ -223,7 +224,6 @@ require (
github.com/opencontainers/selinux v1.11.1 // indirect
github.com/openfga/api/proto v0.0.0-20241107182745-c14fb4b3d4b4 // indirect
github.com/openfga/language/pkg/go v0.2.0-beta.2.0.20240926131254-992b301a003f // indirect
github.com/package-url/packageurl-go v0.1.3 // indirect
github.com/pressly/goose/v3 v3.22.1 // indirect
github.com/puzpuzpuz/xsync v1.5.2 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
Expand Down
48 changes: 48 additions & 0 deletions internal/datasources/deps/deps.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package deps implements a data source that extracts dependencies from
// a filesystem or file.
package deps

import (
"errors"

minderv1 "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
v1datasources "github.com/mindersec/minder/pkg/datasources/v1"
)

type depsDataSource struct {
handlers map[v1datasources.DataSourceFuncKey]v1datasources.DataSourceFuncDef
}

// GetFuncs implements the v1datasources.DataSource interface.
func (r *depsDataSource) GetFuncs() map[v1datasources.DataSourceFuncKey]v1datasources.DataSourceFuncDef {
return r.handlers
}

// NewDepsDataSource returns a new dependencies datasource
func NewDepsDataSource(ds *minderv1.DepsDataSource) (v1datasources.DataSource, error) {
if ds == nil {
return nil, errors.New("rest data source is nil")
}

if ds.GetDef() == nil {
return nil, errors.New("rest data source definition is nil")
}

out := &depsDataSource{
handlers: make(map[v1datasources.DataSourceFuncKey]v1datasources.DataSourceFuncDef, len(ds.GetDef())),
}

for key, handlerCfg := range ds.GetDef() {
handler, err := newHandlerFromDef(handlerCfg)
if err != nil {
return nil, err
}

out.handlers[v1datasources.DataSourceFuncKey(key)] = handler
}

return out, nil
}
118 changes: 118 additions & 0 deletions internal/datasources/deps/handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package deps implements a data source that extracts dependencies from
// a filesystem or file.
package deps

import (
"context"
"errors"
"fmt"

"github.com/go-git/go-billy/v5/helper/iofs"
purl "github.com/package-url/packageurl-go"
"github.com/protobom/protobom/pkg/sbom"
"github.com/rs/zerolog/log"

mdeps "github.com/mindersec/minder/internal/deps"
"github.com/mindersec/minder/internal/deps/scalibr"
minderv1 "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
v1datasources "github.com/mindersec/minder/pkg/datasources/v1"
)

type depsDataSourceHandler struct {
def *minderv1.DepsDataSource_Def
extractor mdeps.Extractor
}

func newHandlerFromDef(def *minderv1.DepsDataSource_Def) (*depsDataSourceHandler, error) {
if def == nil {
return nil, errors.New("function definition not found")
}

// TODO(puerco): Get extractor from type when we have other backends
hndlr := &depsDataSourceHandler{
extractor: scalibr.NewExtractor(),
def: def,
}

// Validate the initialization parameters
if err := hndlr.ValidateArgs(map[string]any{
"ecosystems": def.Ecosystems,
"path": def.Path,
}); err != nil {
return nil, fmt.Errorf("error in function definition: %w", err)
}
return hndlr, nil
}

func (_ *depsDataSourceHandler) ValidateArgs(args any) error {
if args == nil {
return nil
}
mapobj, ok := args.(map[string]any)
if !ok {
return errors.New("args is not a map")
}

var errs = []error{}

// Check the known argumentss
for k, v := range mapobj {
switch k {
case "ecosystems":
errs = append(errs, validateEcosystems(v)...)
case "path":
if _, ok := v.(string); !ok {
errs = append(errs, errors.New("path must be a string"))
}
}
}

return errors.Join(errs...)
}

// validateEcosystems checks that the defined ecosystems are valid
func validateEcosystems(raw any) []error {
if raw == nil {
return nil
}
ecosystems, ok := raw.([]string)
if !ok {
return []error{errors.New("ecosystems must be a list of strings")}
}

var errs = []error{}
for _, es := range ecosystems {
if _, ok := purl.KnownTypes[es]; !ok {
errs = append(errs, fmt.Errorf("unkown ecosystem: %q", es))
}
}
return errs
}

func (_ *depsDataSourceHandler) ValidateUpdate(_ any) error { return nil }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: I guess we could verify something here, like the cardinality of the functions, but might be impractical. Any ideas?

func (_ *depsDataSourceHandler) GetArgsSchema() any { return nil }
func (h *depsDataSourceHandler) Call(ctx context.Context, _ any) (any, error) {
// Extract the ingestion results from the context
var ctxData v1datasources.Context
var ok bool
if ctxData, ok = ctx.Value(v1datasources.ContextKey{}).(v1datasources.Context); !ok {
return nil, fmt.Errorf("unable to read execution context")
}

if ctxData.Ingest.Fs == nil {
return nil, fmt.Errorf("filesystem not found in execution context")
}

nl, err := h.extractor.ScanFilesystem(ctx, iofs.New(ctxData.Ingest.Fs))
if err != nil {
return nil, fmt.Errorf("scanning filesystem for dependencies: %w", err)
}

log.Debug().Msgf("dependency extractor returned %d package nodes", len(nl.Nodes))
nl.Nodes = append(nl.Nodes, sbom.NewNode())

return nl, nil
}
68 changes: 68 additions & 0 deletions internal/datasources/deps/handler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package deps implements a data source that extracts dependencies from
// a filesystem or file.
package deps

import (
"errors"
"testing"

"github.com/stretchr/testify/require"
)

func TestValidateArgs(t *testing.T) {
h := depsDataSourceHandler{}
t.Parallel()
for _, tc := range []struct {
name string
args any
mustErr bool
}{
{name: "no-args", args: nil, mustErr: false},
{name: "wrong-type", args: struct{}{}, mustErr: true},
{name: "no-path", args: map[string]any{"ecosystems": []string{"npm"}}, mustErr: false},
{name: "blank-path", args: map[string]any{"path": "", "ecosystems": []string{"npm"}}, mustErr: false},
{name: "path-set", args: map[string]any{"path": "directory/", "ecosystems": []string{"npm"}}, mustErr: false},
{name: "no-ecosystems", args: map[string]any{"path": "directory/"}, mustErr: false},
{name: "ecosystems-empty", args: map[string]any{"path": "directory/", "ecosystems": []string{}}, mustErr: false},
{name: "ecosystems-nil", args: map[string]any{"path": "directory/", "ecosystems": nil}, mustErr: false},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
res := h.ValidateArgs(tc.args)
if tc.mustErr {
require.Error(t, res)
return
}
require.NoError(t, res)
})
}
}

func TestValidateEcosystems(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
list any
mustErr bool
}{
{"empty-list", nil, false},
{"valid-list-0", []string{}, false},
{"valid-list-1", []string{"npm"}, false},
{"valid-list-1+", []string{"npm", "pypi", "cargo"}, false},
{"invalid-type", []string{"npm", "Hello!", "cargo"}, true},
{"other-something", []struct{}{}, true},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
errs := validateEcosystems(tc.list)
if tc.mustErr {
require.Error(t, errors.Join(errs...))
return
}
require.Len(t, errs, 0)
})
}
}
3 changes: 3 additions & 0 deletions internal/datasources/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package datasources
import (
"fmt"

"github.com/mindersec/minder/internal/datasources/deps"
"github.com/mindersec/minder/internal/datasources/rest"
minderv1 "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
v1datasources "github.com/mindersec/minder/pkg/datasources/v1"
Expand All @@ -26,6 +27,8 @@ func BuildFromProtobuf(ds *minderv1.DataSource) (v1datasources.DataSource, error
switch ds.GetDriver().(type) {
case *minderv1.DataSource_Rest:
return rest.NewRestDataSource(ds.GetRest())
case *minderv1.DataSource_Deps:
return deps.NewDepsDataSource(ds.GetDeps())
default:
return nil, fmt.Errorf("unknown data source type: %T", ds)
}
Expand Down
22 changes: 22 additions & 0 deletions internal/datasources/service/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ func dataSourceDBToProtobuf(ds db.DataSource, dsfuncs []db.DataSourcesFunction)
switch dsfType {
case v1datasources.DataSourceDriverRest:
return dataSourceRestDBToProtobuf(outds, dsfuncs)
case v1datasources.DataSourceDriverDeps:
return dataSourceDepsDBToProtobuf(outds, dsfuncs)
default:
return nil, fmt.Errorf("unknown data source type: %s", dsfType)
}
Expand All @@ -60,3 +62,23 @@ func dataSourceRestDBToProtobuf(ds *minderv1.DataSource, dsfuncs []db.DataSource

return ds, nil
}

func dataSourceDepsDBToProtobuf(ds *minderv1.DataSource, dsfuncs []db.DataSourcesFunction) (*minderv1.DataSource, error) {
ds.Driver = &minderv1.DataSource_Deps{
Deps: &minderv1.DepsDataSource{
Def: make(map[string]*minderv1.DepsDataSource_Def, len(dsfuncs)),
},
}

for _, dsf := range dsfuncs {
key := dsf.Name
dsfToParse := &minderv1.DepsDataSource_Def{}
if err := protojson.Unmarshal(dsf.Definition, dsfToParse); err != nil {
return nil, fmt.Errorf("failed to unmarshal data source definition for %s: %w", key, err)
}

ds.GetDeps().Def[key] = dsfToParse
}

return ds, nil
}
17 changes: 17 additions & 0 deletions internal/datasources/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,23 @@ func addDataSourceFunctions(
return fmt.Errorf("failed to create data source function: %w", err)
}
}
case *minderv1.DataSource_Deps:
for name, def := range drv.Deps.GetDef() {
defBytes, err := protojson.Marshal(def)
if err != nil {
return fmt.Errorf("failed to marshal REST definition: %w", err)
}

if _, err := tx.AddDataSourceFunction(ctx, db.AddDataSourceFunctionParams{
DataSourceID: dsID,
ProjectID: projectID,
Name: name,
Type: v1datasources.DataSourceDriverDeps,
Definition: defBytes,
}); err != nil {
return fmt.Errorf("failed to create data source function: %w", err)
}
}
Comment on lines +451 to +467
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: is there a way to share this code path regardless of the type?

default:
return fmt.Errorf("unsupported data source driver type: %T", drv)
}
Expand Down
Loading
Loading