From 7192206258d2ed72b5318e1ea3ba862c43fc0e55 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 24 Jul 2024 09:06:27 -0400 Subject: [PATCH] chore: only check for db update if local db is old enough Previously, grype would check for a database update on every invocation. However, this was causing slow downs in the CDN. Since new databases are typically made available every 24 hours, change grype to, by default, only check if a new database is available if the local database is more than 12 hours old. Signed-off-by: Will Murphy --- cmd/grype/cli/commands/db_check.go | 2 + cmd/grype/cli/commands/db_update.go | 2 + cmd/grype/cli/options/database.go | 10 ++- go.mod | 6 +- grype/db/curator.go | 23 ++++-- grype/db/curator_test.go | 109 +++++++++++++++++++++++++++- grype/match/matches_test.go | 2 +- 7 files changed, 140 insertions(+), 14 deletions(-) diff --git a/cmd/grype/cli/commands/db_check.go b/cmd/grype/cli/commands/db_check.go index 5f34b8312b6..e2737ce128a 100644 --- a/cmd/grype/cli/commands/db_check.go +++ b/cmd/grype/cli/commands/db_check.go @@ -30,6 +30,8 @@ func DBCheck(app clio.Application) *cobra.Command { } func runDBCheck(opts options.Database) error { + // `grype db check` should _always_ check for updates, regardless of config + opts.MinAgeToCheckForUpdate = 0 dbCurator, err := db.NewCurator(opts.ToCuratorConfig()) if err != nil { return err diff --git a/cmd/grype/cli/commands/db_update.go b/cmd/grype/cli/commands/db_update.go index 4b8110ba9b0..56e37dacd9f 100644 --- a/cmd/grype/cli/commands/db_update.go +++ b/cmd/grype/cli/commands/db_update.go @@ -26,6 +26,8 @@ func DBUpdate(app clio.Application) *cobra.Command { } func runDBUpdate(opts options.Database) error { + // `grype db update` should _always_ check for updates, regardless of config + opts.MinAgeToCheckForUpdate = 0 dbCurator, err := db.NewCurator(opts.ToCuratorConfig()) if err != nil { return err diff --git a/cmd/grype/cli/options/database.go b/cmd/grype/cli/options/database.go index 83faff302a7..d098c72b9be 100644 --- a/cmd/grype/cli/options/database.go +++ b/cmd/grype/cli/options/database.go @@ -18,6 +18,7 @@ type Database struct { AutoUpdate bool `yaml:"auto-update" json:"auto-update" mapstructure:"auto-update"` ValidateByHashOnStart bool `yaml:"validate-by-hash-on-start" json:"validate-by-hash-on-start" mapstructure:"validate-by-hash-on-start"` ValidateAge bool `yaml:"validate-age" json:"validate-age" mapstructure:"validate-age"` + MinAgeToCheckForUpdate time.Duration `yaml:"min-age-to-check-for-update" json:"min-age-to-check-for-update" mapstructure:"min-age-to-check-for-update"` MaxAllowedBuiltAge time.Duration `yaml:"max-allowed-built-age" json:"max-allowed-built-age" mapstructure:"max-allowed-built-age"` UpdateAvailableTimeout time.Duration `yaml:"update-available-timeout" json:"update-available-timeout" mapstructure:"update-available-timeout"` UpdateDownloadTimeout time.Duration `yaml:"update-download-timeout" json:"update-download-timeout" mapstructure:"update-download-timeout"` @@ -28,9 +29,10 @@ var _ interface { } = (*Database)(nil) const ( - defaultMaxDBAge time.Duration = time.Hour * 24 * 5 - defaultUpdateAvailableTimeout = time.Second * 30 - defaultUpdateDownloadTimeout = time.Second * 120 + defaultMinBuiltAgeToCheckForUpdate = time.Hour * 12 + defaultMaxDBAge time.Duration = time.Hour * 24 * 5 + defaultUpdateAvailableTimeout = time.Second * 30 + defaultUpdateDownloadTimeout = time.Second * 120 ) func DefaultDatabase(id clio.Identification) Database { @@ -40,6 +42,7 @@ func DefaultDatabase(id clio.Identification) Database { AutoUpdate: true, ValidateAge: true, // After this period (5 days) the db data is considered stale + MinAgeToCheckForUpdate: defaultMinBuiltAgeToCheckForUpdate, MaxAllowedBuiltAge: defaultMaxDBAge, UpdateAvailableTimeout: defaultUpdateAvailableTimeout, UpdateDownloadTimeout: defaultUpdateDownloadTimeout, @@ -53,6 +56,7 @@ func (cfg Database) ToCuratorConfig() db.Config { CACert: cfg.CACert, ValidateByHashOnGet: cfg.ValidateByHashOnStart, ValidateAge: cfg.ValidateAge, + MinBuiltAgeToCheck: cfg.MinAgeToCheckForUpdate, MaxAllowedBuiltAge: cfg.MaxAllowedBuiltAge, ListingFileTimeout: cfg.UpdateAvailableTimeout, UpdateTimeout: cfg.UpdateDownloadTimeout, diff --git a/go.mod b/go.mod index cf7c8ea07bc..d04e5a508fb 100644 --- a/go.mod +++ b/go.mod @@ -60,7 +60,10 @@ require ( gorm.io/gorm v1.25.11 ) -require github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 +require ( + github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537 + github.com/magiconair/properties v1.8.7 +) require ( cloud.google.com/go v0.110.10 // indirect @@ -164,7 +167,6 @@ require ( github.com/kr/text v0.2.0 // indirect github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect - github.com/magiconair/properties v1.8.7 // indirect github.com/maruel/natural v1.1.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect diff --git a/grype/db/curator.go b/grype/db/curator.go index 34d2cd92c40..94c8caa3a65 100644 --- a/grype/db/curator.go +++ b/grype/db/curator.go @@ -36,6 +36,7 @@ type Config struct { CACert string ValidateByHashOnGet bool ValidateAge bool + MinBuiltAgeToCheck time.Duration MaxAllowedBuiltAge time.Duration ListingFileTimeout time.Duration UpdateTimeout time.Duration @@ -52,6 +53,7 @@ type Curator struct { validateByHashOnGet bool validateAge bool maxAllowedBuiltAge time.Duration + minBuildAgeToCheck time.Duration } func NewCurator(cfg Config) (Curator, error) { @@ -81,6 +83,7 @@ func NewCurator(cfg Config) (Curator, error) { validateByHashOnGet: cfg.ValidateByHashOnGet, validateAge: cfg.ValidateAge, maxAllowedBuiltAge: cfg.MaxAllowedBuiltAge, + minBuildAgeToCheck: cfg.MinBuiltAgeToCheck, }, nil } @@ -189,6 +192,20 @@ func (c *Curator) Update() (bool, error) { func (c *Curator) IsUpdateAvailable() (bool, *Metadata, *ListingEntry, error) { log.Debugf("checking for available database updates") + // compare created data to current db date + current, err := NewMetadataFromDir(c.fs, c.dbDir) + if err != nil { + return false, nil, nil, fmt.Errorf("current metadata corrupt: %w", err) + } + + // There is very little chance a new DB is available so soon + // Do nothing to reduce traffic on CDN that serves listing file. + if current != nil && time.Since(current.Built) < c.minBuildAgeToCheck { + log.Debugf("skipping db update check because current db is less than %0.2f hours old", c.minBuildAgeToCheck.Hours()) + log.Debugf("run 'grype db update' to force a check") + return false, nil, nil, nil + } + listing, err := c.ListingFromURL() if err != nil { return false, nil, nil, err @@ -200,12 +217,6 @@ func (c *Curator) IsUpdateAvailable() (bool, *Metadata, *ListingEntry, error) { } log.Debugf("found database update candidate: %s", updateEntry) - // compare created data to current db date - current, err := NewMetadataFromDir(c.fs, c.dbDir) - if err != nil { - return false, nil, nil, fmt.Errorf("current metadata corrupt: %w", err) - } - if current.IsSupersededBy(updateEntry) { log.Debugf("database update available: %s", updateEntry) return true, current, updateEntry, nil diff --git a/grype/db/curator_test.go b/grype/db/curator_test.go index c652b1ef78a..5e0c72ae01e 100644 --- a/grype/db/curator_test.go +++ b/grype/db/curator_test.go @@ -2,6 +2,7 @@ package db import ( "bufio" + "encoding/json" "errors" "fmt" "io" @@ -62,10 +63,10 @@ func (g *testGetter) GetToDir(dst, src string, _ ...*progress.Manual) error { return afero.WriteFile(g.fs, dst, []byte(g.dir[src]), 0755) } -func newTestCurator(tb testing.TB, fs afero.Fs, getter file.Getter, dbDir, metadataUrl string, validateDbHash bool) Curator { +func newTestCurator(tb testing.TB, fs afero.Fs, getter file.Getter, dbDir, listingFileURL string, validateDbHash bool) Curator { c, err := NewCurator(Config{ DBRootDir: dbDir, - ListingURL: metadataUrl, + ListingURL: listingFileURL, ValidateByHashOnGet: validateDbHash, }) @@ -447,3 +448,107 @@ func TestCuratorTimeoutBehavior(t *testing.T) { t.Fatalf("timeout exceeded (%v)", failAfter) } } + +func TestCuratorIsUpdateAvailable(t *testing.T) { + testDir := "/tmp/dbDir" + listingURL := "http://example.com/metadata/listing.json" + tests := []struct { + name string + existingMetadata Metadata + listing Listing + forceListingCheck bool + wantListingCheck bool + wantUpdate bool + wantErr bool + }{ + { + name: "skip update check; too soon", + existingMetadata: metadataBuiltAt(time.Now().Add(-10 * time.Minute)), + wantListingCheck: false, + wantUpdate: false, + wantErr: false, + }, + { + name: "force causes check even if local metadata is very new", + existingMetadata: metadataBuiltAt(time.Now().Add(-10 * time.Minute)), + listing: listingWithEntryBuiltAt(time.Time{}, "http://example.com/payload.tar.gz"), + forceListingCheck: true, + wantListingCheck: true, + wantUpdate: false, + wantErr: false, + }, + { + name: "check for update but not needed", + existingMetadata: metadataBuiltAt(time.Now().Add(-50 * time.Hour)), // old listing file + listing: listingWithEntryBuiltAt(time.Time{}, "http://example.com/payload.tar.gz"), + wantListingCheck: true, + wantUpdate: false, + wantErr: false, + }, + { + name: "update needed", + wantListingCheck: true, + wantUpdate: true, + wantErr: false, + existingMetadata: metadataBuiltAt(time.Now().Add(-50 * time.Hour)), // old listing file + listing: listingWithEntryBuiltAt(time.Now().Add(-1*time.Hour), "http://example.com/payload.tar.gz"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + files := map[string]string{} + b, err := json.Marshal(tt.listing) + require.NoError(t, err) + files[listingURL] = string(b) + dirs := map[string]string{} + fs := afero.NewMemMapFs() + fs.MkdirAll(path.Join(testDir, "5"), 0755) + f, err := fs.Create(path.Join(testDir, "5", "metadata.json")) + require.NoError(t, err) + defer f.Close() + err = json.NewEncoder(f).Encode(tt.existingMetadata) + require.NoError(t, err) + getter := newTestGetter(fs, files, dirs) + cur := newTestCurator(t, fs, getter, testDir, listingURL, false) + cur.minBuildAgeToCheck = 12 * time.Hour + if tt.forceListingCheck { + cur.minBuildAgeToCheck = 0 + } + gotUpdate, _, _, gotErr := cur.IsUpdateAvailable() + if tt.wantErr { + require.Error(t, gotErr) + return + } + if !tt.wantListingCheck { + assert.Empty(t, getter.calls) + } else { + assert.True(t, getter.calls.Contains(listingURL)) + } + require.NoError(t, gotErr) + assert.Equal(t, tt.wantUpdate, gotUpdate) + }) + } +} + +func metadataBuiltAt(t time.Time) Metadata { + return Metadata{ + Built: t, + Version: 5, + Checksum: "sha256:972d0f51e180d424f3fff2637a1559e06940f35eda6bd03593be2a3db8f28971", + } +} + +func listingWithEntryBuiltAt(t time.Time, u string) Listing { + return Listing{Available: map[int][]ListingEntry{ + 5: { + ListingEntry{ + Built: t, + URL: mustUrl(url.Parse(u)), + Version: 5, + Checksum: "sha256:0123456789abcdef", + }, + }, + }} + +} diff --git a/grype/match/matches_test.go b/grype/match/matches_test.go index 3a16905fbb2..8ea1b9f504e 100644 --- a/grype/match/matches_test.go +++ b/grype/match/matches_test.go @@ -3,13 +3,13 @@ package match import ( "testing" - "github.com/anchore/syft/syft/file" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/anchore/grype/grype/pkg" "github.com/anchore/grype/grype/vulnerability" + "github.com/anchore/syft/syft/file" syftPkg "github.com/anchore/syft/syft/pkg" )