Skip to content

Commit

Permalink
Allow ScheduledFeed.Latest to return a cutoff for future invocations (#…
Browse files Browse the repository at this point in the history
…383)

* Allow ScheduledFeed.Latest to return a cutoff for future invocations

This fixes #382 by ensuring each feed uses a cutoff value that is based
on the data observed in the feed.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Up threshold on pypi tests.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Fix lint errors

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Add some comments and remove some dead code.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Fix missed comment in crates tests.

Signed-off-by: Caleb Brown <calebbrown@google.com>

---------

Signed-off-by: Caleb Brown <calebbrown@google.com>
  • Loading branch information
calebbrown authored Aug 17, 2023
1 parent 1f4b797 commit 0d8e23e
Show file tree
Hide file tree
Showing 23 changed files with 185 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ linters:
- asciicheck
- bodyclose
- deadcode
- depguard
#- depguard
- dogsled
- dupl
- errcheck
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
GO ?= go
BIN := bin
GOOS ?= $(shell uname | tr A-Z a-z)
GOLANGCI_LINT_VERSION = v1.51.2
GOLANGCI_LINT_VERSION = v1.53.3
PROJECT := package-feeds

.PHONY: help
Expand Down
7 changes: 4 additions & 3 deletions pkg/feeds/crates/crates.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,21 @@ func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, er
}, nil
}

func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) {
pkgs := []*feeds.Package{}
packages, err := fetchPackages(feed.baseURL)
if err != nil {
return pkgs, []error{err}
return pkgs, cutoff, []error{err}
}
for _, pkg := range packages {
pkg := feeds.NewPackage(pkg.UpdatedAt, pkg.Name, pkg.NewestVersion, FeedName)
pkgs = append(pkgs, pkg)
}
feed.lossyFeedAlerter.ProcessPackages(FeedName, pkgs)

newCutoff := feeds.FindCutoff(cutoff, pkgs)
pkgs = feeds.ApplyCutoff(pkgs, cutoff)
return pkgs, []error{}
return pkgs, newCutoff, []error{}
}

func (feed Feed) GetName() string {
Expand Down
13 changes: 11 additions & 2 deletions pkg/feeds/crates/crates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ func TestCratesLatest(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, gotCutoff, errs := feed.Latest(cutoff)
if len(errs) != 0 {
t.Fatalf("feed.Latest returned error: %v", err)
}

// Returned cutoff should match the newest package creation time of packages retrieved.
wantCutoff := time.Date(2021, 3, 19, 13, 36, 33, 0, time.UTC)
if gotCutoff.Sub(wantCutoff).Abs() > time.Second {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}

if pkgs[0].Name != "FooPackage" {
t.Errorf("Unexpected package `%s` found in place of expected `FooPackage`", pkgs[0].Name)
}
Expand Down Expand Up @@ -67,7 +73,10 @@ func TestCratesNotFound(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
_, errs := feed.Latest(cutoff)
_, gotCutoff, errs := feed.Latest(cutoff)
if cutoff != gotCutoff {
t.Error("feed.Latest() cutoff should be unchanged if an error is returned")
}
if len(errs) == 0 {
t.Fatalf("feed.Latest() was successful when an error was expected")
}
Expand Down
13 changes: 11 additions & 2 deletions pkg/feeds/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type UnsupportedOptionError struct {
}

type ScheduledFeed interface {
Latest(cutoff time.Time) ([]*Package, []error)
Latest(cutoff time.Time) ([]*Package, time.Time, []error)
GetFeedOptions() FeedOptions
GetName() string
}
Expand Down Expand Up @@ -70,13 +70,22 @@ func NewArtifact(created time.Time, name, version, artifactID, feed string) *Pac
func ApplyCutoff(pkgs []*Package, cutoff time.Time) []*Package {
filteredPackages := []*Package{}
for _, pkg := range pkgs {
if !pkg.CreatedDate.Before(cutoff) {
if pkg.CreatedDate.After(cutoff) {
filteredPackages = append(filteredPackages, pkg)
}
}
return filteredPackages
}

func FindCutoff(cutoff time.Time, pkgs []*Package) time.Time {
for _, pkg := range pkgs {
if pkg.CreatedDate.After(cutoff) {
cutoff = pkg.CreatedDate
}
}
return cutoff
}

func (err UnsupportedOptionError) Error() string {
return fmt.Sprintf("unsupported option `%v` supplied to %v feed", err.Option, err.Feed)
}
7 changes: 4 additions & 3 deletions pkg/feeds/goproxy/goproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,19 @@ func New(feedOptions feeds.FeedOptions) (*Feed, error) {
}, nil
}

func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) {
pkgs := []*feeds.Package{}
packages, err := fetchPackages(feed.baseURL, cutoff)
if err != nil {
return pkgs, []error{err}
return pkgs, cutoff, []error{err}
}
for _, pkg := range packages {
pkg := feeds.NewPackage(pkg.ModifiedDate, pkg.Title, pkg.Version, FeedName)
pkgs = append(pkgs, pkg)
}
newCutoff := feeds.FindCutoff(cutoff, pkgs)
pkgs = feeds.ApplyCutoff(pkgs, cutoff)
return pkgs, []error{}
return pkgs, newCutoff, []error{}
}

func (feed Feed) GetName() string {
Expand Down
12 changes: 10 additions & 2 deletions pkg/feeds/goproxy/goproxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ func TestGoProxyLatest(t *testing.T) {
feed.baseURL = srv.URL

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, gotCutoff, errs := feed.Latest(cutoff)
if len(errs) != 0 {
t.Fatalf("feed.Latest returned error: %v", err)
}

// Returned cutoff should match the newest package creation time of packages retrieved.
wantCutoff := time.Date(2019, 4, 10, 20, 30, 2, 0, time.UTC)
if gotCutoff.Sub(wantCutoff).Abs() > time.Second {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}
if pkgs[0].Name != "golang.org/x/foo" {
t.Errorf("Unexpected package `%s` found in place of expected `golang.org/x/foo`", pkgs[0].Name)
}
Expand Down Expand Up @@ -66,7 +71,10 @@ func TestGoproxyNotFound(t *testing.T) {
feed.baseURL = srv.URL

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
_, errs := feed.Latest(cutoff)
_, gotCutoff, errs := feed.Latest(cutoff)
if cutoff != gotCutoff {
t.Error("feed.Latest() cutoff should be unchanged if an error is returned")
}
if len(errs) == 0 {
t.Fatalf("feed.Latest() was successful when an error was expected")
}
Expand Down
9 changes: 6 additions & 3 deletions pkg/feeds/npm/npm.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ func New(feedOptions feeds.FeedOptions, eventHandler *events.Handler) (*Feed, er
}, nil
}

func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) {
var pkgs []*feeds.Package
var errs []error

Expand All @@ -402,7 +402,7 @@ func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {

if len(pkgs) == 0 {
// If none of the packages were successfully polled for, return early.
return nil, append(errs, feeds.ErrNoPackagesPolled)
return nil, cutoff, append(errs, feeds.ErrNoPackagesPolled)
}

// Ensure packages are sorted by CreatedDate in order of most recent, as goroutine
Expand All @@ -411,6 +411,9 @@ func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
return pkgs[j].CreatedDate.Before(pkgs[i].CreatedDate)
})

// After sorting the first entry should be the largest.
newCutoff := pkgs[0].CreatedDate

// TODO: Add an event for checking if the previous package list contains entries
// that do not exist in the latest package list when polling for critical packages.
// This can highlight cases where specific versions have been unpublished.
Expand All @@ -419,7 +422,7 @@ func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
}

pkgs = feeds.ApplyCutoff(pkgs, cutoff)
return pkgs, errs
return pkgs, newCutoff, errs
}

func (feed Feed) GetName() string {
Expand Down
27 changes: 21 additions & 6 deletions pkg/feeds/npm/npm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,17 @@ func TestNpmLatest(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, gotCutoff, errs := feed.Latest(cutoff)
if len(errs) != 0 {
t.Fatalf("feed.Latest returned error: %v", errs[len(errs)-1])
}

// Returned cutoff should match the newest package creation time of packages retrieved.
wantCutoff := time.Date(2021, 5, 11, 18, 32, 1, 0, time.UTC)
if gotCutoff != wantCutoff {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}

if pkgs[0].Name != "FooPackage" {
t.Errorf("Unexpected package `%s` found in place of expected `FooPackage`", pkgs[0].Name)
}
Expand Down Expand Up @@ -136,7 +142,7 @@ func TestNpmCritical(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, gotCutoff, errs := feed.Latest(cutoff)
if len(errs) != 0 {
t.Fatalf("Failed to call Latest() with err: %v", errs[len(errs)-1])
}
Expand All @@ -145,6 +151,12 @@ func TestNpmCritical(t *testing.T) {
t.Fatalf("Latest() produced %v packages instead of the expected 7", len(pkgs))
}

// Returned cutoff should match the newest package creation time of packages retrieved.
wantCutoff := time.Date(2021, 5, 11, 18, 32, 1, 0, time.UTC)
if gotCutoff != wantCutoff {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}

pkgMap := map[string]map[string]*feeds.Package{}
pkgMap["FooPackage"] = map[string]*feeds.Package{}
pkgMap["BarPackage"] = map[string]*feeds.Package{}
Expand Down Expand Up @@ -192,7 +204,7 @@ func TestNpmCriticalUnpublished(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, _, errs := feed.Latest(cutoff)

if len(errs) != 1 {
t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs))
Expand Down Expand Up @@ -274,7 +286,10 @@ func TestNpmNotFound(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
_, errs := feed.Latest(cutoff)
_, gotCutoff, errs := feed.Latest(cutoff)
if cutoff != gotCutoff {
t.Error("feed.Latest() cutoff should be unchanged if an error is returned")
}
if len(errs) != 2 {
t.Fatalf("feed.Latest() returned %v errors when 2 were expected", len(errs))
}
Expand Down Expand Up @@ -304,7 +319,7 @@ func TestNpmPartialNotFound(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, _, errs := feed.Latest(cutoff)
if len(errs) != 1 {
t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs))
}
Expand Down Expand Up @@ -343,7 +358,7 @@ func TestNpmCriticalPartialNotFound(t *testing.T) {
}

cutoff := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
pkgs, errs := feed.Latest(cutoff)
pkgs, _, errs := feed.Latest(cutoff)
if len(errs) != 1 {
t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs))
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/feeds/nuget/nuget.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,18 +176,18 @@ func New(feedOptions feeds.FeedOptions) (*Feed, error) {
// Latest will parse all creation events for packages in the nuget.org catalog feed
// for packages that have been published since the cutoff
// https://docs.microsoft.com/en-us/nuget/api/catalog-resource
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) {
pkgs := []*feeds.Package{}
var errs []error

catalogService, err := fetchCatalogService(feed.baseURL)
if err != nil {
return nil, append(errs, err)
return nil, cutoff, append(errs, err)
}

catalogPages, err := fetchCatalogPages(catalogService.URI)
if err != nil {
return nil, append(errs, err)
return nil, cutoff, append(errs, err)
}

for _, catalogPage := range catalogPages {
Expand Down Expand Up @@ -220,9 +220,10 @@ func (feed Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
pkgs = append(pkgs, pkg)
}
}
newCutoff := feeds.FindCutoff(cutoff, pkgs)
pkgs = feeds.ApplyCutoff(pkgs, cutoff)

return pkgs, errs
return pkgs, newCutoff, errs
}

func (feed Feed) GetName() string {
Expand Down
10 changes: 9 additions & 1 deletion pkg/feeds/nuget/nuget_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,19 @@ func TestCanParseFeed(t *testing.T) {

cutoff := time.Now().Add(-5 * time.Minute)

results, errs := sut.Latest(cutoff)
results, gotCutoff, errs := sut.Latest(cutoff)
if len(errs) != 0 {
t.Fatal(errs[len(errs)-1])
}

// Returned cutoff should match the newest package creation time of packages retrieved.
// This time is automatically generated at approx 1 minutes ago. The threshold of 70s
// ensures this test doesn't fail with the multiple mocked API requests.
wantCutoff := time.Now().UTC()
if gotCutoff == cutoff || gotCutoff.Sub(wantCutoff).Abs() > (70*time.Second) {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}

if len(results) != 1 {
t.Fatalf("1 result expected but %d retrieved", len(results))
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/feeds/packagist/packagist.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,12 @@ func fetchVersionInformation(versionHost string, action actions) ([]*feeds.Packa
}

// Latest returns all package updates of packagist packages since cutoff.
func (f Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
func (f Feed) Latest(cutoff time.Time) ([]*feeds.Package, time.Time, []error) {
pkgs := []*feeds.Package{}
var errs []error
packages, err := fetchPackages(f.updateHost, cutoff)
if err != nil {
return nil, append(errs, err)
return nil, cutoff, append(errs, err)
}
for _, pkg := range packages {
if time.Unix(pkg.Time, 0).Before(cutoff) {
Expand All @@ -136,8 +136,9 @@ func (f Feed) Latest(cutoff time.Time) ([]*feeds.Package, []error) {
}
pkgs = append(pkgs, updates...)
}
newCutoff := feeds.FindCutoff(cutoff, pkgs)
pkgs = feeds.ApplyCutoff(pkgs, cutoff)
return pkgs, errs
return pkgs, newCutoff, errs
}

func (f Feed) GetName() string {
Expand Down
13 changes: 11 additions & 2 deletions pkg/feeds/packagist/packagist_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,19 @@ func TestFetch(t *testing.T) {
feed.versionHost = srv.URL

cutoff := time.Unix(1614513658, 0)
latest, errs := feed.Latest(cutoff)
latest, gotCutoff, errs := feed.Latest(cutoff)
if len(errs) != 0 {
t.Fatalf("got error: %v", errs[len(errs)-1])
}
if len(latest) == 0 {
t.Fatalf("did not get any updates")
}

// Returned cutoff should match the newest package creation time of packages retrieved.
wantCutoff := time.Date(2021, 2, 28, 12, 20, 3, 0, time.UTC)
if gotCutoff.Sub(wantCutoff).Abs() > time.Second {
t.Errorf("Latest() cutoff %v, want %v", gotCutoff, wantCutoff)
}
for _, pkg := range latest {
if pkg.CreatedDate.Before(cutoff) {
t.Fatalf("package returned that was updated before cutoff %f", pkg.CreatedDate.Sub(cutoff).Minutes())
Expand Down Expand Up @@ -64,7 +70,10 @@ func TestPackagistNotFound(t *testing.T) {
feed.versionHost = srv.URL

cutoff := time.Unix(1614513658, 0)
_, errs := feed.Latest(cutoff)
_, gotCutoff, errs := feed.Latest(cutoff)
if cutoff != gotCutoff {
t.Error("feed.Latest() cutoff should be unchanged if an error is returned")
}
if len(errs) != 1 {
t.Fatalf("feed.Latest() returned %v errors when 1 was expected", len(errs))
}
Expand Down
Loading

0 comments on commit 0d8e23e

Please sign in to comment.