From 84ad58d6325d2ba509238a58d5bea4b18ccef296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Svantesson?= Date: Tue, 19 Sep 2023 17:14:26 +0200 Subject: [PATCH] feat: remove unused artifacts --- .lighthouse/jenkins-x/release.yaml | 2 -- README.md | 18 ++++++++++++ go.mod | 2 ++ go.sum | 2 ++ internal/chart_indexer.go | 6 ++-- internal/cloud_storage.go | 4 +++ internal/config.go | 8 +++++- internal/fs_storage.go | 45 ++++++++++++++++++++++++++++++ internal/main.go | 14 ++++++++++ internal/storage.go | 2 ++ 10 files changed, 97 insertions(+), 6 deletions(-) diff --git a/.lighthouse/jenkins-x/release.yaml b/.lighthouse/jenkins-x/release.yaml index 87b7980..837fa90 100755 --- a/.lighthouse/jenkins-x/release.yaml +++ b/.lighthouse/jenkins-x/release.yaml @@ -36,8 +36,6 @@ spec: resources: {} - name: changelog resources: {} - - name: upload-binaries - resources: {} - name: promote-helm-release resources: {} image: uses:jenkins-x/jx3-pipeline-catalog/tasks/helm/release.yaml@versionStream diff --git a/README.md b/README.md index 044d314..fbc7922 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,24 @@ http: ``` Note that the basic authentication is turned off when HTTPS is disabled. +You can make artifacts not used in a while be removed from disk storage (cloud storage is not touched): + +```YAML +cache: + base_dir: "/tmp/bucketrepo" + clean_interval: "24h" + cache_time: "720h" +``` + +The clean interval of 24 hours is the default, while the cache time doesn't have a default. This means +that cleaning of the cache isn't enabled by default. + +> [!NOTE] +> For this to work the access times needs to be recorded in the file system used for caching. Typically it is. + +If you do want cloud storage to be cleaned you can for example in the case of s3 add a lifecycle policy to +the bucket. + ### Supported Artifacts This repository has been tested with `maven` and `helm` tools, but it can also store other artifacts. diff --git a/go.mod b/go.mod index abc58e6..df8e59c 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,8 @@ require ( rsc.io/letsencrypt v0.0.3 // indirect ) +require github.com/djherbis/atime v1.1.0 + require ( cloud.google.com/go v0.72.0 // indirect cloud.google.com/go/storage v1.12.0 // indirect diff --git a/go.sum b/go.sum index 2f547f2..ed362fd 100644 --- a/go.sum +++ b/go.sum @@ -224,6 +224,8 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8 github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U= github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= +github.com/djherbis/atime v1.1.0 h1:rgwVbP/5by8BvvjBNrbh64Qz33idKT3pSnMSJsxhi0g= +github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/docker/cli v0.0.0-20200130152716-5d0cf8839492 h1:FwssHbCDJD025h+BchanCwE1Q8fyMgqDr2mOQAWOLGw= github.com/docker/cli v0.0.0-20200130152716-5d0cf8839492/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= diff --git a/internal/chart_indexer.go b/internal/chart_indexer.go index ec56c96..5a9e5ea 100644 --- a/internal/chart_indexer.go +++ b/internal/chart_indexer.go @@ -3,7 +3,7 @@ package main import ( "bytes" "fmt" - "io/ioutil" + "io" "os" "path/filepath" @@ -53,13 +53,13 @@ func (ci *ChartIndexer) Reindex(dir string, out string, cache Storage, cloud Sto logrus.Debugf("writing updated chart index at %s", relativePath) - err = cache.WriteFile(relativePath, ioutil.NopCloser(bytes.NewReader(data))) + err = cache.WriteFile(relativePath, io.NopCloser(bytes.NewReader(data))) if err != nil { return errors.Wrap(err, "failed to write helm index to cache") } if cloud != nil { - err = cloud.WriteFile(relativePath, ioutil.NopCloser(bytes.NewReader(data))) + err = cloud.WriteFile(relativePath, io.NopCloser(bytes.NewReader(data))) if err != nil { return errors.Wrap(err, "failed to write helm index to cloud") } diff --git a/internal/cloud_storage.go b/internal/cloud_storage.go index af360c8..31faa74 100644 --- a/internal/cloud_storage.go +++ b/internal/cloud_storage.go @@ -16,6 +16,10 @@ type CloudStorage struct { config StorageConfig } +func (s *CloudStorage) RemoveUnusedArtifacts(*FileController) { + // We probably don't want this implemented +} + // NewCloudStorage creates a new cloud storage instance func NewCloudStorage(config StorageConfig) *CloudStorage { return &CloudStorage{ diff --git a/internal/config.go b/internal/config.go index 36bb02b..c0dc6ab 100644 --- a/internal/config.go +++ b/internal/config.go @@ -39,7 +39,9 @@ type StorageConfig struct { // CacheConfig keeps the configuration for local file system cache type CacheConfig struct { - BaseDir string `mapstructure:"base_dir"` + BaseDir string `mapstructure:"base_dir"` + CacheTime time.Duration `mapstructure:"cache_time"` + CleanInterval time.Duration `mapstructure:"clean_interval"` } // RepositoryConfig keeps the configuration for remote artifacts repository @@ -75,6 +77,10 @@ func NewConfig(configPath string) Config { config.Cache.BaseDir = "./.bucketrepo" } + if config.Cache.CleanInterval == 0 { + config.Cache.CleanInterval = 24 * time.Hour + } + if len(config.Repositories) == 0 { config.Repositories = []RepositoryConfig{{"https://repo1.maven.org/maven2", 1 * time.Minute, nil}} } diff --git a/internal/fs_storage.go b/internal/fs_storage.go index e9401cf..d95787d 100644 --- a/internal/fs_storage.go +++ b/internal/fs_storage.go @@ -1,10 +1,17 @@ package main import ( + "fmt" "io" "os" "path" + "path/filepath" + "regexp" "strings" + "time" + + "github.com/djherbis/atime" + "github.com/sirupsen/logrus" ) // FileSystemStorage file storage backend @@ -26,6 +33,7 @@ func (fs *FileSystemStorage) ReadFile(path string) (io.ReadCloser, error) { } // WriteFile writes a file into the local file system +// TODO: If writing fails due to out of disk files with oldest access timestamp should be removed func (fs *FileSystemStorage) WriteFile(path string, file io.ReadCloser) error { fullPath := resolvePath(fs.config.BaseDir, path) directoryPath, _ := parseFilepath(fullPath) @@ -42,6 +50,43 @@ func (fs *FileSystemStorage) WriteFile(path string, file io.ReadCloser) error { return err } +// RemoveUnusedArtifacts cleans away artifacts that have not been used for configurable amount of time +func (fs *FileSystemStorage) RemoveUnusedArtifacts(ctrl *FileController) { + logrus.Info("starting removal of unused artifacts") + cacheTime := fs.config.CacheTime + maxAccessTime := time.Now().Add(-cacheTime) + var err error + // Don't remove charts directory or index + var keepRegExp *regexp.Regexp + if ctrl.chartsDir != "" { + keepString := fmt.Sprintf("%s(?:%cindex.yaml)?", regexp.QuoteMeta(ctrl.chartsDir), os.PathSeparator) + keepRegExp, err = regexp.Compile(keepString) + if err != nil { + logrus.WithError(err).Errorf("can't compile string %s to regexp", keepString) + } + } + err = filepath.Walk(fs.config.BaseDir, func(path string, info os.FileInfo, err error) error { + if keepRegExp != nil && keepRegExp.MatchString(path) { + return nil + } + aTime := atime.Get(info) + if aTime.Before(maxAccessTime) { + logrus.Debugf("removing %s that has not been accessed since %s", path, aTime) + err := os.RemoveAll(path) + if err != nil { + return err + } + if info.IsDir() { + return filepath.SkipDir + } + } + return nil + }) + if err != nil { + logrus.WithError(err).Errorf("failed to remove unused artifacts") + } +} + func resolvePath(basedir string, filepath string) string { return path.Join(basedir, filepath) } diff --git a/internal/main.go b/internal/main.go index 9389f17..b7aff61 100644 --- a/internal/main.go +++ b/internal/main.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "os" + "time" "github.com/sirupsen/logrus" ) @@ -39,6 +40,19 @@ func main() { logrus.Fatalf("failed to initialise controller: %s", err.Error()) } + if config.Cache.CacheTime != 0 { + ticker := time.NewTicker(config.Cache.CleanInterval) + go func(ticker *time.Ticker) { + cache.RemoveUnusedArtifacts(controller) + for { + select { + case <-ticker.C: + cache.RemoveUnusedArtifacts(controller) + } + } + }(ticker) + } + logrus.Infof("serving http") InitHTTP(config.HTTP, controller) } diff --git a/internal/storage.go b/internal/storage.go index 8730440..bf0b8bc 100644 --- a/internal/storage.go +++ b/internal/storage.go @@ -10,6 +10,8 @@ type Storage interface { ReadFile(path string) (io.ReadCloser, error) // WriteFile wrietes a file into the storage WriteFile(path string, file io.ReadCloser) error + // RemoveUnusedArtifacts cleans away artifacts that have not been used for configurable amount of time + RemoveUnusedArtifacts(*FileController) } // NewStorage creates a new storage