From 4b06ad460ea0d0086de5ace2bc1f6d9902eac81c Mon Sep 17 00:00:00 2001 From: Curtis Myzie <36005462+curtis-fugue@users.noreply.github.com> Date: Thu, 23 Jun 2022 10:08:03 -0400 Subject: [PATCH] Support for a local filesystem cache (#41) * Draft support for a local filesystem cache * Checkpoint on hash interface * Remove unused function * Adjust gitignore so that test fixture is versioned * Use SetEnvKeyReplacer so that ZIM_CACHE_PATH is usable Co-authored-by: Curtis Myzie --- .gitignore | 1 + cache/cache.go | 220 ++++++---------------------- cache/cache_test.go | 10 +- cache/entry.go | 32 ++++ cache/key.go | 39 +++++ cache/middleware.go | 61 ++++++++ cache/write.go | 22 +++ cmd/addToken.go | 6 +- cmd/helpers.go | 36 ++--- cmd/listComponents.go | 5 +- cmd/listEnv.go | 7 +- cmd/listInputs.go | 5 +- cmd/listOutputs.go | 5 +- cmd/listRules.go | 5 +- cmd/root.go | 17 ++- cmd/run.go | 41 +++++- cmd/showKey.go | 17 ++- hash/hash.go | 15 ++ hash/sha1.go | 46 ++++++ hash/sha1_test.go | 35 +++++ hash/sha256.go | 46 ++++++ hash/sha256_test.go | 37 +++++ store/filesystem/filesystem.go | 145 ++++++++++++++++++ store/filesystem/filesystem_test.go | 80 ++++++++++ store/filesystem/test_fixture.txt | 2 + store/{ => http}/http.go | 31 ++-- store/store.go | 45 +----- 27 files changed, 730 insertions(+), 281 deletions(-) create mode 100644 cache/entry.go create mode 100644 cache/key.go create mode 100644 cache/middleware.go create mode 100644 cache/write.go create mode 100644 hash/hash.go create mode 100644 hash/sha1.go create mode 100644 hash/sha1_test.go create mode 100644 hash/sha256.go create mode 100644 hash/sha256_test.go create mode 100644 store/filesystem/filesystem.go create mode 100644 store/filesystem/filesystem_test.go create mode 100644 store/filesystem/test_fixture.txt rename store/{ => http}/http.go (87%) diff --git a/.gitignore b/.gitignore index 79cfac9..73ef454 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ auth.zip samconfig.toml venv coverage.out +store/filesystem/tmp*.txt diff --git a/cache/cache.go b/cache/cache.go index b959a0c..0f557ab 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + package cache import ( @@ -19,12 +20,11 @@ import ( "encoding/hex" "encoding/json" "fmt" - "io" - "io/ioutil" "os" "path/filepath" "sort" + "github.com/fugue/zim/hash" "github.com/fugue/zim/project" "github.com/fugue/zim/store" ) @@ -48,108 +48,36 @@ func (e Error) Error() string { return string(e) } // CacheMiss indicates the cache did not contain a match const CacheMiss = Error("Item not found in cache") -// Entry carries the name and hash for one item within a Key -type Entry struct { - Name string `json:"name"` - Hash string `json:"hash"` -} - -// command is the contribution to a cache key from a rule command -type command struct { - Kind string `json:"kind"` - Argument string `json:"argument,omitempty"` - Attributes map[string]interface{} `json:"attributes,omitempty"` -} - -func newEntry(name, hash string) *Entry { - return &Entry{Name: name, Hash: hash} +// Opts defines options for initializing a Cache +type Opts struct { + Store store.Store + Hasher hash.Hasher + User string + Mode string } -// Key contains information used to build a key -type Key struct { - Project string `json:"project"` - Component string `json:"component"` - Rule string `json:"rule"` - Image string `json:"image"` - OutputCount int `json:"output_count"` - Inputs []*Entry `json:"inputs"` - Deps []*Entry `json:"deps"` - Env []*Entry `json:"env"` - Toolchain []*Entry `json:"toolchain"` - Version string `json:"version"` - Commands []string `json:"commands"` - Native bool `json:"native,omitempty"` - hex string +// Cache for rule outputs +type Cache struct { + store store.Store + hasher hash.Hasher + user string + mode string } -// String returns the key as a hexadecimal string -func (k *Key) String() string { - return k.hex -} +// New returns a Cache +func New(opts Opts) *Cache { -// Compute determines the hash for this key -func (k *Key) Compute() error { - h := sha1.New() - if err := json.NewEncoder(h).Encode(k); err != nil { - return err + if opts.Hasher == nil { + opts.Hasher = hash.SHA1() } - k.hex = hex.EncodeToString(h.Sum(nil)) - return nil -} - -// NewMiddleware returns caching middleware -func NewMiddleware(s store.Store, user, mode string) project.RunnerBuilder { - - c := New(s) - c.user = user - c.mode = mode - - return project.RunnerBuilder(func(runner project.Runner) project.Runner { - return project.RunnerFunc(func(ctx context.Context, r *project.Rule, opts project.RunOpts) (project.Code, error) { - - // Caching is only applicable for rules that have cacheable - // outputs. If this is not the case, run the rule normally. - outputs := r.Outputs() - if len(outputs) == 0 || !outputs[0].Cacheable() { - return runner.Run(ctx, r, opts) - } - - if mode != WriteOnly { - // Download matching outputs from the cache if they exist - _, err := c.Read(ctx, r) - if err == nil { - return project.Cached, nil // Cache hit - } - if err != CacheMiss { - return project.Error, err // Cache error - } - } - - // At this point, the outputs were not cached so build the rule - code, err := runner.Run(ctx, r, opts) - - // Code "OK" indicates the rule was built which means we can - // store its outputs in the cache - if code == project.OK { - if _, err := c.Write(ctx, r); err != nil { - return project.Error, err - } - } - return code, err - }) - }) -} -// Cache used to determine rule keys -type Cache struct { - store store.Store - user string - mode string -} - -// New returns a cache -func New(s store.Store) *Cache { - return &Cache{store: s} + c := &Cache{ + store: opts.Store, + hasher: opts.Hasher, + user: opts.User, + mode: opts.Mode, + } + return c } // Write rule outputs to the cache @@ -159,7 +87,7 @@ func (c *Cache) Write(ctx context.Context, r *project.Rule) ([]string, error) { // If the rule has no outputs then there is nothing to cache if len(outputs) == 0 { - return nil, fmt.Errorf("Rule has no outputs: %s", r.NodeID()) + return nil, fmt.Errorf("rule has no outputs: %s", r.NodeID()) } key, err := c.Key(ctx, r) @@ -207,7 +135,7 @@ func (c *Cache) Read(ctx context.Context, r *project.Rule) ([]string, error) { // If the rule has no outputs then there is nothing to read from the cache if len(outputs) == 0 { - return nil, fmt.Errorf("Rule has no outputs: %s", r.NodeID()) + return nil, fmt.Errorf("rule has no outputs: %s", r.NodeID()) } key, err := c.Key(ctx, r) @@ -237,7 +165,7 @@ func (c *Cache) Read(ctx context.Context, r *project.Rule) ([]string, error) { func (c *Cache) put(ctx context.Context, key, src string) error { // The file hash will be added to the cache item metadata - hash, err := HashFile(src) + hash, err := c.hasher.File(src) if err != nil { return err } @@ -264,11 +192,12 @@ func (c *Cache) get(ctx context.Context, key, dst string) error { // If a local file exists that is identical to the one in the cache, // then there is nothing to do - if localHash, err := HashFile(dst); err == nil { + if localHash, err := c.hasher.File(dst); err == nil { if remoteHash == localHash { return nil } } + // Download the file from the cache return c.store.Get(ctx, key, dst) } @@ -328,7 +257,7 @@ func (c *Cache) buildKey(ctx context.Context, r *project.Rule) (*Key, error) { // Include the hash of every input file in the key for _, input := range inputs.Paths() { - hash, err := c.hashFile(input) + hash, err := c.hasher.File(input) if err != nil { return nil, err } @@ -342,7 +271,7 @@ func (c *Cache) buildKey(ctx context.Context, r *project.Rule) (*Key, error) { // Include rule environment variables in the key for _, k := range MapKeys(env) { - hash, err := c.hashString(env[k]) + hash, err := c.hasher.String(env[k]) if err != nil { return nil, err } @@ -374,7 +303,7 @@ func (c *Cache) buildKey(ctx context.Context, r *project.Rule) (*Key, error) { // For new built-in commands, reduce the command to a hash. hashStr, err := HashCommand(cmd) if err != nil { - return nil, fmt.Errorf("Failed to hash command: %s", err) + return nil, fmt.Errorf("failed to hash command: %s", err) } key.Commands = append(key.Commands, hashStr) } @@ -387,55 +316,6 @@ func (c *Cache) buildKey(ctx context.Context, r *project.Rule) (*Key, error) { return key, nil } -// hashFile returns the SHA1 hash of a given file -func (c *Cache) hashFile(p string) (string, error) { - // No caching for now - return HashFile(p) -} - -// hashString returns the SHA1 hash of a given string -func (c *Cache) hashString(s string) (string, error) { - // No caching for now - return HashString(s) -} - -// HashCommand returns a SHA1 hash of the command configuration -func HashCommand(cmd *project.Command) (string, error) { - entry := &command{ - Kind: cmd.Kind, - Argument: cmd.Argument, - Attributes: cmd.Attributes, - } - h := sha1.New() - if err := json.NewEncoder(h).Encode(entry); err != nil { - return "", err - } - return hex.EncodeToString(h.Sum(nil)), nil -} - -// HashFile returns the SHA1 hash of file contents -func HashFile(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - h := sha1.New() - if _, err := io.Copy(h, file); err != nil { - return "", err - } - return hex.EncodeToString(h.Sum(nil)), nil -} - -// HashString returns the SHA1 hash of a string -func HashString(s string) (string, error) { - h := sha1.New() - if _, err := h.Write([]byte(s)); err != nil { - return "", err - } - return hex.EncodeToString(h.Sum(nil)), nil -} - // MapKeys returns a sorted slice containing all keys from the given map func MapKeys(m map[string]string) (result []string) { result = make([]string, 0, len(m)) @@ -446,32 +326,16 @@ func MapKeys(m map[string]string) (result []string) { return } -func writeJSON(key *Key) (string, error) { - js, err := json.Marshal(key) - if err != nil { - return "", err - } - f, err := ioutil.TempFile("", "zim-key-") - if err != nil { - return "", err +// Command returns a SHA1 hash of the command configuration +func HashCommand(cmd *project.Command) (string, error) { + entry := &command{ + Kind: cmd.Kind, + Argument: cmd.Argument, + Attributes: cmd.Attributes, } - defer f.Close() - if _, err := f.Write(js); err != nil { + h := sha1.New() + if err := json.NewEncoder(h).Encode(entry); err != nil { return "", err } - return f.Name(), nil -} - -func writeKey(path string, key *Key) error { - js, err := json.Marshal(key) - if err != nil { - return err - } - f, err := os.Create(path) - if err != nil { - return err - } - defer f.Close() - _, err = f.Write(js) - return err + return hex.EncodeToString(h.Sum(nil)), nil } diff --git a/cache/cache_test.go b/cache/cache_test.go index 350adc6..50d2b65 100644 --- a/cache/cache_test.go +++ b/cache/cache_test.go @@ -65,14 +65,14 @@ func TestCacheKey(t *testing.T) { Path: cPath("component.yaml"), Docker: definitions.Docker{Image: "repo/img:1.2.3"}, Rules: map[string]definitions.Rule{ - "test": definitions.Rule{ + "test": { Description: "test it!", Inputs: []string{"${NAME}_test.go", "go.mod"}, Ignore: []string{"exclude_me.go"}, Outputs: []string{"test_results"}, Command: "go test -v", }, - "build": definitions.Rule{ + "build": { Description: "build it!", Inputs: []string{"${NAME}.go", "go.mod"}, Ignore: []string{"exclude_me.go"}, @@ -110,7 +110,7 @@ func TestCacheKey(t *testing.T) { buildDeps := buildRule.Dependencies() require.Len(t, buildDeps, 1) - cache := New(nil) + cache := New(Opts{}) key1, err := cache.Key(ctx, testRule) require.Nil(t, err, "Error getting cache key") @@ -149,7 +149,7 @@ func TestCacheKeyNonDocker(t *testing.T) { Name: "my-component", Path: path.Join(cDir, "component.yaml"), Rules: map[string]definitions.Rule{ - "test": definitions.Rule{ + "test": { Inputs: []string{"main.go"}, Outputs: []string{"my-exe"}, Command: "touch my-exe", @@ -170,7 +170,7 @@ func TestCacheKeyNonDocker(t *testing.T) { require.NotNil(t, c) testRule := c.MustRule("test") - cache := New(nil) + cache := New(Opts{}) key, err := cache.Key(ctx, testRule) require.Nil(t, err, "Error getting cache key") diff --git a/cache/entry.go b/cache/entry.go new file mode 100644 index 0000000..e222302 --- /dev/null +++ b/cache/entry.go @@ -0,0 +1,32 @@ +// Copyright 2020 Fugue, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +// Entry carries the name and hash for one item within a Key +type Entry struct { + Name string `json:"name"` + Hash string `json:"hash"` +} + +// command is the contribution to a cache key from a rule command +type command struct { + Kind string `json:"kind"` + Argument string `json:"argument,omitempty"` + Attributes map[string]interface{} `json:"attributes,omitempty"` +} + +func newEntry(name, hash string) *Entry { + return &Entry{Name: name, Hash: hash} +} diff --git a/cache/key.go b/cache/key.go new file mode 100644 index 0000000..9603b99 --- /dev/null +++ b/cache/key.go @@ -0,0 +1,39 @@ +package cache + +import ( + "crypto/sha1" + "encoding/hex" + "encoding/json" +) + +// Key contains information used to build a key +type Key struct { + Project string `json:"project"` + Component string `json:"component"` + Rule string `json:"rule"` + Image string `json:"image"` + OutputCount int `json:"output_count"` + Inputs []*Entry `json:"inputs"` + Deps []*Entry `json:"deps"` + Env []*Entry `json:"env"` + Toolchain []*Entry `json:"toolchain"` + Version string `json:"version"` + Commands []string `json:"commands"` + Native bool `json:"native,omitempty"` + hex string +} + +// String returns the key as a hexadecimal string +func (k *Key) String() string { + return k.hex +} + +// Compute determines the hash for this key +func (k *Key) Compute() error { + h := sha1.New() + if err := json.NewEncoder(h).Encode(k); err != nil { + return err + } + k.hex = hex.EncodeToString(h.Sum(nil)) + return nil +} diff --git a/cache/middleware.go b/cache/middleware.go new file mode 100644 index 0000000..ddcfde2 --- /dev/null +++ b/cache/middleware.go @@ -0,0 +1,61 @@ +// Copyright 2020 Fugue, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "context" + + "github.com/fugue/zim/project" +) + +// NewMiddleware returns caching middleware +func NewMiddleware(c *Cache) project.RunnerBuilder { + + return project.RunnerBuilder(func(runner project.Runner) project.Runner { + + return project.RunnerFunc(func(ctx context.Context, r *project.Rule, opts project.RunOpts) (project.Code, error) { + + // Caching is only applicable for rules that have cacheable + // outputs. If this is not the case, run the rule normally. + outputs := r.Outputs() + if len(outputs) == 0 || !outputs[0].Cacheable() { + return runner.Run(ctx, r, opts) + } + + if c.mode != WriteOnly { + // Download matching outputs from the cache if they exist + _, err := c.Read(ctx, r) + if err == nil { + return project.Cached, nil // Cache hit + } + if err != CacheMiss { + return project.Error, err // Cache error + } + } + + // At this point, the outputs were not cached so build the rule + code, err := runner.Run(ctx, r, opts) + + // Code "OK" indicates the rule was built which means we can + // store its outputs in the cache + if code == project.OK { + if _, err := c.Write(ctx, r); err != nil { + return project.Error, err + } + } + return code, err + }) + }) +} diff --git a/cache/write.go b/cache/write.go new file mode 100644 index 0000000..d54f30b --- /dev/null +++ b/cache/write.go @@ -0,0 +1,22 @@ +package cache + +import ( + "encoding/json" + "io/ioutil" +) + +func writeJSON(key *Key) (string, error) { + js, err := json.Marshal(key) + if err != nil { + return "", err + } + f, err := ioutil.TempFile("", "zim-key-") + if err != nil { + return "", err + } + defer f.Close() + if _, err := f.Write(js); err != nil { + return "", err + } + return f.Name(), nil +} diff --git a/cmd/addToken.go b/cmd/addToken.go index ca3fc5c..a22e2a8 100644 --- a/cmd/addToken.go +++ b/cmd/addToken.go @@ -34,7 +34,11 @@ func NewAddTokenCommand() *cobra.Command { Short: "Add a cache token", Run: func(cmd *cobra.Command, args []string) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } + name := viper.GetString("name") email := viper.GetString("email") diff --git a/cmd/helpers.go b/cmd/helpers.go index 2afc435..702393f 100644 --- a/cmd/helpers.go +++ b/cmd/helpers.go @@ -18,7 +18,6 @@ import ( "fmt" "os" "os/exec" - "path" "path/filepath" "strings" @@ -73,9 +72,10 @@ type zimOptions struct { CacheMode string Token string Platform string + CachePath string } -func getZimOptions(cmd *cobra.Command, args []string) zimOptions { +func getZimOptions(cmd *cobra.Command, args []string) (zimOptions, error) { opts := zimOptions{ Directory: viper.GetString("dir"), URL: viper.GetString("url"), @@ -91,10 +91,17 @@ func getZimOptions(cmd *cobra.Command, args []string) zimOptions { CacheMode: viper.GetString("cache"), Token: viper.GetString("token"), Platform: viper.GetString("platform"), + CachePath: viper.GetString("cache-path"), } - if opts.Cache == "" { - opts.Cache = XDGCache() + if opts.CachePath == "" { + opts.CachePath = LocalCacheDirectory() } + absCachePath, err := filepath.Abs(opts.CachePath) + if err != nil { + return zimOptions{}, fmt.Errorf("unable to make cache path absolute: %w", err) + } + opts.CachePath = absCachePath + // Strip paths to components if provided, e.g. src/foo -> foo for i, c := range opts.Components { opts.Components[i] = filepath.Base(c) @@ -104,28 +111,21 @@ func getZimOptions(cmd *cobra.Command, args []string) zimOptions { if cmd.Name() == "run" && len(opts.Rules) == 0 && len(args) > 0 { opts.Rules = args } - - return opts + return opts, nil } -// XDGCache returns the local cache directory -func XDGCache() string { +// LocalCacheDirectory returns the directory in the local filesystem +// to be used for caching +func LocalCacheDirectory() string { value := os.Getenv("XDG_CACHE_HOME") if value != "" { - return value + return filepath.Join(value, "zim") } home, err := os.UserHomeDir() if err != nil { panic(err) } - return path.Join(home, ".cache") -} - -func fileExists(p string) bool { - if _, err := os.Stat(p); err == nil { - return true - } - return false + return filepath.Join(home, ".cache", "zim") } // repoRoot returns the root directory of the Git repository, given any @@ -140,7 +140,7 @@ func repoRoot(dir string) (string, error) { command.Stderr = &b if err := command.Run(); err != nil { - return "", fmt.Errorf("Failed to run git rev-parse: %s", err) + return "", fmt.Errorf("failed to run git rev-parse: %s", err) } output := strings.TrimSpace(b.String()) if output == ".git" { diff --git a/cmd/listComponents.go b/cmd/listComponents.go index fe9ed61..18bb33b 100644 --- a/cmd/listComponents.go +++ b/cmd/listComponents.go @@ -41,7 +41,10 @@ func NewListComponentsCommand() *cobra.Command { Aliases: []string{"c"}, Run: func(cmd *cobra.Command, args []string) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) diff --git a/cmd/listEnv.go b/cmd/listEnv.go index 1c55481..99aee87 100644 --- a/cmd/listEnv.go +++ b/cmd/listEnv.go @@ -32,12 +32,17 @@ var listEnvCmd = &cobra.Command{ Short: "List zim environment and configuration", Run: func(cmd *cobra.Command, args []string) { + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } + defaultCols := []string{ "Key", "Value", } - fields := structs.Map(getZimOptions(cmd, args)) + fields := structs.Map(opts) var rows []interface{} for _, k := range []string{"URL", "Region", "Debug", "Jobs", "UseDocker"} { diff --git a/cmd/listInputs.go b/cmd/listInputs.go index 7c528de..44ebde2 100644 --- a/cmd/listInputs.go +++ b/cmd/listInputs.go @@ -41,7 +41,10 @@ func NewListInputsCommand() *cobra.Command { Aliases: []string{"in", "ins", "inputs"}, Run: func(cmd *cobra.Command, args []string) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) diff --git a/cmd/listOutputs.go b/cmd/listOutputs.go index d4290b8..af320c0 100644 --- a/cmd/listOutputs.go +++ b/cmd/listOutputs.go @@ -44,7 +44,10 @@ func NewListArtifactsCommand() *cobra.Command { Aliases: []string{"out", "outs", "outputs"}, Run: func(cmd *cobra.Command, args []string) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) diff --git a/cmd/listRules.go b/cmd/listRules.go index 39c8b3a..41b872e 100644 --- a/cmd/listRules.go +++ b/cmd/listRules.go @@ -39,7 +39,10 @@ func NewListRulesCommand() *cobra.Command { Aliases: []string{"r", "rule", "rules"}, Run: func(cmd *cobra.Command, args []string) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) diff --git a/cmd/root.go b/cmd/root.go index ed6cbd2..1b90bcc 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -16,6 +16,7 @@ package cmd import ( "fmt" "os" + "strings" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -67,7 +68,10 @@ func init() { // Flag completions rootCmd.RegisterFlagCompletionFunc("components", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) @@ -85,7 +89,10 @@ func init() { }) rootCmd.RegisterFlagCompletionFunc("kinds", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) @@ -99,7 +106,10 @@ func init() { }) rootCmd.RegisterFlagCompletionFunc("rules", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) @@ -128,5 +138,6 @@ func initConfig() { viper.SetConfigName(".zim") viper.AutomaticEnv() + viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) viper.ReadInConfig() } diff --git a/cmd/run.go b/cmd/run.go index 751dc6a..aebe2fb 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + package cmd import ( @@ -25,9 +26,11 @@ import ( "github.com/fugue/zim/cache" "github.com/fugue/zim/exec" + "github.com/fugue/zim/hash" "github.com/fugue/zim/project" "github.com/fugue/zim/sched" - "github.com/fugue/zim/store" + fsStore "github.com/fugue/zim/store/filesystem" + httpStore "github.com/fugue/zim/store/http" "github.com/spf13/cobra" "github.com/spf13/viper" ) @@ -54,7 +57,10 @@ func NewRunCommand() *cobra.Command { defer cancel() closeHandler(cancel) - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } // If inside a git repo pick the root as the project directory if repo, err := getRepository(opts.Directory); err == nil { @@ -111,17 +117,33 @@ func NewRunCommand() *cobra.Command { // Add caching middleware depending on configuration if opts.CacheMode == cache.Disabled { - fmt.Fprintf(os.Stdout, project.Yellow("Caching is disabled.\n")) + fmt.Fprint(os.Stdout, project.Yellow("Caching is disabled.\n")) } else if opts.URL != "" { - objStore := store.NewHTTP(opts.URL, opts.Token) + objStore := httpStore.New(opts.URL, opts.Token) self, err := user.Current() if err != nil { fatal(err) } - builders = append(builders, - cache.NewMiddleware(objStore, self.Name, opts.CacheMode)) + cacheInterface := cache.New(cache.Opts{ + Store: objStore, + Hasher: hash.SHA1(), + User: self.Name, + }) + builders = append(builders, cache.NewMiddleware(cacheInterface)) + } else if opts.CachePath != "" { + objStore := fsStore.New(opts.CachePath) + self, err := user.Current() + if err != nil { + fatal(err) + } + cacheInterface := cache.New(cache.Opts{ + Store: objStore, + Hasher: hash.SHA1(), + User: self.Name, + }) + builders = append(builders, cache.NewMiddleware(cacheInterface)) } else { - fmt.Fprintf(os.Stderr, + fmt.Fprint(os.Stderr, project.Yellow("Cache URL is not set. See the docs!\n")) } @@ -161,7 +183,10 @@ func NewRunCommand() *cobra.Command { } }, ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } proj, err := getProject(opts.Directory) if err != nil { fatal(err) diff --git a/cmd/showKey.go b/cmd/showKey.go index d671270..f9b5ef5 100644 --- a/cmd/showKey.go +++ b/cmd/showKey.go @@ -18,10 +18,12 @@ import ( "encoding/json" "errors" "fmt" + "os/user" "strings" "github.com/fugue/zim/cache" "github.com/fugue/zim/exec" + "github.com/fugue/zim/hash" "github.com/fugue/zim/project" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -36,7 +38,10 @@ func NewShowKeyCommand() *cobra.Command { Run: func(cmd *cobra.Command, args []string) { var ruleName, componentName string - opts := getZimOptions(cmd, args) + opts, err := getZimOptions(cmd, args) + if err != nil { + fatal(err) + } if gitDir, err := gitRoot(opts.Directory); err == nil { opts.Directory = gitDir @@ -88,9 +93,17 @@ func NewShowKeyCommand() *cobra.Command { if !found { fatal(fmt.Errorf("Unknown rule: %s.%s", componentName, ruleName)) } + self, err := user.Current() + if err != nil { + fatal(err) + } ctx := context.Background() - zimCache := cache.New(nil) + zimCache := cache.New(cache.Opts{ + Store: nil, + Hasher: hash.SHA1(), + User: self.Name, + }) key, err := zimCache.Key(ctx, r) if err != nil { fatal(err) diff --git a/hash/hash.go b/hash/hash.go new file mode 100644 index 0000000..80fa5ca --- /dev/null +++ b/hash/hash.go @@ -0,0 +1,15 @@ +package hash + +// Hasher is an interface for hashing objects, files, or strings. +// Different implementations may exist for SHA1, SHA256, etc. +type Hasher interface { + + // Object returns the hash of a given object + Object(obj interface{}) (string, error) + + // File returns the hash of a given file on disk + File(path string) (string, error) + + // String returns the hash of a given string + String(s string) (string, error) +} diff --git a/hash/sha1.go b/hash/sha1.go new file mode 100644 index 0000000..841b7a5 --- /dev/null +++ b/hash/sha1.go @@ -0,0 +1,46 @@ +package hash + +import ( + "crypto/sha1" + "encoding/hex" + "encoding/json" + "io" + "os" +) + +type sha1Hasher struct{} + +func SHA1() Hasher { + return &sha1Hasher{} +} + +func (hasher *sha1Hasher) Object(obj interface{}) (string, error) { + data, err := json.Marshal(obj) + if err != nil { + return "", err + } + h := sha1.New() + h.Write(data) + return hex.EncodeToString(h.Sum(nil)), nil +} + +func (hasher *sha1Hasher) File(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + h := sha1.New() + if _, err := io.Copy(h, file); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +func (hasher *sha1Hasher) String(s string) (string, error) { + h := sha1.New() + if _, err := h.Write([]byte(s)); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/hash/sha1_test.go b/hash/sha1_test.go new file mode 100644 index 0000000..f6bacc6 --- /dev/null +++ b/hash/sha1_test.go @@ -0,0 +1,35 @@ +package hash + +import ( + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSha1(t *testing.T) { + // echo -n "1234" | shasum + // 7110eda4d09e062aa5e4a390b0a572ac0d2c0220 - + + var err error + var value string + + h := SHA1() + + value, err = h.String("1234") + require.Nil(t, err) + require.Equal(t, "7110eda4d09e062aa5e4a390b0a572ac0d2c0220", value) + + value, err = h.Object(1234) + require.Nil(t, err) + require.Equal(t, "7110eda4d09e062aa5e4a390b0a572ac0d2c0220", value) + + f, err := ioutil.TempFile("", "zim-test-") + require.Nil(t, err) + f.Write([]byte("1234")) + f.Close() + + value, err = h.File(f.Name()) + require.Nil(t, err) + require.Equal(t, "7110eda4d09e062aa5e4a390b0a572ac0d2c0220", value) +} diff --git a/hash/sha256.go b/hash/sha256.go new file mode 100644 index 0000000..8dd583b --- /dev/null +++ b/hash/sha256.go @@ -0,0 +1,46 @@ +package hash + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "io" + "os" +) + +type sha256Hasher struct{} + +func SHA256() Hasher { + return &sha256Hasher{} +} + +func (hasher *sha256Hasher) Object(obj interface{}) (string, error) { + data, err := json.Marshal(obj) + if err != nil { + return "", err + } + h := sha256.New() + h.Write(data) + return hex.EncodeToString(h.Sum(nil)), nil +} + +func (hasher *sha256Hasher) File(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + h := sha256.New() + if _, err := io.Copy(h, file); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +func (hasher *sha256Hasher) String(s string) (string, error) { + h := sha256.New() + if _, err := h.Write([]byte(s)); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/hash/sha256_test.go b/hash/sha256_test.go new file mode 100644 index 0000000..903aae9 --- /dev/null +++ b/hash/sha256_test.go @@ -0,0 +1,37 @@ +package hash + +import ( + "io/ioutil" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSha256(t *testing.T) { + // echo -n "1234" | shasum -a 256 (docker-desktop/default) + // 03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4 - + + var err error + var value string + + expected := "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" + + h := SHA256() + + value, err = h.String("1234") + require.Nil(t, err) + require.Equal(t, expected, value) + + value, err = h.Object(1234) + require.Nil(t, err) + require.Equal(t, expected, value) + + f, err := ioutil.TempFile("", "zim-test-") + require.Nil(t, err) + f.Write([]byte("1234")) + f.Close() + + value, err = h.File(f.Name()) + require.Nil(t, err) + require.Equal(t, expected, value) +} diff --git a/store/filesystem/filesystem.go b/store/filesystem/filesystem.go new file mode 100644 index 0000000..e697a86 --- /dev/null +++ b/store/filesystem/filesystem.go @@ -0,0 +1,145 @@ +// Copyright 2020 Fugue, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filesystem + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + + "github.com/fugue/zim/store" +) + +type fileStore struct { + rootDirectory string +} + +// New returns a store.Store interface backed by the local filesystem +func New(rootDirectory string) store.Store { + return &fileStore{rootDirectory: rootDirectory} +} + +func (s *fileStore) path(key string) string { + + // Use a nested subdirectory tree for the cache to help avoid + // having one folder with many 10s of thousands of files in it, since + // that can be difficult to navigate in file explorers and similar. + + keyLen := len(key) + + // Two levels of nesting, e.g. "abcd" is stored at "/ab/abcd" + if keyLen >= 4 { + prefix1 := key[:2] + prefix2 := key[2:4] + return filepath.Join(s.rootDirectory, prefix1, prefix2, key) + } + + // One level of nesting, e.g. "abc" is stored at "/ab/abc" + if keyLen >= 2 { + prefix := key[:2] + return filepath.Join(s.rootDirectory, prefix, key) + } + + // No nesting for short keys (uncommon) + return filepath.Join(s.rootDirectory, key) +} + +func (s *fileStore) Get(ctx context.Context, key, dst string) error { + + path := s.path(key) + srcFile, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("not found: %s", key) + } + return fmt.Errorf("failed to open file %s: %w", path, err) + } + defer srcFile.Close() + + return copyFile(srcFile, dst) +} + +func (s *fileStore) Put(ctx context.Context, key, src string, meta map[string]string) error { + + path := s.path(key) + pathDir := filepath.Dir(path) + + if err := os.MkdirAll(pathDir, 0755); err != nil { + return err + } + + f, err := os.Open(src) + if err != nil { + return fmt.Errorf("failed to open file %s: %w", src, err) + } + defer f.Close() + + if err := copyFile(f, path); err != nil { + return err + } + + metaPath := fmt.Sprintf("%s.meta", path) + metaBytes, err := json.Marshal(store.ItemMeta{Meta: meta}) + if err != nil { + return fmt.Errorf("failed to marshal metadata; key: %s err: %w", key, err) + } + metaFile, err := os.Create(metaPath) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", metaPath, err) + } + defer metaFile.Close() + + if _, err := metaFile.Write(metaBytes); err != nil { + return fmt.Errorf("failed to write file %s: %w", metaPath, err) + } + return nil +} + +func copyFile(f *os.File, dstPath string) error { + + dstFile, err := os.Create(dstPath) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", dstPath, err) + } + defer dstFile.Close() + + if _, err := io.Copy(dstFile, f); err != nil { + return fmt.Errorf("failed to write file %s: %w", dstPath, err) + } + return nil +} + +// Head checks if the item exists in the store +func (s *fileStore) Head(ctx context.Context, key string) (store.ItemMeta, error) { + + metaPath := fmt.Sprintf("%s.meta", s.path(key)) + metaBytes, err := ioutil.ReadFile(metaPath) + if err != nil { + if os.IsNotExist(err) { + return store.ItemMeta{}, store.NotFound(fmt.Sprintf("not found: %s", key)) + } + return store.ItemMeta{}, err + } + + var itemMeta store.ItemMeta + if err := json.Unmarshal(metaBytes, &itemMeta); err != nil { + return store.ItemMeta{}, fmt.Errorf("failed to parse metadata: %w", err) + } + return itemMeta, nil +} diff --git a/store/filesystem/filesystem_test.go b/store/filesystem/filesystem_test.go new file mode 100644 index 0000000..f5aeafc --- /dev/null +++ b/store/filesystem/filesystem_test.go @@ -0,0 +1,80 @@ +package filesystem + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/fugue/zim/store" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Confirm Head and Get requests on a unknown key result in a +// "not found" error +func TestMissingKey(t *testing.T) { + + var err error + var meta store.ItemMeta + ctx := context.Background() + + cacheDir, err := ioutil.TempDir("", "zim-test-") + require.Nil(t, err) + defer os.RemoveAll(cacheDir) + + f, err := ioutil.TempFile("", "zim-tmp-file-") + require.Nil(t, err) + f.Close() + defer os.RemoveAll(f.Name()) + + fs := New(cacheDir) + + meta, err = fs.Head(ctx, "/missing/key") + assert.Equal(t, "not found: /missing/key", err.Error()) + assert.Equal(t, 0, len(meta.Meta)) + + err = fs.Get(ctx, "/missing/key", f.Name()) + assert.Equal(t, "not found: /missing/key", err.Error()) + assert.Equal(t, 0, len(meta.Meta)) +} + +// Confirm that a Get works on a file we put in the store manually +func TestPresentKey(t *testing.T) { + + inputFile := "test_fixture.txt" + outputFile := "tmp_output.txt" + key := "abcdef" + + ctx := context.Background() + + cacheDir, err := ioutil.TempDir("", "zim-test-") + require.Nil(t, err) + defer os.RemoveAll(cacheDir) + + // Remove output file in case a previous test run created it + os.RemoveAll(outputFile) + + fs := New(cacheDir) + + // Add a test fixture file to the store + require.Nil(t, fs.Put(ctx, key, inputFile, map[string]string{"foo": "bar"})) + + // Key abcdef should be nested at /ab/cd/abcdef + _, err = os.Stat(filepath.Join(cacheDir, "ab", "cd", key)) + require.Nil(t, err) + + // Confirm Head succeeds and returns the item metadata + item, err := fs.Head(ctx, key) + require.Nil(t, err) + require.Equal(t, map[string]string{"foo": "bar"}, item.Meta) + + // Retrieve the item and store it in the local directory as test_get.txt + require.Nil(t, fs.Get(ctx, key, outputFile)) + + // Confirm the resulting file exists and has the expected contents + bytes, err := ioutil.ReadFile(outputFile) + require.Nil(t, err) + require.Equal(t, "The quick brown fox\njumps over the lazy dog", string(bytes)) +} diff --git a/store/filesystem/test_fixture.txt b/store/filesystem/test_fixture.txt new file mode 100644 index 0000000..50ea977 --- /dev/null +++ b/store/filesystem/test_fixture.txt @@ -0,0 +1,2 @@ +The quick brown fox +jumps over the lazy dog \ No newline at end of file diff --git a/store/http.go b/store/http/http.go similarity index 87% rename from store/http.go rename to store/http/http.go index 123b212..57e9a7d 100644 --- a/store/http.go +++ b/store/http/http.go @@ -11,13 +11,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package store + +package http import ( "bytes" "context" "encoding/json" - "errors" "fmt" "io" "io/ioutil" @@ -28,6 +28,7 @@ import ( "strings" "github.com/fugue/zim/sign" + "github.com/fugue/zim/store" "github.com/hashicorp/go-retryablehttp" ) @@ -37,8 +38,8 @@ type httpStore struct { client *retryablehttp.Client } -// NewHTTP returns an HTTP storage interface -func NewHTTP(signingURL, authToken string) Store { +// New returns an HTTP storage interface +func New(signingURL, authToken string) store.Store { client := retryablehttp.NewClient() client.RetryMax = 4 client.Logger = nil @@ -175,27 +176,15 @@ func (s *httpStore) Put(ctx context.Context, key, src string, meta map[string]st return nil } -// List contents of the Store -func (s *httpStore) List(ctx context.Context, prefix string) ([]string, error) { - return []string{}, errors.New("Unimplemented") -} - // Head checks if the item exists in the store -func (s *httpStore) Head(ctx context.Context, key string) (Item, error) { +func (s *httpStore) Head(ctx context.Context, key string) (store.ItemMeta, error) { input := sign.Input{Name: key} output, err := s.requestHead(ctx, &input) if err != nil { - return Item{}, err + return store.ItemMeta{}, err } if output.ETag == "" { - return Item{}, NotFound(fmt.Sprintf("Not found: %s", key)) - } - return Item{ - Key: output.Key, - Version: output.Version, - ETag: output.ETag, - Size: output.Size, - LastModified: output.LastModified, - Meta: output.Metadata, - }, nil + return store.ItemMeta{}, store.NotFound(fmt.Sprintf("Not found: %s", key)) + } + return store.ItemMeta{Meta: output.Metadata}, nil } diff --git a/store/store.go b/store/store.go index 49945be..43c854c 100644 --- a/store/store.go +++ b/store/store.go @@ -11,14 +11,11 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + package store import ( "context" - "os" - "time" - - "github.com/aws/aws-sdk-go/aws/awserr" ) // NotFound indicates an object does not exist @@ -26,19 +23,9 @@ type NotFound string func (e NotFound) Error() string { return string(e) } -// Item in storage -type Item struct { - Key string - ETag string - Version string - Size int64 - LastModified time.Time - Meta map[string]string -} - -// Exists returns true if it is a valid Item -func (item Item) Exists() bool { - return item.Key != "" && item.ETag != "" && item.Size > 0 +// ItemMeta contains metadata for an item in storage +type ItemMeta struct { + Meta map[string]string `json:"meta"` } // Store is an interface to Get and Put items into storage @@ -50,28 +37,6 @@ type Store interface { // Put an item in the Store Put(ctx context.Context, key, src string, meta map[string]string) error - // List contents of the Store - List(ctx context.Context, prefix string) ([]string, error) - // Head checks if the item exists in the store - Head(ctx context.Context, key string) (Item, error) -} - -// Returns the size and last modified timestamp for the file at the given path -func fileStat(name string) (int64, time.Time) { - info, err := os.Stat(name) - if err != nil { - return 0, time.Time{} - } - return info.Size(), info.ModTime() -} - -func isNotFound(err error) bool { - if aerr, ok := err.(awserr.Error); ok { - switch aerr.Code() { - case "NotFound": - return true - } - } - return false + Head(ctx context.Context, key string) (ItemMeta, error) }