From d86099f9cf5287194eaa8288596411d58253451e Mon Sep 17 00:00:00 2001 From: John McBride Date: Tue, 29 Aug 2023 15:10:09 -0600 Subject: [PATCH] feat: Normalizes and validates git URLs before accepting for processing (#38) Signed-off-by: John McBride --- pkg/common/validation.go | 60 ++++++++++++++++++++++++++++ pkg/common/validation_test.go | 73 +++++++++++++++++++++++++++++++++++ pkg/server/server.go | 40 ++++++++++++++++--- 3 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 pkg/common/validation.go create mode 100644 pkg/common/validation_test.go diff --git a/pkg/common/validation.go b/pkg/common/validation.go new file mode 100644 index 0000000..fe7fbc2 --- /dev/null +++ b/pkg/common/validation.go @@ -0,0 +1,60 @@ +package common + +import ( + "fmt" + "net/url" + "strings" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" + "github.com/go-git/go-git/v5/storage/memory" +) + +// IsValidGitRepo returns true if the provided git repo URL is a valid and reachable +// git repository. This is equivalent to running "git ls-remote" on the provided +// URL string. This may result in some unexpected "authentication required" or +// "repository not found" errors which is standard for git to return in these +// situations. +func IsValidGitRepo(repoURL string) (bool, error) { + remoteConfig := &config.RemoteConfig{ + Name: "source", + URLs: []string{ + repoURL, + }, + } + + remote := git.NewRemote(memory.NewStorage(), remoteConfig) + + _, err := remote.List(&git.ListOptions{}) + if err != nil { + return false, fmt.Errorf("could not list remote repository: %s", err.Error()) + } + + return true, nil +} + +// NormalizeGitURL attempts to take a raw git repo URL and ensure it is normalized +// before being validated or entered into the database +func NormalizeGitURL(repoURL string) (string, error) { + parsedURL, err := url.Parse(repoURL) + if err != nil { + return "", err + } + + // Check if it has a valid protocol specified (e.g., https, ssh, git) + if parsedURL.Scheme != "git" && parsedURL.Scheme != "https" && parsedURL.Scheme != "file" { + return "", fmt.Errorf("repo URL missing valid protocol scheme (https, git, file): %s", repoURL) + } + + // Trim trailing slashes + // Example: https://github.com/open-sauced/pizza/ to https://github.com/open-sauced/pizza + trimmedPath := strings.TrimSuffix(parsedURL.Path, "/") + + // Remove .git suffix if present + // Example: https://github.com/open-sauced/pizza.git to https://github.com/open-sauced/pizza + trimmedPath = strings.TrimSuffix(trimmedPath, ".git") + + parsedURL.Path = trimmedPath + + return parsedURL.String(), nil +} diff --git a/pkg/common/validation_test.go b/pkg/common/validation_test.go new file mode 100644 index 0000000..2e75deb --- /dev/null +++ b/pkg/common/validation_test.go @@ -0,0 +1,73 @@ +package common + +import "testing" + +func TestNormalizeGitURL(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + url string + expected string + }{ + { + name: "Fully normalizes", + url: "https://github.com/user/repo.git/", + expected: "https://github.com/user/repo", + }, + { + name: "Removes trailing .git", + url: "https://github.com/user/repo.git", + expected: "https://github.com/user/repo", + }, + { + name: "Removes trailing slash", + url: "https://github.com/user/repo/", + expected: "https://github.com/user/repo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + normalizedURL, err := NormalizeGitURL(tt.url) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if normalizedURL != tt.expected { + t.Fatalf("normalized URL: %s is not expected: %s", normalizedURL, tt.expected) + } + }) + } +} + +func TestNormalizeGitURLError(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + url string + }{ + { + name: "Missing protocol fails", + url: "github.com/user/repo", + }, + { + name: "Malformed protocol fails", + url: "ht:/github.com/user/repo", + }, + { + name: "Unusable protocol fails", + url: "ssh://github.com/user/repo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + normalizedURL, err := NormalizeGitURL(tt.url) + if err == nil { + t.Fatalf("expected error, got none: %s", normalizedURL) + } + }) + } +} diff --git a/pkg/server/server.go b/pkg/server/server.go index 9021314..d5ff703 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -12,8 +12,10 @@ import ( "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/transport" "go.uber.org/zap" + "github.com/open-sauced/pizza/oven/pkg/common" "github.com/open-sauced/pizza/oven/pkg/database" "github.com/open-sauced/pizza/oven/pkg/insights" "github.com/open-sauced/pizza/oven/pkg/providers" @@ -73,9 +75,37 @@ func (p PizzaOvenServer) handleRequest(w http.ResponseWriter, r *http.Request) { return } + p.Logger.Debugf("Validating and normalizing repository URL: %s", data.URL) + normalizedRepoURL, err := common.NormalizeGitURL(data.URL) + if err != nil { + p.Logger.Debugf("Could not normalize repo URL %s: %s", data.URL, err.Error()) + http.Error(w, fmt.Sprintf("Could not normalize provided repo URL: %s", err.Error()), http.StatusBadRequest) + return + } + + repoURLendpoint, err := transport.NewEndpoint(normalizedRepoURL) + if err != nil { + p.Logger.Errorf("Could not create git transport endpoint with repo URL %s: %s", data.URL, err.Error()) + http.Error(w, fmt.Sprintf("Could not create git transport endpoint from provided repo URL: %s", err.Error()), http.StatusBadRequest) + return + } + + ok, err := common.IsValidGitRepo(repoURLendpoint.String()) + if !ok { + if err != nil { + p.Logger.Errorf("Error validating repo URL %s: %s", data.URL, err.Error()) + http.Error(w, fmt.Sprintf("Error validating remote git repo URL: %s", err.Error()), http.StatusBadRequest) + return + } + + p.Logger.Debug("Could not validate repo URL %s: %s", data.URL, err.Error()) + http.Error(w, fmt.Sprintf("not valid git repo URL. Expected format protocol://address but got: %s", err.Error()), http.StatusBadRequest) + return + } + w.WriteHeader(http.StatusAccepted) if data.Wait { - err = p.processRepository(data.URL) + err = p.processRepository(repoURLendpoint.String()) if err != nil { p.Logger.Errorf("Could not process repository input: %v with error: %v", r.Body, err) http.Error(w, "Could not process input", http.StatusInternalServerError) @@ -83,7 +113,7 @@ func (p PizzaOvenServer) handleRequest(w http.ResponseWriter, r *http.Request) { } } else { go func() { - err = p.processRepository(data.URL) + err = p.processRepository(repoURLendpoint.String()) if err != nil { p.Logger.Errorf("Could not process repository input: %v with error: %v", r.Body, err) http.Error(w, "Could not process input", http.StatusInternalServerError) @@ -128,7 +158,7 @@ func (p PizzaOvenServer) processRepository(repoURL string) error { p.Logger.Debugf("Getting repo via configured git provider: %s", insight.RepoURLSource) // Use the configured git provider to get the repo - providedRepo, err := p.PizzaGitProvider.FetchRepo(repoURL) + providedRepo, err := p.PizzaGitProvider.FetchRepo(insight.RepoURLSource) if err != nil { return err } @@ -162,7 +192,7 @@ func (p PizzaOvenServer) processRepository(repoURL string) error { return err } - p.Logger.Debugf("Iterating commits in repository: %s", repoURL) + p.Logger.Debugf("Iterating commits in repository: %s", insight.RepoURLSource) err = commitIter.ForEach(func(c *object.Commit) error { // TODO - if the committer and author are not the same, handle both // those users. This is the case where there is a separate committer for @@ -205,6 +235,6 @@ func (p PizzaOvenServer) processRepository(repoURL string) error { return nil }) - p.Logger.Debugf("Finished processing: %s", repoURL) + p.Logger.Debugf("Finished processing: %s", insight.RepoURLSource) return err }