From 78060556a2612542737a0587fb3691033db00b4f Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 9 Feb 2022 16:06:44 +0000 Subject: [PATCH 1/9] Serve a tarball containing the contents of a given directory. This will make it easier to get all logs for a given bug; preventing users needing to run scripts to download all files. - we cannot make the link exist in the directory listing as there are scripts that automate downloads which would pick this up. - Unsure if "?format=tar.gz" is the right option to enable this; I couldn't think of something easy to do but hard to not get picked up by existing automation, and wouldn't conflict with existing filenames. --- logserver.go | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/logserver.go b/logserver.go index 5e18803..7c81ca1 100644 --- a/logserver.go +++ b/logserver.go @@ -17,6 +17,7 @@ limitations under the License. package main import ( + "archive/tar" "compress/gzip" "io" "log" @@ -69,6 +70,7 @@ func (f *logServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { } func serveFile(w http.ResponseWriter, r *http.Request, path string) { + d, err := os.Stat(path) if err != nil { msg, code := toHTTPError(err) @@ -79,9 +81,18 @@ func serveFile(w http.ResponseWriter, r *http.Request, path string) { // for anti-XSS belt-and-braces, set a very restrictive CSP w.Header().Set("Content-Security-Policy", "default-src: none") - // if it's a directory, serve a listing + // if it's a directory, serve a listing or a tarball if d.IsDir() { - log.Println("Serving", path) + format, _ := r.URL.Query()["format"] + if len(format) == 1 && format[0] == "tar.gz" { + log.Println("Serving tarball of", path) + err := serveTarball(w, r, path) + if err != nil { + log.Println("Error", err) + } + return + } + log.Println("Serving directory listing of", path) http.ServeFile(w, r, path) return } @@ -125,6 +136,78 @@ func extensionToMimeType(path string) string { return "application/octet-stream" } +// Streams a dynamically created tar.gz file with the contents of the given directory +// Will serve a partial, corrupted response if there is a error partway through the +// operation as we stream the response. +// +// The resultant tarball will contain a single directory containing all the files +// so it can unpack cleanly without overwriting other files. +func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { + directory, err := os.Open(dir) + if err != nil { + return err + } + // "disposition filename" + dfilename := strings.Trim(r.URL.Path,"/") + dfilename = strings.Replace(dfile, "/","_",-1) + + // There is no application/tgz or similar; return a gzip file as best option. + // This tends to trigger archive type tools, which will then use the filename to + // identify the contents correctly. + w.Header().Set("Content-Type", "application/gzip") + w.Header().Set("Content-Disposition", "attachment; filename=" + dfilename+".tar.gz") + + filenames, err := directory.Readdirnames(-1) + if err != nil { + return err + } + + gzip := gzip.NewWriter(w) + defer gzip.Close() + targz := tar.NewWriter(gzip) + defer targz.Close() + + for _, filename := range filenames { + path := dir + "/" + filename + err := addToArchive(targz, dfilename, path) + if err != nil { + return err + } + } + return nil +} + +// Add a single file into the archive. +func addToArchive(targz *tar.Writer, dfilename string, filename string) error { + file, err := os.Open(filename) + if err != nil { + return err + } + defer file.Close() + + info, err := file.Stat() + if err != nil { + return err + } + + header, err := tar.FileInfoHeader(info, info.Name()) + if err != nil { + return err + } + header.Name = dfilename + "/" + info.Name() + + err = targz.WriteHeader(header) + if err != nil { + return err + } + + _, err = io.Copy(targz, file) + if err != nil { + return err + } + return nil +} + func serveGzippedFile(w http.ResponseWriter, r *http.Request, path string, size int64) { w.Header().Set("Content-Type", "text/plain; charset=utf-8") @@ -208,3 +291,5 @@ func containsDotDot(v string) bool { return false } func isSlashRune(r rune) bool { return r == '/' || r == '\\' } + + From 2a4434281c2a2e3a5e294aa004ba4ab6f491e79f Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 9 Feb 2022 16:10:55 +0000 Subject: [PATCH 2/9] Fix whitespace --- logserver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logserver.go b/logserver.go index 7c81ca1..4b3cacd 100644 --- a/logserver.go +++ b/logserver.go @@ -155,7 +155,7 @@ func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { // This tends to trigger archive type tools, which will then use the filename to // identify the contents correctly. w.Header().Set("Content-Type", "application/gzip") - w.Header().Set("Content-Disposition", "attachment; filename=" + dfilename+".tar.gz") + w.Header().Set("Content-Disposition", "attachment; filename=" + dfilename + ".tar.gz") filenames, err := directory.Readdirnames(-1) if err != nil { From f318399536ea7c4ce0db41376b42c61b1da21173 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 9 Feb 2022 16:11:37 +0000 Subject: [PATCH 3/9] changelog.d --- changelog.d/53.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/53.feature diff --git a/changelog.d/53.feature b/changelog.d/53.feature new file mode 100644 index 0000000..83f1025 --- /dev/null +++ b/changelog.d/53.feature @@ -0,0 +1 @@ +Provide ?format=tar.gz option on directory listings to download tarball. From a2caf1c5469fcd3eed0a4c3406bb6d1c3c063c3d Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 9 Feb 2022 16:12:05 +0000 Subject: [PATCH 4/9] More whitespace --- logserver.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/logserver.go b/logserver.go index 4b3cacd..cd645f3 100644 --- a/logserver.go +++ b/logserver.go @@ -291,5 +291,3 @@ func containsDotDot(v string) bool { return false } func isSlashRune(r rune) bool { return r == '/' || r == '\\' } - - From 7b2d70a3c93a4b3f174d60eddf1033619017aed4 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Wed, 9 Feb 2022 16:31:56 +0000 Subject: [PATCH 5/9] Fix last minute refactor fial. --- logserver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logserver.go b/logserver.go index cd645f3..b35de40 100644 --- a/logserver.go +++ b/logserver.go @@ -149,7 +149,7 @@ func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { } // "disposition filename" dfilename := strings.Trim(r.URL.Path,"/") - dfilename = strings.Replace(dfile, "/","_",-1) + dfilename = strings.Replace(dfilename, "/","_",-1) // There is no application/tgz or similar; return a gzip file as best option. // This tends to trigger archive type tools, which will then use the filename to From 714cc448077ea7035a1e0dec9e2b6c4e708dcf2a Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 17 Feb 2022 11:37:05 +0000 Subject: [PATCH 6/9] Update documentation --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a4112bc..4c63c41 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ Serves submitted bug reports. Protected by basic HTTP auth using the username/password provided in the environment. A browsable list, collated by report submission date and time. +A whole directory can be downloaded as a tarball by appending the parameter `?format=tar.gz` to the end of the URL path + ### POST `/api/submit` Submission endpoint: this is where applications should send their reports. From 77e66be90f5f052ee30386bcb52b41d5eb63326a Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 17 Feb 2022 11:38:36 +0000 Subject: [PATCH 7/9] Guard against including directories in tarball, handle http errors better, add documentation. --- logserver.go | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/logserver.go b/logserver.go index b35de40..e92aac8 100644 --- a/logserver.go +++ b/logserver.go @@ -70,7 +70,6 @@ func (f *logServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { } func serveFile(w http.ResponseWriter, r *http.Request, path string) { - d, err := os.Stat(path) if err != nil { msg, code := toHTTPError(err) @@ -88,6 +87,8 @@ func serveFile(w http.ResponseWriter, r *http.Request, path string) { log.Println("Serving tarball of", path) err := serveTarball(w, r, path) if err != nil { + msg, code := toHTTPError(err) + http.Error(w, msg, code) log.Println("Error", err) } return @@ -142,12 +143,20 @@ func extensionToMimeType(path string) string { // // The resultant tarball will contain a single directory containing all the files // so it can unpack cleanly without overwriting other files. +// +// Errors are only returned if generated before the tarball has started being +// written to the ResponseWriter func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { directory, err := os.Open(dir) if err != nil { return err } - // "disposition filename" + + // Creates a "disposition filename" + // Take a URL.path like `/2022-01-10/184843-BZZXEGYH/` + // and removes leading and trailing `/` and replaces internal `/` with `_` + // to form a suitable filename for use in the content-disposition header + // dfilename would turn into `2022-01-10_184843-BZZXEGYH` dfilename := strings.Trim(r.URL.Path,"/") dfilename = strings.Replace(dfilename, "/","_",-1) @@ -157,7 +166,7 @@ func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { w.Header().Set("Content-Type", "application/gzip") w.Header().Set("Content-Disposition", "attachment; filename=" + dfilename + ".tar.gz") - filenames, err := directory.Readdirnames(-1) + files, err := directory.Readdir(-1) if err != nil { return err } @@ -167,11 +176,25 @@ func serveTarball(w http.ResponseWriter, r *http.Request, dir string) error { targz := tar.NewWriter(gzip) defer targz.Close() - for _, filename := range filenames { - path := dir + "/" + filename + + for _, file := range files { + if file.IsDir() { + // We avoid including nested directories + // This will result in requests for directories with only directories in + // to return an empty tarball instead of recursively including directories. + // This helps the server remain performant as a download of 'everything' would be slow + continue + } + path := dir + "/" + file.Name() + // We use the existing disposition filename to create a base directory structure for the files + // so when they are unpacked, they are grouped in a unique folder on disk err := addToArchive(targz, dfilename, path) if err != nil { - return err + // From this point we assume that data may have been sent to the client already. + // We therefore do not http.Error() after this point, instead closing the stream and + // allowing the client to deal with a partial file as if there was a network issue. + log.Println("Error streaming tarball", err) + return nil } } return nil From ba8725a3aa5f3d53f36e2222bc99ab15fedca4f6 Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Thu, 17 Feb 2022 11:47:21 +0000 Subject: [PATCH 8/9] Factor out serveDirectory into it's own method --- logserver.go | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/logserver.go b/logserver.go index e92aac8..4261151 100644 --- a/logserver.go +++ b/logserver.go @@ -69,6 +69,7 @@ func (f *logServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { serveFile(w, r, upath) } + func serveFile(w http.ResponseWriter, r *http.Request, path string) { d, err := os.Stat(path) if err != nil { @@ -82,20 +83,7 @@ func serveFile(w http.ResponseWriter, r *http.Request, path string) { // if it's a directory, serve a listing or a tarball if d.IsDir() { - format, _ := r.URL.Query()["format"] - if len(format) == 1 && format[0] == "tar.gz" { - log.Println("Serving tarball of", path) - err := serveTarball(w, r, path) - if err != nil { - msg, code := toHTTPError(err) - http.Error(w, msg, code) - log.Println("Error", err) - } - return - } - log.Println("Serving directory listing of", path) - http.ServeFile(w, r, path) - return + serveDirectory(w, r, path) } // if it's a gzipped log file, serve it as text @@ -137,6 +125,24 @@ func extensionToMimeType(path string) string { return "application/octet-stream" } +// Chooses to serve either a directory listing or tarball based on the 'format' parameter. +func serveDirectory(w http.ResponseWriter, r *http.Request, path string) { + format, _ := r.URL.Query()["format"] + if len(format) == 1 && format[0] == "tar.gz" { + log.Println("Serving tarball of", path) + err := serveTarball(w, r, path) + if err != nil { + msg, code := toHTTPError(err) + http.Error(w, msg, code) + log.Println("Error", err) + } + return + } + log.Println("Serving directory listing of", path) + http.ServeFile(w, r, path) + return +} + // Streams a dynamically created tar.gz file with the contents of the given directory // Will serve a partial, corrupted response if there is a error partway through the // operation as we stream the response. From 1137cb2c04c5bfbcb0e1848c84586ec2ed33197a Mon Sep 17 00:00:00 2001 From: Michael Kaye <1917473+michaelkaye@users.noreply.github.com> Date: Tue, 22 Feb 2022 15:40:46 +0000 Subject: [PATCH 9/9] Correct the use of return --- logserver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logserver.go b/logserver.go index 4261151..5f10d32 100644 --- a/logserver.go +++ b/logserver.go @@ -84,6 +84,7 @@ func serveFile(w http.ResponseWriter, r *http.Request, path string) { // if it's a directory, serve a listing or a tarball if d.IsDir() { serveDirectory(w, r, path) + return } // if it's a gzipped log file, serve it as text @@ -140,7 +141,6 @@ func serveDirectory(w http.ResponseWriter, r *http.Request, path string) { } log.Println("Serving directory listing of", path) http.ServeFile(w, r, path) - return } // Streams a dynamically created tar.gz file with the contents of the given directory