From ac8735907af63882480bc72225716fcd670aca26 Mon Sep 17 00:00:00 2001 From: Brandon Duffany Date: Mon, 25 Nov 2024 22:00:25 -0500 Subject: [PATCH] Extract only output paths from the action workspace --- .../containers/firecracker/firecracker.go | 29 +++- .../firecracker/firecracker_test.go | 24 ++- .../overlayfs/overlayfs_test.go | 3 +- .../remote_execution/platform/platform.go | 4 +- enterprise/server/util/ext4/ext4.go | 24 ++- enterprise/server/util/ext4/ext4_test.go | 159 +++++++++++++++++- server/testutil/testfs/BUILD | 1 + server/testutil/testfs/testfs.go | 52 +++++- 8 files changed, 268 insertions(+), 28 deletions(-) diff --git a/enterprise/server/remote_execution/containers/firecracker/firecracker.go b/enterprise/server/remote_execution/containers/firecracker/firecracker.go index f9cd4d310350..63d9d9dfc842 100644 --- a/enterprise/server/remote_execution/containers/firecracker/firecracker.go +++ b/enterprise/server/remote_execution/containers/firecracker/firecracker.go @@ -1690,7 +1690,8 @@ func (c *FirecrackerContainer) copyOutputsToWorkspace(ctx context.Context) error } defer m.Unmount() } else { - if err := ext4.ImageToDirectory(ctx, workspaceExt4Path, wsDir); err != nil { + outputPaths := workspacePathsToExtract(c.task) + if err := ext4.ImageToDirectory(ctx, workspaceExt4Path, wsDir, outputPaths); err != nil { return err } } @@ -1699,10 +1700,6 @@ func (c *FirecrackerContainer) copyOutputsToWorkspace(ctx context.Context) error if err != nil { return err } - // Skip filesystem layerfs write-layer files. - if strings.HasPrefix(path, "bbvmroot/") || strings.HasPrefix(path, "bbvmwork/") { - return nil - } targetLocation := filepath.Join(c.actionWorkingDir, path) alreadyExists, err := disk.FileExists(ctx, targetLocation) if err != nil { @@ -2870,3 +2867,25 @@ func getRandomNUMANode() (int, error) { return nodes[rand.IntN(len(nodes))], nil } + +// Returns the paths relative to the workspace root that should be copied back +// to the action workspace directory after execution has completed. +// +// For performance reasons, we only extract the action's declared outputs, +// unless the action is running with preserve-workspace=true. +func workspacePathsToExtract(task *repb.ExecutionTask) []string { + if platform.IsTrue(platform.FindEffectiveValue(task, platform.PreserveWorkspacePropertyName)) { + return []string{"/"} + } + + // Special files + // TODO: declare this list as a constant somewhere? + paths := []string{".BUILDBUDDY_DO_NOT_RECYCLE"} + + // Declared paths + paths = append(paths, task.GetCommand().GetOutputDirectories()...) + paths = append(paths, task.GetCommand().GetOutputFiles()...) + paths = append(paths, task.GetCommand().GetOutputPaths()...) + + return paths +} diff --git a/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go b/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go index 52affbfeb2c0..66c008161677 100644 --- a/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go +++ b/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go @@ -1404,7 +1404,11 @@ func TestFirecrackerRun_ReapOrphanedZombieProcess(t *testing.T) { }, ExecutorConfig: getExecutorConfig(t), } - c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{}, opts) + c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{ + Command: &repb.Command{ + OutputPaths: []string{"sh.pid", "sleep.pid"}, + }, + }, opts) if err != nil { t.Fatal(err) } @@ -1815,7 +1819,11 @@ func TestFirecrackerExecWithRecycledWorkspaceWithDocker(t *testing.T) { }, ExecutorConfig: getExecutorConfig(t), } - c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{}, opts) + c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{ + Command: &repb.Command{ + OutputPaths: []string{"preserves.txt"}, + }, + }, opts) require.NoError(t, err) err = container.PullImageIfNecessary(ctx, env, c, oci.Credentials{}, opts.ContainerImage) require.NoError(t, err) @@ -1988,7 +1996,11 @@ func TestFirecrackerRun_Timeout_DebugOutputIsAvailable(t *testing.T) { }, ExecutorConfig: getExecutorConfig(t), } - c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{}, opts) + c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{ + Command: &repb.Command{ + OutputPaths: []string{"output.txt"}, + }, + }, opts) require.NoError(t, err) cmd := &repb.Command{Arguments: []string{"sh", "-c", ` @@ -2032,7 +2044,11 @@ func TestFirecrackerExec_Timeout_DebugOutputIsAvailable(t *testing.T) { }, ExecutorConfig: getExecutorConfig(t), } - c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{}, opts) + c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{ + Command: &repb.Command{ + OutputPaths: []string{"output.txt"}, + }, + }, opts) require.NoError(t, err) err = container.PullImageIfNecessary(ctx, env, c, oci.Credentials{}, opts.ContainerImage) require.NoError(t, err) diff --git a/enterprise/server/remote_execution/overlayfs/overlayfs_test.go b/enterprise/server/remote_execution/overlayfs/overlayfs_test.go index c527fe86574c..cd96f8345f3b 100644 --- a/enterprise/server/remote_execution/overlayfs/overlayfs_test.go +++ b/enterprise/server/remote_execution/overlayfs/overlayfs_test.go @@ -62,7 +62,7 @@ func TestOverlayWorkspace(t *testing.T) { // removed in t.Cleanup() above, otherwise we get 'device or resource busy' // when attempting to clean up the workspace. t.Cleanup(func() { fileAOverlay.Close() }) - // Remove dir1/b.txt and dir1/child1/. + // Remove dir1's children (b.txt and child1/), but keep dir1 itself. err = os.RemoveAll(filepath.Join(ws, "dir1/b.txt")) require.NoError(t, err) err = os.RemoveAll(filepath.Join(ws, "dir1/child1/")) @@ -101,6 +101,7 @@ func TestOverlayWorkspace(t *testing.T) { lowerdir := ws + ".lower" testfs.AssertExactFileContents(t, lowerdir, map[string]string{ "a.txt": "A-MODIFIED", + "dir1": testfs.EmptyDir, "dir2/d.txt": "D", "dir3/e.txt/child.txt": "child-contents", }) diff --git a/enterprise/server/remote_execution/platform/platform.go b/enterprise/server/remote_execution/platform/platform.go index 4fc19ba49aa4..8eb84f37b820 100644 --- a/enterprise/server/remote_execution/platform/platform.go +++ b/enterprise/server/remote_execution/platform/platform.go @@ -74,7 +74,7 @@ const ( RecycleRunnerPropertyName = "recycle-runner" AffinityRoutingPropertyName = "affinity-routing" RunnerRecyclingMaxWaitPropertyName = "runner-recycling-max-wait" - preserveWorkspacePropertyName = "preserve-workspace" + PreserveWorkspacePropertyName = "preserve-workspace" nonrootWorkspacePropertyName = "nonroot-workspace" overlayfsWorkspacePropertyName = "overlayfs-workspace" cleanWorkspaceInputsPropertyName = "clean-workspace-inputs" @@ -351,7 +351,7 @@ func ParseProperties(task *repb.ExecutionTask) (*Properties, error) { RunnerRecyclingMaxWait: runnerRecyclingMaxWait, EnableVFS: vfsEnabled, IncludeSecrets: boolProp(m, IncludeSecretsPropertyName, false), - PreserveWorkspace: boolProp(m, preserveWorkspacePropertyName, false), + PreserveWorkspace: boolProp(m, PreserveWorkspacePropertyName, false), OverlayfsWorkspace: boolProp(m, overlayfsWorkspacePropertyName, false), NonrootWorkspace: boolProp(m, nonrootWorkspacePropertyName, false), CleanWorkspaceInputs: stringProp(m, cleanWorkspaceInputsPropertyName, ""), diff --git a/enterprise/server/util/ext4/ext4.go b/enterprise/server/util/ext4/ext4.go index c5235de5c45e..539a93464ac1 100644 --- a/enterprise/server/util/ext4/ext4.go +++ b/enterprise/server/util/ext4/ext4.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "syscall" "github.com/buildbuddy-io/buildbuddy/server/util/log" @@ -166,7 +167,10 @@ func isDirEmpty(dir string) (bool, error) { } // ImageToDirectory unpacks an ext4 image into outputDir, which must be empty. -func ImageToDirectory(ctx context.Context, inputFile, outputDir string) error { +// Only the given paths are unpacked. Paths are unpacked recursively, which +// means that they can reference either directories or files. Non-existent +// paths are silently ignored. +func ImageToDirectory(ctx context.Context, inputFile, outputDir string, paths []string) error { empty, err := isDirEmpty(outputDir) if err != nil { return err @@ -174,13 +178,19 @@ func ImageToDirectory(ctx context.Context, inputFile, outputDir string) error { if !empty { return status.FailedPreconditionError("Unpacking image in non-empty directory is unsupported.") } - args := []string{ - "/sbin/debugfs", - inputFile, - "-R", - fmt.Sprintf("rdump \"/\" \"%s\"", outputDir), + requests := make([]string, 0, len(paths)) + for _, p := range paths { + p = filepath.Clean(filepath.Join(outputDir, p)) + p = strings.TrimPrefix(p, filepath.Clean(outputDir)) + parent := filepath.Dir(p) + if err := os.MkdirAll(filepath.Join(outputDir, parent), 0755); err != nil { + return status.InternalErrorf("make parent dir %q: %s", parent, err) + } + requests = append(requests, fmt.Sprintf("rdump %q %q", p, filepath.Join(outputDir, parent))) } - if out, err := exec.CommandContext(ctx, args[0], args[1:]...).CombinedOutput(); err != nil { + cmd := exec.CommandContext(ctx, "/sbin/debugfs", inputFile) + cmd.Stdin = strings.NewReader(strings.Join(requests, "\n")) + if out, err := cmd.CombinedOutput(); err != nil { return status.InternalErrorf("%s: %s", err, out) } return nil diff --git a/enterprise/server/util/ext4/ext4_test.go b/enterprise/server/util/ext4/ext4_test.go index 5d7c7afbcc7f..c5bbf9eec748 100644 --- a/enterprise/server/util/ext4/ext4_test.go +++ b/enterprise/server/util/ext4/ext4_test.go @@ -61,7 +61,7 @@ func TestE2E(t *testing.T) { // Create a new (empty) directory and unpack the image file to the new directory. newRoot := testfs.MakeTempDir(t) - if err := ext4.ImageToDirectory(ctx, imageFile, newRoot); err != nil { + if err := ext4.ImageToDirectory(ctx, imageFile, newRoot, []string{"/"}); err != nil { t.Fatal(err) } @@ -103,3 +103,160 @@ func TestDirectoryToImageAutoSize_DanglingSymlink(t *testing.T) { require.NoError(t, err) } + +func TestImageToDirectory(t *testing.T) { + for _, test := range []struct { + name string + image map[string]string + paths []string + expected map[string]string + }{ + { + name: "passing the root path extracts the whole image", + image: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + }, + paths: []string{"/"}, + expected: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + // ext4 recovery dir + "lost+found": testfs.EmptyDir, + }, + }, + { + name: "file paths are allowed", + image: map[string]string{ + "ignore.txt": "IGNORE_ME", + "foo/hello.txt": "hello", + "foo/ignore.txt": "hello", + "foo/bar/ignore.txt": "IGNORE_ME", + }, + paths: []string{"/foo/hello.txt"}, + expected: map[string]string{ + "foo/hello.txt": "hello", + }, + }, + { + name: "directory paths are allowed", + image: map[string]string{ + "parent1/child1/hello.txt": "hello", + "parent1/child2/ignore.txt": "IGNORE_ME", + "parent2/world.txt": "world", + }, + paths: []string{"/parent1/child1", "/parent2/"}, + expected: map[string]string{ + "parent1/child1/hello.txt": "hello", + "parent2/world.txt": "world", + }, + }, + { + name: "nonexistent paths are silently ignored", + image: map[string]string{ + "parent1/child1/hello.txt": "hello", + "world.txt": "world", + }, + paths: []string{ + "/does_not_exist", + "/parent1/does_not_exist", + "/parent1/child1/hello.txt", + }, + expected: map[string]string{ + "parent1/child1/hello.txt": "hello", + }, + }, + { + name: "empty directories are copied", + image: map[string]string{ + "parent1/emptydir1": testfs.EmptyDir, + "parent2/emptydir2": testfs.EmptyDir, + "emptydir3": testfs.EmptyDir, + }, + paths: []string{"/parent1/emptydir1", "/parent2", "/emptydir3"}, + expected: map[string]string{ + "parent1/emptydir1": testfs.EmptyDir, + "parent2/emptydir2": testfs.EmptyDir, + "emptydir3": testfs.EmptyDir, + }, + }, + { + name: "duplicate paths are allowed", + image: map[string]string{ + "parent1/hello.txt": "hello", + "parent2/world.txt": "world", + }, + paths: []string{"/parent1", "/parent1/hello.txt", "/parent2", "/parent2"}, + expected: map[string]string{ + "parent1/hello.txt": "hello", + "parent2/world.txt": "world", + }, + }, + { + name: "paths not beginning with slash are allowed", + image: map[string]string{ + "parent1/hello.txt": "hello", + "parent2/world.txt": "world", + }, + paths: []string{"parent1"}, + expected: map[string]string{ + "parent1/hello.txt": "hello", + }, + }, + { + name: "paths beginning with dotslash are allowed", + image: map[string]string{ + "parent1/hello.txt": "hello", + "parent2/world.txt": "world", + }, + paths: []string{"./parent1"}, + expected: map[string]string{ + "parent1/hello.txt": "hello", + }, + }, + { + name: "empty path is the same as root path", + image: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + }, + paths: []string{""}, + expected: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + // ext4 recovery dir + "lost+found": testfs.EmptyDir, + }, + }, + { + name: "dot is the same as root path", + image: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + }, + paths: []string{"."}, + expected: map[string]string{ + "foo/bar/hello.txt": "hello", + "world.txt": "world", + // ext4 recovery dir + "lost+found": testfs.EmptyDir, + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + ctx := context.Background() + inputDir := testfs.MakeTempDir(t) + testfs.WriteAllFileContents(t, inputDir, test.image) + tmp := testfs.MakeTempDir(t) + imagePath := filepath.Join(tmp, "image.ext4") + err := ext4.DirectoryToImageAutoSize(ctx, inputDir, imagePath) + require.NoError(t, err) + outputDir := testfs.MakeTempDir(t) + + err = ext4.ImageToDirectory(ctx, imagePath, outputDir, test.paths) + require.NoError(t, err) + + testfs.AssertExactFileContents(t, outputDir, test.expected) + }) + } +} diff --git a/server/testutil/testfs/BUILD b/server/testutil/testfs/BUILD index b3c404196d2a..dd1683e1021f 100644 --- a/server/testutil/testfs/BUILD +++ b/server/testutil/testfs/BUILD @@ -7,6 +7,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//server/util/random", + "@com_github_google_go_containerregistry//pkg/v1/empty", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", "@io_bazel_rules_go//go/runfiles:go_default_library", diff --git a/server/testutil/testfs/testfs.go b/server/testutil/testfs/testfs.go index 096467dfedfe..437ee45cb366 100644 --- a/server/testutil/testfs/testfs.go +++ b/server/testutil/testfs/testfs.go @@ -15,6 +15,18 @@ import ( "github.com/stretchr/testify/require" ) +const ( + // EmptyDir is a representation of an empty directory for use in functions + // that represent filesystem contents as a map. + // + // Example: + // testfs.WriteAllFileContents(t, dest, map[string]string{ + // "greeting.txt": "Hello world", + // "emptydir": testfs.EmptyDir, + // }) + EmptyDir = "" +) + // RunfilePath returns the path to the given bazel runfile. func RunfilePath(t testing.TB, path string) string { path, err := runfiles.Rlocation(path) @@ -130,7 +142,13 @@ func MakeExecutable(t testing.TB, rootDir string, path string) { func WriteAllFileContents(t testing.TB, rootDir string, contents map[string]string) { for relPath, content := range contents { path := filepath.Join(rootDir, relPath) - if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { + if content == EmptyDir { + if err := os.MkdirAll(path, 0755); err != nil { + assert.FailNow(t, "failed to create empty dir", err) + } + continue + } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { assert.FailNow(t, "failed to create parent dir for file", err) } if err := os.WriteFile(path, []byte(content), 0644); err != nil { @@ -175,9 +193,9 @@ func Exists(t testing.TB, rootDir, path string) bool { } // AssertExactFileContents checks that the given mapping exactly represents the -// files in rootDir. The mapping is keyed by path relative to rootDir. -// Empty dirs and non-regular files (e.g. symlinks) are ignored in the -// comparison. +// contents of rootDir. The mapping is keyed by path relative to rootDir. +// Symlinks are ignored. Empty directories must be listed in the given map +// using EmptyDir as the map value. func AssertExactFileContents(t testing.TB, rootDir string, contents map[string]string) { expectedFilePaths := []string{} for k := range contents { @@ -185,16 +203,34 @@ func AssertExactFileContents(t testing.TB, rootDir string, contents map[string]s } actualFilePaths := []string{} err := filepath.WalkDir(rootDir, func(path string, entry fs.DirEntry, err error) error { - require.NoError(t, err) - if !entry.Type().IsRegular() { + if !entry.Type().IsRegular() && !entry.IsDir() { + // Ignore symlinks and special files for now. return nil } + + var actualContent string + + if entry.IsDir() { + children, err := os.ReadDir(path) + require.NoError(t, err) + // Skip nonempty dirs - they don't need explicit entries + // since they have children. + if len(children) > 0 { + return nil + } + actualContent = EmptyDir + } else { + b, err := os.ReadFile(path) + require.NoError(t, err) + actualContent = string(b) + } + + require.NoError(t, err) relPath := strings.TrimPrefix(path, rootDir+string(os.PathSeparator)) actualFilePaths = append(actualFilePaths, relPath) if content, ok := contents[relPath]; ok { - actualContent, err := os.ReadFile(path) require.NoError(t, err) - assert.Equalf(t, content, string(actualContent), "unexpected contents in %s", relPath) + assert.Equalf(t, content, string(actualContent), "unexpected contents at %s", relPath) } return nil })