From f333d497669f5063206033ca406435b724280dd9 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Wed, 10 Jan 2018 22:26:39 +1100
Subject: [PATCH 1/5] oci: tar generate: disallow .wh. prefix files

The OCI specification makes it clear that we MUST NOT allow users to
have a rootfs that contains '.wh.'-prefixed files. While we cannot
prevent them from creating such a rootfs, we can abort when we see it.
Also add a test to ensure that this doesn't regress.

In addition we error out if we see opaque whiteouts. This is a temporary
patch in this series, and will be replaced with full opaque whiteout
support in a future commit.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 oci/layer/tar_extract.go  | 15 ++++++++++++---
 oci/layer/tar_generate.go | 20 ++++++++++++++++++++
 test/repack.bats          | 30 ++++++++++++++++++++++++------
 3 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/oci/layer/tar_extract.go b/oci/layer/tar_extract.go
index 96027afff..e018f6fa4 100644
--- a/oci/layer/tar_extract.go
+++ b/oci/layer/tar_extract.go
@@ -231,9 +231,17 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 	// Typeflag, expecting that the path is the only thing that matters in a
 	// whiteout entry.
 	if strings.HasPrefix(file, whPrefix) {
+		isOpaque := file == whOpaque
 		file = strings.TrimPrefix(file, whPrefix)
 		path = filepath.Join(dir, file)
 
+		// XXX: We currently don't have any way of handling opaque files if
+		//      they occur after other entries inside the path. So we have to
+		//      error out (for now).
+		if isOpaque {
+			return errors.Errorf("unsupported whiteout type: %s", hdr.Name)
+		}
+
 		// Unfortunately we can't just stat the file here, because if we hit a
 		// parent directory whiteout earlier than this one then stating here
 		// would fail. The best solution would be to keep a list of whiteouts
@@ -245,6 +253,7 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 		if err := te.fsEval.RemoveAll(path); err != nil {
 			return errors.Wrap(err, "whiteout remove all")
 		}
+
 		return nil
 	}
 
@@ -284,9 +293,9 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 	// In order to avoid potential permission or clobbering issues, just do the
 	// noble thing and massacre whatever path points to (*unless* it's a
 	// directory). This is necessary to avoid permission errors with os.Create
-	// and similar issues. Note that this technically might break (the
-	// currently undefined) hardlink semantics for replaced inodes (a new layer
-	// will cause old hard-links to no longer be hard-links to the new inode).
+	// and similar issues, as well as to remain fully spec-compliant (note that
+	// this will cause hard-links in the "lower" layer to not be able to point
+	// to "upper" layer inodes).
 	//
 	// Ideally we would also handle the case where a TarLink entry exists
 	// before its target entry in the same layer (as this logic would
diff --git a/oci/layer/tar_generate.go b/oci/layer/tar_generate.go
index 387947314..dc479f06a 100644
--- a/oci/layer/tar_generate.go
+++ b/oci/layer/tar_generate.go
@@ -22,6 +22,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"
 
 	"github.com/apex/log"
@@ -133,6 +134,13 @@ func (tg *tarGenerator) AddFile(name, path string) error {
 	}
 	hdr.Name = name
 
+	// Make sure that we don't include any files with the name ".wh.". This
+	// will almost certainly confuse some users (unfortunately) but there's
+	// nothing we can do to store such files on-disk.
+	if strings.HasPrefix(filepath.Base(name), whPrefix) {
+		return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name)
+	}
+
 	// FIXME: Do we need to ensure that the parent paths have all been added to
 	//        the archive? I haven't found any tar specification that makes
 	//        this mandatory, but I have a feeling that some people might rely
@@ -225,8 +233,15 @@ func (tg *tarGenerator) AddFile(name, path string) error {
 	return nil
 }
 
+// whPrefix is the whiteout prefix, which is used to signify "special" files in
+// an OCI image layer archive. An expanded filesystem image cannot contain
+// files that have a basename starting with this prefix.
 const whPrefix = ".wh."
 
+// whOpaque is the *full* basename of a special file which indicates that all
+// siblings in a directory are to be dropped in the "lower" layer.
+const whOpaque = whPrefix + whPrefix + ".opq"
+
 // AddWhiteout adds a whiteout file for the given name inside the tar archive.
 // It's not recommended to add a file with AddFile and then white it out.
 func (tg *tarGenerator) AddWhiteout(name string) error {
@@ -240,6 +255,11 @@ func (tg *tarGenerator) AddWhiteout(name string) error {
 	whiteout := filepath.Join(dir, whPrefix+file)
 	timestamp := time.Now()
 
+	// Disallow having a whiteout of a whiteout, purely for our own sanity.
+	if strings.HasPrefix(file, whPrefix) {
+		return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name)
+	}
+
 	// Add a dummy header for the whiteout file.
 	if err := tg.tw.WriteHeader(&tar.Header{
 		Name:       whiteout,
diff --git a/test/repack.bats b/test/repack.bats
index 8de302ffe..fc72de996 100644
--- a/test/repack.bats
+++ b/test/repack.bats
@@ -151,14 +151,32 @@ function teardown() {
 	[ -z "$output" ]
 
 	# Just for sanity, check that everything looks okay.
-	! [ -e "$BUNDLE_A/rootfs/etc" ]
-	! [ -e "$BUNDLE_A/rootfs/bin/sh" ]
-	! [ -e "$BUNDLE_A/rootfs/usr/bin/env" ]
+	! [ -e "$BUNDLE_B/rootfs/etc" ]
+	! [ -e "$BUNDLE_B/rootfs/bin/sh" ]
+	! [ -e "$BUNDLE_B/rootfs/usr/bin/env" ]
 
 	# Make sure that the new layer is a non-empty_layer.
 	umoci stat --image "${IMAGE}:${TAG}-new" --json
 	[ "$status" -eq 0 ]
 	[[ "$(echo "$output" | jq -SM '.history[-1].empty_layer')" == "null" ]]
+
+	# Try to create a layer containing a ".wh." file.
+	mkdir -p "$BUNDLE_B/rootfs/whiteout_test"
+	echo "some data" > "$BUNDLE_B/rootfs/whiteout_test/.wh. THIS IS A TEST "
+
+	# Repacking a rootfs with a '.wh.' file *must* fail.
+	umoci repack --image "${IMAGE}:${TAG}-new2" "$BUNDLE_B"
+	[ "$status" -ne 0 ]
+	image-verify "${IMAGE}"
+
+	# Try to create a layer containing a ".wh." directory.
+	rm -rf "$BUNDLE_B/rootfs/whiteout_test"
+	mkdir -p "$BUNDLE_B/rootfs/.wh.another_whiteout"
+
+	# Repacking a rootfs with a '.wh.' directory *must* fail.
+	umoci repack --image "${IMAGE}:${TAG}-new3" "$BUNDLE_B"
+	[ "$status" -ne 0 ]
+	image-verify "${IMAGE}"
 }
 
 @test "umoci repack [replace]" {
@@ -202,9 +220,9 @@ function teardown() {
 	[ -z "$output" ]
 
 	# Just for sanity, check that everything looks okay.
-	[ -f "$BUNDLE_A/rootfs/etc" ]
-	[ -d "$BUNDLE_A/rootfs/bin/sh" ]
-	[ -L "$BUNDLE_A/rootfs/usr/bin/env" ]
+	[ -f "$BUNDLE_B/rootfs/etc" ]
+	[ -d "$BUNDLE_B/rootfs/bin/sh" ]
+	[ -L "$BUNDLE_B/rootfs/usr/bin/env" ]
 
 	# Make sure that the new layer is a non-empty_layer.
 	umoci stat --image "${IMAGE}:${TAG}" --json

From 56be14a72defd94283f23430134d64d989806e56 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Tue, 6 Mar 2018 00:35:00 +1100
Subject: [PATCH 2/5] oci: tar extract: simplify clobbering code

The previous clobbering code was overly complicated and duplicated.
While this is technically a no-op change, it makes it clearer what the
clobbering semantics actually are (everything gets clobbered except
previously-directories that will remain a directory after the
extraction).

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 oci/layer/tar_extract.go | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/oci/layer/tar_extract.go b/oci/layer/tar_extract.go
index e018f6fa4..76bfe248c 100644
--- a/oci/layer/tar_extract.go
+++ b/oci/layer/tar_extract.go
@@ -260,24 +260,12 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 	// Get information about the path. This has to be done after we've dealt
 	// with whiteouts because it turns out that lstat(2) will return EPERM if
 	// you try to stat a whiteout on AUFS.
-	hdrFi := hdr.FileInfo()
 	fi, err := te.fsEval.Lstat(path)
 	if err != nil {
 		// File doesn't exist, just switch fi to the file header.
 		fi = hdr.FileInfo()
 	}
 
-	// If the type of the file has changed, there's nothing we can do other
-	// than just remove the old path and replace it.
-	// XXX: Is this actually valid according to the spec? Do you need to have a
-	//      whiteout in this case, or can we just assume that a change in the
-	//      type is reason enough to purge the old type.
-	if hdrFi.Mode()&os.ModeType != fi.Mode()&os.ModeType {
-		if err := te.fsEval.RemoveAll(path); err != nil {
-			return errors.Wrap(err, "replace removeall")
-		}
-	}
-
 	// Attempt to create the parent directory of the path we're unpacking.
 	// We do a MkdirAll here because even though you need to have a tar entry
 	// for every component of a new path, applyMetadata will correct any
@@ -289,21 +277,21 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 		return errors.Wrap(err, "mkdir parent")
 	}
 
-	// At this moment, we have sorted out all other special cases of the path.
-	// In order to avoid potential permission or clobbering issues, just do the
-	// noble thing and massacre whatever path points to (*unless* it's a
-	// directory). This is necessary to avoid permission errors with os.Create
-	// and similar issues, as well as to remain fully spec-compliant (note that
-	// this will cause hard-links in the "lower" layer to not be able to point
-	// to "upper" layer inodes).
+	// We remove whatever existed at the old path to clobber it so that
+	// creating a new path will not break. The only exception is if the path is
+	// a directory in both the layer and the current filesystem, in which case
+	// we don't delete it for obvious reasons. In all other cases we clobber.
 	//
-	// Ideally we would also handle the case where a TarLink entry exists
-	// before its target entry in the same layer (as this logic would
-	// technically break that), but it's not clear whether such an archive
-	// would be technically considered a valid tar archive.
-	if hdr.Typeflag != tar.TypeDir {
+	// Note that this will cause hard-links in the "lower" layer to not be able
+	// to point to "upper" layer inodes even if the extracted type is the same
+	// as the old one, however it is not clear whether this is something a user
+	// would expect anyway. In addition, this will incorrectly deal with a
+	// TarLink that is present before the "upper" entry in the layer but the
+	// "lower" file still exists (so the hard-link would point to the old
+	// inode). It's not clear if such an archive is actually valid though.
+	if !fi.IsDir() || hdr.Typeflag != tar.TypeDir {
 		if err := te.fsEval.RemoveAll(path); err != nil {
-			return errors.Wrap(err, "clobber old")
+			return errors.Wrap(err, "clobber old path")
 		}
 	}
 

From a150698fa1da37458cb4092130b79f56da2ba5d3 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Sun, 25 Feb 2018 23:02:41 +1100
Subject: [PATCH 3/5] pkg: unpriv: simplify RemoveAll

Rather than looping over Readdirnames, just get them all in the first
place. The Go stdlib appears to use both the "get all" and "get some"
interfaces of Readdirnames for similar operations (RemoveAll and Walk
respectively).

It doesn't make sense to use different operations, as they also make the
implementation seem more complicated than necessary. Also refactor out
the Readdirnames code so that it can be used by Walk in a future patch.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 pkg/unpriv/unpriv.go | 104 ++++++++++++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 42 deletions(-)

diff --git a/pkg/unpriv/unpriv.go b/pkg/unpriv/unpriv.go
index 5213d963d..ec04b5faa 100644
--- a/pkg/unpriv/unpriv.go
+++ b/pkg/unpriv/unpriv.go
@@ -19,7 +19,6 @@ package unpriv
 
 import (
 	"archive/tar"
-	"io"
 	"os"
 	"path/filepath"
 	"strings"
@@ -56,6 +55,11 @@ func splitpath(path string) []string {
 	return parts
 }
 
+// WrapFunc is a function that can be passed to Wrap. It takes a path (and
+// presumably operates on it -- since Wrap only ensures that the path given is
+// resolvable) and returns some form of error.
+type WrapFunc func(path string) error
+
 // Wrap will wrap a given function, and call it in a context where all of the
 // parent directories in the given path argument are such that the path can be
 // resolved (you may need to make your own changes to the path to make it
@@ -63,7 +67,7 @@ func splitpath(path string) []string {
 // if the error returned is such that !os.IsPermission(err), then no trickery
 // will be performed. If fn returns an error, so will this function. All of the
 // trickery is reverted when this function returns (which is when fn returns).
-func Wrap(path string, fn func(path string) error) error {
+func Wrap(path string, fn WrapFunc) error {
 	// FIXME: Should we be calling fn() here first?
 	if err := fn(path); err == nil || !os.IsPermission(errors.Cause(err)) {
 		return err
@@ -299,10 +303,50 @@ func Remove(path string) error {
 	return errors.Wrap(Wrap(path, os.Remove), "unpriv.remove")
 }
 
-// RemoveAll is similar to os.RemoveAll but in order to implement it properly
-// all of the internal functions were wrapped with unpriv.Wrap to make it
-// possible to remove a path (even if it has child paths) even if you do not
-// currently have enough access bits.
+// foreachSubpath executes WrapFunc for each child of the given path (not
+// including the path itself). If path is not a directory, then WrapFunc will
+// not be called and no error will be returned. This should be called within a
+// context where path has already been made resolveable. If WrapFunc returns an
+// error, the first error is returned and iteration is halted.
+func foreachSubpath(path string, wrapFn WrapFunc) error {
+	// Is the path a directory?
+	fi, err := os.Lstat(path)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	if !fi.IsDir() {
+		return nil
+	}
+
+	// Open the directory.
+	fd, err := Open(path)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	defer fd.Close()
+
+	// We need to change the mode to Readdirnames. We don't need to worry about
+	// permissions because we're already in a context with filepath.Dir(path)
+	// is at least a+rx.
+	os.Chmod(path, fi.Mode()|0400)
+	defer fiRestore(path, fi)
+
+	// Remove contents recursively.
+	names, err := fd.Readdirnames(-1)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	for _, name := range names {
+		if err := wrapFn(filepath.Join(path, name)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// RemoveAll is similar to os.RemoveAll but with all of the internal functions
+// wrapped with unpriv.Wrap to make it possible to remove a path (even if it
+// has child paths) even if you do not currently have enough access bits.
 func RemoveAll(path string) error {
 	return errors.Wrap(Wrap(path, func(path string) error {
 		// If remove works, we're done.
@@ -324,49 +368,25 @@ func RemoveAll(path string) error {
 		if !fi.IsDir() {
 			return errors.Wrap(err, "remove non-directory")
 		}
-
-		// Open the directory.
-		fd, err := Open(path)
-		if err != nil {
-			// We hit a race, but don't worry about it.
-			if os.IsNotExist(errors.Cause(err)) {
-				err = nil
-			}
-			return errors.Wrap(err, "opendir")
-		}
-
-		// We need to change the mode to Readdirnames. We don't need to worry
-		// about permissions because we're already in a context with
-		// filepath.Dir(path) is writeable.
-		os.Chmod(path, fi.Mode()|0400)
-		defer fiRestore(path, fi)
-
-		// Remove contents recursively.
 		err = nil
-		for {
-			names, err1 := fd.Readdirnames(128)
-			for _, name := range names {
-				err1 := RemoveAll(filepath.Join(path, name))
-				if err == nil {
-					err = err1
-				}
-			}
-			if err1 == io.EOF {
-				break
-			}
+
+		err1 := foreachSubpath(path, func(subpath string) error {
+			err2 := RemoveAll(subpath)
 			if err == nil {
-				err = err1
+				err = err2
 			}
-			if len(names) == 0 {
-				break
+			return nil
+		})
+		if err1 != nil {
+			// We must have hit a race, but we don't care.
+			if os.IsNotExist(errors.Cause(err1)) {
+				err1 = nil
 			}
+			return errors.Wrap(err1, "foreach subpath")
 		}
 
-		// Close the directory.
-		fd.Close()
-
 		// Remove the directory. This should now work.
-		err1 := os.Remove(path)
+		err1 = os.Remove(path)
 		if err1 == nil || os.IsNotExist(errors.Cause(err1)) {
 			return nil
 		}

From 6b737f6b92f8c7157e357fbb11fe2a94b4ee686b Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Thu, 8 Mar 2018 22:14:14 +1100
Subject: [PATCH 4/5] pkg: unpriv: add Walk implementation

This adds a filepath.Walk implementation to unpriv, that works
unprivileged. At the moment the WalkFunc is Wrap'd internally, though
this results in the function being called more than once if there's a
permission error. This might not be ideal for all users, but it is more
transparent I guess?

Also add Walk to the fseval interfaces so we can use Walk in oci/layer
for the purposes of handling opaque whiteouts.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 pkg/fseval/fseval.go          |  4 ++
 pkg/fseval/fseval_default.go  |  6 +++
 pkg/fseval/fseval_rootless.go |  6 +++
 pkg/unpriv/unpriv.go          | 73 +++++++++++++++++++++++---
 pkg/unpriv/unpriv_test.go     | 99 +++++++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 7 deletions(-)

diff --git a/pkg/fseval/fseval.go b/pkg/fseval/fseval.go
index bd14beff8..df2534edb 100644
--- a/pkg/fseval/fseval.go
+++ b/pkg/fseval/fseval.go
@@ -19,6 +19,7 @@ package fseval
 
 import (
 	"os"
+	"path/filepath"
 	"time"
 
 	"github.com/vbatts/go-mtree"
@@ -95,4 +96,7 @@ type FsEval interface {
 
 	// KeywordFunc returns a wrapper around the given mtree.KeywordFunc.
 	KeywordFunc(fn mtree.KeywordFunc) mtree.KeywordFunc
+
+	// Walk is equivalent to filepath.Walk.
+	Walk(root string, fn filepath.WalkFunc) error
 }
diff --git a/pkg/fseval/fseval_default.go b/pkg/fseval/fseval_default.go
index 458342309..e554f24c1 100644
--- a/pkg/fseval/fseval_default.go
+++ b/pkg/fseval/fseval_default.go
@@ -19,6 +19,7 @@ package fseval
 
 import (
 	"os"
+	"path/filepath"
 	"time"
 
 	"github.com/openSUSE/umoci/pkg/system"
@@ -146,3 +147,8 @@ func (fs osFsEval) Lclearxattrs(path string) error {
 func (fs osFsEval) KeywordFunc(fn mtree.KeywordFunc) mtree.KeywordFunc {
 	return fn
 }
+
+// Walk is equivalent to filepath.Walk.
+func (fs osFsEval) Walk(root string, fn filepath.WalkFunc) error {
+	return filepath.Walk(root, fn)
+}
diff --git a/pkg/fseval/fseval_rootless.go b/pkg/fseval/fseval_rootless.go
index 1d62195f6..2ab5673b5 100644
--- a/pkg/fseval/fseval_rootless.go
+++ b/pkg/fseval/fseval_rootless.go
@@ -20,6 +20,7 @@ package fseval
 import (
 	"io"
 	"os"
+	"path/filepath"
 	"time"
 
 	"github.com/openSUSE/umoci/pkg/unpriv"
@@ -148,3 +149,8 @@ func (fs unprivFsEval) KeywordFunc(fn mtree.KeywordFunc) mtree.KeywordFunc {
 		return kv, err
 	}
 }
+
+// Walk is equivalent to filepath.Walk.
+func (fs unprivFsEval) Walk(root string, fn filepath.WalkFunc) error {
+	return unpriv.Walk(root, fn)
+}
diff --git a/pkg/unpriv/unpriv.go b/pkg/unpriv/unpriv.go
index ec04b5faa..8f6ca5837 100644
--- a/pkg/unpriv/unpriv.go
+++ b/pkg/unpriv/unpriv.go
@@ -21,6 +21,7 @@ import (
 	"archive/tar"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"time"
 
@@ -306,7 +307,7 @@ func Remove(path string) error {
 // foreachSubpath executes WrapFunc for each child of the given path (not
 // including the path itself). If path is not a directory, then WrapFunc will
 // not be called and no error will be returned. This should be called within a
-// context where path has already been made resolveable. If WrapFunc returns an
+// context where path has already been made resolveable, however the . If WrapFunc returns an
 // error, the first error is returned and iteration is halted.
 func foreachSubpath(path string, wrapFn WrapFunc) error {
 	// Is the path a directory?
@@ -327,17 +328,23 @@ func foreachSubpath(path string, wrapFn WrapFunc) error {
 
 	// We need to change the mode to Readdirnames. We don't need to worry about
 	// permissions because we're already in a context with filepath.Dir(path)
-	// is at least a+rx.
-	os.Chmod(path, fi.Mode()|0400)
-	defer fiRestore(path, fi)
-
-	// Remove contents recursively.
+	// is at least a+rx. However, because we are calling wrapFn we need to
+	// restore the original mode immediately.
+	os.Chmod(path, fi.Mode()|0444)
 	names, err := fd.Readdirnames(-1)
+	fiRestore(path, fi)
 	if err != nil {
 		return errors.WithStack(err)
 	}
+
+	// Make iteration order consistent.
+	sort.Strings(names)
+
+	// Call on all the sub-directories. We run it in a Wrap context to ensure
+	// that the path we pass is resolveable when executed.
 	for _, name := range names {
-		if err := wrapFn(filepath.Join(path, name)); err != nil {
+		subpath := filepath.Join(path, name)
+		if err := Wrap(subpath, wrapFn); err != nil {
 			return err
 		}
 	}
@@ -521,3 +528,55 @@ func Lclearxattrs(path string) error {
 		return nil
 	}), "unpriv.lclearxattrs")
 }
+
+// walk is the inner implementation of Walk.
+func walk(path string, info os.FileInfo, walkFn filepath.WalkFunc) error {
+	// Always run walkFn first. If we're not a directory there's no children to
+	// iterate over and so we bail even if there wasn't an error.
+	err := walkFn(path, info, nil)
+	if !info.IsDir() || err != nil {
+		return err
+	}
+
+	// Now just execute walkFn over each subpath.
+	return foreachSubpath(path, func(subpath string) error {
+		info, err := Lstat(subpath)
+		if err != nil {
+			// If it doesn't exist, just pass it directly to walkFn.
+			if err := walkFn(subpath, info, err); err != nil {
+				// Ignore SkipDir.
+				if errors.Cause(err) != filepath.SkipDir {
+					return err
+				}
+			}
+		} else {
+			if err := walk(subpath, info, walkFn); err != nil {
+				// Ignore error if it's SkipDir and subpath is a directory.
+				if !(info.IsDir() && errors.Cause(err) == filepath.SkipDir) {
+					return err
+				}
+			}
+		}
+		return nil
+	})
+}
+
+// Walk is a reimplementation of filepath.Walk, wrapping all of the relevant
+// function calls with Wrap, allowing you to walk over a tree even in the face
+// of multiple nested cases where paths are not normally accessible. The
+// os.FileInfo passed to walkFn is the "pristine" version (as opposed to the
+// currently-on-disk version that may have been temporarily modified by Wrap).
+func Walk(root string, walkFn filepath.WalkFunc) error {
+	return Wrap(root, func(root string) error {
+		info, err := Lstat(root)
+		if err != nil {
+			err = walkFn(root, nil, err)
+		} else {
+			err = walk(root, info, walkFn)
+		}
+		if errors.Cause(err) == filepath.SkipDir {
+			err = nil
+		}
+		return errors.Wrap(err, "unpriv.walk")
+	})
+}
diff --git a/pkg/unpriv/unpriv_test.go b/pkg/unpriv/unpriv_test.go
index 64c672509..ece6b7703 100644
--- a/pkg/unpriv/unpriv_test.go
+++ b/pkg/unpriv/unpriv_test.go
@@ -23,6 +23,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
+	"reflect"
 	"testing"
 	"time"
 
@@ -1791,3 +1792,101 @@ func TestMkdirRPerm(t *testing.T) {
 		t.Errorf("unexpected modeperm for path %s: %o", fi.Name(), fi.Mode()&os.ModePerm)
 	}
 }
+
+func TestWalk(t *testing.T) {
+	// There are two important things to make sure of here. That we actually
+	// hit all of the paths (once), and that the fileinfo we get is the one we
+	// expected.
+
+	if os.Geteuid() == 0 {
+		t.Log("unpriv.* tests only work with non-root privileges")
+		t.Skip()
+	}
+
+	dir, err := ioutil.TempDir("", "umoci-unpriv.TestWalk")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer RemoveAll(dir)
+
+	// Create some structure.
+	if err := os.MkdirAll(filepath.Join(dir, "some", "parent", "directories"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := ioutil.WriteFile(filepath.Join(dir, "some", "parent", "directories", "file"), []byte("some content"), 0555); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(filepath.Join(dir, "some", "parent", "directories", "file"), 0); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(filepath.Join(dir, "some", "parent", "directories"), 0123); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(filepath.Join(dir, "some", "parent"), 0); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(filepath.Join(dir, "some"), 0); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Chmod(dir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	// Walk over it.
+	seen := map[string]int{}
+	err = Walk(dir, func(path string, info os.FileInfo, err error) error {
+		// Don't expect errors.
+		if err != nil {
+			t.Errorf("unexpected error in walkfunc(%s): %v", path, err)
+			return err
+		}
+
+		// Run Lstat first, and return an error if it would fail so Wrap "works".
+		newFi, err := os.Lstat(path)
+		if err != nil {
+			return err
+		}
+
+		// Figure out the expected mode.
+		expectedMode := os.FileMode(0xFFFFFFFF)
+		switch path {
+		case dir:
+			expectedMode = 0755 | os.ModeDir
+		case filepath.Join(dir, "some"),
+			filepath.Join(dir, "some", "parent"):
+			expectedMode = os.ModeDir
+		case filepath.Join(dir, "some", "parent", "directories"):
+			expectedMode = 0123 | os.ModeDir
+		case filepath.Join(dir, "some", "parent", "directories", "file"):
+			expectedMode = 0
+		default:
+			t.Errorf("saw unexpected path %s", path)
+			return nil
+		}
+
+		// Check the mode.
+		if info.Mode() != expectedMode {
+			t.Errorf("got unexpected mode on %s: expected %o got %o", path, info.Mode(), expectedMode)
+		}
+		if !reflect.DeepEqual(info, newFi) {
+			t.Errorf("got different info after lstat: before=%#v after=%#v", info, newFi)
+		}
+
+		// Update seen map.
+		seen[path]++
+		return nil
+	})
+	if err != nil {
+		t.Errorf("unexpected walk error: %v", err)
+	}
+
+	// Check the seen map.
+	for path, num := range seen {
+		if num != 1 {
+			t.Errorf("path %s seen an unexpected number of times %d (expected %d)", path, num, 1)
+		}
+	}
+	if len(seen) != 5 {
+		t.Errorf("saw an unexpected number of paths: len(%v) != %v", seen, 5)
+	}
+}

From 08da5c6d0fd94da8b19710f2da86560c5dfb824f Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Fri, 9 Mar 2018 23:03:09 +1100
Subject: [PATCH 5/5] oci: tar extract: full opaque whiteout support

Add full opaque whiteout support, including the handling of new files
present in the upper layer (and thus should not be removed). The code
implementing this is fairly fragile and has to deal with a lot of really
annoying edge cases, so many new tests were added for this portion.

There are a few semantics differences found between unpriv.Walk and
filepath.Walk by this patch (especially when it comes to handling ENOENT
during the walk). These semantics have been fixed in the WalkFunc used,
but probably deserve a refactor in future.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 CHANGELOG.md                  |   5 +
 oci/layer/tar_extract.go      | 100 +++++++++++---
 oci/layer/tar_extract_test.go | 240 +++++++++++++++++++++++++++++++++-
 3 files changed, 328 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ae5f2160..064a0c7bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
   fork][as-proot-fork] (though we do not test its interoperability at the
   moment) as both tools use [the same protobuf
   specification][rootlesscontainers-proto]. openSUSE/umoci#227
+- `umoci unpack` now has support for opaque whiteouts (whiteouts which remove
+  all children of a directory in the lower layer), though `umoci repack` does
+  not currently have support for generating them. While this is technically a
+  spec requirement, through testing we've never encountered an actual user of
+  these whiteouts. openSUSE/umoci#224 openSUSE/umoci#229
 
 ### Fixed
 - Fix a bug in our "parent directory restore" code, which is responsible for
diff --git a/oci/layer/tar_extract.go b/oci/layer/tar_extract.go
index 76bfe248c..c7f85306d 100644
--- a/oci/layer/tar_extract.go
+++ b/oci/layer/tar_extract.go
@@ -35,11 +35,21 @@ import (
 )
 
 type tarExtractor struct {
-	// mapOptions is the set of mapping options to use when extracting filesystem layers.
+	// mapOptions is the set of mapping options to use when extracting
+	// filesystem layers.
 	mapOptions MapOptions
 
 	// fsEval is an fseval.FsEval used for extraction.
 	fsEval fseval.FsEval
+
+	// upperPaths are paths that have either been extracted in the execution of
+	// this tarExtractor or are ancestors of paths extracted. The purpose of
+	// having this stored in-memory is to be able to handle opaque whiteouts as
+	// well as some other possible ordering issues with malformed archives (the
+	// downside of this approach is that it takes up memory -- we could switch
+	// to a trie if necessary). These paths are relative to the tar root but
+	// are fully symlink-expanded so no need to worry about that line noise.
+	upperPaths map[string]struct{}
 }
 
 // newTarExtractor creates a new tarExtractor.
@@ -52,6 +62,7 @@ func newTarExtractor(opt MapOptions) *tarExtractor {
 	return &tarExtractor{
 		mapOptions: opt,
 		fsEval:     fsEval,
+		upperPaths: make(map[string]struct{}),
 	}
 }
 
@@ -233,27 +244,75 @@ func (te *tarExtractor) unpackEntry(root string, hdr *tar.Header, r io.Reader) (
 	if strings.HasPrefix(file, whPrefix) {
 		isOpaque := file == whOpaque
 		file = strings.TrimPrefix(file, whPrefix)
-		path = filepath.Join(dir, file)
 
-		// XXX: We currently don't have any way of handling opaque files if
-		//      they occur after other entries inside the path. So we have to
-		//      error out (for now).
+		// We have special handling for opaque whiteouts. All other brands of
+		// whiteouts are just removed without prejudice (with the note that we
+		// cannot error out if a layer removes a non-existant file with this
+		// implementation -- in future we could add lowerPaths that would help
+		// track whether another whiteout caused the removal to "fail" or if
+		// the path was actually missing -- which would allow us to actually
+		// error out here).
+
+		path = filepath.Join(dir, file)
 		if isOpaque {
-			return errors.Errorf("unsupported whiteout type: %s", hdr.Name)
+			path = dir
 		}
 
-		// Unfortunately we can't just stat the file here, because if we hit a
-		// parent directory whiteout earlier than this one then stating here
-		// would fail. The best solution would be to keep a list of whiteouts
-		// we've seen and then Lstat accordingly (though it won't help in some
-		// cases).
+		removeFunc := te.fsEval.RemoveAll
+		if isOpaque {
+			removeFunc = func(path string) error {
+				// Check that root exists.
+				if fi, err := te.fsEval.Lstat(path); err != nil {
+					return errors.Wrap(err, "check whiteout root")
+				} else if !fi.IsDir() {
+					return errors.Errorf("expected whiteout root to be directory: %v", path)
+				}
 
-		// Just remove the path. The defer will reapply the correct parent
-		// metadata. We have nothing left to do here.
-		if err := te.fsEval.RemoveAll(path); err != nil {
-			return errors.Wrap(err, "whiteout remove all")
+				// Walk over the path to remove children.
+				err := te.fsEval.Walk(path, func(subpath string, info os.FileInfo, err error) error {
+					// If we are passed an error, bail unless it's ENOENT.
+					if err != nil {
+						// If something was deleted outside of our knowledge
+						// it's not the end of the world. We've already checked
+						// that the root path exists.
+						if os.IsNotExist(errors.Cause(err)) {
+							err = filepath.SkipDir
+						}
+						return err
+					}
+
+					// Skip the top-level dir.
+					if CleanPath(path) == CleanPath(subpath) {
+						return nil
+					}
+
+					// Get the relative form of subpath to root to match
+					// te.upperPaths.
+					upperPath, err := filepath.Rel(root, subpath)
+					if err != nil {
+						return errors.Wrap(err, "find relative-to-root [should never happen]")
+					}
+
+					// Remove the path only if it hasn't been touched.
+					if _, ok := te.upperPaths[upperPath]; !ok {
+						err := errors.Wrap(te.fsEval.RemoveAll(subpath), "whiteout subpath")
+						// Skip anything underneath the subpath if it's a
+						// directory, since we just purged it.
+						if err == nil && info.IsDir() {
+							err = filepath.SkipDir
+						}
+						return err
+					}
+					return nil
+				})
+				return errors.Wrap(err, "opaque whiteout")
+			}
 		}
 
+		// Run the removal function now.
+		if err := removeFunc(path); err != nil {
+			return errors.Wrap(err, "whiteout remove")
+		}
 		return nil
 	}
 
@@ -412,5 +471,16 @@ out:
 		}
 	}
 
+	// Everything is done -- the path now exists. Add it (and all its
+	// ancestors) to the set of upper paths. We first have to figure out the
+	// proper path corresponding to hdr.Name though.
+	upperPath, err := filepath.Rel(root, path)
+	if err != nil {
+		// Really shouldn't happen because of the guarantees of SecureJoinVFS.
+		return errors.Wrap(err, "find relative-to-root [should never happen]")
+	}
+	for pth := upperPath; pth != filepath.Dir(pth); pth = filepath.Dir(pth) {
+		te.upperPaths[pth] = struct{}{}
+	}
 	return nil
 }
diff --git a/oci/layer/tar_extract_test.go b/oci/layer/tar_extract_test.go
index e16453390..a4e361bf6 100644
--- a/oci/layer/tar_extract_test.go
+++ b/oci/layer/tar_extract_test.go
@@ -20,6 +20,8 @@ package layer
 import (
 	"archive/tar"
 	"bytes"
+	"crypto/rand"
+	"io"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -27,6 +29,7 @@ import (
 	"time"
 
 	rspec "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 )
 
@@ -307,6 +310,241 @@ func TestUnpackEntryWhiteout(t *testing.T) {
 	}(t)
 }
 
+// TestUnpackOpaqueWhiteout checks whether *opaque* whiteout handling is done
+// correctly, as well as ensuring that the metadata of the parent is
+// maintained -- and that upperdir entries are handled.
+func TestUnpackOpaqueWhiteout(t *testing.T) {
+	type pseudoHdr struct {
+		path     string
+		linkname string
+		typeflag byte
+		upper    bool
+	}
+
+	fromPseudoHdr := func(ph pseudoHdr) (*tar.Header, io.Reader) {
+		var r io.Reader
+		var size int64
+		if ph.typeflag == tar.TypeReg || ph.typeflag == tar.TypeRegA {
+			size = 256 * 1024
+			r = &io.LimitedReader{
+				R: rand.Reader,
+				N: size,
+			}
+		}
+
+		mode := os.FileMode(0777)
+		if ph.typeflag == tar.TypeDir {
+			mode |= os.ModeDir
+		}
+
+		return &tar.Header{
+			Name:       ph.path,
+			Linkname:   ph.linkname,
+			Typeflag:   ph.typeflag,
+			Mode:       int64(mode),
+			Size:       size,
+			ModTime:    time.Unix(1210393, 4528036),
+			AccessTime: time.Unix(7892829, 2341211),
+			ChangeTime: time.Unix(8731293, 8218947),
+		}, r
+	}
+
+	// TODO: Modify this to use subtests once Go 1.7 is in enough places.
+	func(t *testing.T) {
+		for _, test := range []struct {
+			name          string
+			ignoreExist   bool // ignore if extra upper files exist
+			pseudoHeaders []pseudoHdr
+		}{
+			{"EmptyDir", false, nil},
+			{"OneLevel", false, []pseudoHdr{
+				{"file", "", tar.TypeReg, false},
+				{"link", "..", tar.TypeSymlink, true},
+				{"badlink", "./nothing", tar.TypeSymlink, true},
+				{"fifo", "", tar.TypeFifo, false},
+			}},
+			{"OneLevelNoUpper", false, []pseudoHdr{
+				{"file", "", tar.TypeReg, false},
+				{"link", "..", tar.TypeSymlink, false},
+				{"badlink", "./nothing", tar.TypeSymlink, false},
+				{"fifo", "", tar.TypeFifo, false},
+			}},
+			{"TwoLevel", false, []pseudoHdr{
+				{"file", "", tar.TypeReg, true},
+				{"link", "..", tar.TypeSymlink, false},
+				{"badlink", "./nothing", tar.TypeSymlink, false},
+				{"dir", "", tar.TypeDir, true},
+				{"dir/file", "", tar.TypeRegA, true},
+				{"dir/link", "../badlink", tar.TypeSymlink, false},
+				{"dir/verybadlink", "../../../../../../../../../../../../etc/shadow", tar.TypeSymlink, true},
+				{"dir/verybadlink2", "/../../../../../../../../../../../../etc/shadow", tar.TypeSymlink, false},
+			}},
+			{"TwoLevelNoUpper", false, []pseudoHdr{
+				{"file", "", tar.TypeReg, false},
+				{"link", "..", tar.TypeSymlink, false},
+				{"badlink", "./nothing", tar.TypeSymlink, false},
+				{"dir", "", tar.TypeDir, false},
+				{"dir/file", "", tar.TypeRegA, false},
+				{"dir/link", "../badlink", tar.TypeSymlink, false},
+				{"dir/verybadlink", "../../../../../../../../../../../../etc/shadow", tar.TypeSymlink, false},
+				{"dir/verybadlink2", "/../../../../../../../../../../../../etc/shadow", tar.TypeSymlink, false},
+			}},
+			{"MultiLevel", false, []pseudoHdr{
+				{"level1_file", "", tar.TypeReg, true},
+				{"level1_link", "..", tar.TypeSymlink, false},
+				{"level1a", "", tar.TypeDir, true},
+				{"level1a/level2_file", "", tar.TypeRegA, false},
+				{"level1a/level2_link", "../../../", tar.TypeSymlink, true},
+				{"level1a/level2a", "", tar.TypeDir, false},
+				{"level1a/level2a/level3_fileA", "", tar.TypeReg, false},
+				{"level1a/level2a/level3_fileB", "", tar.TypeReg, false},
+				{"level1a/level2b", "", tar.TypeDir, true},
+				{"level1a/level2b/level3_fileA", "", tar.TypeReg, true},
+				{"level1a/level2b/level3_fileB", "", tar.TypeReg, false},
+				{"level1a/level2b/level3", "", tar.TypeDir, false},
+				{"level1a/level2b/level3/level4", "", tar.TypeDir, false},
+				{"level1a/level2b/level3/level4", "", tar.TypeDir, false},
+				{"level1a/level2b/level3_fileA", "", tar.TypeReg, true},
+				{"level1b", "", tar.TypeDir, false},
+				{"level1b/level2_fileA", "", tar.TypeReg, false},
+				{"level1b/level2_fileB", "", tar.TypeReg, false},
+				{"level1b/level2", "", tar.TypeDir, false},
+				{"level1b/level2/level3_file", "", tar.TypeReg, false},
+			}},
+			{"MultiLevelNoUpper", false, []pseudoHdr{
+				{"level1_file", "", tar.TypeReg, false},
+				{"level1_link", "..", tar.TypeSymlink, false},
+				{"level1a", "", tar.TypeDir, false},
+				{"level1a/level2_file", "", tar.TypeRegA, false},
+				{"level1a/level2_link", "../../../", tar.TypeSymlink, false},
+				{"level1a/level2a", "", tar.TypeDir, false},
+				{"level1a/level2a/level3_fileA", "", tar.TypeReg, false},
+				{"level1a/level2a/level3_fileB", "", tar.TypeReg, false},
+				{"level1a/level2b", "", tar.TypeDir, false},
+				{"level1a/level2b/level3_fileA", "", tar.TypeReg, false},
+				{"level1a/level2b/level3_fileB", "", tar.TypeReg, false},
+				{"level1a/level2b/level3", "", tar.TypeDir, false},
+				{"level1a/level2b/level3/level4", "", tar.TypeDir, false},
+				{"level1a/level2b/level3/level4", "", tar.TypeDir, false},
+				{"level1a/level2b/level3_fileA", "", tar.TypeReg, false},
+				{"level1b", "", tar.TypeDir, false},
+				{"level1b/level2_fileA", "", tar.TypeReg, false},
+				{"level1b/level2_fileB", "", tar.TypeReg, false},
+				{"level1b/level2", "", tar.TypeDir, false},
+				{"level1b/level2/level3_file", "", tar.TypeReg, false},
+			}},
+			{"MissingUpperAncestor", true, []pseudoHdr{
+				{"some", "", tar.TypeDir, false},
+				{"some/dir", "", tar.TypeDir, false},
+				{"some/dir/somewhere", "", tar.TypeReg, true},
+				{"another", "", tar.TypeDir, false},
+				{"another/dir", "", tar.TypeDir, false},
+				{"another/dir/somewhere", "", tar.TypeReg, false},
+			}},
+		} {
+			t.Logf("running Test%s", test.name)
+			mapOptions := MapOptions{
+				Rootless: os.Geteuid() != 0,
+			}
+
+			dir, err := ioutil.TempDir("", "umoci-TestUnpackOpaqueWhiteout")
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer os.RemoveAll(dir)
+
+			// We do all whiteouts in a subdirectory.
+			whiteoutDir := "test-subdir"
+			whiteoutRoot := filepath.Join(dir, whiteoutDir)
+			if err := os.MkdirAll(whiteoutRoot, 0755); err != nil {
+				t.Fatal(err)
+			}
+
+			// Track if we have upper entries.
+			numUpper := 0
+
+			// First we apply the non-upper files in a new tarExtractor.
+			te := newTarExtractor(mapOptions)
+			for _, ph := range test.pseudoHeaders {
+				// Skip upper.
+				if ph.upper {
+					numUpper++
+					continue
+				}
+				hdr, rdr := fromPseudoHdr(ph)
+				hdr.Name = filepath.Join(whiteoutDir, hdr.Name)
+				if err := te.unpackEntry(dir, hdr, rdr); err != nil {
+					t.Errorf("unpackEntry %s failed: %v", hdr.Name, err)
+				}
+			}
+
+			// Now we apply the upper files in another tarExtractor.
+			te = newTarExtractor(mapOptions)
+			for _, ph := range test.pseudoHeaders {
+				// Skip non-upper.
+				if !ph.upper {
+					continue
+				}
+				hdr, rdr := fromPseudoHdr(ph)
+				hdr.Name = filepath.Join(whiteoutDir, hdr.Name)
+				if err := te.unpackEntry(dir, hdr, rdr); err != nil {
+					t.Errorf("unpackEntry %s failed: %v", hdr.Name, err)
+				}
+			}
+
+			// And now apply a whiteout for the whiteoutRoot.
+			whHdr := &tar.Header{
+				Name:     filepath.Join(whiteoutDir, whOpaque),
+				Typeflag: tar.TypeReg,
+			}
+			if err := te.unpackEntry(dir, whHdr, nil); err != nil {
+				t.Errorf("unpack whiteout %s failed: %v", whiteoutRoot, err)
+				continue
+			}
+
+			// Now we double-check it worked. If the file was in "upper" it
+			// should have survived. If it was in lower it shouldn't. We don't
+			// bother checking the contents here.
+			for _, ph := range test.pseudoHeaders {
+				fullPath := filepath.Join(whiteoutRoot, ph.path)
+
+				_, err := te.fsEval.Lstat(fullPath)
+				if err != nil && !os.IsNotExist(errors.Cause(err)) {
+					t.Errorf("unexpected lstat error of %s: %v", ph.path, err)
+				} else if ph.upper && err != nil {
+					t.Errorf("expected upper %s to exist: got %v", ph.path, err)
+				} else if !ph.upper && err == nil {
+					if !test.ignoreExist {
+						t.Errorf("expected lower %s to not exist", ph.path)
+					}
+				}
+			}
+
+			// Make sure the whiteoutRoot still exists.
+			if fi, err := te.fsEval.Lstat(whiteoutRoot); err != nil {
+				if os.IsNotExist(errors.Cause(err)) {
+					t.Errorf("expected whiteout root to still exist: %v", err)
+				} else {
+					t.Errorf("unexpected error in lstat of whiteout root: %v", err)
+				}
+			} else if !fi.IsDir() {
+				t.Errorf("expected whiteout root to still be a directory")
+			}
+
+			// Check that the directory is empty if there's no uppers.
+			if numUpper == 0 {
+				if fd, err := os.Open(whiteoutRoot); err != nil {
+					t.Errorf("unexpected error opening whiteoutRoot: %v", err)
+				} else if names, err := fd.Readdirnames(-1); err != nil {
+					t.Errorf("unexpected error reading dirnames: %v", err)
+				} else if len(names) != 0 {
+					t.Errorf("expected empty opaque'd dir: got %v", names)
+				}
+			}
+		}
+	}(t)
+}
+
 // TestUnpackHardlink makes sure that hardlinks are correctly unpacked in all
 // cases. In particular when it comes to hardlinks to symlinks.
 func TestUnpackHardlink(t *testing.T) {
@@ -475,8 +713,6 @@ func TestUnpackEntryMap(t *testing.T) {
 			}
 			defer os.RemoveAll(dir)
 
-			t.Logf("running with uid=%#v gid=%#v", test.uidMap, test.gidMap)
-
 			var (
 				hdrUID, hdrGID, uUID, uGID int
 				hdr                        *tar.Header