Skip to content

Commit

Permalink
Absolute symlinks are now handled properly
Browse files Browse the repository at this point in the history
  • Loading branch information
jssblck committed Dec 13, 2024
1 parent 956235a commit 200ec5b
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 7 deletions.
28 changes: 28 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

# v0.3.1

- `extract`: absolute symlinks are now correctly made relative to the target directory.

# v0.3.0

- Add `list` subcommand.
- `--platform`: Select the platform to list.
- `--username`: Authenticate to the registry using a username.
- `--password`: Authenticate to the registry using a password.

# v0.2.0

- `extract`: Support filtering layers and files within them.
- `--layer-glob`, `--lg`: Filter layer digests using a glob pattern.
- `--file-glob`, `--fg`: Filter files within layers using a glob pattern.
- `--layer-regex`, `--lr`: Filter layer digests using a regex pattern.
- `--file-regex`, `--fr`: Filter files within layers using a regex pattern.

# v0.1.0

- Add `extract` subcommand.
- `--platform`: Select the platform to extract.
- `--username`: Authenticate to the registry using a username.
- `--password`: Authenticate to the registry using a password.
- `--layers`: Select which layers to extract.
- `--overwrite`: Overwrite the existing target directory.
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,3 @@ Ideally we'll fix these in the future; feel free to make a contribution or open
- [ ] circe does not currently download layers concurrently.
Since network transfer is effectively always the bottleneck, adding concurrent downloads would likely speed up `circe` significantly.
That being said, as of our tests today `circe` is already about as fast as `docker pull && docker save`.
- [ ] symlinks are unpacked with the same destination as written in the actual container.
This means e.g. they can link to files outside of the output directory
(the example case I found was files in `usr/bin`, linking to `/bin/`).
2 changes: 1 addition & 1 deletion bin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "circe"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
authors = ["Jessica Black <me@jessica.black>", "FOSSA Inc. <support@fossa.com>"]
description = "Extracts and examines the contents of containers"
Expand Down
2 changes: 1 addition & 1 deletion lib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "circe_lib"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
authors = ["Jessica Black <me@jessica.black>", "FOSSA Inc. <support@fossa.com>"]
description = "Extracts and examines the contents of containers"
Expand Down
145 changes: 143 additions & 2 deletions lib/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{
};

use bytes::Bytes;
use color_eyre::eyre::{Context, Result};
use color_eyre::eyre::{Context, OptionExt, Result};
use derive_more::Debug;
use futures_lite::{Stream, StreamExt};
use oci_client::{
Expand All @@ -16,7 +16,9 @@ use oci_client::{
Client, Reference as OciReference, RegistryOperation,
};
use os_str_bytes::OsStrBytesExt;
use tokio_tar::Archive;
use tap::Pipe;
use tokio::io::AsyncRead;
use tokio_tar::{Archive, Entry};
use tokio_util::io::StreamReader;
use tracing::{debug, warn};

Expand Down Expand Up @@ -344,6 +346,22 @@ async fn apply_tarball(
continue;
}

// The tar library mostly handles symlinks properly, but still allows them to link to absolute paths.
// This doesn't technically break anything from a security standpoint, but might for analysis.
// Intercept its handling of absolute symlinks to handle this case.
if entry.header().entry_type().is_symlink() {
let handled = unwrap_warn!(
safe_symlink(&entry, output).await,
continue,
"create symlink {path:?}"
);

// But if the function didn't handle it, fall back to the default behavior.
if handled {
continue;
}
}

// Future improvement: symlinks are unpacked with the same destination as written in the actual container;
// this means e.g. they can link to files outside of the output directory
// (the example case I found was in `usr/bin`, linking to `/bin/`).
Expand Down Expand Up @@ -381,6 +399,115 @@ async fn enumerate_tarball(stream: impl Stream<Item = Chunk> + Unpin) -> Result<
Ok(files)
}

/// Special handling for symlinks that link to an absolute path.
/// It effectively forces the destination into a path relative to the output directory.
///
/// Returns true if the symlink was handled;
/// false if the symlink should fall back to standard handling from `async_tar`.
async fn safe_symlink<R: AsyncRead + Unpin>(entry: &Entry<R>, dir: &Path) -> Result<bool> {
let header = entry.header();
let kind = header.entry_type();
if !kind.is_symlink() {
return Ok(false);
}

let link = entry.path().context("read symlink source")?;
let target = header
.link_name()
.context("read symlink target")?
.ok_or_eyre("no symlink target")?;

// If the target is relative, we should let `async_tar` handle it;
// this function only needs to intercept absolute symlinks.
if !target.is_absolute() {
return Ok(false);
}

let safe_link = dir.join(&link);
let safe_target = dir.join(strip_root(&target));

let rel_target = compute_relative(&safe_link, &safe_target)
.with_context(|| format!("compute relative path from {safe_link:?} to {safe_target:?}"))?;
tracing::info!(
?link,
?target,
?safe_link,
?safe_target,
?rel_target,
"create symlink"
);

if let Some(parent) = safe_link.parent() {
tokio::fs::create_dir_all(parent)
.await
.context("create parent directory")?;
}

symlink(&rel_target, &safe_link)
.await
.map(|_| true)
.with_context(|| {
format!("create symlink from {safe_link:?} to {safe_target:?} as {rel_target:?}")
})
}

fn compute_relative(src: &Path, dst: &Path) -> Result<PathBuf> {
let common_prefix = src
.components()
.zip(dst.components())
.by_ref()
.take_while(|(src, dst)| src == dst)
.map(|(src, _)| src)
.collect::<PathBuf>();

let src_rel = src
.strip_prefix(&common_prefix)
.context("strip common prefix from src")?;
let dst_rel = dst
.strip_prefix(&common_prefix)
.context("strip common prefix from dst")?;

// `bridge` is the path from the source to the common prefix.
let bridge = src_rel
.components()
.skip(1)
.map(|_| "..")
.collect::<PathBuf>();
let rel = bridge.join(dst_rel);

// `.` indicates that the source and destination are the same file.
if rel.to_string_lossy().is_empty() {
Ok(PathBuf::from("."))
} else {
Ok(rel)
}
}

/// Strips any root and prefix from a path, if they exist.
fn strip_root(path: impl AsRef<Path>) -> PathBuf {
path.as_ref()
.components()
.filter(|c| match c {
std::path::Component::Prefix(_) => false,
std::path::Component::RootDir => false,
_ => true,
})
.pipe(PathBuf::from_iter)
}

#[cfg(windows)]
async fn symlink(src: &Path, dst: &Path) -> std::io::Result<()> {
let (src, dst) = (src.to_owned(), dst.to_owned());
tokio::task::spawn_blocking(|| std::os::windows::fs::symlink_file(src, dst))
.await
.expect("join tokio task")
}

#[cfg(any(unix, target_os = "redox"))]
async fn symlink(src: &Path, dst: &Path) -> std::io::Result<()> {
tokio::fs::symlink(src, dst).await
}

impl From<&Reference> for OciReference {
fn from(reference: &Reference) -> Self {
match &reference.version {
Expand Down Expand Up @@ -544,6 +671,8 @@ const fn go_arch() -> &'static str {
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use simple_test_case::test_case;

#[test]
fn test_is_whiteout() {
Expand All @@ -553,4 +682,16 @@ mod tests {
Some(PathBuf::from("foo"))
);
}

#[test_case(Path::new("/a/b/c"), Path::new("/a/b/d/e/f"), PathBuf::from("../d/e/f"); "one_level")]
#[test_case(Path::new("/usr/local/bin/ls"), Path::new("/bin/ls"), PathBuf::from("../../../../bin/ls"); "usr_local_bin_to_bin")]
#[test_case(Path::new("/usr/local/bin/ls"), Path::new("/usr/bin/ls"), PathBuf::from("../../../bin/ls"); "usr_local_bin_to_usr_bin")]
#[test_case(Path::new("/usr/local/bin/ls"), Path::new("/usr/local/bin/ls"), PathBuf::from("."); "same_file")]
#[test_case(Path::new("/usr/local/bin/eza"), Path::new("/usr/local/bin/ls"), PathBuf::from("./ls"); "same_dir")]
#[tokio::test]
async fn compute_relative(src: &Path, dst: &Path, expected: PathBuf) -> Result<()> {
let relative = compute_relative(src, dst)?;
pretty_assertions::assert_eq!(relative, expected);
Ok(())
}
}

0 comments on commit 200ec5b

Please sign in to comment.