From 104e4699848559afb438a7c90564624dbe9fa8e2 Mon Sep 17 00:00:00 2001 From: Jessica Black Date: Mon, 9 Dec 2024 17:11:50 -0800 Subject: [PATCH] Support extracting layers --- bin/src/cmd_extract.rs | 10 +-- lib/Cargo.toml | 2 + lib/src/ext.rs | 60 ++++++++++++++++ lib/src/lib.rs | 7 +- lib/src/registry.rs | 124 ++++++++++++++++++++++++++++++-- lib/{ => tests}/it/main.rs | 1 + lib/{ => tests}/it/platform.rs | 0 lib/{ => tests}/it/reference.rs | 10 +-- lib/tests/it/registry.rs | 27 +++++++ 9 files changed, 225 insertions(+), 16 deletions(-) create mode 100644 lib/src/ext.rs rename lib/{ => tests}/it/main.rs (67%) rename lib/{ => tests}/it/platform.rs (100%) rename lib/{ => tests}/it/reference.rs (89%) create mode 100644 lib/tests/it/registry.rs diff --git a/bin/src/cmd_extract.rs b/bin/src/cmd_extract.rs index e28522a..89cf9b0 100644 --- a/bin/src/cmd_extract.rs +++ b/bin/src/cmd_extract.rs @@ -22,13 +22,15 @@ pub struct Options { /// If the image is multi-platform and this argument is not provided, /// the platform is chosen according to the following priority list: /// - /// 1. The current platform (if available) + /// 1. The first platform-independent image /// - /// 2. The `linux` platform for the current architecture + /// 2. The current platform (if available) /// - /// 3. The `linux` platform for the `amd64` architecture + /// 3. The `linux` platform for the current architecture /// - /// 4. The first platform in the image manifest + /// 4. The `linux` platform for the `amd64` architecture + /// + /// 5. The first platform in the image manifest #[arg(long, value_parser = Platform::from_str)] platform: Option, diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 62f0696..049afd2 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -18,6 +18,7 @@ color-eyre = "0.6.3" derive_more = { version = "1.0.0", features = ["full"] } hex = "0.4.3" hex-magic = "0.0.2" +itertools = "0.13.0" oci-client = "0.14.0" static_assertions = "1.1.0" tap = "1.0.1" @@ -27,3 +28,4 @@ tracing = "0.1.41" pretty_assertions = "1.4.1" proptest = "1.5.0" simple_test_case = "1.2.0" +tokio = { version = "1.42.0", features = ["full"] } diff --git a/lib/src/ext.rs b/lib/src/ext.rs new file mode 100644 index 0000000..426e734 --- /dev/null +++ b/lib/src/ext.rs @@ -0,0 +1,60 @@ +use itertools::Itertools; + +/// Implements `priority_find` for collections. +pub trait PriorityFind { + /// Searches through a list of items using the provided prioritization function. + /// Priorities are such that a lower number is higher priority, meaning that `0` is the highest possible priority. + /// + /// As the search is performed: + /// - If an item with the lowest priority is found, it is immediately returned and the rest of the search is aborted. + /// - Otherwise, the highest priority item found is retained until the end of the search, at which point it is returned. + fn priority_find usize>(self, prioritize: F) -> Option; +} + +impl PriorityFind for I +where + I: Iterator, +{ + fn priority_find usize>(self, prioritize: F) -> Option { + priority_find(self, prioritize) + } +} + +/// Searches through a list of items using a priority function returning a non-negative number. +/// Priorities are such that a lower number is higher priority, meaning that `0` is the highest possible priority. +/// +/// As the search is performed: +/// - If an item with the lowest priority is found, it is immediately returned and the rest of the search is aborted. +/// - Otherwise, the highest priority item found is retained until the end of the search, at which point it is returned. +fn priority_find usize>( + items: impl IntoIterator, + prioritize: F, +) -> Option { + items + .into_iter() + // Mapping here allows the function to use `take_while_inclusive` to bound the search below + // instead of using more complex logic in `fold`. + .map(|item| (prioritize(&item), item)) + // This ensures that the fold stops after finding the first priority 0 item, which constitutes an early termination condition. + // Any item that isn't at priority 0 doesn't allow the function to early return: it might find a higher priority item later. + .take_while_inclusive(|(priority, _)| *priority > 0) + // The job of fold is now simple: just always select the item with higher priority. + .fold(None, |result, (incoming, item)| { + match result { + // No result yet, so incoming item is automatically highest priority. + None => Some((incoming, item)), + + // Remember that "lower number" means "higher priority". + // If the new item isn't higher priority, keep the current pick: + // this ensures the first item encountered at a given priority is chosen. + Some((current, _)) => { + if current > incoming { + Some((incoming, item)) + } else { + result + } + } + } + }) + .map(|(_, item)| item) +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 419c027..288e53e 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -9,6 +9,7 @@ use derive_more::derive::Display; use std::str::FromStr; use tap::Pipe; +mod ext; pub mod registry; /// Platform represents the platform a container image is built for. @@ -218,12 +219,12 @@ macro_rules! digest { circe::digest!($algorithm, $hex, 32) }}; ($algorithm:expr, $hex:expr, $size:expr) => {{ - const hash: [u8; $size] = hex_magic::hex!($hex); - static_assertions::const_assert_ne!(hash.len(), 0); + const HASH: [u8; $size] = hex_magic::hex!($hex); + static_assertions::const_assert_ne!(HASH.len(), 0); static_assertions::const_assert_ne!($algorithm.len(), 0); circe::Digest { algorithm: $algorithm.to_string(), - hash: hash.to_vec(), + hash: HASH.to_vec(), } }}; } diff --git a/lib/src/registry.rs b/lib/src/registry.rs index c2fcf84..b62a094 100644 --- a/lib/src/registry.rs +++ b/lib/src/registry.rs @@ -1,10 +1,124 @@ //! Interacts with remote OCI registries. -use color_eyre::eyre::Result; +use std::str::FromStr; -use crate::Reference; +use color_eyre::eyre::{Context, Result}; +use oci_client::{ + client::ClientConfig, manifest::ImageIndexEntry, secrets::RegistryAuth, Client, + Reference as OciReference, +}; -/// Enumerate layers for a reference in the remote registry. -pub async fn layers(reference: &Reference) -> Result> { - Ok(vec![]) +use crate::{ext::PriorityFind, LayerReference, Platform, Reference, Version}; + +/// Enumerate layers for a container reference in the remote registry. +/// Layers are returned in order from the base image to the application. +#[tracing::instrument] +pub async fn layers( + platform: Option<&Platform>, + reference: &Reference, +) -> Result> { + let client = client(platform.cloned()); + let auth = RegistryAuth::Anonymous; + + let oci_ref = OciReference::from(reference); + let (manifest, _) = client + .pull_image_manifest(&oci_ref, &auth) + .await + .context("pull image manifest: {oci_ref}")?; + + manifest + .layers + .into_iter() + .map(|layer| LayerReference::from_str(&layer.digest)) + .collect() +} + +impl From<&Reference> for OciReference { + fn from(reference: &Reference) -> Self { + match &reference.version { + Version::Tag(tag) => Self::with_tag( + reference.host.clone(), + reference.repository.clone(), + tag.clone(), + ), + Version::Digest(digest) => Self::with_digest( + reference.host.clone(), + reference.repository.clone(), + digest.to_string(), + ), + } + } +} + +fn client(platform: Option) -> Client { + let mut config = ClientConfig::default(); + config.platform_resolver = match platform { + Some(platform) => Some(Box::new(target_platform_resolver(platform))), + None => Some(Box::new(current_platform_resolver)), + }; + Client::new(config) +} + +fn target_platform_resolver(target: Platform) -> impl Fn(&[ImageIndexEntry]) -> Option { + move |entries: &[ImageIndexEntry]| { + entries + .iter() + .find(|entry| { + entry.platform.as_ref().map_or(false, |platform| { + platform.os == target.os && platform.architecture == target.architecture + }) + }) + .map(|entry| entry.digest.clone()) + } +} + +fn current_platform_resolver(entries: &[ImageIndexEntry]) -> Option { + let current_os = go_os(); + let current_arch = go_arch(); + let linux = Platform::LINUX; + let amd64 = Platform::AMD64; + entries + .iter() + .priority_find(|entry| match entry.platform.as_ref() { + None => 0, + Some(p) if p.os == current_os && p.architecture == current_arch => 1, + Some(p) if p.os == linux && p.architecture == current_arch => 2, + Some(p) if p.os == linux && p.architecture == amd64 => 3, + _ => 4, + }) + .map(|entry| entry.digest.clone()) +} + +/// Returns the current OS as a string that matches a `GOOS` constant. +/// This is required because the OCI spec requires the OS to be a valid GOOS value. +// If you get a compile error here, you need to add a new `cfg` branch for your platform. +// Valid GOOS values may be gathered from here: https://go.dev/doc/install/source#environment +const fn go_os() -> &'static str { + #[cfg(target_os = "linux")] + { + "linux" + } + #[cfg(target_os = "macos")] + { + "darwin" + } + #[cfg(target_os = "windows")] + { + "windows" + } +} + +/// Returns the current architecture as a string that matches a `GOARCH` constant. +/// This is required because the OCI spec requires the architecture to be a valid GOARCH value. +// If you get a compile error here, you need to add a new `cfg` branch for your platform. +// Valid GOARCH values may be gathered from here: https://go.dev/doc/install/source#environment +const fn go_arch() -> &'static str { + #[cfg(target_arch = "x86_64")] + { + "amd64" + } + #[cfg(target_arch = "aarch64")] + { + "arm64" + } } diff --git a/lib/it/main.rs b/lib/tests/it/main.rs similarity index 67% rename from lib/it/main.rs rename to lib/tests/it/main.rs index 6aeb56f..21c87db 100644 --- a/lib/it/main.rs +++ b/lib/tests/it/main.rs @@ -1,2 +1,3 @@ mod platform; mod reference; +mod registry; diff --git a/lib/it/platform.rs b/lib/tests/it/platform.rs similarity index 100% rename from lib/it/platform.rs rename to lib/tests/it/platform.rs diff --git a/lib/it/reference.rs b/lib/tests/it/reference.rs similarity index 89% rename from lib/it/reference.rs rename to lib/tests/it/reference.rs index 807beae..b4d535f 100644 --- a/lib/it/reference.rs +++ b/lib/tests/it/reference.rs @@ -1,9 +1,9 @@ -use circe::Reference; +use circe::{Digest, Reference}; use proptest::prelude::*; use simple_test_case::test_case; #[test_case("docker.io/library/ubuntu:latest", Reference::builder().host("docker.io").repository("library/ubuntu").tag("latest").build(); "docker.io/library/ubuntu:latest")] -#[test_case("ghcr.io/user/repo@sha256:123abc", Reference::builder().host("ghcr.io").repository("user/repo").digest("sha256:123abc").build(); "ghcr.io/user/repo@sha256:123abc")] +#[test_case("ghcr.io/user/repo@sha256:123abc", Reference::builder().host("ghcr.io").repository("user/repo").digest(circe::digest!("sha256", "123abc", 3)).build(); "ghcr.io/user/repo@sha256:123abc")] #[test_case("docker.io/library/ubuntu", Reference::builder().host("docker.io").repository("library/ubuntu").build(); "docker.io/library/ubuntu")] #[test] fn parse(input: &str, expected: Reference) { @@ -12,7 +12,7 @@ fn parse(input: &str, expected: Reference) { } #[test_case(Reference::builder().host("docker.io").repository("library/ubuntu").tag("latest").build(), "docker.io/library/ubuntu:latest"; "docker.io/library/ubuntu:latest")] -#[test_case(Reference::builder().host("ghcr.io").repository("user/repo").digest("sha256:123abc").build(), "ghcr.io/user/repo@sha256:123abc"; "ghcr.io/user/repo@sha256:123abc")] +#[test_case(Reference::builder().host("ghcr.io").repository("user/repo").digest(circe::digest!("sha256", "123abc", 3)).build(), "ghcr.io/user/repo@sha256:123abc"; "ghcr.io/user/repo@sha256:123abc")] #[test_case(Reference::builder().host("docker.io").repository("library/ubuntu").build(), "docker.io/library/ubuntu:latest"; "docker.io/library/ubuntu")] #[test] fn display(reference: Reference, expected: &str) { @@ -60,7 +60,9 @@ fn reference_strategy() -> impl Strategy { repository_strategy(), prop_oneof![ tag_strategy().prop_map(circe::Version::Tag), - digest_strategy().prop_map(circe::Version::Digest) + digest_strategy().prop_map(|digest| { + circe::Version::Digest(digest.parse::().expect("parse digest")) + }) ], ) .prop_map(|(host, repository, version)| Reference { diff --git a/lib/tests/it/registry.rs b/lib/tests/it/registry.rs new file mode 100644 index 0000000..4e9034e --- /dev/null +++ b/lib/tests/it/registry.rs @@ -0,0 +1,27 @@ +use circe::{Platform, Reference}; +use color_eyre::Result; +use simple_test_case::test_case; + +#[test_case("docker.io/library/alpine:latest", None; "docker.io/library/alpine:latest")] +#[test_case("docker.io/library/ubuntu:latest", None; "docker.io/library/ubuntu:latest")] +#[tokio::test] +async fn single_platform_layers(image: &str, platform: Option) -> Result<()> { + let reference = image.parse::()?; + let layers = circe::registry::layers(platform.as_ref(), &reference).await?; + + // Verify we got some layers back + assert!(!layers.is_empty(), "image should have at least one layer"); + Ok(()) +} + +#[test_case("docker.io/library/golang:latest", Platform::linux_amd64(); "docker.io/library/golang:latest.linux_amd64")] +#[test_case("docker.io/library/golang:latest", Platform::linux_arm64(); "docker.io/library/golang:latest.linux_arm64")] +#[tokio::test] +async fn multi_platform_layers(image: &str, platform: Platform) -> Result<()> { + let reference = image.parse::()?; + let layers = circe::registry::layers(Some(&platform), &reference).await?; + + // Verify we got some layers back + assert!(!layers.is_empty(), "image should have at least one layer"); + Ok(()) +}