diff --git a/Cargo.lock b/Cargo.lock index d7e7ac2..6d8c469 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,6 +116,12 @@ dependencies = [ "backtrace", ] +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "async-stream" version = "0.3.5" @@ -144,6 +150,35 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "aya" +version = "0.12.0" +source = "git+https://github.com/aya-rs/aya.git#542306d295e51ac1ec117ce453544f201875af3d" +dependencies = [ + "assert_matches", + "aya-obj", + "bitflags 2.4.2", + "bytes", + "lazy_static", + "libc", + "log", + "object", + "thiserror", +] + +[[package]] +name = "aya-obj" +version = "0.1.0" +source = "git+https://github.com/aya-rs/aya.git#542306d295e51ac1ec117ce453544f201875af3d" +dependencies = [ + "bytes", + "core-error", + "hashbrown 0.14.3", + "log", + "object", + "thiserror", +] + [[package]] name = "backtrace" version = "0.3.69" @@ -235,9 +270,9 @@ checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "cc" -version = "1.0.88" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" +checksum = "a0ba8f7aaa012f30d5b2861462f6708eccd49c3c39863fe083a308035f63d723" [[package]] name = "cfg-if" @@ -323,6 +358,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-stream", + "aya", "bitflags 2.4.2", "bollard", "bytes", @@ -342,6 +378,15 @@ dependencies = [ "walkdir", ] +[[package]] +name = "core-error" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efcdb2972eb64230b4c50646d8498ff73f5128d196a90c7236eec4cbe8619b8f" +dependencies = [ + "version_check", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -543,9 +588,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -730,13 +775,19 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.68" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.153" @@ -1024,9 +1075,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -1488,9 +1539,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -1513,9 +1564,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1523,9 +1574,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", @@ -1538,9 +1589,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1548,9 +1599,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", @@ -1561,9 +1612,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "winapi" diff --git a/Cargo.toml b/Cargo.toml index 455b114..9ddb451 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ bollard = "0.16" futures = "0.3" rustix = { version = "0.38", features = ["fs", "stdio", "termios"] } bitflags = "2" +aya = { git = "https://github.com/aya-rs/aya.git" } [build-dependencies] anyhow = { version = "1", features = ["backtrace"] } diff --git a/README.md b/README.md index 661b085..03baa33 100644 --- a/README.md +++ b/README.md @@ -25,10 +25,7 @@ Another concern is providing a container with well known paths for the devices. On bare-metal systems this would usually be achieved with a `SYMLINK` directive in a udev rule. This program tries to provide a similar functionality for containers, allowing you to specify symlinks for certain devices. -## Limitations - -`container-hotplug` needs to be run as root and relies on `cgroup v1`. It does not support `cgroup v2`. -On distributions with `cgroup v2`, you can switch back to `cgroup v1` by setting the [kernel parameter](https://wiki.ubuntu.com/Kernel/KernelBootParameters) `systemd.unified_cgroup_hierarchy=0`. +This tool supports both cgroup v1 and v2. ## Example diff --git a/src/docker/cgroup.rs b/src/docker/cgroup.rs index 026d6ae..4ac5691 100644 --- a/src/docker/cgroup.rs +++ b/src/docker/cgroup.rs @@ -1,4 +1,8 @@ -use anyhow::{ensure, Result}; +use anyhow::{ensure, Context, Result}; +use aya::maps::{HashMap, MapData}; +use aya::programs::{CgroupDevice, Link}; +use std::fs::File; +use std::mem::ManuallyDrop; use std::path::PathBuf; // The numerical representation below needs to match BPF_DEVCG constants. @@ -26,6 +30,10 @@ pub trait DeviceAccessController { minor: u32, access: Access, ) -> Result<()>; + + /// Stop performing access control. This may allow all accesses, so should only be used when + /// the cgroup is shutdown. + fn stop(self: Box) -> Result<()>; } pub struct DeviceAccessControllerV1 { @@ -96,4 +104,95 @@ impl DeviceAccessController for DeviceAccessControllerV1 { Ok(()) } + + fn stop(self: Box) -> Result<()> { + Ok(()) + } +} + +#[repr(C)] // This is read as POD by the BPF program. +#[derive(Clone, Copy)] +struct Device { + device_type: u32, + major: u32, + minor: u32, +} + +// SAFETY: Device is `repr(C)`` and has no padding. +unsafe impl aya::Pod for Device {} + +pub struct DeviceAccessControllerV2 { + map: HashMap, + pin: PathBuf, +} + +impl DeviceAccessControllerV2 { + pub fn new(id: &str) -> Result { + // We want to take control of the device cgroup filtering from docker. To do this, we attach our own + // filter program and detach the one by docker. + let cgroup_path = format!("/sys/fs/cgroup/system.slice/docker-{id}.scope"); + let cgroup = File::open(cgroup_path)?; + + let mut bpf = aya::Bpf::load(include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/cgroup_device_filter/target/bpfel-unknown-none/release/cgroup_device_filter" + )))?; + + let program: &mut CgroupDevice = bpf + .program_mut("check_device") + .context("cannot find check_device program")? + .try_into()?; + + program.load()?; + + // Iterate existing programs. We'll need to detach them later. + // Wrap this inside `ManuallyDrop` to prevent accidental detaching. + let existing_programs = ManuallyDrop::new(CgroupDevice::query(&cgroup)?); + + program.attach(&cgroup)?; + + // Pin the program so that if container-hotplug accidentally exits, the filter won't be removed from the docker + // container. + let pin: PathBuf = format!("/sys/fs/bpf/docker-{id}-device-filter").into(); + program.pin(&pin)?; + + // Now our new filter is attached, detach all docker filters. + for existing_program in ManuallyDrop::into_inner(existing_programs) { + existing_program.detach()?; + } + + let map: HashMap<_, Device, u32> = bpf + .take_map("DEVICE_PERM") + .context("cannot find DEVICE_PERM map")? + .try_into()?; + + Ok(Self { map, pin }) + } +} + +impl DeviceAccessController for DeviceAccessControllerV2 { + fn set_permission( + &mut self, + ty: DeviceType, + major: u32, + minor: u32, + access: Access, + ) -> Result<()> { + let device = Device { + device_type: ty as u32, + major, + minor, + }; + if access.is_empty() { + self.map.remove(&device)?; + } else { + self.map.insert(device, access.bits(), 0)?; + } + Ok(()) + } + + fn stop(self: Box) -> Result<()> { + CgroupDevice::from_pin(&self.pin)?.unpin()?; + Ok(()) + } } diff --git a/src/docker/container.rs b/src/docker/container.rs index e7a9525..7821c95 100644 --- a/src/docker/container.rs +++ b/src/docker/container.rs @@ -10,7 +10,7 @@ use tokio::signal::unix::{signal, SignalKind}; use tokio::task::{spawn, JoinHandle}; use tokio_stream::StreamExt; -use super::cgroup::{Access, DeviceAccessController, DeviceAccessControllerV1, DeviceType}; +use super::cgroup::{Access, DeviceAccessController, DeviceAccessControllerV1, DeviceAccessControllerV2, DeviceType}; use super::{IoStream, IoStreamSource}; #[derive(Clone)] @@ -18,7 +18,7 @@ pub struct Container { id: String, docker: bollard::Docker, remove_event: Shared>>, - cgroup_device_filter: Arc>>, + cgroup_device_filter: Arc>>>, } impl Container { @@ -40,13 +40,19 @@ impl Container { .shared(); let cgroup_device_filter: Box = - Box::new(DeviceAccessControllerV1::new(id)?); + match DeviceAccessControllerV2::new(id) { + Ok(v) => Box::new(v), + Err(err) => match DeviceAccessControllerV1::new(id) { + Ok(v) => Box::new(v), + Err(_) => Err(err).context("neither cgroup v1 and cgroup v2 works")?, + }, + }; Ok(Self { id: id.to_owned(), docker: docker.clone(), remove_event: remove_evevnt, - cgroup_device_filter: Arc::new(Mutex::new(cgroup_device_filter)), + cgroup_device_filter: Arc::new(Mutex::new(Some(cgroup_device_filter))), }) } @@ -83,6 +89,14 @@ impl Container { .context("no destroy event")?; } + // Stop the cgroup device filter. Only do so once we're sure that the container is removed. + self.cgroup_device_filter + .lock() + .unwrap() + .take() + .unwrap() + .stop()?; + Ok(()) } @@ -229,7 +243,7 @@ impl Container { let controller = self.cgroup_device_filter.clone(); tokio::task::spawn_blocking(move || -> Result<()> { let mut controller = controller.lock().unwrap(); - controller.set_permission( + controller.as_mut().unwrap().set_permission( DeviceType::Character, major, minor, diff --git a/src/main.rs b/src/main.rs index b0418d8..0bf6372 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,11 +7,11 @@ use cli::{Action, Device, Symlink}; use docker::{Container, Docker}; use hotplug::{Event as HotPlugEvent, HotPlug, PluggedDevice}; +use std::fmt::Display; use std::pin::pin; -use std::{fmt::Display, path::Path}; use tokio_stream::StreamExt; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use clap::Parser; use clap_verbosity_flag::{InfoLevel, LogLevel, Verbosity}; use log::info; @@ -98,10 +98,6 @@ fn run_hotplug( async fn run(param: cli::Run, verbosity: Verbosity) -> Result { let mut status = 0; - if !Path::new("/sys/fs/cgroup/devices/").is_dir() { - bail!("Could not find cgroup v1"); - } - let docker = Docker::connect_with_defaults()?; let container = docker.run(param.docker_args).await?; drop(container.pipe_signals());