diff --git a/Cargo.lock b/Cargo.lock index 00ac659..8c9c063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -323,6 +323,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-stream", + "bitflags 2.4.2", "bollard", "bytes", "clap", diff --git a/Cargo.toml b/Cargo.toml index f4112df..84cec1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,12 @@ license = "MIT OR Apache-2.0" anyhow = { version = "1", features = ["backtrace"] } log = "0.4" env_logger = "0.11" -clap = { version = "4", features = ["default", "derive", "unicode", "wrap_help"] } +clap = { version = "4", features = [ + "default", + "derive", + "unicode", + "wrap_help", +] } clap-verbosity-flag = "2" humantime = "2" bytes = "1" @@ -24,3 +29,4 @@ udev = "0.8" bollard = "0.16" futures = "0.3" rustix = { version = "0.38", features = ["fs", "stdio", "termios"] } +bitflags = "2" diff --git a/src/docker/cgroup.rs b/src/docker/cgroup.rs new file mode 100644 index 0000000..026d6ae --- /dev/null +++ b/src/docker/cgroup.rs @@ -0,0 +1,99 @@ +use anyhow::{ensure, Result}; +use std::path::PathBuf; + +// The numerical representation below needs to match BPF_DEVCG constants. +#[allow(unused)] +#[repr(u32)] +pub enum DeviceType { + Block = 1, + Character = 2, +} + +bitflags::bitflags! { + pub struct Access: u32 { + const MKNOD = 1; + const READ = 2; + const WRITE = 4; + } +} + +pub trait DeviceAccessController { + /// Set the permission for a specific device. + fn set_permission( + &mut self, + ty: DeviceType, + major: u32, + minor: u32, + access: Access, + ) -> Result<()>; +} + +pub struct DeviceAccessControllerV1 { + cgroup: PathBuf, +} + +impl DeviceAccessControllerV1 { + pub fn new(id: &str) -> Result { + let cgroup: PathBuf = format!("/sys/fs/cgroup/devices/docker/{id}").into(); + + ensure!( + cgroup.is_dir(), + "cgroup {} does not exist", + cgroup.display() + ); + + Ok(Self { cgroup }) + } +} + +impl DeviceAccessController for DeviceAccessControllerV1 { + fn set_permission( + &mut self, + ty: DeviceType, + major: u32, + minor: u32, + access: Access, + ) -> Result<()> { + let mut denied = String::with_capacity(3); + let mut allowed = String::with_capacity(3); + + let ty = match ty { + DeviceType::Character => 'c', + DeviceType::Block => 'b', + }; + + if access.contains(Access::READ) { + allowed.push('r'); + } else { + denied.push('r'); + } + + if access.contains(Access::WRITE) { + allowed.push('w'); + } else { + denied.push('w'); + } + + if access.contains(Access::MKNOD) { + allowed.push('m'); + } else { + denied.push('m'); + } + + if !denied.is_empty() { + std::fs::write( + self.cgroup.join("devices.deny"), + format!("{ty} {major}:{minor} {denied}"), + )?; + } + + if !allowed.is_empty() { + std::fs::write( + self.cgroup.join("devices.allow"), + format!("{ty} {major}:{minor} {allowed}"), + )?; + } + + Ok(()) + } +} diff --git a/src/docker/container.rs b/src/docker/container.rs index 9b81a3e..e7a9525 100644 --- a/src/docker/container.rs +++ b/src/docker/container.rs @@ -1,22 +1,24 @@ use std::pin::pin; +use std::sync::{Arc, Mutex}; use std::time::Duration; -use super::{IoStream, IoStreamSource}; - use anyhow::{anyhow, Context, Error, Result}; use bollard::service::EventMessage; use futures::future::{BoxFuture, Shared}; use futures::FutureExt; -use tokio::io::AsyncWriteExt; use tokio::signal::unix::{signal, SignalKind}; use tokio::task::{spawn, JoinHandle}; use tokio_stream::StreamExt; +use super::cgroup::{Access, DeviceAccessController, DeviceAccessControllerV1, DeviceType}; +use super::{IoStream, IoStreamSource}; + #[derive(Clone)] pub struct Container { id: String, docker: bollard::Docker, remove_event: Shared>>, + cgroup_device_filter: Arc>>, } impl Container { @@ -37,10 +39,14 @@ impl Container { .boxed() .shared(); + let cgroup_device_filter: Box = + Box::new(DeviceAccessControllerV1::new(id)?); + Ok(Self { id: id.to_owned(), docker: docker.clone(), remove_event: remove_evevnt, + cgroup_device_filter: Arc::new(Mutex::new(cgroup_device_filter)), }) } @@ -76,6 +82,7 @@ impl Container { .await .context("no destroy event")?; } + Ok(()) } @@ -219,24 +226,21 @@ impl Container { (major, minor): (u32, u32), (r, w, m): (bool, bool, bool), ) -> Result<()> { - let mut permissions = String::new(); - if r { - permissions.push('r'); - } - if w { - permissions.push('w'); - } - if m { - permissions.push('m'); - } - - deny_device_cgroup1(&self.id, major, minor, "rwm").await?; + let controller = self.cgroup_device_filter.clone(); + tokio::task::spawn_blocking(move || -> Result<()> { + let mut controller = controller.lock().unwrap(); + controller.set_permission( + DeviceType::Character, + major, + minor, + if r { Access::READ } else { Access::empty() } + | if w { Access::WRITE } else { Access::empty() } + | if m { Access::MKNOD } else { Access::empty() }, + )?; - if permissions != "" { - allow_device_cgroup1(&self.id, major, minor, permissions.as_ref()).await?; - } - - Ok(()) + Ok(()) + }) + .await? } pub async fn pipe_signals(&self) -> JoinHandle> { @@ -270,32 +274,6 @@ impl Container { } } -async fn allow_device_cgroup1( - container_id: &str, - major: u32, - minor: u32, - permissions: &str, -) -> Result<()> { - let path = format!("/sys/fs/cgroup/devices/docker/{container_id}/devices.allow"); - let mut file = tokio::fs::OpenOptions::new().write(true).open(path).await?; - let mut data = bytes::Bytes::from(format!("c {major}:{minor} {permissions}")); - file.write_all_buf(&mut data).await?; - Ok(()) -} - -async fn deny_device_cgroup1( - container_id: &str, - major: u32, - minor: u32, - permissions: &str, -) -> Result<()> { - let path = format!("/sys/fs/cgroup/devices/docker/{container_id}/devices.deny"); - let mut file = tokio::fs::OpenOptions::new().write(true).open(path).await?; - let mut data = bytes::Bytes::from(format!("c {major}:{minor} {permissions}")); - file.write_all_buf(&mut data).await?; - Ok(()) -} - fn signal_stream(kind: SignalKind) -> impl tokio_stream::Stream> { async_stream::try_stream! { let sig_kind = SignalKind::hangup(); diff --git a/src/docker/mod.rs b/src/docker/mod.rs index 4025f5d..f015187 100644 --- a/src/docker/mod.rs +++ b/src/docker/mod.rs @@ -1,3 +1,4 @@ +mod cgroup; mod container; mod docker; mod iostream;