From fff5c2a78a75b0b4537a13b68e88a7ee086352e6 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 17:40:20 -0700 Subject: [PATCH 01/25] Implement `select`. Add a `select` function, defined only on platforms where it doesn't have an `FD_SETSIZE` limitation. --- src/backend/libc/event/syscalls.rs | 54 +++++++++ src/event/mod.rs | 4 + src/event/poll.rs | 2 +- src/event/select.rs | 57 +++++++++ src/lib.rs | 2 + tests/event/main.rs | 2 + tests/event/select.rs | 180 +++++++++++++++++++++++++++++ 7 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 src/event/select.rs create mode 100644 tests/event/select.rs diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index dcd0135f0..fdba539a4 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -16,6 +16,8 @@ use crate::event::port::Event; target_os = "espidf" ))] use crate::event::EventfdFlags; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +use crate::event::FdSetElement; use crate::event::PollFd; use crate::io; #[cfg(solarish)] @@ -125,6 +127,58 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, +) -> io::Result { + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `c::timeval`. + timeout_data = c::timeval { + tv_sec: timeout.tv_sec, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => core::ptr::null(), + }; + + // On Apple platforms, use the specially mangled `select` which doesn't + // have an `FD_SETSIZE` limitation. + #[cfg(apple)] + { + extern "C" { + #[link_name = "select$DARWIN_EXTSN$NOCANCEL"] + fn select( + nfds: c::c_int, + readfds: *mut FdSetElement, + writefds: *mut FdSetElement, + errorfds: *mut FdSetElement, + timeout: *const c::timeval, + ) -> c::c_int; + } + + ret_c_int(select(nfds, readfds, writefds, exceptfds, timeout_ptr)) + } + + // Otherwise just use the normal `select`. + #[cfg(not(apple))] + { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr as *mut c::timeval, + )) + } +} + #[cfg(solarish)] pub(crate) fn port_create() -> io::Result { unsafe { ret_owned_fd(c::port_create()) } diff --git a/src/event/mod.rs b/src/event/mod.rs index dab9c6932..be0f1bc89 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,6 +16,8 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +mod select; #[cfg(any( linux_kernel, @@ -27,3 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +pub use select::{select, FdSetElement, Timespec}; diff --git a/src/event/poll.rs b/src/event/poll.rs index 0937dd6fd..2b60a920f 100644 --- a/src/event/poll.rs +++ b/src/event/poll.rs @@ -2,7 +2,7 @@ use crate::{backend, io}; pub use backend::event::poll_fd::{PollFd, PollFlags}; -/// `poll(self.fds, timeout)` +/// `poll(self.fds, timeout)`—Wait for events on lists of file descriptors. /// /// # References /// - [Beej's Guide to Network Programming] diff --git a/src/event/select.rs b/src/event/select.rs new file mode 100644 index 000000000..85a5b6a9b --- /dev/null +++ b/src/event/select.rs @@ -0,0 +1,57 @@ +use crate::{backend, io}; + +pub use crate::timespec::Timespec; + +/// Bitfield array element type for use with [`select`]. +#[cfg(all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") +))] +pub type FdSetElement = i64; + +/// Bitfield array element type for use with [`select`]. +#[cfg(not(all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") +)))] +pub type FdSetElement = i32; + +/// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on +/// sets of file descriptors. +/// +/// This `select` wrapper differs from POSIX in that `nfds` is not limited to +/// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this +/// function takes raw pointers to arrays of `nfds / size_of::()` +/// elements of type `FdSetElement`. +/// +/// In particular, on Apple platforms, it behaves as if +/// `_DARWIN_UNLIMITED_SELECT` were predefined. And on Linux platforms, it is +/// not defined because Linux's `select` always has an `FD_SETSIZE` limitation. +/// On Linux, it is recommended to use [`poll`] instead. +/// +/// # Safety +/// +/// `readfds`, `writefds`, `exceptfds` must point to arrays of `FdSetElement` +/// containing at least `nfds.div_ceil(size_of::())` elements. +/// +/// # References +/// - [POSIX] +/// - [Apple] +/// - [FreeBSD] +/// - [NetBSD] +/// - [DragonFly BSD] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html +/// [Apple]: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/select.2.html +/// [FreeBSD]: https://man.freebsd.org/cgi/man.cgi?query=select&sektion=2 +/// [NetBSD]: https://man.netbsd.org/select.2 +/// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 +pub unsafe fn select( + nfds: i32, + readfds: *mut FdSetElement, + writefds: *mut FdSetElement, + exceptfds: *mut FdSetElement, + timeout: Option<&Timespec>, +) -> io::Result { + backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) +} diff --git a/src/lib.rs b/src/lib.rs index 0fd0dc4f8..ec545d438 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,7 @@ //! - Provide y2038 compatibility, on platforms which support this. //! - Correct selected platform bugs, such as behavioral differences when //! running under seccomp. +//! - Use `timespec` for timestamps instead of `timeval`. //! //! Things they don't do include: //! - Detecting whether functions are supported at runtime, except in specific @@ -362,6 +363,7 @@ mod signal; feature = "runtime", feature = "thread", feature = "time", + all(feature = "event", any(apple, freebsdlike, target_os = "netbsd")), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/tests/event/main.rs b/tests/event/main.rs index 68f999737..72da82f56 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,3 +10,5 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +mod select; diff --git a/tests/event/select.rs b/tests/event/select.rs new file mode 100644 index 000000000..6eeb97b38 --- /dev/null +++ b/tests/event/select.rs @@ -0,0 +1,180 @@ +#[cfg(feature = "pipe")] +use { + rustix::event::{select, FdSetElement}, + rustix::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}, + rustix::io::retry_on_intr, + std::cmp::max, +}; + +#[cfg(feature = "pipe")] +#[test] +fn test_select() { + use core::mem::size_of; + use core::ptr::null_mut; + use rustix::event::Timespec; + use rustix::io::{read, write}; + use rustix::pipe::pipe; + + // The number of bits in an `fd_set` element. + const BITS: usize = size_of::() * 8; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) + }) + .unwrap(); + assert_eq!(num, 1); + assert_eq!( + readfds[reader.as_raw_fd() as usize / BITS], + 1 << (reader.as_raw_fd() as usize % BITS) + ); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); +} + +#[cfg(feature = "pipe")] +#[test] +fn test_select_with_great_fds() { + use core::cmp::max; + use core::mem::size_of; + use core::ptr::null_mut; + use rustix::event::select; + use rustix::event::Timespec; + use rustix::io::{read, write}; + use rustix::pipe::pipe; + use rustix::process::{getrlimit, setrlimit, Resource}; + + // The number of bits in an `fd_set` element. + const BITS: usize = size_of::() * 8; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + + // Raise the file descriptor limit so that we can test fds above + // `FD_SETSIZE`. + let orig_rlimit = getrlimit(Resource::Nofile); + let mut rlimit = orig_rlimit; + if let Some(current) = rlimit.current { + rlimit.current = Some(max(current, libc::FD_SETSIZE as u64 + 2)); + } + setrlimit(Resource::Nofile, rlimit).unwrap(); + + // Create a fd at `FD_SETSIZE + 1` out of thin air. Use `libc` instead + // of `OwnedFd::from_raw_fd` because grabbing a fd out of thin air + // violates Rust's concept of I/O safety (and wouldn't make sense to do + // in anything other than a test like this). + let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), libc::FD_SETSIZE as RawFd + 1) }; + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) + }) + .unwrap(); + assert_eq!(num, 1); + assert_eq!( + readfds[reader.as_raw_fd() as usize / BITS], + 1 << (reader.as_raw_fd() as usize % BITS) + ); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Reset the process limit. + setrlimit(Resource::Nofile, orig_rlimit).unwrap(); +} From dd44e098af66d7ef2ddf71b1b926d9fcbaef4b5c Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 19:10:28 -0700 Subject: [PATCH 02/25] Fix test hangs on macos. In the `waitpid` tests, ensure that the child process has exited, as dropping `Command` otherwise leaves the process running. This fixes test hangs on macos. --- tests/process/wait.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/process/wait.rs b/tests/process/wait.rs index 23c41b1bb..b4f74adc8 100644 --- a/tests/process/wait.rs +++ b/tests/process/wait.rs @@ -23,6 +23,9 @@ fn test_waitpid_none() { .unwrap(); assert_eq!(pid, process::Pid::from_child(&child)); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; } #[test] @@ -41,6 +44,9 @@ fn test_waitpid_some() { .unwrap(); assert_eq!(rpid, pid); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; } #[test] @@ -59,6 +65,9 @@ fn test_waitpgid() { .unwrap(); assert_eq!(pid, process::Pid::from_child(&child)); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; } #[cfg(not(any( From 11a0802fe40d6b133c3c02b685f107674abe74e1 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 19:24:57 -0700 Subject: [PATCH 03/25] Wait for the child process after signaling it. --- tests/process/wait.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/process/wait.rs b/tests/process/wait.rs index b4f74adc8..1149d660c 100644 --- a/tests/process/wait.rs +++ b/tests/process/wait.rs @@ -26,6 +26,12 @@ fn test_waitpid_none() { // Clean up the child process. unsafe { kill(child.id() as _, SIGKILL) }; + + let (pid, status) = process::waitpid(None, process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(pid, process::Pid::from_child(&child)); + assert!(status.signaled()); } #[test] @@ -47,6 +53,12 @@ fn test_waitpid_some() { // Clean up the child process. unsafe { kill(child.id() as _, SIGKILL) }; + + let (rpid, status) = process::waitpid(Some(pid), process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(rpid, pid); + assert!(status.signaled()); } #[test] @@ -68,6 +80,12 @@ fn test_waitpgid() { // Clean up the child process. unsafe { kill(child.id() as _, SIGKILL) }; + + let (pid, status) = process::waitpgid(pgid, process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(pid, process::Pid::from_child(&child)); + assert!(status.signaled()); } #[cfg(not(any( From 51c076333442a7d8ab65d75b510651e8ca826589 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 19:39:03 -0700 Subject: [PATCH 04/25] Fix the vector sizes in the test. --- tests/event/select.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/event/select.rs b/tests/event/select.rs index 6eeb97b38..f0f739a75 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -23,7 +23,7 @@ fn test_select() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; nfds as usize]; + let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); let num = retry_on_intr(|| unsafe { select( @@ -45,7 +45,7 @@ fn test_select() { assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; nfds as usize]; + let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); let num = retry_on_intr(|| unsafe { select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) @@ -118,7 +118,7 @@ fn test_select_with_great_fds() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; nfds as usize]; + let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); let num = retry_on_intr(|| unsafe { select( @@ -140,7 +140,7 @@ fn test_select_with_great_fds() { assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; nfds as usize]; + let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); let num = retry_on_intr(|| unsafe { select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) From e6837847ded936b0b9cfec457e5a4398fba0fc1a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 19:42:20 -0700 Subject: [PATCH 05/25] rustfmt --- tests/event/select.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/event/select.rs b/tests/event/select.rs index f0f739a75..12dfc2b86 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -87,8 +87,7 @@ fn test_select_with_great_fds() { use core::cmp::max; use core::mem::size_of; use core::ptr::null_mut; - use rustix::event::select; - use rustix::event::Timespec; + use rustix::event::{select, Timespec}; use rustix::io::{read, write}; use rustix::pipe::pipe; use rustix::process::{getrlimit, setrlimit, Resource}; From faacb146aa82ecb63eda05f9940720db2451438b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 19:59:01 -0700 Subject: [PATCH 06/25] More comments. --- src/event/poll.rs | 5 +++++ src/event/select.rs | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/event/poll.rs b/src/event/poll.rs index 2b60a920f..00014eb1e 100644 --- a/src/event/poll.rs +++ b/src/event/poll.rs @@ -4,6 +4,11 @@ pub use backend::event::poll_fd::{PollFd, PollFlags}; /// `poll(self.fds, timeout)`—Wait for events on lists of file descriptors. /// +/// On macOS, `poll` doesn't work on fds for /dev/tty or /dev/null, however +/// [`select`] is available on macOS and does work on these fds. +/// +/// [`select`]: https://docs.rs/rustix/*/x86_64-apple-darwin/rustix/event/fn.select.html +/// /// # References /// - [Beej's Guide to Network Programming] /// - [POSIX] diff --git a/src/event/select.rs b/src/event/select.rs index 85a5b6a9b..401be6e6b 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -21,8 +21,11 @@ pub type FdSetElement = i32; /// /// This `select` wrapper differs from POSIX in that `nfds` is not limited to /// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this -/// function takes raw pointers to arrays of `nfds / size_of::()` -/// elements of type `FdSetElement`. +/// function takes raw pointers to arrays of +/// `nfds.div_ceil(size_of::())` elements of type `FdSetElement`, +/// where a fd `fd` is in the set if the element at index +/// `fd / (size_of::() * 8)` has the bit +/// `1 << (fd % (size_of::() * 8))` set. /// /// In particular, on Apple platforms, it behaves as if /// `_DARWIN_UNLIMITED_SELECT` were predefined. And on Linux platforms, it is From c6b92bd3d4d9b3603399e560e0d7f78d0d595b6c Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 20:11:54 -0700 Subject: [PATCH 07/25] More comments. --- src/event/select.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/event/select.rs b/src/event/select.rs index 401be6e6b..6f475e496 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -2,14 +2,14 @@ use crate::{backend, io}; pub use crate::timespec::Timespec; -/// Bitfield array element type for use with [`select`]. +/// Bitvector element type for use with [`select`]. #[cfg(all( target_pointer_width = "64", any(target_os = "freebsd", target_os = "dragonfly") ))] pub type FdSetElement = i64; -/// Bitfield array element type for use with [`select`]. +/// Bitvector element type for use with [`select`]. #[cfg(not(all( target_pointer_width = "64", any(target_os = "freebsd", target_os = "dragonfly") @@ -27,10 +27,15 @@ pub type FdSetElement = i32; /// `fd / (size_of::() * 8)` has the bit /// `1 << (fd % (size_of::() * 8))` set. /// -/// In particular, on Apple platforms, it behaves as if -/// `_DARWIN_UNLIMITED_SELECT` were predefined. And on Linux platforms, it is -/// not defined because Linux's `select` always has an `FD_SETSIZE` limitation. -/// On Linux, it is recommended to use [`poll`] instead. +/// In particular, on Apple platforms, this function behaves as if +/// `_DARWIN_UNLIMITED_SELECT` were predefined. +/// +/// On Linux, illumos, and OpenBSD, this function is not defined because the +/// `select` functions on these platforms always has an `FD_SETSIZE` +/// limitation, following POSIX. These platforms' documentation recommend using +/// [`poll`] instead. +/// +/// [`poll`]: crate::event::poll /// /// # Safety /// From f249fc35564716f4464e29b8e6454608c22d0138 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 14 Sep 2024 05:07:42 -0700 Subject: [PATCH 08/25] Add `fd_set` and other convenience functions. --- src/event/mod.rs | 2 +- src/event/select.rs | 41 ++++++++++++++++++++++++++++++-- tests/event/select.rs | 54 +++++++++++++++++-------------------------- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/src/event/mod.rs b/src/event/mod.rs index be0f1bc89..d0ebee5be 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -30,4 +30,4 @@ pub use eventfd::{eventfd, EventfdFlags}; pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; #[cfg(any(apple, freebsdlike, target_os = "netbsd"))] -pub use select::{select, FdSetElement, Timespec}; +pub use select::*; diff --git a/src/event/select.rs b/src/event/select.rs index 6f475e496..528c1573b 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -23,9 +23,11 @@ pub type FdSetElement = i32; /// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this /// function takes raw pointers to arrays of /// `nfds.div_ceil(size_of::())` elements of type `FdSetElement`, -/// where a fd `fd` is in the set if the element at index +/// representing bitvectors where a fd `fd` is set if the element at index /// `fd / (size_of::() * 8)` has the bit -/// `1 << (fd % (size_of::() * 8))` set. +/// `1 << (fd % (size_of::() * 8))` set. Convenience functions +/// [`fd_set`], [`fd_clr`], [`fd_isset`], and [`fd_bitvector_len`] are provided +/// for setting, clearing, testing, and sizing bitvectors. /// /// In particular, on Apple platforms, this function behaves as if /// `_DARWIN_UNLIMITED_SELECT` were predefined. @@ -63,3 +65,38 @@ pub unsafe fn select( ) -> io::Result { backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) } + +const BITS: usize = size_of::() * 8; +use crate::fd::RawFd; + +/// Set `fd` in the bitvector pointed to by `fds`. +#[doc(alias = "FD_SET")] +#[inline] +pub fn fd_set(fd: RawFd, fds: &mut [FdSetElement]) { + let fd = fd as usize; + fds[fd / BITS] |= 1 << (fd % BITS); +} + +/// Clear `fd` in the bitvector pointed to by `fds`. +#[doc(alias = "FD_CLR")] +#[inline] +pub fn fd_clr(fd: RawFd, fds: &mut [FdSetElement]) { + let fd = fd as usize; + fds[fd / BITS] &= !(1 << (fd % BITS)); +} + +/// Test whether `fd` is set in the bitvector pointed to by `fds`. +#[doc(alias = "FD_ISSET")] +#[inline] +pub fn fd_isset(fd: RawFd, fds: &[FdSetElement]) -> bool { + let fd = fd as usize; + (fds[fd / BITS] & (1 << (fd % BITS))) != 0 +} + +/// Compute the number of `FdSetElement`s needed to hold a bitvector which can +/// contain file descriptors less than `nfds`. +#[inline] +pub fn fd_bitvector_len(nfds: RawFd) -> usize { + let nfds = nfds as usize; + (nfds + (BITS - 1)) / BITS +} diff --git a/tests/event/select.rs b/tests/event/select.rs index 12dfc2b86..c6f7d977e 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -1,6 +1,6 @@ #[cfg(feature = "pipe")] use { - rustix::event::{select, FdSetElement}, + rustix::event::{fd_bitvector_len, fd_clr, fd_isset, fd_set, select, FdSetElement, Timespec}, rustix::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}, rustix::io::retry_on_intr, std::cmp::max, @@ -9,22 +9,17 @@ use { #[cfg(feature = "pipe")] #[test] fn test_select() { - use core::mem::size_of; use core::ptr::null_mut; - use rustix::event::Timespec; use rustix::io::{read, write}; use rustix::pipe::pipe; - // The number of bits in an `fd_set` element. - const BITS: usize = size_of::() * 8; - // Create a pipe. let (reader, writer) = pipe().unwrap(); let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select( nfds, @@ -39,23 +34,22 @@ fn test_select() { }) .unwrap(); assert_eq!(num, 0); - assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); // Write a byte to the pipe. assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) }) .unwrap(); assert_eq!(num, 1); - assert_eq!( - readfds[reader.as_raw_fd() as usize / BITS], - 1 << (reader.as_raw_fd() as usize % BITS) - ); + assert!(fd_isset(reader.as_raw_fd(), &readfds)); + fd_clr(reader.as_raw_fd(), &mut readfds); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); // Read the byte from the pipe. let mut buf = [b'\0']; @@ -63,7 +57,7 @@ fn test_select() { assert_eq!(buf[0], b'a'); // Select should now say there's no more data to be read. - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select( nfds, @@ -78,23 +72,18 @@ fn test_select() { }) .unwrap(); assert_eq!(num, 0); - assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); } #[cfg(feature = "pipe")] #[test] fn test_select_with_great_fds() { use core::cmp::max; - use core::mem::size_of; use core::ptr::null_mut; - use rustix::event::{select, Timespec}; use rustix::io::{read, write}; use rustix::pipe::pipe; use rustix::process::{getrlimit, setrlimit, Resource}; - // The number of bits in an `fd_set` element. - const BITS: usize = size_of::() * 8; - // Create a pipe. let (reader, writer) = pipe().unwrap(); @@ -117,8 +106,8 @@ fn test_select_with_great_fds() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select( nfds, @@ -133,23 +122,22 @@ fn test_select_with_great_fds() { }) .unwrap(); assert_eq!(num, 0); - assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); // Write a byte to the pipe. assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; (nfds as usize + (bits - 1)) / bits]; - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) }) .unwrap(); assert_eq!(num, 1); - assert_eq!( - readfds[reader.as_raw_fd() as usize / BITS], - 1 << (reader.as_raw_fd() as usize % BITS) - ); + assert!(fd_isset(reader.as_raw_fd(), &readfds)); + fd_clr(reader.as_raw_fd(), &mut readfds); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); // Read the byte from the pipe. let mut buf = [b'\0']; @@ -157,7 +145,7 @@ fn test_select_with_great_fds() { assert_eq!(buf[0], b'a'); // Select should now say there's no more data to be read. - readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + fd_set(reader.as_raw_fd(), &mut readfds); let num = retry_on_intr(|| unsafe { select( nfds, @@ -172,7 +160,7 @@ fn test_select_with_great_fds() { }) .unwrap(); assert_eq!(num, 0); - assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + assert!(!fd_isset(reader.as_raw_fd(), &readfds)); // Reset the process limit. setrlimit(Resource::Nofile, orig_rlimit).unwrap(); From d0667e2d79601e09ffe90dd26faf8cb464c491fb Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 14 Sep 2024 05:28:14 -0700 Subject: [PATCH 09/25] Switch to a safe API. --- src/backend/libc/event/syscalls.rs | 40 ++++++++++++++++++++++----- src/event/select.rs | 18 ++++++------ tests/event/select.rs | 44 ++++++++++++------------------ 3 files changed, 59 insertions(+), 43 deletions(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index fdba539a4..cf8cceb02 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -128,13 +128,39 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, timeout: Option<&crate::timespec::Timespec>, ) -> io::Result { + use core::ptr::{null, null_mut}; + + let len = crate::event::fd_bitvector_len(nfds); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + let timeout_data; let timeout_ptr = match timeout { Some(timeout) => { @@ -145,13 +171,13 @@ pub(crate) unsafe fn select( }; &timeout_data } - None => core::ptr::null(), + None => null(), }; // On Apple platforms, use the specially mangled `select` which doesn't // have an `FD_SETSIZE` limitation. #[cfg(apple)] - { + unsafe { extern "C" { #[link_name = "select$DARWIN_EXTSN$NOCANCEL"] fn select( @@ -168,7 +194,7 @@ pub(crate) unsafe fn select( // Otherwise just use the normal `select`. #[cfg(not(apple))] - { + unsafe { ret_c_int(c::select( nfds, readfds.cast(), diff --git a/src/event/select.rs b/src/event/select.rs index 528c1573b..e5d788128 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -19,6 +19,9 @@ pub type FdSetElement = i32; /// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on /// sets of file descriptors. /// +/// `readfds`, `writefds`, `exceptfds` must point to arrays of `FdSetElement` +/// containing at least `nfds.div_ceil(size_of::())` elements. +/// /// This `select` wrapper differs from POSIX in that `nfds` is not limited to /// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this /// function takes raw pointers to arrays of @@ -37,12 +40,7 @@ pub type FdSetElement = i32; /// limitation, following POSIX. These platforms' documentation recommend using /// [`poll`] instead. /// -/// [`poll`]: crate::event::poll -/// -/// # Safety -/// -/// `readfds`, `writefds`, `exceptfds` must point to arrays of `FdSetElement` -/// containing at least `nfds.div_ceil(size_of::())` elements. +/// [`poll`]: crate::event::poll() /// /// # References /// - [POSIX] @@ -56,11 +54,11 @@ pub type FdSetElement = i32; /// [FreeBSD]: https://man.freebsd.org/cgi/man.cgi?query=select&sektion=2 /// [NetBSD]: https://man.netbsd.org/select.2 /// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 -pub unsafe fn select( +pub fn select( nfds: i32, - readfds: *mut FdSetElement, - writefds: *mut FdSetElement, - exceptfds: *mut FdSetElement, + readfds: Option<&mut [FdSetElement]>, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, timeout: Option<&Timespec>, ) -> io::Result { backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) diff --git a/tests/event/select.rs b/tests/event/select.rs index c6f7d977e..ceed19454 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -9,7 +9,6 @@ use { #[cfg(feature = "pipe")] #[test] fn test_select() { - use core::ptr::null_mut; use rustix::io::{read, write}; use rustix::pipe::pipe; @@ -20,12 +19,12 @@ fn test_select() { // `select` should say there's nothing ready to be read from the pipe. let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { + let num = retry_on_intr(|| { select( nfds, - readfds.as_mut_ptr(), - null_mut(), - null_mut(), + Some(&mut readfds), + None, + None, Some(&Timespec { tv_sec: 0, tv_nsec: 0, @@ -42,10 +41,7 @@ fn test_select() { // `select` should now say there's data to be read. let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { - select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) - }) - .unwrap(); + let num = retry_on_intr(|| select(nfds, Some(&mut readfds), None, None, None)).unwrap(); assert_eq!(num, 1); assert!(fd_isset(reader.as_raw_fd(), &readfds)); fd_clr(reader.as_raw_fd(), &mut readfds); @@ -58,12 +54,12 @@ fn test_select() { // Select should now say there's no more data to be read. fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { + let num = retry_on_intr(|| { select( nfds, - readfds.as_mut_ptr(), - null_mut(), - null_mut(), + Some(&mut readfds), + None, + None, Some(&Timespec { tv_sec: 0, tv_nsec: 0, @@ -79,7 +75,6 @@ fn test_select() { #[test] fn test_select_with_great_fds() { use core::cmp::max; - use core::ptr::null_mut; use rustix::io::{read, write}; use rustix::pipe::pipe; use rustix::process::{getrlimit, setrlimit, Resource}; @@ -108,12 +103,12 @@ fn test_select_with_great_fds() { // `select` should say there's nothing ready to be read from the pipe. let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { + let num = retry_on_intr(|| { select( nfds, - readfds.as_mut_ptr(), - null_mut(), - null_mut(), + Some(&mut readfds), + None, + None, Some(&Timespec { tv_sec: 0, tv_nsec: 0, @@ -130,10 +125,7 @@ fn test_select_with_great_fds() { // `select` should now say there's data to be read. let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { - select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) - }) - .unwrap(); + let num = retry_on_intr(|| select(nfds, Some(&mut readfds), None, None, None)).unwrap(); assert_eq!(num, 1); assert!(fd_isset(reader.as_raw_fd(), &readfds)); fd_clr(reader.as_raw_fd(), &mut readfds); @@ -146,12 +138,12 @@ fn test_select_with_great_fds() { // Select should now say there's no more data to be read. fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| unsafe { + let num = retry_on_intr(|| { select( nfds, - readfds.as_mut_ptr(), - null_mut(), - null_mut(), + Some(&mut readfds), + None, + None, Some(&Timespec { tv_sec: 0, tv_nsec: 0, From 592ccdac8879d76492145b8c915175f59098262a Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 14 Sep 2024 10:39:33 -0700 Subject: [PATCH 10/25] Support `select` on Linux and Windows too. And make `select` unsafe due to I/O safety. --- src/backend/libc/event/syscalls.rs | 22 ++--- src/backend/libc/event/windows_syscalls.rs | 63 +++++++++++- src/backend/libc/winsock_c.rs | 6 ++ src/backend/linux_raw/event/syscalls.rs | 85 +++++++++++++++- src/event/mod.rs | 4 +- src/event/poll.rs | 4 +- src/event/select.rs | 101 ++++++++++++++++--- src/io_uring.rs | 2 +- src/lib.rs | 3 +- src/thread/clock.rs | 2 +- src/thread/futex.rs | 2 +- tests/event/main.rs | 2 +- tests/event/select.rs | 107 +++++++++++++++------ 13 files changed, 337 insertions(+), 66 deletions(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index cf8cceb02..27f13f504 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -16,7 +16,7 @@ use crate::event::port::Event; target_os = "espidf" ))] use crate::event::EventfdFlags; -#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +#[cfg(any(bsd, linux_kernel))] use crate::event::FdSetElement; use crate::event::PollFd; use crate::io; @@ -30,7 +30,9 @@ use crate::utils::as_ptr; all(feature = "alloc", any(linux_kernel, target_os = "redox")), ))] use core::mem::MaybeUninit; -#[cfg(any(linux_kernel, solarish, target_os = "redox"))] +#[cfg(any(bsd, linux_kernel))] +use core::ptr::null; +#[cfg(any(bsd, linux_kernel, solarish, target_os = "redox"))] use core::ptr::null_mut; #[cfg(any( linux_kernel, @@ -50,7 +52,7 @@ use {crate::backend::conv::borrowed_fd, crate::fd::BorrowedFd}; ))] use {crate::backend::conv::ret_owned_fd, crate::fd::OwnedFd}; #[cfg(all(feature = "alloc", bsd))] -use {crate::event::kqueue::Event, crate::utils::as_ptr, core::ptr::null}; +use {crate::event::kqueue::Event, crate::utils::as_ptr}; #[cfg(any( linux_kernel, @@ -127,17 +129,15 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, writefds: Option<&mut [FdSetElement]>, exceptfds: Option<&mut [FdSetElement]>, timeout: Option<&crate::timespec::Timespec>, ) -> io::Result { - use core::ptr::{null, null_mut}; - - let len = crate::event::fd_bitvector_len(nfds); + let len = crate::event::fd_set_num_elements(nfds); let readfds = match readfds { Some(readfds) => { @@ -177,7 +177,7 @@ pub(crate) fn select( // On Apple platforms, use the specially mangled `select` which doesn't // have an `FD_SETSIZE` limitation. #[cfg(apple)] - unsafe { + { extern "C" { #[link_name = "select$DARWIN_EXTSN$NOCANCEL"] fn select( @@ -194,7 +194,7 @@ pub(crate) fn select( // Otherwise just use the normal `select`. #[cfg(not(apple))] - unsafe { + { ret_c_int(c::select( nfds, readfds.cast(), @@ -287,7 +287,7 @@ pub(crate) fn port_send( unsafe { ret(c::port_send(borrowed_fd(port), events, userdata)) } } -#[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] +#[cfg(not(any(target_os = "redox", target_os = "wasi")))] pub(crate) fn pause() { let r = unsafe { c::pause() }; let errno = libc_errno::errno().0; diff --git a/src/backend/libc/event/windows_syscalls.rs b/src/backend/libc/event/windows_syscalls.rs index 8ccad4794..90f6459d0 100644 --- a/src/backend/libc/event/windows_syscalls.rs +++ b/src/backend/libc/event/windows_syscalls.rs @@ -2,7 +2,8 @@ use crate::backend::c; use crate::backend::conv::ret_c_int; -use crate::event::PollFd; +use crate::event::{FdSetElement, PollFd}; +use crate::fd::RawFd; use crate::io; pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result { @@ -14,3 +15,63 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + use core::ptr::{null, null_mut}; + + let len = crate::event::fd_set_num_elements(nfds as RawFd); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `TIMEVAL`. + timeout_data = c::TIMEVAL { + tv_sec: timeout + .tv_sec + .try_into() + .map_err(|_| io::Errno::OPNOTSUPP)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + unsafe { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr, + )) + } +} diff --git a/src/backend/libc/winsock_c.rs b/src/backend/libc/winsock_c.rs index ee2704ade..36e5d2162 100644 --- a/src/backend/libc/winsock_c.rs +++ b/src/backend/libc/winsock_c.rs @@ -57,3 +57,9 @@ pub(crate) use WinSock::{ WSAEWOULDBLOCK as EWOULDBLOCK, WSAEWOULDBLOCK as EAGAIN, WSAPOLLFD as pollfd, WSA_E_CANCELLED as ECANCELED, *, }; + +pub struct timespec { + pub tv_sec: time_t, + pub tv_nsec: i64, +} +pub type time_t = i64; diff --git a/src/backend/linux_raw/event/syscalls.rs b/src/backend/linux_raw/event/syscalls.rs index ac199adfa..ec572bb32 100644 --- a/src/backend/linux_raw/event/syscalls.rs +++ b/src/backend/linux_raw/event/syscalls.rs @@ -7,13 +7,15 @@ use crate::backend::c; use crate::backend::conv::{ - by_ref, c_int, c_uint, ret, ret_error, ret_owned_fd, ret_usize, slice_mut, zero, + by_ref, c_int, c_uint, ret, ret_c_int, ret_error, ret_owned_fd, ret_usize, slice_mut, zero, }; -use crate::event::{epoll, EventfdFlags, PollFd}; +use crate::event::{epoll, EventfdFlags, FdSetElement, PollFd}; use crate::fd::{BorrowedFd, OwnedFd}; use crate::io; +use crate::utils::as_mut_ptr; #[cfg(feature = "alloc")] use core::mem::MaybeUninit; +use core::ptr::null_mut; use linux_raw_sys::general::{EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD}; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use { @@ -50,6 +52,85 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + let len = crate::event::fd_set_num_elements(nfds); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + // Linux's `pselect6` mutates the timeout argument. Our public interface + // does not do this, because it's not portable to other platforms, so we + // create a temporary value to hide this behavior. + let mut timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + timeout_data = timeout.clone(); + as_mut_ptr(&mut timeout_data) + } + None => null_mut(), + }; + + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "sparc", + target_arch = "csky", + target_arch = "x86", + target_arch = "mips32r6", + target_arch = "riscv32", + target_arch = "mips" + ))] + { + ret_c_int(syscall!( + __NR_pselect6_time64, + c_int(nfds), + readfds, + writefds, + exceptfds, + timeout_ptr, + zero() + )) + } + + #[cfg(target_pointer_width = "64")] + { + ret_c_int(syscall!( + __NR_pselect6, + c_int(nfds), + readfds, + writefds, + exceptfds, + timeout_ptr, + zero() + )) + } +} + #[inline] pub(crate) fn epoll_create(flags: epoll::CreateFlags) -> io::Result { // SAFETY: `__NR_epoll_create1` doesn't access any user memory. diff --git a/src/event/mod.rs b/src/event/mod.rs index d0ebee5be..ded159f93 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,7 +16,7 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; -#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +#[cfg(any(bsd, linux_kernel, windows))] mod select; #[cfg(any( @@ -29,5 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; -#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +#[cfg(any(bsd, linux_kernel, windows))] pub use select::*; diff --git a/src/event/poll.rs b/src/event/poll.rs index 00014eb1e..8c86a0f3f 100644 --- a/src/event/poll.rs +++ b/src/event/poll.rs @@ -5,9 +5,9 @@ pub use backend::event::poll_fd::{PollFd, PollFlags}; /// `poll(self.fds, timeout)`—Wait for events on lists of file descriptors. /// /// On macOS, `poll` doesn't work on fds for /dev/tty or /dev/null, however -/// [`select`] is available on macOS and does work on these fds. +/// [`select`] is available and does work on these fds. /// -/// [`select`]: https://docs.rs/rustix/*/x86_64-apple-darwin/rustix/event/fn.select.html +/// [`select`]: crate::event::select /// /// # References /// - [Beej's Guide to Network Programming] diff --git a/src/event/select.rs b/src/event/select.rs index e5d788128..ae1ee0194 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -1,20 +1,27 @@ +//! The `select` function. +//! +//! # Safety +//! +//! `select` is unsafe due to I/O safety. +#![allow(unsafe_code)] + use crate::{backend, io}; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; /// Bitvector element type for use with [`select`]. #[cfg(all( target_pointer_width = "64", any(target_os = "freebsd", target_os = "dragonfly") ))] -pub type FdSetElement = i64; +pub type FdSetElement = u64; /// Bitvector element type for use with [`select`]. #[cfg(not(all( target_pointer_width = "64", any(target_os = "freebsd", target_os = "dragonfly") )))] -pub type FdSetElement = i32; +pub type FdSetElement = u32; /// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on /// sets of file descriptors. @@ -29,32 +36,44 @@ pub type FdSetElement = i32; /// representing bitvectors where a fd `fd` is set if the element at index /// `fd / (size_of::() * 8)` has the bit /// `1 << (fd % (size_of::() * 8))` set. Convenience functions -/// [`fd_set`], [`fd_clr`], [`fd_isset`], and [`fd_bitvector_len`] are provided -/// for setting, clearing, testing, and sizing bitvectors. +/// [`fd_set_insert`], [`fd_set_remove`], [`fd_set_contains`], +/// [`fd_set_num_elements`], and [`FdSetIter`] are provided for setting, +/// clearing, testing, sizing, and iterating through bitvectors. /// /// In particular, on Apple platforms, this function behaves as if /// `_DARWIN_UNLIMITED_SELECT` were predefined. /// -/// On Linux, illumos, and OpenBSD, this function is not defined because the -/// `select` functions on these platforms always has an `FD_SETSIZE` -/// limitation, following POSIX. These platforms' documentation recommend using -/// [`poll`] instead. +/// On illumos, this function is not defined because the `select` function on +/// this platform always has an `FD_SETSIZE` limitation, following POSIX. This +/// platform's documentation recommends using [`poll`] instead. /// /// [`poll`]: crate::event::poll() /// +/// # Safety +/// +/// All set bits in all the sets must correspond to open file descriptors. +/// /// # References /// - [POSIX] +/// - [Linux] /// - [Apple] /// - [FreeBSD] /// - [NetBSD] +/// - [OpenBSD] /// - [DragonFly BSD] +/// - [Winsock] +/// - [glibc] /// /// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html +/// [Linux]: https://man7.org/linux/man-pages/man2/select.2.html /// [Apple]: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/select.2.html /// [FreeBSD]: https://man.freebsd.org/cgi/man.cgi?query=select&sektion=2 /// [NetBSD]: https://man.netbsd.org/select.2 +/// [OpenBSD]: https://man.openbsd.org/select.2 /// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 -pub fn select( +/// [Winsock]: https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-select +/// [glibc]: https://sourceware.org/glibc/manual/latest/html_node/Waiting-for-I_002fO.html#index-select +pub unsafe fn select( nfds: i32, readfds: Option<&mut [FdSetElement]>, writefds: Option<&mut [FdSetElement]>, @@ -70,7 +89,7 @@ use crate::fd::RawFd; /// Set `fd` in the bitvector pointed to by `fds`. #[doc(alias = "FD_SET")] #[inline] -pub fn fd_set(fd: RawFd, fds: &mut [FdSetElement]) { +pub fn fd_set_insert(fds: &mut [FdSetElement], fd: RawFd) { let fd = fd as usize; fds[fd / BITS] |= 1 << (fd % BITS); } @@ -78,7 +97,7 @@ pub fn fd_set(fd: RawFd, fds: &mut [FdSetElement]) { /// Clear `fd` in the bitvector pointed to by `fds`. #[doc(alias = "FD_CLR")] #[inline] -pub fn fd_clr(fd: RawFd, fds: &mut [FdSetElement]) { +pub fn fd_set_remove(fds: &mut [FdSetElement], fd: RawFd) { let fd = fd as usize; fds[fd / BITS] &= !(1 << (fd % BITS)); } @@ -86,15 +105,69 @@ pub fn fd_clr(fd: RawFd, fds: &mut [FdSetElement]) { /// Test whether `fd` is set in the bitvector pointed to by `fds`. #[doc(alias = "FD_ISSET")] #[inline] -pub fn fd_isset(fd: RawFd, fds: &[FdSetElement]) -> bool { +pub fn fd_set_contains(fds: &[FdSetElement], fd: RawFd) -> bool { let fd = fd as usize; (fds[fd / BITS] & (1 << (fd % BITS))) != 0 } +/// Compute the minimum `nfds` value needed for the bitvector pointed to by +/// `fds`. +#[inline] +pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { + if let Some(position) = fds.iter().rposition(|element| *element != 0) { + let element = fds[position]; + (position * BITS + (BITS - element.leading_zeros() as usize)) as RawFd + } else { + 0 + } +} + /// Compute the number of `FdSetElement`s needed to hold a bitvector which can /// contain file descriptors less than `nfds`. #[inline] -pub fn fd_bitvector_len(nfds: RawFd) -> usize { +pub fn fd_set_num_elements(nfds: RawFd) -> usize { let nfds = nfds as usize; (nfds + (BITS - 1)) / BITS } + +/// An iterator over the set fds in a bitvector. +pub struct FdSetIter<'a> { + current: RawFd, + fds: &'a [FdSetElement], +} + +impl<'a> FdSetIter<'a> { + /// Construct a `FdSetIter` for the given bitvector. + pub fn new(fds: &'a [FdSetElement]) -> Self { + Self { current: 0, fds } + } +} + +impl<'a> Iterator for FdSetIter<'a> { + type Item = RawFd; + + fn next(&mut self) -> Option { + if let Some(element) = self.fds.get(self.current as usize / BITS) { + // Test whether the current element has more bits set. + let shifted = element >> ((self.current as usize % BITS) as u32); + if shifted != 0 { + let fd = self.current + shifted.trailing_zeros() as RawFd; + self.current = fd + 1; + return Some(fd); + } + + // Search through the array for the next element with bits set. + if let Some(index) = self.fds[(self.current as usize / BITS) + 1..] + .iter() + .position(|element| *element != 0) + { + let index = index + (self.current as usize / BITS) + 1; + let element = self.fds[index]; + let fd = (index * BITS) as RawFd + element.trailing_zeros() as RawFd; + self.current = fd + 1; + return Some(fd); + } + } + None + } +} diff --git a/src/io_uring.rs b/src/io_uring.rs index de272188f..bc15595c0 100644 --- a/src/io_uring.rs +++ b/src/io_uring.rs @@ -40,7 +40,7 @@ pub use crate::fs::{ }; pub use crate::io::ReadWriteFlags; pub use crate::net::{RecvFlags, SendFlags, SocketFlags}; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; pub use linux_raw_sys::general::sigset_t; pub use net::{__kernel_sockaddr_storage as sockaddr_storage, msghdr, sockaddr, socklen_t}; diff --git a/src/lib.rs b/src/lib.rs index ec545d438..e92162556 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -356,14 +356,13 @@ mod prctl; #[cfg(not(any(windows, target_os = "espidf", target_os = "wasi")))] #[cfg(any(feature = "process", feature = "runtime", all(bsd, feature = "event")))] mod signal; -#[cfg(not(windows))] #[cfg(any( feature = "fs", feature = "process", feature = "runtime", feature = "thread", feature = "time", - all(feature = "event", any(apple, freebsdlike, target_os = "netbsd")), + all(feature = "event", any(bsd, linux_kernel, windows)), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/src/thread/clock.rs b/src/thread/clock.rs index 8023f5466..a5302ba3a 100644 --- a/src/thread/clock.rs +++ b/src/thread/clock.rs @@ -1,7 +1,7 @@ use crate::{backend, io}; use core::fmt; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; #[cfg(not(any( apple, diff --git a/src/thread/futex.rs b/src/thread/futex.rs index 35c1f550d..d05ab5257 100644 --- a/src/thread/futex.rs +++ b/src/thread/futex.rs @@ -34,7 +34,7 @@ use crate::fd::{FromRawFd, OwnedFd, RawFd}; use crate::utils::option_as_ptr; use crate::{backend, io}; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; pub use backend::thread::futex::{Flags, OWNER_DIED, WAITERS}; diff --git a/tests/event/main.rs b/tests/event/main.rs index 72da82f56..fd29d1dcb 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,5 +10,5 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; -#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +#[cfg(any(bsd, linux_kernel, windows))] mod select; diff --git a/tests/event/select.rs b/tests/event/select.rs index ceed19454..f61563067 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -1,12 +1,19 @@ +use rustix::event::{ + fd_set_bound, fd_set_contains, fd_set_insert, fd_set_num_elements, fd_set_remove, FdSetElement, + FdSetIter, +}; +use rustix::fd::RawFd; #[cfg(feature = "pipe")] +#[cfg(not(windows))] use { - rustix::event::{fd_bitvector_len, fd_clr, fd_isset, fd_set, select, FdSetElement, Timespec}, - rustix::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}, + rustix::event::{select, Timespec}, + rustix::fd::{AsRawFd, FromRawFd, OwnedFd}, rustix::io::retry_on_intr, std::cmp::max, }; #[cfg(feature = "pipe")] +#[cfg(not(windows))] #[test] fn test_select() { use rustix::io::{read, write}; @@ -17,9 +24,9 @@ fn test_select() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| { + let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { select( nfds, Some(&mut readfds), @@ -33,19 +40,23 @@ fn test_select() { }) .unwrap(); assert_eq!(num, 0); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); // Write a byte to the pipe. assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| select(nfds, Some(&mut readfds), None, None, None)).unwrap(); + let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); assert_eq!(num, 1); - assert!(fd_isset(reader.as_raw_fd(), &readfds)); - fd_clr(reader.as_raw_fd(), &mut readfds); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); // Read the byte from the pipe. let mut buf = [b'\0']; @@ -53,8 +64,8 @@ fn test_select() { assert_eq!(buf[0], b'a'); // Select should now say there's no more data to be read. - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| { + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { select( nfds, Some(&mut readfds), @@ -68,10 +79,12 @@ fn test_select() { }) .unwrap(); assert_eq!(num, 0); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); } #[cfg(feature = "pipe")] +#[cfg(not(windows))] #[test] fn test_select_with_great_fds() { use core::cmp::max; @@ -101,9 +114,9 @@ fn test_select_with_great_fds() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| { + let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { select( nfds, Some(&mut readfds), @@ -117,19 +130,23 @@ fn test_select_with_great_fds() { }) .unwrap(); assert_eq!(num, 0); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); // Write a byte to the pipe. assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; fd_bitvector_len(nfds)]; - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| select(nfds, Some(&mut readfds), None, None, None)).unwrap(); + let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); assert_eq!(num, 1); - assert!(fd_isset(reader.as_raw_fd(), &readfds)); - fd_clr(reader.as_raw_fd(), &mut readfds); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); // Read the byte from the pipe. let mut buf = [b'\0']; @@ -137,8 +154,8 @@ fn test_select_with_great_fds() { assert_eq!(buf[0], b'a'); // Select should now say there's no more data to be read. - fd_set(reader.as_raw_fd(), &mut readfds); - let num = retry_on_intr(|| { + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { select( nfds, Some(&mut readfds), @@ -152,8 +169,42 @@ fn test_select_with_great_fds() { }) .unwrap(); assert_eq!(num, 0); - assert!(!fd_isset(reader.as_raw_fd(), &readfds)); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); // Reset the process limit. setrlimit(Resource::Nofile, orig_rlimit).unwrap(); } + +#[test] +fn test_select_iter() { + for stuff in [ + &[1, 3, 31, 64, 128, 1024, 1025, 1030][..], + &[100, 101, 102, 103, 104, 105, 106, 107, 2999][..], + &[0, 8, 32, 64, 128][..], + &[0, 1, 2, 3, 31, 32, 33, 34, 35][..], + &[500][..], + &[128][..], + &[127][..], + &[0][..], + &[][..], + ] { + let nfds = if stuff.is_empty() { + 0 + } else { + *stuff.last().unwrap() + 1 + }; + let mut fds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + for fd in stuff { + assert!(!fd_set_contains(&mut fds, *fd)); + fd_set_insert(&mut fds, *fd); + assert!(fd_set_contains(&mut fds, *fd)); + fd_set_remove(&mut fds, *fd); + assert!(!fd_set_contains(&mut fds, *fd)); + fd_set_insert(&mut fds, *fd); + assert!(fd_set_contains(&mut fds, *fd)); + } + assert_eq!(fd_set_bound(&fds), nfds); + assert_eq!(FdSetIter::new(&fds).collect::>(), stuff); + } +} From 4a1ae311f610dd280296f10f828abe99f21e89be Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 14:29:34 -0700 Subject: [PATCH 11/25] Fix qemu to implment arbitrary-sized fd sets for `select`. --- .github/workflows/main.yml | 3 + ci/select-setsize.patch | 269 +++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 ci/select-setsize.patch diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9ec7356cc..a5730eb53 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -486,6 +486,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' @@ -624,6 +625,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' @@ -718,6 +720,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' diff --git a/ci/select-setsize.patch b/ci/select-setsize.patch new file mode 100644 index 000000000..6631dd414 --- /dev/null +++ b/ci/select-setsize.patch @@ -0,0 +1,269 @@ +From Dan Gohman +Subject: [PATCH] Remove the `FD_SETSIZE` limitation in `select` + +The `fd_set` type is limited to a fixed `FD_SETSIZE` number of file +descriptors, however Linux's `select has no such limitation. Change +the `select` implementation to using manual bit-vector logic to better +implement the Linux semantics. + +diff -ur a/linux-user/syscall.c b/linux-user/syscall.c +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -664,8 +664,9 @@ + char **, argv, char **, envp, int, flags) + #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \ + defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64) +-safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ +- fd_set *, exceptfds, struct timespec *, timeout, void *, sig) ++safe_syscall6(int, pselect6, int, nfds, unsigned long *, readfds, \ ++ unsigned long *, writefds, unsigned long *, exceptfds, \ ++ struct timespec *, timeout, void *, sig) + #endif + #if defined(TARGET_NR_ppoll) || defined(TARGET_NR_ppoll_time64) + safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds, +@@ -861,7 +862,7 @@ + + #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \ + defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64) +-static inline abi_long copy_from_user_fdset(fd_set *fds, ++static inline abi_long copy_from_user_fdset(unsigned long *fds, + abi_ulong target_fds_addr, + int n) + { +@@ -875,7 +876,8 @@ + 1))) + return -TARGET_EFAULT; + +- FD_ZERO(fds); ++ memset(fds, 0, DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); + k = 0; + for (i = 0; i < nw; i++) { + /* grab the abi_ulong */ +@@ -883,7 +885,8 @@ + for (j = 0; j < TARGET_ABI_BITS; j++) { + /* check the bit inside the abi_ulong */ + if ((b >> j) & 1) +- FD_SET(k, fds); ++ fds[k / (sizeof(unsigned long) * 8)] |= ++ 1 << (k % (sizeof(unsigned long) * 8)); + k++; + } + } +@@ -893,7 +896,8 @@ + return 0; + } + +-static inline abi_ulong copy_from_user_fdset_ptr(fd_set *fds, fd_set **fds_ptr, ++static inline abi_ulong copy_from_user_fdset_ptr(unsigned long *fds, ++ unsigned long **fds_ptr, + abi_ulong target_fds_addr, + int n) + { +@@ -908,7 +912,7 @@ + } + + static inline abi_long copy_to_user_fdset(abi_ulong target_fds_addr, +- const fd_set *fds, ++ const unsigned long *fds, + int n) + { + int i, nw, j, k; +@@ -926,7 +930,10 @@ + for (i = 0; i < nw; i++) { + v = 0; + for (j = 0; j < TARGET_ABI_BITS; j++) { +- v |= ((abi_ulong)(FD_ISSET(k, fds) != 0) << j); ++ bool set = ++ (fds[k / (sizeof(unsigned long) * 8)] & ++ (1 << (k % (sizeof(unsigned long) * 8)))) != 0; ++ v |= ((abi_ulong)set << j); + k++; + } + __put_user(v, &target_fds[i]); +@@ -1295,28 +1302,40 @@ + abi_ulong rfd_addr, abi_ulong wfd_addr, + abi_ulong efd_addr, abi_ulong target_tv_addr) + { +- fd_set rfds, wfds, efds; +- fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; ++ unsigned long *rfds, *wfds, *efds; ++ unsigned long *rfds_ptr, *wfds_ptr, *efds_ptr; + struct timeval tv; + struct timespec ts, *ts_ptr; + abi_long ret; + +- ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); ++ rfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ wfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ efds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ ++ ret = copy_from_user_fdset_ptr(rfds, &rfds_ptr, rfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&wfds, &wfds_ptr, wfd_addr, n); ++ ret = copy_from_user_fdset_ptr(wfds, &wfds_ptr, wfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&efds, &efds_ptr, efd_addr, n); ++ ret = copy_from_user_fdset_ptr(efds, &efds_ptr, efd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + + if (target_tv_addr) { +- if (copy_from_user_timeval(&tv, target_tv_addr)) ++ if (copy_from_user_timeval(&tv, target_tv_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; ++ } + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + ts_ptr = &ts; +@@ -1328,22 +1347,30 @@ + ts_ptr, NULL)); + + if (!is_error(ret)) { +- if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) ++ if (rfd_addr && copy_to_user_fdset(rfd_addr, rfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; +- if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) ++ } ++ if (wfd_addr && copy_to_user_fdset(wfd_addr, wfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; +- if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) ++ } ++ if (efd_addr && copy_to_user_fdset(efd_addr, efds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; ++ } + + if (target_tv_addr) { + tv.tv_sec = ts.tv_sec; + tv.tv_usec = ts.tv_nsec / 1000; + if (copy_to_user_timeval(target_tv_addr, &tv)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } + } + ++ free(rfds); free(wfds); free(efds); + return ret; + } + +@@ -1377,8 +1404,8 @@ + bool time64) + { + abi_long rfd_addr, wfd_addr, efd_addr, n, ts_addr; +- fd_set rfds, wfds, efds; +- fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; ++ unsigned long *rfds, *wfds, *efds; ++ unsigned long *rfds_ptr, *wfds_ptr, *efds_ptr; + struct timespec ts, *ts_ptr; + abi_long ret; + +@@ -1399,16 +1426,26 @@ + efd_addr = arg4; + ts_addr = arg5; + +- ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); ++ rfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ wfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ efds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ ++ ret = copy_from_user_fdset_ptr(rfds, &rfds_ptr, rfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&wfds, &wfds_ptr, wfd_addr, n); ++ ret = copy_from_user_fdset_ptr(wfds, &wfds_ptr, wfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&efds, &efds_ptr, efd_addr, n); ++ ret = copy_from_user_fdset_ptr(efds, &efds_ptr, efd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + +@@ -1419,10 +1456,12 @@ + if (ts_addr) { + if (time64) { + if (target_to_host_timespec64(&ts, ts_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } else { + if (target_to_host_timespec(&ts, ts_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } +@@ -1436,6 +1475,7 @@ + if (arg6) { + arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1); + if (!arg7) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + arg_sigset = tswapal(arg7[0]); +@@ -1445,6 +1485,7 @@ + if (arg_sigset) { + ret = process_sigsuspend_mask(&sig.set, arg_sigset, arg_sigsize); + if (ret != 0) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + sig_ptr = &sig; +@@ -1460,25 +1501,31 @@ + } + + if (!is_error(ret)) { +- if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) { ++ if (rfd_addr && copy_to_user_fdset(rfd_addr, rfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } +- if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) { ++ if (wfd_addr && copy_to_user_fdset(wfd_addr, wfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } +- if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) { ++ if (efd_addr && copy_to_user_fdset(efd_addr, efds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + if (time64) { + if (ts_addr && host_to_target_timespec64(ts_addr, &ts)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } else { + if (ts_addr && host_to_target_timespec(ts_addr, &ts)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } + } ++ free(rfds); free(wfds); free(efds); + return ret; + } + #endif From 0d513848e6d0b3158613ab08e7452456f4061fd7 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 14:30:00 -0700 Subject: [PATCH 12/25] Fix compilation on Windows. --- src/backend/libc/event/syscalls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index 27f13f504..9b5d368b0 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -166,7 +166,7 @@ pub(crate) unsafe fn select( Some(timeout) => { // Convert from `Timespec` to `c::timeval`. timeout_data = c::timeval { - tv_sec: timeout.tv_sec, + tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, }; &timeout_data From a20c73496ce7bb7dae48e07c663ff96872d1b0e8 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 14:37:56 -0700 Subject: [PATCH 13/25] Compile fixes. --- src/backend/linux_raw/process/syscalls.rs | 4 ++-- src/event/select.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/linux_raw/process/syscalls.rs b/src/backend/linux_raw/process/syscalls.rs index d562aab33..85c6fbbad 100644 --- a/src/backend/linux_raw/process/syscalls.rs +++ b/src/backend/linux_raw/process/syscalls.rs @@ -348,12 +348,12 @@ pub(crate) fn prlimit(pid: Option, limit: Resource, new: Rlimit) -> io::Res /// Convert a C `rlimit64` to a Rust `Rlimit`. #[inline] fn rlimit_from_linux(lim: rlimit64) -> Rlimit { - let current = if lim.rlim_cur == RLIM64_INFINITY as _ { + let current = if lim.rlim_cur == RLIM64_INFINITY as u64 { None } else { Some(lim.rlim_cur) }; - let maximum = if lim.rlim_max == RLIM64_INFINITY as _ { + let maximum = if lim.rlim_max == RLIM64_INFINITY as u64 { None } else { Some(lim.rlim_max) diff --git a/src/event/select.rs b/src/event/select.rs index ae1ee0194..bb26b75ea 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -83,7 +83,7 @@ pub unsafe fn select( backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) } -const BITS: usize = size_of::() * 8; +const BITS: usize = core::mem::size_of::() * 8; use crate::fd::RawFd; /// Set `fd` in the bitvector pointed to by `fds`. From be71be73f702a350f662804c33e5ae90b9a57030 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 16 Sep 2024 03:20:41 -0700 Subject: [PATCH 14/25] Minor code cleanup. --- src/backend/linux_raw/event/syscalls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/linux_raw/event/syscalls.rs b/src/backend/linux_raw/event/syscalls.rs index ec572bb32..0e5b815ec 100644 --- a/src/backend/linux_raw/event/syscalls.rs +++ b/src/backend/linux_raw/event/syscalls.rs @@ -89,7 +89,7 @@ pub(crate) unsafe fn select( let mut timeout_data; let timeout_ptr = match timeout { Some(timeout) => { - timeout_data = timeout.clone(); + timeout_data = *timeout; as_mut_ptr(&mut timeout_data) } None => null_mut(), From 680ec5e0d254249d93dbe75d70ab30e4bbf7166b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 16 Sep 2024 03:21:01 -0700 Subject: [PATCH 15/25] Drop Windows support. Windows has a different `FD_SET` representation. Supporting it might be possible, though the documentation is ambiguous about whether it supports arbitrary `FD_SETSIZE` values. But even if so, it would require a more elaborate abstraction, so just drop it for now. --- src/backend/libc/event/syscalls.rs | 2 +- src/backend/libc/event/windows_syscalls.rs | 63 +--------------------- src/backend/libc/winsock_c.rs | 6 --- src/event/mod.rs | 4 +- src/event/select.rs | 5 +- src/lib.rs | 3 +- tests/event/main.rs | 2 +- tests/event/select.rs | 3 -- 8 files changed, 10 insertions(+), 78 deletions(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index 9b5d368b0..27f13f504 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -166,7 +166,7 @@ pub(crate) unsafe fn select( Some(timeout) => { // Convert from `Timespec` to `c::timeval`. timeout_data = c::timeval { - tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, + tv_sec: timeout.tv_sec, tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, }; &timeout_data diff --git a/src/backend/libc/event/windows_syscalls.rs b/src/backend/libc/event/windows_syscalls.rs index 90f6459d0..8ccad4794 100644 --- a/src/backend/libc/event/windows_syscalls.rs +++ b/src/backend/libc/event/windows_syscalls.rs @@ -2,8 +2,7 @@ use crate::backend::c; use crate::backend::conv::ret_c_int; -use crate::event::{FdSetElement, PollFd}; -use crate::fd::RawFd; +use crate::event::PollFd; use crate::io; pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result { @@ -15,63 +14,3 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, - writefds: Option<&mut [FdSetElement]>, - exceptfds: Option<&mut [FdSetElement]>, - timeout: Option<&crate::timespec::Timespec>, -) -> io::Result { - use core::ptr::{null, null_mut}; - - let len = crate::event::fd_set_num_elements(nfds as RawFd); - - let readfds = match readfds { - Some(readfds) => { - assert!(readfds.len() >= len); - readfds.as_mut_ptr() - } - None => null_mut(), - }; - let writefds = match writefds { - Some(writefds) => { - assert!(writefds.len() >= len); - writefds.as_mut_ptr() - } - None => null_mut(), - }; - let exceptfds = match exceptfds { - Some(exceptfds) => { - assert!(exceptfds.len() >= len); - exceptfds.as_mut_ptr() - } - None => null_mut(), - }; - - let timeout_data; - let timeout_ptr = match timeout { - Some(timeout) => { - // Convert from `Timespec` to `TIMEVAL`. - timeout_data = c::TIMEVAL { - tv_sec: timeout - .tv_sec - .try_into() - .map_err(|_| io::Errno::OPNOTSUPP)?, - tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, - }; - &timeout_data - } - None => null(), - }; - - unsafe { - ret_c_int(c::select( - nfds, - readfds.cast(), - writefds.cast(), - exceptfds.cast(), - timeout_ptr, - )) - } -} diff --git a/src/backend/libc/winsock_c.rs b/src/backend/libc/winsock_c.rs index 36e5d2162..ee2704ade 100644 --- a/src/backend/libc/winsock_c.rs +++ b/src/backend/libc/winsock_c.rs @@ -57,9 +57,3 @@ pub(crate) use WinSock::{ WSAEWOULDBLOCK as EWOULDBLOCK, WSAEWOULDBLOCK as EAGAIN, WSAPOLLFD as pollfd, WSA_E_CANCELLED as ECANCELED, *, }; - -pub struct timespec { - pub tv_sec: time_t, - pub tv_nsec: i64, -} -pub type time_t = i64; diff --git a/src/event/mod.rs b/src/event/mod.rs index ded159f93..16edc646c 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,7 +16,7 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; -#[cfg(any(bsd, linux_kernel, windows))] +#[cfg(any(bsd, linux_kernel))] mod select; #[cfg(any( @@ -29,5 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; -#[cfg(any(bsd, linux_kernel, windows))] +#[cfg(any(bsd, linux_kernel))] pub use select::*; diff --git a/src/event/select.rs b/src/event/select.rs index bb26b75ea..63a480790 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -47,6 +47,9 @@ pub type FdSetElement = u32; /// this platform always has an `FD_SETSIZE` limitation, following POSIX. This /// platform's documentation recommends using [`poll`] instead. /// +/// On Windows, this function is not defined because the `select` function on +/// this platform doesn't use bitvectors for file descriptor sets. +/// /// [`poll`]: crate::event::poll() /// /// # Safety @@ -61,7 +64,6 @@ pub type FdSetElement = u32; /// - [NetBSD] /// - [OpenBSD] /// - [DragonFly BSD] -/// - [Winsock] /// - [glibc] /// /// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html @@ -71,7 +73,6 @@ pub type FdSetElement = u32; /// [NetBSD]: https://man.netbsd.org/select.2 /// [OpenBSD]: https://man.openbsd.org/select.2 /// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 -/// [Winsock]: https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-select /// [glibc]: https://sourceware.org/glibc/manual/latest/html_node/Waiting-for-I_002fO.html#index-select pub unsafe fn select( nfds: i32, diff --git a/src/lib.rs b/src/lib.rs index e92162556..8405becc5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -356,13 +356,14 @@ mod prctl; #[cfg(not(any(windows, target_os = "espidf", target_os = "wasi")))] #[cfg(any(feature = "process", feature = "runtime", all(bsd, feature = "event")))] mod signal; +#[cfg(not(windows))] #[cfg(any( feature = "fs", feature = "process", feature = "runtime", feature = "thread", feature = "time", - all(feature = "event", any(bsd, linux_kernel, windows)), + all(feature = "event", any(bsd, linux_kernel)), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/tests/event/main.rs b/tests/event/main.rs index fd29d1dcb..8e93f0ab4 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,5 +10,5 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; -#[cfg(any(bsd, linux_kernel, windows))] +#[cfg(any(bsd, linux_kernel))] mod select; diff --git a/tests/event/select.rs b/tests/event/select.rs index f61563067..c791c9ea1 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -4,7 +4,6 @@ use rustix::event::{ }; use rustix::fd::RawFd; #[cfg(feature = "pipe")] -#[cfg(not(windows))] use { rustix::event::{select, Timespec}, rustix::fd::{AsRawFd, FromRawFd, OwnedFd}, @@ -13,7 +12,6 @@ use { }; #[cfg(feature = "pipe")] -#[cfg(not(windows))] #[test] fn test_select() { use rustix::io::{read, write}; @@ -84,7 +82,6 @@ fn test_select() { } #[cfg(feature = "pipe")] -#[cfg(not(windows))] #[test] fn test_select_with_great_fds() { use core::cmp::max; From 791cd7471c00dc7eb514fe413a04ca01b5bcd0fd Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 16 Sep 2024 03:30:17 -0700 Subject: [PATCH 16/25] Fix compilation on some 32-bit platforms. --- src/backend/libc/event/syscalls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index 27f13f504..9b5d368b0 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -166,7 +166,7 @@ pub(crate) unsafe fn select( Some(timeout) => { // Convert from `Timespec` to `c::timeval`. timeout_data = c::timeval { - tv_sec: timeout.tv_sec, + tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, }; &timeout_data From 52381f050b1f7e91467c2e171b5c510d031727bc Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 20 Sep 2024 09:12:58 -0700 Subject: [PATCH 17/25] Re-enable Windows support. This uses a trick where we still allow users to allocate a `FdSetElement` array, but we just allocate a `FD_SET` on Windows out of it. --- src/backend/libc/event/syscalls.rs | 2 +- src/backend/libc/event/windows_syscalls.rs | 60 ++++- src/backend/libc/winsock_c.rs | 8 + src/backend/linux_raw/event/syscalls.rs | 2 +- src/event/mod.rs | 4 +- src/event/select.rs | 209 +++++++++++++---- src/lib.rs | 3 +- tests/event/main.rs | 2 +- tests/event/select.rs | 247 +++++++++++++++++++-- 9 files changed, 462 insertions(+), 75 deletions(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index 9b5d368b0..244fb6b36 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -137,7 +137,7 @@ pub(crate) unsafe fn select( exceptfds: Option<&mut [FdSetElement]>, timeout: Option<&crate::timespec::Timespec>, ) -> io::Result { - let len = crate::event::fd_set_num_elements(nfds); + let len = crate::event::fd_set_num_elements_for_bitvector(nfds); let readfds = match readfds { Some(readfds) => { diff --git a/src/backend/libc/event/windows_syscalls.rs b/src/backend/libc/event/windows_syscalls.rs index 8ccad4794..c152411bf 100644 --- a/src/backend/libc/event/windows_syscalls.rs +++ b/src/backend/libc/event/windows_syscalls.rs @@ -2,7 +2,7 @@ use crate::backend::c; use crate::backend::conv::ret_c_int; -use crate::event::PollFd; +use crate::event::{FdSetElement, PollFd}; use crate::io; pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result { @@ -14,3 +14,61 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + use core::ptr::{null, null_mut}; + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= readfds[0].0 as usize); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= writefds[0].0 as usize); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= exceptfds[0].0 as usize); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `TIMEVAL`. + timeout_data = c::TIMEVAL { + tv_sec: timeout + .tv_sec + .try_into() + .map_err(|_| io::Errno::OPNOTSUPP)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + unsafe { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr, + )) + } +} diff --git a/src/backend/libc/winsock_c.rs b/src/backend/libc/winsock_c.rs index ee2704ade..007cda69c 100644 --- a/src/backend/libc/winsock_c.rs +++ b/src/backend/libc/winsock_c.rs @@ -57,3 +57,11 @@ pub(crate) use WinSock::{ WSAEWOULDBLOCK as EWOULDBLOCK, WSAEWOULDBLOCK as EAGAIN, WSAPOLLFD as pollfd, WSA_E_CANCELLED as ECANCELED, *, }; + +// Windows doesn't have `timespec`, just `timeval`. Rustix only uses `timespec` +// in its public API. So define one, and we'll convert it internally. +pub struct timespec { + pub tv_sec: time_t, + pub tv_nsec: i64, +} +pub type time_t = i64; diff --git a/src/backend/linux_raw/event/syscalls.rs b/src/backend/linux_raw/event/syscalls.rs index 0e5b815ec..3886fa7e6 100644 --- a/src/backend/linux_raw/event/syscalls.rs +++ b/src/backend/linux_raw/event/syscalls.rs @@ -59,7 +59,7 @@ pub(crate) unsafe fn select( exceptfds: Option<&mut [FdSetElement]>, timeout: Option<&crate::timespec::Timespec>, ) -> io::Result { - let len = crate::event::fd_set_num_elements(nfds); + let len = crate::event::fd_set_num_elements_for_bitvector(nfds); let readfds = match readfds { Some(readfds) => { diff --git a/src/event/mod.rs b/src/event/mod.rs index 16edc646c..a1a51c14f 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,7 +16,7 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; -#[cfg(any(bsd, linux_kernel))] +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] mod select; #[cfg(any( @@ -29,5 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; -#[cfg(any(bsd, linux_kernel))] +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] pub use select::*; diff --git a/src/event/select.rs b/src/event/select.rs index 63a480790..8100157d6 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -9,19 +9,29 @@ use crate::{backend, io}; pub use crate::timespec::{Nsecs, Secs, Timespec}; -/// Bitvector element type for use with [`select`]. -#[cfg(all( - target_pointer_width = "64", - any(target_os = "freebsd", target_os = "dragonfly") +/// Storage element type for use with [`select`]. +#[cfg(any( + windows, + all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") + ) ))] -pub type FdSetElement = u64; +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) u64); -/// Bitvector element type for use with [`select`]. -#[cfg(not(all( - target_pointer_width = "64", - any(target_os = "freebsd", target_os = "dragonfly") +/// Storage element type for use with [`select`]. +#[cfg(not(any( + windows, + all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") + ) )))] -pub type FdSetElement = u32; +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) u32); /// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on /// sets of file descriptors. @@ -30,15 +40,11 @@ pub type FdSetElement = u32; /// containing at least `nfds.div_ceil(size_of::())` elements. /// /// This `select` wrapper differs from POSIX in that `nfds` is not limited to -/// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this -/// function takes raw pointers to arrays of -/// `nfds.div_ceil(size_of::())` elements of type `FdSetElement`, -/// representing bitvectors where a fd `fd` is set if the element at index -/// `fd / (size_of::() * 8)` has the bit -/// `1 << (fd % (size_of::() * 8))` set. Convenience functions -/// [`fd_set_insert`], [`fd_set_remove`], [`fd_set_contains`], -/// [`fd_set_num_elements`], and [`FdSetIter`] are provided for setting, -/// clearing, testing, sizing, and iterating through bitvectors. +/// `FD_SETSIZE`. Instead of using the fixed-sized `fd_set` type, this function +/// takes raw pointers to arrays of `fd_set_num_elements(max_fd + 1, num_fds)`, +/// where `max_fd` is the maximum value of any fd that will be inserted into +/// the set, and `num_fds` is the maximum number of fds that will be inserted +/// into the set. /// /// In particular, on Apple platforms, this function behaves as if /// `_DARWIN_UNLIMITED_SELECT` were predefined. @@ -47,14 +53,14 @@ pub type FdSetElement = u32; /// this platform always has an `FD_SETSIZE` limitation, following POSIX. This /// platform's documentation recommends using [`poll`] instead. /// -/// On Windows, this function is not defined because the `select` function on -/// this platform doesn't use bitvectors for file descriptor sets. +/// [`fd_set_insert`], [`fd_set_remove`], and [`FdSetIter`] are provided for +/// setting, clearing, and iterating with sets. /// /// [`poll`]: crate::event::poll() /// /// # Safety /// -/// All set bits in all the sets must correspond to open file descriptors. +/// All fds in in all the sets must correspond to open file descriptors. /// /// # References /// - [POSIX] @@ -64,6 +70,7 @@ pub type FdSetElement = u32; /// - [NetBSD] /// - [OpenBSD] /// - [DragonFly BSD] +/// - [Winsock] /// - [glibc] /// /// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html @@ -73,6 +80,7 @@ pub type FdSetElement = u32; /// [NetBSD]: https://man.netbsd.org/select.2 /// [OpenBSD]: https://man.openbsd.org/select.2 /// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 +/// [Winsock]: https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-select /// [glibc]: https://sourceware.org/glibc/manual/latest/html_node/Waiting-for-I_002fO.html#index-select pub unsafe fn select( nfds: i32, @@ -87,70 +95,126 @@ pub unsafe fn select( const BITS: usize = core::mem::size_of::() * 8; use crate::fd::RawFd; -/// Set `fd` in the bitvector pointed to by `fds`. +/// Set `fd` in the set pointed to by `fds`. #[doc(alias = "FD_SET")] #[inline] pub fn fd_set_insert(fds: &mut [FdSetElement], fd: RawFd) { let fd = fd as usize; - fds[fd / BITS] |= 1 << (fd % BITS); + fds[fd / BITS].0 |= 1 << (fd % BITS); } -/// Clear `fd` in the bitvector pointed to by `fds`. +/// Clear `fd` in the set pointed to by `fds`. #[doc(alias = "FD_CLR")] #[inline] pub fn fd_set_remove(fds: &mut [FdSetElement], fd: RawFd) { let fd = fd as usize; - fds[fd / BITS] &= !(1 << (fd % BITS)); + fds[fd / BITS].0 &= !(1 << (fd % BITS)); } -/// Test whether `fd` is set in the bitvector pointed to by `fds`. -#[doc(alias = "FD_ISSET")] +/// Compute the minimum `nfds` value needed for the set pointed to by +/// `fds`. #[inline] -pub fn fd_set_contains(fds: &[FdSetElement], fd: RawFd) -> bool { - let fd = fd as usize; - (fds[fd / BITS] & (1 << (fd % BITS))) != 0 +pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { + #[cfg(any(windows, target_os = "wasi"))] + { + assert!(cfg!(target_endian = "little"), "what"); + + let fd_count = fds[0].0 as usize; + (fd_count + 1) as RawFd + } + + #[cfg(not(any(windows, target_os = "wasi")))] + { + if let Some(position) = fds.iter().rposition(|element| element.0 != 0) { + let element = fds[position].0; + (position * BITS + (BITS - element.leading_zeros() as usize)) as RawFd + } else { + 0 + } + } } -/// Compute the minimum `nfds` value needed for the bitvector pointed to by -/// `fds`. +/// Compute the number of `FdSetElement`s needed to hold a set which can +/// contain up to `set_count` file descriptors with values less than `nfds`. #[inline] -pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { - if let Some(position) = fds.iter().rposition(|element| *element != 0) { - let element = fds[position]; - (position * BITS + (BITS - element.leading_zeros() as usize)) as RawFd - } else { - 0 +pub fn fd_set_num_elements(set_count: usize, nfds: RawFd) -> usize { + #[cfg(any(windows, target_os = "wasi"))] + { + let _ = nfds; + + fd_set_num_elements_for_fd_array(set_count) + } + + #[cfg(not(any(windows, target_os = "wasi")))] + { + let _ = set_count; + + fd_set_num_elements_for_bitvector(nfds) } } -/// Compute the number of `FdSetElement`s needed to hold a bitvector which can -/// contain file descriptors less than `nfds`. +/// `fd_set_num_elements` implementation on platforms with fd array +/// implementations. +#[cfg(any(windows, target_os = "wasi"))] +#[inline] +pub(crate) fn fd_set_num_elements_for_fd_array(set_count: usize) -> usize { + // Allocate space for an `fd_count` field, plus `set_count` elements + // for the `fd_array` field. + 1 + set_count +} + +/// `fd_set_num_elements` implementation on platforms with bitvector +/// implementations. +#[cfg(not(any(windows, target_os = "wasi")))] #[inline] -pub fn fd_set_num_elements(nfds: RawFd) -> usize { +pub(crate) fn fd_set_num_elements_for_bitvector(nfds: RawFd) -> usize { + // Allocate space for a dense bitvector for `nfds` bits. let nfds = nfds as usize; - (nfds + (BITS - 1)) / BITS + div_ceil(nfds, BITS) +} + +#[cfg(not(any(windows, target_os = "wasi")))] +fn div_ceil(lhs: usize, rhs: usize) -> usize { + let d = lhs / rhs; + let r = lhs % rhs; + if r > 0 { + d + 1 + } else { + d + } } -/// An iterator over the set fds in a bitvector. +/// An iterator over the fds in a set. +#[doc(alias = "FD_ISSET")] +#[cfg(not(any(windows, target_os = "wasi")))] pub struct FdSetIter<'a> { current: RawFd, fds: &'a [FdSetElement], } +/// An iterator over the fds in a set. +#[doc(alias = "FD_ISSET")] +#[cfg(any(windows, target_os = "wasi"))] +pub struct FdSetIter<'a> { + current: usize, + fds: &'a [FdSetElement], +} + impl<'a> FdSetIter<'a> { - /// Construct a `FdSetIter` for the given bitvector. + /// Construct a `FdSetIter` for the given set. pub fn new(fds: &'a [FdSetElement]) -> Self { Self { current: 0, fds } } } +#[cfg(not(any(windows, target_os = "wasi")))] impl<'a> Iterator for FdSetIter<'a> { type Item = RawFd; fn next(&mut self) -> Option { if let Some(element) = self.fds.get(self.current as usize / BITS) { // Test whether the current element has more bits set. - let shifted = element >> ((self.current as usize % BITS) as u32); + let shifted = element.0 >> ((self.current as usize % BITS) as u32); if shifted != 0 { let fd = self.current + shifted.trailing_zeros() as RawFd; self.current = fd + 1; @@ -160,10 +224,10 @@ impl<'a> Iterator for FdSetIter<'a> { // Search through the array for the next element with bits set. if let Some(index) = self.fds[(self.current as usize / BITS) + 1..] .iter() - .position(|element| *element != 0) + .position(|element| element.0 != 0) { let index = index + (self.current as usize / BITS) + 1; - let element = self.fds[index]; + let element = self.fds[index].0; let fd = (index * BITS) as RawFd + element.trailing_zeros() as RawFd; self.current = fd + 1; return Some(fd); @@ -172,3 +236,52 @@ impl<'a> Iterator for FdSetIter<'a> { None } } + +#[cfg(any(windows, target_os = "wasi"))] +impl<'a> Iterator for FdSetIter<'a> { + type Item = RawFd; + + fn next(&mut self) -> Option { + assert!(cfg!(target_endian = "little"), "what"); + + let current = self.current; + if current as u64 == self.fds[0].0 { + return None; + } + let fd = self.fds[current as usize + 1].0; + self.current = current + 1; + Some(fd) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[cfg(windows)] + fn layouts() { + use windows_sys::Win32::Networking::WinSock::FD_SET; + + // The first element of the `FdSetElement` array corresponds to the + // `fd_count` field. + assert_eq!(memoffset::offset_of!(FD_SET, fd_count), 0); + + // The following elements of the `FdSetElement` array correspond to the + // `fd_array` field. + let array = [FdSetElement::default()]; + assert_eq!(memoffset::offset_of!(FD_SET, fd_array), unsafe { + array[1..1].as_ptr().offset_from(array[0..0].as_ptr()) as usize + }); + + // The `FdSetElement` array should be suitably aligned. + assert_eq!(align_of::(), align_of::()); + } + + #[test] + #[cfg(any(bsd, linux_kernel))] + fn layouts() { + // The `FdSetElement` array should be suitably aligned. + assert_eq!(align_of::(), align_of::()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 8405becc5..e92162556 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -356,14 +356,13 @@ mod prctl; #[cfg(not(any(windows, target_os = "espidf", target_os = "wasi")))] #[cfg(any(feature = "process", feature = "runtime", all(bsd, feature = "event")))] mod signal; -#[cfg(not(windows))] #[cfg(any( feature = "fs", feature = "process", feature = "runtime", feature = "thread", feature = "time", - all(feature = "event", any(bsd, linux_kernel)), + all(feature = "event", any(bsd, linux_kernel, windows)), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/tests/event/main.rs b/tests/event/main.rs index 8e93f0ab4..e15c419f1 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,5 +10,5 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; -#[cfg(any(bsd, linux_kernel))] +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] mod select; diff --git a/tests/event/select.rs b/tests/event/select.rs index c791c9ea1..847e6f414 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -1,19 +1,20 @@ use rustix::event::{ - fd_set_bound, fd_set_contains, fd_set_insert, fd_set_num_elements, fd_set_remove, FdSetElement, - FdSetIter, + fd_set_bound, fd_set_insert, fd_set_num_elements, fd_set_remove, FdSetElement, FdSetIter, }; -use rustix::fd::RawFd; +use rustix::event::{select, Timespec}; +use rustix::fd::{AsRawFd, RawFd}; #[cfg(feature = "pipe")] -use { - rustix::event::{select, Timespec}, - rustix::fd::{AsRawFd, FromRawFd, OwnedFd}, - rustix::io::retry_on_intr, - std::cmp::max, -}; +#[cfg(not(windows))] +use rustix::fd::{FromRawFd, OwnedFd}; +use rustix::io::retry_on_intr; +#[cfg(not(windows))] +use serial_test::serial; +use std::cmp::max; #[cfg(feature = "pipe")] +#[cfg(not(windows))] #[test] -fn test_select() { +fn test_select_with_pipes() { use rustix::io::{read, write}; use rustix::pipe::pipe; @@ -22,7 +23,7 @@ fn test_select() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; fd_set_insert(&mut readfds, reader.as_raw_fd()); let num = retry_on_intr(|| unsafe { select( @@ -45,7 +46,7 @@ fn test_select() { assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; fd_set_insert(&mut readfds, reader.as_raw_fd()); let num = retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); @@ -82,7 +83,9 @@ fn test_select() { } #[cfg(feature = "pipe")] +#[cfg(not(windows))] #[test] +#[serial] // for `setrlimit` usage fn test_select_with_great_fds() { use core::cmp::max; use rustix::io::{read, write}; @@ -111,7 +114,7 @@ fn test_select_with_great_fds() { let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; // `select` should say there's nothing ready to be read from the pipe. - let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; fd_set_insert(&mut readfds, reader.as_raw_fd()); let num = retry_on_intr(|| unsafe { select( @@ -134,7 +137,7 @@ fn test_select_with_great_fds() { assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); // `select` should now say there's data to be read. - let mut readfds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; fd_set_insert(&mut readfds, reader.as_raw_fd()); let num = retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); @@ -173,6 +176,206 @@ fn test_select_with_great_fds() { setrlimit(Resource::Nofile, orig_rlimit).unwrap(); } +#[cfg(feature = "net")] +#[test] +fn test_select_with_sockets() { + use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; + use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + + // Create a socket pair (but don't use `socketpair` because we want this + // to work on Windows too). + + let localhost = IpAddr::V4(Ipv4Addr::LOCALHOST); + let addr = SocketAddr::new(localhost, 0); + let listener = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::bind(&listener, &addr).expect("bind"); + rustix::net::listen(&listener, 1).expect("listen"); + let local_addr = rustix::net::getsockname(&listener).unwrap(); + let writer = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::connect_any(&writer, &local_addr).expect("connect"); + let reader = rustix::net::accept(&listener).expect("accept"); + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + #[cfg(windows)] + let nfds: i32 = nfds.try_into().unwrap(); + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!( + retry_on_intr(|| send(&writer, b"a", SendFlags::empty())).unwrap(), + 1 + ); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!( + retry_on_intr(|| recv(&reader, &mut buf, RecvFlags::empty())).unwrap(), + 1 + ); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); +} + +// Like `test_select_with_sockets` but test with the maximum permitted +// fd value. +#[cfg(feature = "net")] +#[cfg(not(windows))] // for `dup2` usage +#[test] +#[serial] // for `setrlimit` usage +fn test_select_with_maxfd_sockets() { + use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; + use rustix::process::{getrlimit, setrlimit, Resource}; + use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + + let localhost = IpAddr::V4(Ipv4Addr::LOCALHOST); + let addr = SocketAddr::new(localhost, 0); + let listener = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::bind(&listener, &addr).expect("bind"); + rustix::net::listen(&listener, 1).expect("listen"); + let local_addr = rustix::net::getsockname(&listener).unwrap(); + let writer = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::connect_any(&writer, &local_addr).expect("connect"); + let reader = rustix::net::accept(&listener).expect("accept"); + + // Raise the fd limit to the maximum. + let orig_rlimit = getrlimit(Resource::Nofile); + let mut rlimit = orig_rlimit; + let mut fd_limit = libc::FD_SETSIZE as RawFd; + if let Some(maximum) = rlimit.maximum { + rlimit.current = Some(maximum); + fd_limit = maximum as RawFd; + } + setrlimit(Resource::Nofile, rlimit).unwrap(); + + // Renumber the fds to the maximum possible values. + let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), fd_limit as RawFd - 1) }; + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; + let great_fd = unsafe { libc::dup2(writer.as_raw_fd(), fd_limit as RawFd - 2) }; + let writer = unsafe { OwnedFd::from_raw_fd(great_fd) }; + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + #[cfg(windows)] + let nfds: i32 = nfds.try_into().unwrap(); + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!( + retry_on_intr(|| send(&writer, b"a", SendFlags::empty())).unwrap(), + 1 + ); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!( + retry_on_intr(|| recv(&reader, &mut buf, RecvFlags::empty())).unwrap(), + 1 + ); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + setrlimit(Resource::Nofile, orig_rlimit).unwrap(); +} + #[test] fn test_select_iter() { for stuff in [ @@ -191,17 +394,23 @@ fn test_select_iter() { } else { *stuff.last().unwrap() + 1 }; - let mut fds = vec![0 as FdSetElement; fd_set_num_elements(nfds)]; + let mut fds = vec![FdSetElement::default(); fd_set_num_elements(stuff.len(), nfds)]; for fd in stuff { - assert!(!fd_set_contains(&mut fds, *fd)); + assert!(!fd_set_contains(&fds, *fd)); fd_set_insert(&mut fds, *fd); - assert!(fd_set_contains(&mut fds, *fd)); + assert!(fd_set_contains(&fds, *fd)); fd_set_remove(&mut fds, *fd); - assert!(!fd_set_contains(&mut fds, *fd)); + assert!(!fd_set_contains(&fds, *fd)); fd_set_insert(&mut fds, *fd); - assert!(fd_set_contains(&mut fds, *fd)); + assert!(fd_set_contains(&fds, *fd)); } assert_eq!(fd_set_bound(&fds), nfds); assert_eq!(FdSetIter::new(&fds).collect::>(), stuff); } } + +// This isn't in rustix's public API because it isn't constant time. On +// bitvector platforms it could be, but on fd array platforms it can't be. +fn fd_set_contains(fds: &[FdSetElement], fd: RawFd) -> bool { + FdSetIter::new(fds).any(|x| x == fd) +} From d3714c94fcfa2ac88909a91bf6448162af0c697e Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 23 Sep 2024 16:45:21 -0700 Subject: [PATCH 18/25] Support WASI. --- src/backend/libc/event/syscalls.rs | 61 ++++++++++++++++++- src/event/select.rs | 96 +++++++++++++++++++++++++----- 2 files changed, 138 insertions(+), 19 deletions(-) diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index 244fb6b36..763f2e2c0 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -16,7 +16,7 @@ use crate::event::port::Event; target_os = "espidf" ))] use crate::event::EventfdFlags; -#[cfg(any(bsd, linux_kernel))] +#[cfg(any(bsd, linux_kernel, target_os = "wasi"))] use crate::event::FdSetElement; use crate::event::PollFd; use crate::io; @@ -30,9 +30,9 @@ use crate::utils::as_ptr; all(feature = "alloc", any(linux_kernel, target_os = "redox")), ))] use core::mem::MaybeUninit; -#[cfg(any(bsd, linux_kernel))] +#[cfg(any(bsd, linux_kernel, target_os = "wasi"))] use core::ptr::null; -#[cfg(any(bsd, linux_kernel, solarish, target_os = "redox"))] +#[cfg(any(bsd, linux_kernel, solarish, target_os = "redox", target_os = "wasi"))] use core::ptr::null_mut; #[cfg(any( linux_kernel, @@ -205,6 +205,61 @@ pub(crate) unsafe fn select( } } +// WASI uses a count + array instead of a bitvector. +#[cfg(target_os = "wasi")] +pub(crate) unsafe fn select( + nfds: i32, + readfds: Option<&mut [FdSetElement]>, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + let len = crate::event::fd_set_num_elements_for_fd_array(nfds as usize); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `c::timeval`. + timeout_data = c::timeval { + tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr as *mut c::timeval, + )) +} + #[cfg(solarish)] pub(crate) fn port_create() -> io::Result { unsafe { ret_owned_fd(c::port_create()) } diff --git a/src/event/select.rs b/src/event/select.rs index 8100157d6..521d1fd06 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -5,10 +5,28 @@ //! `select` is unsafe due to I/O safety. #![allow(unsafe_code)] +use crate::fd::RawFd; use crate::{backend, io}; +#[cfg(any(windows, target_os = "wasi"))] +use core::slice; pub use crate::timespec::{Nsecs, Secs, Timespec}; +/// wasi-libc's `fd_set` type. The libc bindings for it have private fields, +/// so we redeclare it for ourselves so that we can access the fields. They're +/// publicly exposed in wasi-libc. +#[cfg(target_os = "wasi")] +#[repr(C)] +struct FD_SET { + /// The wasi-libc headers call this `__nfds`. + fd_count: usize, + /// The wasi-libc headers call this `__fds`. + fd_array: [i32; libc::FD_SETSIZE], +} + +#[cfg(windows)] +use windows_sys::Win32::Networking::WinSock::FD_SET; + /// Storage element type for use with [`select`]. #[cfg(any( windows, @@ -24,6 +42,7 @@ pub struct FdSetElement(pub(crate) u64); /// Storage element type for use with [`select`]. #[cfg(not(any( windows, + target_os = "wasi", all( target_pointer_width = "64", any(target_os = "freebsd", target_os = "dragonfly") @@ -33,6 +52,12 @@ pub struct FdSetElement(pub(crate) u64); #[derive(Copy, Clone, Default)] pub struct FdSetElement(pub(crate) u32); +/// Storage element type for use with [`select`]. +#[cfg(target_os = "wasi")] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) usize); + /// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on /// sets of file descriptors. /// @@ -92,37 +117,62 @@ pub unsafe fn select( backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) } +#[cfg(not(any(windows, target_os = "wasi")))] const BITS: usize = core::mem::size_of::() * 8; -use crate::fd::RawFd; /// Set `fd` in the set pointed to by `fds`. #[doc(alias = "FD_SET")] #[inline] pub fn fd_set_insert(fds: &mut [FdSetElement], fd: RawFd) { - let fd = fd as usize; - fds[fd / BITS].0 |= 1 << (fd % BITS); + #[cfg(not(any(windows, target_os = "wasi")))] + { + let fd = fd as usize; + fds[fd / BITS].0 |= 1 << (fd % BITS); + } + + #[cfg(any(windows, target_os = "wasi"))] + { + let set = unsafe { &mut *fds.as_mut_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + + if !fd_array.iter().any(|p| *p as RawFd == fd) { + let fd_array = unsafe { + slice::from_raw_parts_mut(set.fd_array.as_mut_ptr(), fd_count as usize + 1) + }; + set.fd_count = fd_count + 1; + fd_array[fd_count as usize] = fd as _; + } + } } /// Clear `fd` in the set pointed to by `fds`. #[doc(alias = "FD_CLR")] #[inline] pub fn fd_set_remove(fds: &mut [FdSetElement], fd: RawFd) { - let fd = fd as usize; - fds[fd / BITS].0 &= !(1 << (fd % BITS)); -} + #[cfg(not(any(windows, target_os = "wasi")))] + { + let fd = fd as usize; + fds[fd / BITS].0 &= !(1 << (fd % BITS)); + } -/// Compute the minimum `nfds` value needed for the set pointed to by -/// `fds`. -#[inline] -pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { #[cfg(any(windows, target_os = "wasi"))] { - assert!(cfg!(target_endian = "little"), "what"); + let set = unsafe { &mut *fds.as_mut_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; - let fd_count = fds[0].0 as usize; - (fd_count + 1) as RawFd + if let Some(pos) = fd_array.iter().position(|p| *p as RawFd == fd) { + set.fd_count = fd_count - 1; + set.fd_array[pos] = *set.fd_array.last().unwrap(); + } } +} +/// Compute the minimum `nfds` value needed for the set pointed to by +/// `fds`. +#[inline] +pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { #[cfg(not(any(windows, target_os = "wasi")))] { if let Some(position) = fds.iter().rposition(|element| element.0 != 0) { @@ -132,6 +182,15 @@ pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { 0 } } + + #[cfg(any(windows, target_os = "wasi"))] + { + assert!(cfg!(target_endian = "little"), "what"); + + let set = unsafe { &*fds.as_ptr().cast::() }; + let fd_count = set.fd_count; + fd_count as RawFd + } } /// Compute the number of `FdSetElement`s needed to hold a set which can @@ -245,12 +304,17 @@ impl<'a> Iterator for FdSetIter<'a> { assert!(cfg!(target_endian = "little"), "what"); let current = self.current; - if current as u64 == self.fds[0].0 { + + let set = unsafe { &*self.fds.as_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + + if current == fd_count as usize { return None; } - let fd = self.fds[current as usize + 1].0; + let fd = fd_array[current as usize]; self.current = current + 1; - Some(fd) + Some(fd as RawFd) } } From 2ca56ecfe5f2f46fa81e9abf6c42f9e887e9bb14 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 23 Sep 2024 16:55:39 -0700 Subject: [PATCH 19/25] Fix compilation. --- src/event/select.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/event/select.rs b/src/event/select.rs index 521d1fd06..7e2886965 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -321,6 +321,7 @@ impl<'a> Iterator for FdSetIter<'a> { #[cfg(test)] mod test { use super::*; + use core::mem::align_of; #[test] #[cfg(windows)] From 8130b4c354249ad1be2954a191bfaa759bf7a019 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 24 Sep 2024 10:20:41 -0700 Subject: [PATCH 20/25] Ignore "unstable name collisions" warnings for now. --- tests/path/arg.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/path/arg.rs b/tests/path/arg.rs index 66f0bfd23..2b330b95b 100644 --- a/tests/path/arg.rs +++ b/tests/path/arg.rs @@ -1,3 +1,6 @@ +// TODO: Rename `Arg::as_str` to avoid collisions. +#![allow(unstable_name_collisions)] + use rustix::ffi::{CStr, CString}; use rustix::io; use rustix::path::Arg; From 7ce5bdf79618b3d8a19fff1fe260c2c2f624ae56 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 24 Sep 2024 10:03:02 -0700 Subject: [PATCH 21/25] Fix types in the QEMU select implementation. --- ci/select-setsize.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/select-setsize.patch b/ci/select-setsize.patch index 6631dd414..48f9824d7 100644 --- a/ci/select-setsize.patch +++ b/ci/select-setsize.patch @@ -46,7 +46,7 @@ diff -ur a/linux-user/syscall.c b/linux-user/syscall.c if ((b >> j) & 1) - FD_SET(k, fds); + fds[k / (sizeof(unsigned long) * 8)] |= -+ 1 << (k % (sizeof(unsigned long) * 8)); ++ 1ul << (k % (sizeof(unsigned long) * 8)); k++; } } @@ -76,7 +76,7 @@ diff -ur a/linux-user/syscall.c b/linux-user/syscall.c - v |= ((abi_ulong)(FD_ISSET(k, fds) != 0) << j); + bool set = + (fds[k / (sizeof(unsigned long) * 8)] & -+ (1 << (k % (sizeof(unsigned long) * 8)))) != 0; ++ (1ul << (k % (sizeof(unsigned long) * 8)))) != 0; + v |= ((abi_ulong)set << j); k++; } From f321b9acc8bf5fb1b5e4cfde83e5c843e208eabe Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 23 Sep 2024 17:13:10 -0700 Subject: [PATCH 22/25] Various fixes for Linux, Windows, and macOS. --- src/event/select.rs | 37 ++++++++++++++++++------------------- tests/event/main.rs | 29 +++++++++++++++++++++++++++++ tests/event/select.rs | 16 ++++++++++++++-- 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/event/select.rs b/src/event/select.rs index 7e2886965..466cd72db 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -5,6 +5,8 @@ //! `select` is unsafe due to I/O safety. #![allow(unsafe_code)] +#[cfg(linux_like)] +use crate::backend::c; use crate::fd::RawFd; use crate::{backend, io}; #[cfg(any(windows, target_os = "wasi"))] @@ -39,8 +41,15 @@ use windows_sys::Win32::Networking::WinSock::FD_SET; #[derive(Copy, Clone, Default)] pub struct FdSetElement(pub(crate) u64); +/// Storage element type for use with [`select`]. +#[cfg(linux_like)] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) c::c_ulong); + /// Storage element type for use with [`select`]. #[cfg(not(any( + linux_like, windows, target_os = "wasi", all( @@ -185,11 +194,16 @@ pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { #[cfg(any(windows, target_os = "wasi"))] { - assert!(cfg!(target_endian = "little"), "what"); - let set = unsafe { &*fds.as_ptr().cast::() }; let fd_count = set.fd_count; - fd_count as RawFd + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + let mut max = 0; + for fd in fd_array { + if *fd >= max { + max = *fd + 1; + } + } + max as RawFd } } @@ -301,8 +315,6 @@ impl<'a> Iterator for FdSetIter<'a> { type Item = RawFd; fn next(&mut self) -> Option { - assert!(cfg!(target_endian = "little"), "what"); - let current = self.current; let set = unsafe { &*self.fds.as_ptr().cast::() }; @@ -324,21 +336,8 @@ mod test { use core::mem::align_of; #[test] - #[cfg(windows)] + #[cfg(any(windows, target_os = "wasi"))] fn layouts() { - use windows_sys::Win32::Networking::WinSock::FD_SET; - - // The first element of the `FdSetElement` array corresponds to the - // `fd_count` field. - assert_eq!(memoffset::offset_of!(FD_SET, fd_count), 0); - - // The following elements of the `FdSetElement` array correspond to the - // `fd_array` field. - let array = [FdSetElement::default()]; - assert_eq!(memoffset::offset_of!(FD_SET, fd_array), unsafe { - array[1..1].as_ptr().offset_from(array[0..0].as_ptr()) as usize - }); - // The `FdSetElement` array should be suitably aligned. assert_eq!(align_of::(), align_of::()); } diff --git a/tests/event/main.rs b/tests/event/main.rs index e15c419f1..5e8ef6936 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -12,3 +12,32 @@ mod eventfd; mod poll; #[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] mod select; + +#[cfg(windows)] +mod windows { + use std::sync::OnceLock; + + pub struct Thing; + + impl Thing { + pub fn new() -> Self { + let _ = rustix::net::wsa_startup().unwrap(); + Self + } + } + + impl Drop for Thing { + fn drop(&mut self) { + rustix::net::wsa_cleanup().unwrap(); + } + } + + pub static CLEANUP: OnceLock = OnceLock::new(); +} + +/// Checks whether the Windows socket interface has been started already, and +/// if not, starts it. +pub fn init() { + #[cfg(windows)] + let _ = windows::CLEANUP.get_or_init(|| windows::Thing::new()); +} diff --git a/tests/event/select.rs b/tests/event/select.rs index 847e6f414..66919824c 100644 --- a/tests/event/select.rs +++ b/tests/event/select.rs @@ -7,7 +7,6 @@ use rustix::fd::{AsRawFd, RawFd}; #[cfg(not(windows))] use rustix::fd::{FromRawFd, OwnedFd}; use rustix::io::retry_on_intr; -#[cfg(not(windows))] use serial_test::serial; use std::cmp::max; @@ -178,10 +177,13 @@ fn test_select_with_great_fds() { #[cfg(feature = "net")] #[test] +#[serial] // for `crate::init` fn test_select_with_sockets() { use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + crate::init(); + // Create a socket pair (but don't use `socketpair` because we want this // to work on Windows too). @@ -271,12 +273,14 @@ fn test_select_with_sockets() { #[cfg(feature = "net")] #[cfg(not(windows))] // for `dup2` usage #[test] -#[serial] // for `setrlimit` usage +#[serial] // for `setrlimit` usage, and `crate::init` fn test_select_with_maxfd_sockets() { use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; use rustix::process::{getrlimit, setrlimit, Resource}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + crate::init(); + let localhost = IpAddr::V4(Ipv4Addr::LOCALHOST); let addr = SocketAddr::new(localhost, 0); let listener = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); @@ -299,6 +303,14 @@ fn test_select_with_maxfd_sockets() { // Renumber the fds to the maximum possible values. let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), fd_limit as RawFd - 1) }; + + // On old versions of macOS, the above `dup2` call fails with `EBADF`. Just + // skip the rest of this test in that case. + #[cfg(apple)] + if great_fd == -1 && libc_errno::errno().0 == libc::EBADF { + return; + } + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; let great_fd = unsafe { libc::dup2(writer.as_raw_fd(), fd_limit as RawFd - 2) }; let writer = unsafe { OwnedFd::from_raw_fd(great_fd) }; From d56f394f69bdda7f9ace677387c7bba63b2f8eab Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 24 Sep 2024 12:36:31 -0700 Subject: [PATCH 23/25] Various fixes. --- src/backend/linux_raw/c.rs | 1 + src/event/select.rs | 30 ++++++++++++++++++++++++------ src/lib.rs | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/backend/linux_raw/c.rs b/src/backend/linux_raw/c.rs index 4035bf945..95f701b16 100644 --- a/src/backend/linux_raw/c.rs +++ b/src/backend/linux_raw/c.rs @@ -9,6 +9,7 @@ pub(crate) type size_t = usize; pub(crate) use linux_raw_sys::ctypes::*; pub(crate) use linux_raw_sys::errno::EINVAL; +pub(crate) use linux_raw_sys::general::{__kernel_fd_set as fd_set, __FD_SETSIZE as FD_SETSIZE}; pub(crate) use linux_raw_sys::ioctl::{FIONBIO, FIONREAD}; // Import the kernel's `uid_t` and `gid_t` if they're 32-bit. #[cfg(not(any(target_arch = "arm", target_arch = "sparc", target_arch = "x86")))] diff --git a/src/event/select.rs b/src/event/select.rs index 466cd72db..c8dc4fcc8 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -5,7 +5,7 @@ //! `select` is unsafe due to I/O safety. #![allow(unsafe_code)] -#[cfg(linux_like)] +#[cfg(any(linux_like, target_os = "wasi"))] use crate::backend::c; use crate::fd::RawFd; use crate::{backend, io}; @@ -23,7 +23,7 @@ struct FD_SET { /// The wasi-libc headers call this `__nfds`. fd_count: usize, /// The wasi-libc headers call this `__fds`. - fd_array: [i32; libc::FD_SETSIZE], + fd_array: [i32; c::FD_SETSIZE], } #[cfg(windows)] @@ -233,7 +233,10 @@ pub fn fd_set_num_elements(set_count: usize, nfds: RawFd) -> usize { pub(crate) fn fd_set_num_elements_for_fd_array(set_count: usize) -> usize { // Allocate space for an `fd_count` field, plus `set_count` elements // for the `fd_array` field. - 1 + set_count + div_ceil( + align_of::() + set_count * size_of::(), + size_of::(), + ) } /// `fd_set_num_elements` implementation on platforms with bitvector @@ -246,7 +249,6 @@ pub(crate) fn fd_set_num_elements_for_bitvector(nfds: RawFd) -> usize { div_ceil(nfds, BITS) } -#[cfg(not(any(windows, target_os = "wasi")))] fn div_ceil(lhs: usize, rhs: usize) -> usize { let d = lhs / rhs; let r = lhs % rhs; @@ -333,19 +335,35 @@ impl<'a> Iterator for FdSetIter<'a> { #[cfg(test)] mod test { use super::*; - use core::mem::align_of; + use core::mem::{align_of, size_of}; #[test] #[cfg(any(windows, target_os = "wasi"))] fn layouts() { // The `FdSetElement` array should be suitably aligned. assert_eq!(align_of::(), align_of::()); + + // The layout of `FD_SET` should match our layout of a set of the same + // size. + assert_eq!( + fd_set_num_elements_for_fd_array( + memoffset::span_of!(FD_SET, fd_array).len() / size_of::() + ) * size_of::(), + size_of::() + ); } #[test] #[cfg(any(bsd, linux_kernel))] fn layouts() { // The `FdSetElement` array should be suitably aligned. - assert_eq!(align_of::(), align_of::()); + assert_eq!(align_of::(), align_of::()); + + // The layout of `fd_set` should match our layout of a set of the same + // size. + assert_eq!( + fd_set_num_elements_for_bitvector(c::FD_SETSIZE as RawFd) * size_of::(), + size_of::() + ); } } diff --git a/src/lib.rs b/src/lib.rs index e92162556..e53e263ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -362,7 +362,7 @@ mod signal; feature = "runtime", feature = "thread", feature = "time", - all(feature = "event", any(bsd, linux_kernel, windows)), + all(feature = "event", any(bsd, linux_kernel, windows, target_os = "wasi")), all( linux_raw, not(feature = "use-libc-auxv"), From 974bc80d86ac57119a2f0967564796c0d7f454ea Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 24 Sep 2024 12:42:43 -0700 Subject: [PATCH 24/25] Fix test compilation. --- src/event/select.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/event/select.rs b/src/event/select.rs index c8dc4fcc8..e38f27efd 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -356,6 +356,8 @@ mod test { #[test] #[cfg(any(bsd, linux_kernel))] fn layouts() { + use crate::backend::c; + // The `FdSetElement` array should be suitably aligned. assert_eq!(align_of::(), align_of::()); From 489d9d39ec9dccd89b9308f268670aaf712cc494 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 24 Sep 2024 13:47:02 -0700 Subject: [PATCH 25/25] Fix imports. --- src/event/select.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/event/select.rs b/src/event/select.rs index e38f27efd..d124e13c3 100644 --- a/src/event/select.rs +++ b/src/event/select.rs @@ -10,6 +10,8 @@ use crate::backend::c; use crate::fd::RawFd; use crate::{backend, io}; #[cfg(any(windows, target_os = "wasi"))] +use core::mem::{align_of, size_of}; +#[cfg(any(windows, target_os = "wasi"))] use core::slice; pub use crate::timespec::{Nsecs, Secs, Timespec};