rustix/backend/linux_raw/event/
syscalls.rs

1//! linux_raw syscalls supporting `rustix::event`.
2//!
3//! # Safety
4//!
5//! See the `rustix::backend` module documentation for details.
6#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
7
8use crate::backend::conv::{
9    by_ref, c_int, c_uint, opt_mut, opt_ref, pass_usize, ret, ret_c_int, ret_error, ret_owned_fd,
10    ret_usize, size_of, slice_mut, zero,
11};
12use crate::event::{epoll, EventfdFlags, FdSetElement, PollFd, Timespec};
13use crate::fd::{BorrowedFd, OwnedFd};
14use crate::io;
15use core::ptr::null_mut;
16use linux_raw_sys::general::{kernel_sigset_t, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD};
17
18#[inline]
19pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: Option<&Timespec>) -> io::Result<usize> {
20    let (fds_addr_mut, fds_len) = slice_mut(fds);
21
22    #[cfg(target_pointer_width = "32")]
23    unsafe {
24        // If we don't have Linux 5.1, and the timeout fits in a
25        // `__kernel_old_timespec`, use plain `ppoll`.
26        //
27        // We do this unconditionally, rather than trying `ppoll_time64` and
28        // falling back on `Errno::NOSYS`, because seccomp configurations will
29        // sometimes abort the process on syscalls they don't recognize.
30        #[cfg(not(feature = "linux_5_1"))]
31        {
32            use linux_raw_sys::general::__kernel_old_timespec;
33
34            // If we don't have a timeout, or if we can convert the timeout to
35            // a `__kernel_old_timespec`, the use `__NR_ppoll`.
36            fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> {
37                Some(__kernel_old_timespec {
38                    tv_sec: timeout.tv_sec.try_into().ok()?,
39                    tv_nsec: timeout.tv_nsec.try_into().ok()?,
40                })
41            }
42            let old_timeout = if let Some(timeout) = timeout {
43                match convert(timeout) {
44                    // Could not convert timeout.
45                    None => None,
46                    // Could convert timeout. Ok!
47                    Some(old_timeout) => Some(Some(old_timeout)),
48                }
49            } else {
50                // No timeout. Ok!
51                Some(None)
52            };
53            if let Some(mut old_timeout) = old_timeout {
54                // Call `ppoll`.
55                //
56                // Linux's `ppoll` mutates the timeout argument. Our public
57                // interface does not do this, because it's not portable to other
58                // platforms, so we create a temporary value to hide this behavior.
59                return ret_usize(syscall!(
60                    __NR_ppoll,
61                    fds_addr_mut,
62                    fds_len,
63                    opt_mut(old_timeout.as_mut()),
64                    zero(),
65                    size_of::<kernel_sigset_t, _>()
66                ));
67            }
68        }
69
70        // We either have Linux 5.1 or the timeout didn't fit in
71        // `__kernel_old_timespec` so `__NR_ppoll_time64` will either
72        // succeed or fail due to our having no other options.
73
74        // Call `ppoll_time64`.
75        //
76        // Linux's `ppoll_time64` mutates the timeout argument. Our public
77        // interface does not do this, because it's not portable to other
78        // platforms, so we create a temporary value to hide this behavior.
79        ret_usize(syscall!(
80            __NR_ppoll_time64,
81            fds_addr_mut,
82            fds_len,
83            opt_mut(timeout.copied().as_mut()),
84            zero(),
85            size_of::<kernel_sigset_t, _>()
86        ))
87    }
88
89    #[cfg(target_pointer_width = "64")]
90    unsafe {
91        // Call `ppoll`.
92        //
93        // Linux's `ppoll` mutates the timeout argument. Our public interface
94        // does not do this, because it's not portable to other platforms, so
95        // we create a temporary value to hide this behavior.
96        ret_usize(syscall!(
97            __NR_ppoll,
98            fds_addr_mut,
99            fds_len,
100            opt_mut(timeout.copied().as_mut()),
101            zero(),
102            size_of::<kernel_sigset_t, _>()
103        ))
104    }
105}
106
107pub(crate) unsafe fn select(
108    nfds: i32,
109    readfds: Option<&mut [FdSetElement]>,
110    writefds: Option<&mut [FdSetElement]>,
111    exceptfds: Option<&mut [FdSetElement]>,
112    timeout: Option<&crate::timespec::Timespec>,
113) -> io::Result<i32> {
114    let len = crate::event::fd_set_num_elements_for_bitvector(nfds);
115
116    let readfds = match readfds {
117        Some(readfds) => {
118            assert!(readfds.len() >= len);
119            readfds.as_mut_ptr()
120        }
121        None => null_mut(),
122    };
123    let writefds = match writefds {
124        Some(writefds) => {
125            assert!(writefds.len() >= len);
126            writefds.as_mut_ptr()
127        }
128        None => null_mut(),
129    };
130    let exceptfds = match exceptfds {
131        Some(exceptfds) => {
132            assert!(exceptfds.len() >= len);
133            exceptfds.as_mut_ptr()
134        }
135        None => null_mut(),
136    };
137
138    #[cfg(target_pointer_width = "32")]
139    {
140        // If we don't have Linux 5.1, and the timeout fits in a
141        // `__kernel_old_timespec`, use plain `pselect6`.
142        //
143        // We do this unconditionally, rather than trying `pselect6_time64` and
144        // falling back on `Errno::NOSYS`, because seccomp configurations will
145        // sometimes abort the process on syscalls they don't recognize.
146        #[cfg(not(feature = "linux_5_1"))]
147        {
148            use linux_raw_sys::general::__kernel_old_timespec;
149
150            // If we don't have a timeout, or if we can convert the timeout to
151            // a `__kernel_old_timespec`, the use `__NR_pselect6`.
152            fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> {
153                Some(__kernel_old_timespec {
154                    tv_sec: timeout.tv_sec.try_into().ok()?,
155                    tv_nsec: timeout.tv_nsec.try_into().ok()?,
156                })
157            }
158            let old_timeout = if let Some(timeout) = timeout {
159                match convert(timeout) {
160                    // Could not convert timeout.
161                    None => None,
162                    // Could convert timeout. Ok!
163                    Some(old_timeout) => Some(Some(old_timeout)),
164                }
165            } else {
166                // No timeout. Ok!
167                Some(None)
168            };
169            if let Some(mut old_timeout) = old_timeout {
170                // Call `pselect6`.
171                //
172                // Linux's `pselect6` mutates the timeout argument. Our public
173                // interface does not do this, because it's not portable to other
174                // platforms, so we create a temporary value to hide this behavior.
175                return ret_c_int(syscall!(
176                    __NR_pselect6,
177                    c_int(nfds),
178                    readfds,
179                    writefds,
180                    exceptfds,
181                    opt_mut(old_timeout.as_mut()),
182                    zero()
183                ));
184            }
185        }
186
187        // We either have Linux 5.1 or the timeout didn't fit in
188        // `__kernel_old_timespec` so `__NR_pselect6_time64` will either
189        // succeed or fail due to our having no other options.
190
191        // Call `pselect6_time64`.
192        //
193        // Linux's `pselect6_time64` mutates the timeout argument. Our public
194        // interface does not do this, because it's not portable to other
195        // platforms, so we create a temporary value to hide this behavior.
196        ret_c_int(syscall!(
197            __NR_pselect6_time64,
198            c_int(nfds),
199            readfds,
200            writefds,
201            exceptfds,
202            opt_mut(timeout.copied().as_mut()),
203            zero()
204        ))
205    }
206
207    #[cfg(target_pointer_width = "64")]
208    {
209        // Call `pselect6`.
210        //
211        // Linux's `pselect6` mutates the timeout argument. Our public interface
212        // does not do this, because it's not portable to other platforms, so we
213        // create a temporary value to hide this behavior.
214        ret_c_int(syscall!(
215            __NR_pselect6,
216            c_int(nfds),
217            readfds,
218            writefds,
219            exceptfds,
220            opt_mut(timeout.copied().as_mut()),
221            zero()
222        ))
223    }
224}
225
226#[inline]
227pub(crate) fn epoll_create(flags: epoll::CreateFlags) -> io::Result<OwnedFd> {
228    // SAFETY: `__NR_epoll_create1` doesn't access any user memory.
229    unsafe { ret_owned_fd(syscall_readonly!(__NR_epoll_create1, flags)) }
230}
231
232#[inline]
233pub(crate) fn epoll_add(
234    epfd: BorrowedFd<'_>,
235    fd: BorrowedFd<'_>,
236    event: &epoll::Event,
237) -> io::Result<()> {
238    // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_ADD` doesn't modify any user
239    // memory, and it only reads from `event`.
240    unsafe {
241        ret(syscall_readonly!(
242            __NR_epoll_ctl,
243            epfd,
244            c_uint(EPOLL_CTL_ADD),
245            fd,
246            by_ref(event)
247        ))
248    }
249}
250
251#[inline]
252pub(crate) fn epoll_mod(
253    epfd: BorrowedFd<'_>,
254    fd: BorrowedFd<'_>,
255    event: &epoll::Event,
256) -> io::Result<()> {
257    // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_MOD` doesn't modify any user
258    // memory, and it only reads from `event`.
259    unsafe {
260        ret(syscall_readonly!(
261            __NR_epoll_ctl,
262            epfd,
263            c_uint(EPOLL_CTL_MOD),
264            fd,
265            by_ref(event)
266        ))
267    }
268}
269
270#[inline]
271pub(crate) fn epoll_del(epfd: BorrowedFd<'_>, fd: BorrowedFd<'_>) -> io::Result<()> {
272    // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_DEL` doesn't access any user
273    // memory.
274    unsafe {
275        ret(syscall_readonly!(
276            __NR_epoll_ctl,
277            epfd,
278            c_uint(EPOLL_CTL_DEL),
279            fd,
280            zero()
281        ))
282    }
283}
284
285#[inline]
286pub(crate) unsafe fn epoll_wait(
287    epfd: BorrowedFd<'_>,
288    events: (*mut crate::event::epoll::Event, usize),
289    timeout: Option<&Timespec>,
290) -> io::Result<usize> {
291    // If we don't have Linux 5.1, and the timeout fits in an `i32`, use plain
292    // `epoll_pwait`.
293    //
294    // We do this unconditionally, rather than trying `epoll_pwait2` and
295    // falling back on `Errno::NOSYS`, because seccomp configurations will
296    // sometimes abort the process on syscalls they don't recognize.
297    #[cfg(not(feature = "linux_5_11"))]
298    {
299        // If we don't have a timeout, or if we can convert the timeout to an
300        // `i32`, the use `__NR_epoll_pwait`.
301        let old_timeout = if let Some(timeout) = timeout {
302            // Try to convert the timeout; if this is `Some`, we're ok!
303            timeout.as_c_int_millis()
304        } else {
305            // No timeout. Ok!
306            Some(-1)
307        };
308        if let Some(old_timeout) = old_timeout {
309            // Call `epoll_pwait`.
310            return ret_usize(syscall!(
311                __NR_epoll_pwait,
312                epfd,
313                events.0,
314                pass_usize(events.1),
315                c_int(old_timeout),
316                zero()
317            ));
318        }
319    }
320
321    // Call `epoll_pwait2`.
322    //
323    // We either have Linux 5.1 or the timeout didn't fit in an `i32`, so
324    // `__NR_epoll_pwait2` will either succeed or fail due to our having no
325    // other options.
326    ret_usize(syscall!(
327        __NR_epoll_pwait2,
328        epfd,
329        events.0,
330        pass_usize(events.1),
331        opt_ref(timeout),
332        zero()
333    ))
334}
335
336#[inline]
337pub(crate) fn eventfd(initval: u32, flags: EventfdFlags) -> io::Result<OwnedFd> {
338    unsafe { ret_owned_fd(syscall_readonly!(__NR_eventfd2, c_uint(initval), flags)) }
339}
340
341#[inline]
342pub(crate) fn pause() {
343    unsafe {
344        #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
345        let error = ret_error(syscall_readonly!(
346            __NR_ppoll,
347            zero(),
348            zero(),
349            zero(),
350            zero()
351        ));
352
353        #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
354        let error = ret_error(syscall_readonly!(__NR_pause));
355
356        debug_assert_eq!(error, io::Errno::INTR);
357    }
358}