rustix/backend/linux_raw/event/syscalls.rs
1//! linux_raw syscalls supporting `rustix::event`.
2//!
3//! # Safety
4//!
5//! See the `rustix::backend` module documentation for details.
6#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
7
8use crate::backend::conv::{
9 by_ref, c_int, c_uint, opt_mut, opt_ref, pass_usize, ret, ret_c_int, ret_error, ret_owned_fd,
10 ret_usize, size_of, slice_mut, zero,
11};
12use crate::event::{epoll, EventfdFlags, FdSetElement, PollFd, Timespec};
13use crate::fd::{BorrowedFd, OwnedFd};
14use crate::io;
15use core::ptr::null_mut;
16use linux_raw_sys::general::{kernel_sigset_t, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD};
17
18#[inline]
19pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: Option<&Timespec>) -> io::Result<usize> {
20 let (fds_addr_mut, fds_len) = slice_mut(fds);
21
22 #[cfg(target_pointer_width = "32")]
23 unsafe {
24 // If we don't have Linux 5.1, and the timeout fits in a
25 // `__kernel_old_timespec`, use plain `ppoll`.
26 //
27 // We do this unconditionally, rather than trying `ppoll_time64` and
28 // falling back on `Errno::NOSYS`, because seccomp configurations will
29 // sometimes abort the process on syscalls they don't recognize.
30 #[cfg(not(feature = "linux_5_1"))]
31 {
32 use linux_raw_sys::general::__kernel_old_timespec;
33
34 // If we don't have a timeout, or if we can convert the timeout to
35 // a `__kernel_old_timespec`, the use `__NR_ppoll`.
36 fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> {
37 Some(__kernel_old_timespec {
38 tv_sec: timeout.tv_sec.try_into().ok()?,
39 tv_nsec: timeout.tv_nsec.try_into().ok()?,
40 })
41 }
42 let old_timeout = if let Some(timeout) = timeout {
43 match convert(timeout) {
44 // Could not convert timeout.
45 None => None,
46 // Could convert timeout. Ok!
47 Some(old_timeout) => Some(Some(old_timeout)),
48 }
49 } else {
50 // No timeout. Ok!
51 Some(None)
52 };
53 if let Some(mut old_timeout) = old_timeout {
54 // Call `ppoll`.
55 //
56 // Linux's `ppoll` mutates the timeout argument. Our public
57 // interface does not do this, because it's not portable to other
58 // platforms, so we create a temporary value to hide this behavior.
59 return ret_usize(syscall!(
60 __NR_ppoll,
61 fds_addr_mut,
62 fds_len,
63 opt_mut(old_timeout.as_mut()),
64 zero(),
65 size_of::<kernel_sigset_t, _>()
66 ));
67 }
68 }
69
70 // We either have Linux 5.1 or the timeout didn't fit in
71 // `__kernel_old_timespec` so `__NR_ppoll_time64` will either
72 // succeed or fail due to our having no other options.
73
74 // Call `ppoll_time64`.
75 //
76 // Linux's `ppoll_time64` mutates the timeout argument. Our public
77 // interface does not do this, because it's not portable to other
78 // platforms, so we create a temporary value to hide this behavior.
79 ret_usize(syscall!(
80 __NR_ppoll_time64,
81 fds_addr_mut,
82 fds_len,
83 opt_mut(timeout.copied().as_mut()),
84 zero(),
85 size_of::<kernel_sigset_t, _>()
86 ))
87 }
88
89 #[cfg(target_pointer_width = "64")]
90 unsafe {
91 // Call `ppoll`.
92 //
93 // Linux's `ppoll` mutates the timeout argument. Our public interface
94 // does not do this, because it's not portable to other platforms, so
95 // we create a temporary value to hide this behavior.
96 ret_usize(syscall!(
97 __NR_ppoll,
98 fds_addr_mut,
99 fds_len,
100 opt_mut(timeout.copied().as_mut()),
101 zero(),
102 size_of::<kernel_sigset_t, _>()
103 ))
104 }
105}
106
107pub(crate) unsafe fn select(
108 nfds: i32,
109 readfds: Option<&mut [FdSetElement]>,
110 writefds: Option<&mut [FdSetElement]>,
111 exceptfds: Option<&mut [FdSetElement]>,
112 timeout: Option<&crate::timespec::Timespec>,
113) -> io::Result<i32> {
114 let len = crate::event::fd_set_num_elements_for_bitvector(nfds);
115
116 let readfds = match readfds {
117 Some(readfds) => {
118 assert!(readfds.len() >= len);
119 readfds.as_mut_ptr()
120 }
121 None => null_mut(),
122 };
123 let writefds = match writefds {
124 Some(writefds) => {
125 assert!(writefds.len() >= len);
126 writefds.as_mut_ptr()
127 }
128 None => null_mut(),
129 };
130 let exceptfds = match exceptfds {
131 Some(exceptfds) => {
132 assert!(exceptfds.len() >= len);
133 exceptfds.as_mut_ptr()
134 }
135 None => null_mut(),
136 };
137
138 #[cfg(target_pointer_width = "32")]
139 {
140 // If we don't have Linux 5.1, and the timeout fits in a
141 // `__kernel_old_timespec`, use plain `pselect6`.
142 //
143 // We do this unconditionally, rather than trying `pselect6_time64` and
144 // falling back on `Errno::NOSYS`, because seccomp configurations will
145 // sometimes abort the process on syscalls they don't recognize.
146 #[cfg(not(feature = "linux_5_1"))]
147 {
148 use linux_raw_sys::general::__kernel_old_timespec;
149
150 // If we don't have a timeout, or if we can convert the timeout to
151 // a `__kernel_old_timespec`, the use `__NR_pselect6`.
152 fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> {
153 Some(__kernel_old_timespec {
154 tv_sec: timeout.tv_sec.try_into().ok()?,
155 tv_nsec: timeout.tv_nsec.try_into().ok()?,
156 })
157 }
158 let old_timeout = if let Some(timeout) = timeout {
159 match convert(timeout) {
160 // Could not convert timeout.
161 None => None,
162 // Could convert timeout. Ok!
163 Some(old_timeout) => Some(Some(old_timeout)),
164 }
165 } else {
166 // No timeout. Ok!
167 Some(None)
168 };
169 if let Some(mut old_timeout) = old_timeout {
170 // Call `pselect6`.
171 //
172 // Linux's `pselect6` mutates the timeout argument. Our public
173 // interface does not do this, because it's not portable to other
174 // platforms, so we create a temporary value to hide this behavior.
175 return ret_c_int(syscall!(
176 __NR_pselect6,
177 c_int(nfds),
178 readfds,
179 writefds,
180 exceptfds,
181 opt_mut(old_timeout.as_mut()),
182 zero()
183 ));
184 }
185 }
186
187 // We either have Linux 5.1 or the timeout didn't fit in
188 // `__kernel_old_timespec` so `__NR_pselect6_time64` will either
189 // succeed or fail due to our having no other options.
190
191 // Call `pselect6_time64`.
192 //
193 // Linux's `pselect6_time64` mutates the timeout argument. Our public
194 // interface does not do this, because it's not portable to other
195 // platforms, so we create a temporary value to hide this behavior.
196 ret_c_int(syscall!(
197 __NR_pselect6_time64,
198 c_int(nfds),
199 readfds,
200 writefds,
201 exceptfds,
202 opt_mut(timeout.copied().as_mut()),
203 zero()
204 ))
205 }
206
207 #[cfg(target_pointer_width = "64")]
208 {
209 // Call `pselect6`.
210 //
211 // Linux's `pselect6` mutates the timeout argument. Our public interface
212 // does not do this, because it's not portable to other platforms, so we
213 // create a temporary value to hide this behavior.
214 ret_c_int(syscall!(
215 __NR_pselect6,
216 c_int(nfds),
217 readfds,
218 writefds,
219 exceptfds,
220 opt_mut(timeout.copied().as_mut()),
221 zero()
222 ))
223 }
224}
225
226#[inline]
227pub(crate) fn epoll_create(flags: epoll::CreateFlags) -> io::Result<OwnedFd> {
228 // SAFETY: `__NR_epoll_create1` doesn't access any user memory.
229 unsafe { ret_owned_fd(syscall_readonly!(__NR_epoll_create1, flags)) }
230}
231
232#[inline]
233pub(crate) fn epoll_add(
234 epfd: BorrowedFd<'_>,
235 fd: BorrowedFd<'_>,
236 event: &epoll::Event,
237) -> io::Result<()> {
238 // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_ADD` doesn't modify any user
239 // memory, and it only reads from `event`.
240 unsafe {
241 ret(syscall_readonly!(
242 __NR_epoll_ctl,
243 epfd,
244 c_uint(EPOLL_CTL_ADD),
245 fd,
246 by_ref(event)
247 ))
248 }
249}
250
251#[inline]
252pub(crate) fn epoll_mod(
253 epfd: BorrowedFd<'_>,
254 fd: BorrowedFd<'_>,
255 event: &epoll::Event,
256) -> io::Result<()> {
257 // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_MOD` doesn't modify any user
258 // memory, and it only reads from `event`.
259 unsafe {
260 ret(syscall_readonly!(
261 __NR_epoll_ctl,
262 epfd,
263 c_uint(EPOLL_CTL_MOD),
264 fd,
265 by_ref(event)
266 ))
267 }
268}
269
270#[inline]
271pub(crate) fn epoll_del(epfd: BorrowedFd<'_>, fd: BorrowedFd<'_>) -> io::Result<()> {
272 // SAFETY: `__NR_epoll_ctl` with `EPOLL_CTL_DEL` doesn't access any user
273 // memory.
274 unsafe {
275 ret(syscall_readonly!(
276 __NR_epoll_ctl,
277 epfd,
278 c_uint(EPOLL_CTL_DEL),
279 fd,
280 zero()
281 ))
282 }
283}
284
285#[inline]
286pub(crate) unsafe fn epoll_wait(
287 epfd: BorrowedFd<'_>,
288 events: (*mut crate::event::epoll::Event, usize),
289 timeout: Option<&Timespec>,
290) -> io::Result<usize> {
291 // If we don't have Linux 5.1, and the timeout fits in an `i32`, use plain
292 // `epoll_pwait`.
293 //
294 // We do this unconditionally, rather than trying `epoll_pwait2` and
295 // falling back on `Errno::NOSYS`, because seccomp configurations will
296 // sometimes abort the process on syscalls they don't recognize.
297 #[cfg(not(feature = "linux_5_11"))]
298 {
299 // If we don't have a timeout, or if we can convert the timeout to an
300 // `i32`, the use `__NR_epoll_pwait`.
301 let old_timeout = if let Some(timeout) = timeout {
302 // Try to convert the timeout; if this is `Some`, we're ok!
303 timeout.as_c_int_millis()
304 } else {
305 // No timeout. Ok!
306 Some(-1)
307 };
308 if let Some(old_timeout) = old_timeout {
309 // Call `epoll_pwait`.
310 return ret_usize(syscall!(
311 __NR_epoll_pwait,
312 epfd,
313 events.0,
314 pass_usize(events.1),
315 c_int(old_timeout),
316 zero()
317 ));
318 }
319 }
320
321 // Call `epoll_pwait2`.
322 //
323 // We either have Linux 5.1 or the timeout didn't fit in an `i32`, so
324 // `__NR_epoll_pwait2` will either succeed or fail due to our having no
325 // other options.
326 ret_usize(syscall!(
327 __NR_epoll_pwait2,
328 epfd,
329 events.0,
330 pass_usize(events.1),
331 opt_ref(timeout),
332 zero()
333 ))
334}
335
336#[inline]
337pub(crate) fn eventfd(initval: u32, flags: EventfdFlags) -> io::Result<OwnedFd> {
338 unsafe { ret_owned_fd(syscall_readonly!(__NR_eventfd2, c_uint(initval), flags)) }
339}
340
341#[inline]
342pub(crate) fn pause() {
343 unsafe {
344 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
345 let error = ret_error(syscall_readonly!(
346 __NR_ppoll,
347 zero(),
348 zero(),
349 zero(),
350 zero()
351 ));
352
353 #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
354 let error = ret_error(syscall_readonly!(__NR_pause));
355
356 debug_assert_eq!(error, io::Errno::INTR);
357 }
358}