rustix/backend/linux_raw/
vdso_wrappers.rs

1//! Implement syscalls using the vDSO.
2//!
3//! <https://man7.org/linux/man-pages/man7/vdso.7.html>
4//!
5//! # Safety
6//!
7//! Similar to syscalls.rs, this file performs raw system calls, and sometimes
8//! passes them uninitialized memory buffers. This file also calls vDSO
9//! functions.
10#![allow(unsafe_code)]
11#![allow(clippy::missing_transmute_annotations)]
12
13#[cfg(target_arch = "x86")]
14use super::reg::{ArgReg, RetReg, SyscallNumber, A0, A1, A2, A3, A4, A5, R0};
15use super::vdso;
16#[cfg(target_arch = "x86")]
17use core::arch::global_asm;
18#[cfg(feature = "thread")]
19#[cfg(any(
20    target_arch = "x86_64",
21    target_arch = "x86",
22    target_arch = "riscv64",
23    target_arch = "powerpc",
24    target_arch = "powerpc64",
25    target_arch = "s390x",
26))]
27use core::ffi::c_void;
28use core::mem::transmute;
29use core::ptr::null_mut;
30use core::sync::atomic::AtomicPtr;
31use core::sync::atomic::Ordering::Relaxed;
32#[cfg(target_pointer_width = "32")]
33#[cfg(feature = "time")]
34use linux_raw_sys::general::timespec as __kernel_old_timespec;
35#[cfg(any(
36    all(
37        feature = "thread",
38        any(
39            target_arch = "x86_64",
40            target_arch = "x86",
41            target_arch = "riscv64",
42            target_arch = "powerpc",
43            target_arch = "powerpc64",
44            target_arch = "s390x"
45        )
46    ),
47    feature = "time"
48))]
49use {super::c, super::conv::ret, core::mem::MaybeUninit};
50#[cfg(feature = "time")]
51use {
52    super::conv::c_int,
53    crate::clockid::{ClockId, DynamicClockId},
54    crate::io,
55    crate::timespec::Timespec,
56    linux_raw_sys::general::__kernel_clockid_t,
57};
58
59#[cfg(feature = "time")]
60#[inline]
61#[must_use]
62pub(crate) fn clock_gettime(id: ClockId) -> Timespec {
63    // SAFETY: `CLOCK_GETTIME` contains either null or the address of a
64    // function with an ABI like libc `clock_gettime`, and calling it has the
65    // side effect of writing to the result buffer, and no others.
66    unsafe {
67        let mut result = MaybeUninit::<Timespec>::uninit();
68        let callee = match transmute(CLOCK_GETTIME.load(Relaxed)) {
69            Some(callee) => callee,
70            None => init_clock_gettime(),
71        };
72        let r0 = callee(id as c::c_int, result.as_mut_ptr());
73        // The `ClockId` enum only contains clocks which never fail. It may be
74        // tempting to change this to `debug_assert_eq`, however they can still
75        // fail on uncommon kernel configs, so we leave this in place to ensure
76        // that we don't execute undefined behavior if they ever do fail.
77        assert_eq!(r0, 0);
78        result.assume_init()
79    }
80}
81
82#[cfg(feature = "time")]
83#[inline]
84pub(crate) fn clock_gettime_dynamic(id: DynamicClockId<'_>) -> io::Result<Timespec> {
85    let id = match id {
86        DynamicClockId::Known(id) => id as __kernel_clockid_t,
87
88        DynamicClockId::Dynamic(fd) => {
89            // See `FD_TO_CLOCKID` in Linux's `clock_gettime` documentation.
90            use crate::backend::fd::AsRawFd as _;
91            const CLOCKFD: i32 = 3;
92            ((!fd.as_raw_fd() << 3) | CLOCKFD) as __kernel_clockid_t
93        }
94
95        DynamicClockId::RealtimeAlarm => c::CLOCK_REALTIME_ALARM as __kernel_clockid_t,
96        DynamicClockId::Tai => c::CLOCK_TAI as __kernel_clockid_t,
97        DynamicClockId::Boottime => c::CLOCK_BOOTTIME as __kernel_clockid_t,
98        DynamicClockId::BoottimeAlarm => c::CLOCK_BOOTTIME_ALARM as __kernel_clockid_t,
99    };
100
101    // SAFETY: `CLOCK_GETTIME` contains either null or the address of a
102    // function with an ABI like libc `clock_gettime`, and calling it has the
103    // side effect of writing to the result buffer, and no others.
104    unsafe {
105        const EINVAL: c::c_int = -(c::EINVAL as c::c_int);
106        let mut timespec = MaybeUninit::<Timespec>::uninit();
107        let callee = match transmute(CLOCK_GETTIME.load(Relaxed)) {
108            Some(callee) => callee,
109            None => init_clock_gettime(),
110        };
111        match callee(id, timespec.as_mut_ptr()) {
112            0 => (),
113            EINVAL => return Err(io::Errno::INVAL),
114            _ => _clock_gettime_via_syscall(id, timespec.as_mut_ptr())?,
115        }
116        Ok(timespec.assume_init())
117    }
118}
119
120#[cfg(feature = "thread")]
121#[cfg(any(
122    target_arch = "x86_64",
123    target_arch = "x86",
124    target_arch = "riscv64",
125    target_arch = "powerpc",
126    target_arch = "powerpc64",
127    target_arch = "s390x",
128))]
129#[inline]
130pub(crate) fn sched_getcpu() -> usize {
131    // SAFETY: `GETCPU` contains either null or the address of a function with
132    // an ABI like libc `getcpu`, and calling it has the side effect of writing
133    // to the result buffers, and no others.
134    unsafe {
135        let mut cpu = MaybeUninit::<u32>::uninit();
136        let callee = match transmute(GETCPU.load(Relaxed)) {
137            Some(callee) => callee,
138            None => init_getcpu(),
139        };
140        let r0 = callee(cpu.as_mut_ptr(), null_mut(), null_mut());
141        debug_assert_eq!(r0, 0);
142        cpu.assume_init() as usize
143    }
144}
145
146#[cfg(target_arch = "x86")]
147pub(super) mod x86_via_vdso {
148    use super::{transmute, ArgReg, Relaxed, RetReg, SyscallNumber, A0, A1, A2, A3, A4, A5, R0};
149    use crate::backend::arch::asm;
150
151    #[inline]
152    pub(in crate::backend) unsafe fn syscall0(nr: SyscallNumber<'_>) -> RetReg<R0> {
153        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
154            Some(callee) => callee,
155            None => super::init_syscall(),
156        };
157        asm::indirect_syscall0(callee, nr)
158    }
159
160    #[inline]
161    pub(in crate::backend) unsafe fn syscall1<'a>(
162        nr: SyscallNumber<'a>,
163        a0: ArgReg<'a, A0>,
164    ) -> RetReg<R0> {
165        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
166            Some(callee) => callee,
167            None => super::init_syscall(),
168        };
169        asm::indirect_syscall1(callee, nr, a0)
170    }
171
172    #[inline]
173    pub(in crate::backend) unsafe fn syscall1_noreturn<'a>(
174        nr: SyscallNumber<'a>,
175        a0: ArgReg<'a, A0>,
176    ) -> ! {
177        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
178            Some(callee) => callee,
179            None => super::init_syscall(),
180        };
181        asm::indirect_syscall1_noreturn(callee, nr, a0)
182    }
183
184    #[inline]
185    pub(in crate::backend) unsafe fn syscall2<'a>(
186        nr: SyscallNumber<'a>,
187        a0: ArgReg<'a, A0>,
188        a1: ArgReg<'a, A1>,
189    ) -> RetReg<R0> {
190        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
191            Some(callee) => callee,
192            None => super::init_syscall(),
193        };
194        asm::indirect_syscall2(callee, nr, a0, a1)
195    }
196
197    #[inline]
198    pub(in crate::backend) unsafe fn syscall3<'a>(
199        nr: SyscallNumber<'a>,
200        a0: ArgReg<'a, A0>,
201        a1: ArgReg<'a, A1>,
202        a2: ArgReg<'a, A2>,
203    ) -> RetReg<R0> {
204        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
205            Some(callee) => callee,
206            None => super::init_syscall(),
207        };
208        asm::indirect_syscall3(callee, nr, a0, a1, a2)
209    }
210
211    #[inline]
212    pub(in crate::backend) unsafe fn syscall4<'a>(
213        nr: SyscallNumber<'a>,
214        a0: ArgReg<'a, A0>,
215        a1: ArgReg<'a, A1>,
216        a2: ArgReg<'a, A2>,
217        a3: ArgReg<'a, A3>,
218    ) -> RetReg<R0> {
219        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
220            Some(callee) => callee,
221            None => super::init_syscall(),
222        };
223        asm::indirect_syscall4(callee, nr, a0, a1, a2, a3)
224    }
225
226    #[inline]
227    pub(in crate::backend) unsafe fn syscall5<'a>(
228        nr: SyscallNumber<'a>,
229        a0: ArgReg<'a, A0>,
230        a1: ArgReg<'a, A1>,
231        a2: ArgReg<'a, A2>,
232        a3: ArgReg<'a, A3>,
233        a4: ArgReg<'a, A4>,
234    ) -> RetReg<R0> {
235        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
236            Some(callee) => callee,
237            None => super::init_syscall(),
238        };
239        asm::indirect_syscall5(callee, nr, a0, a1, a2, a3, a4)
240    }
241
242    #[inline]
243    pub(in crate::backend) unsafe fn syscall6<'a>(
244        nr: SyscallNumber<'a>,
245        a0: ArgReg<'a, A0>,
246        a1: ArgReg<'a, A1>,
247        a2: ArgReg<'a, A2>,
248        a3: ArgReg<'a, A3>,
249        a4: ArgReg<'a, A4>,
250        a5: ArgReg<'a, A5>,
251    ) -> RetReg<R0> {
252        let callee = match transmute(super::SYSCALL.load(Relaxed)) {
253            Some(callee) => callee,
254            None => super::init_syscall(),
255        };
256        asm::indirect_syscall6(callee, nr, a0, a1, a2, a3, a4, a5)
257    }
258
259    // With the indirect call, it isn't meaningful to do a separate
260    // `_readonly` optimization.
261    #[allow(unused_imports)]
262    pub(in crate::backend) use {
263        syscall0 as syscall0_readonly, syscall1 as syscall1_readonly,
264        syscall2 as syscall2_readonly, syscall3 as syscall3_readonly,
265        syscall4 as syscall4_readonly, syscall5 as syscall5_readonly,
266        syscall6 as syscall6_readonly,
267    };
268}
269
270#[cfg(feature = "time")]
271type ClockGettimeType = unsafe extern "C" fn(c::c_int, *mut Timespec) -> c::c_int;
272
273#[cfg(feature = "thread")]
274#[cfg(any(
275    target_arch = "x86_64",
276    target_arch = "x86",
277    target_arch = "riscv64",
278    target_arch = "powerpc",
279    target_arch = "powerpc64",
280    target_arch = "s390x",
281))]
282type GetcpuType = unsafe extern "C" fn(*mut u32, *mut u32, *mut c_void) -> c::c_int;
283
284/// The underlying syscall functions are only called from asm, using the
285/// special syscall calling convention to pass arguments and return values,
286/// which the signature here doesn't reflect.
287#[cfg(target_arch = "x86")]
288pub(super) type SyscallType = unsafe extern "C" fn();
289
290/// Initialize `CLOCK_GETTIME` and return its value.
291#[cfg(feature = "time")]
292#[cold]
293fn init_clock_gettime() -> ClockGettimeType {
294    init();
295    // SAFETY: Load the function address from static storage that we just
296    // initialized.
297    unsafe { transmute(CLOCK_GETTIME.load(Relaxed)) }
298}
299
300/// Initialize `GETCPU` and return its value.
301#[cfg(feature = "thread")]
302#[cfg(any(
303    target_arch = "x86_64",
304    target_arch = "x86",
305    target_arch = "riscv64",
306    target_arch = "powerpc",
307    target_arch = "powerpc64",
308    target_arch = "s390x",
309))]
310#[cold]
311fn init_getcpu() -> GetcpuType {
312    init();
313    // SAFETY: Load the function address from static storage that we just
314    // initialized.
315    unsafe { transmute(GETCPU.load(Relaxed)) }
316}
317
318/// Initialize `SYSCALL` and return its value.
319#[cfg(target_arch = "x86")]
320#[cold]
321fn init_syscall() -> SyscallType {
322    init();
323    // SAFETY: Load the function address from static storage that we just
324    // initialized.
325    unsafe { transmute(SYSCALL.load(Relaxed)) }
326}
327
328/// `AtomicPtr` can't hold a `fn` pointer, so we use a `*` pointer to this
329/// placeholder type, and cast it as needed.
330struct Function;
331#[cfg(feature = "time")]
332static CLOCK_GETTIME: AtomicPtr<Function> = AtomicPtr::new(null_mut());
333#[cfg(feature = "thread")]
334#[cfg(any(
335    target_arch = "x86_64",
336    target_arch = "x86",
337    target_arch = "riscv64",
338    target_arch = "powerpc",
339    target_arch = "powerpc64",
340    target_arch = "s390x",
341))]
342static GETCPU: AtomicPtr<Function> = AtomicPtr::new(null_mut());
343#[cfg(target_arch = "x86")]
344static SYSCALL: AtomicPtr<Function> = AtomicPtr::new(null_mut());
345
346#[cfg(feature = "time")]
347#[must_use]
348unsafe extern "C" fn clock_gettime_via_syscall(clockid: c::c_int, res: *mut Timespec) -> c::c_int {
349    match _clock_gettime_via_syscall(clockid, res) {
350        Ok(()) => 0,
351        Err(err) => err.raw_os_error().wrapping_neg(),
352    }
353}
354
355#[cfg(feature = "time")]
356#[cfg(target_pointer_width = "32")]
357unsafe fn _clock_gettime_via_syscall(clockid: c::c_int, res: *mut Timespec) -> io::Result<()> {
358    let r0 = syscall!(__NR_clock_gettime64, c_int(clockid), res);
359    match ret(r0) {
360        Err(io::Errno::NOSYS) => _clock_gettime_via_syscall_old(clockid, res),
361        otherwise => otherwise,
362    }
363}
364
365#[cfg(feature = "time")]
366#[cfg(target_pointer_width = "32")]
367unsafe fn _clock_gettime_via_syscall_old(clockid: c::c_int, res: *mut Timespec) -> io::Result<()> {
368    // Ordinarily `rustix` doesn't like to emulate system calls, but in the
369    // case of time APIs, it's specific to Linux, specific to 32-bit
370    // architectures *and* specific to old kernel versions, and it's not that
371    // hard to fix up here, so that no other code needs to worry about this.
372    let mut old_result = MaybeUninit::<__kernel_old_timespec>::uninit();
373    let r0 = syscall!(__NR_clock_gettime, c_int(clockid), &mut old_result);
374    match ret(r0) {
375        Ok(()) => {
376            let old_result = old_result.assume_init();
377            *res = Timespec {
378                tv_sec: old_result.tv_sec.into(),
379                tv_nsec: old_result.tv_nsec.into(),
380            };
381            Ok(())
382        }
383        otherwise => otherwise,
384    }
385}
386
387#[cfg(feature = "time")]
388#[cfg(target_pointer_width = "64")]
389unsafe fn _clock_gettime_via_syscall(clockid: c::c_int, res: *mut Timespec) -> io::Result<()> {
390    ret(syscall!(__NR_clock_gettime, c_int(clockid), res))
391}
392
393#[cfg(feature = "thread")]
394#[cfg(any(
395    target_arch = "x86_64",
396    target_arch = "x86",
397    target_arch = "riscv64",
398    target_arch = "powerpc",
399    target_arch = "powerpc64",
400    target_arch = "s390x",
401))]
402unsafe extern "C" fn getcpu_via_syscall(
403    cpu: *mut u32,
404    node: *mut u32,
405    unused: *mut c_void,
406) -> c::c_int {
407    match ret(syscall!(__NR_getcpu, cpu, node, unused)) {
408        Ok(()) => 0,
409        Err(err) => err.raw_os_error().wrapping_neg(),
410    }
411}
412
413#[cfg(target_arch = "x86")]
414extern "C" {
415    /// A symbol pointing to an x86 `int 0x80` instruction. This “function”
416    /// is only called from assembly, and only with the x86 syscall calling
417    /// convention, so its signature here is not its true signature.
418    ///
419    /// This extern block and the `global_asm!` below can be replaced with
420    /// `#[naked]` if it's stabilized.
421    fn rustix_x86_int_0x80();
422}
423
424// This uses `.weak` so that it doesn't conflict if multiple versions of rustix
425// are linked in in non-lto builds, and `.ifndef` so that it doesn't conflict
426// if multiple versions of rustix are linked in in lto builds.
427#[cfg(target_arch = "x86")]
428global_asm!(
429    r#"
430    .ifndef     rustix_x86_int_0x80
431    .section    .text.rustix_x86_int_0x80,"ax",@progbits
432    .p2align    4
433    .weak       rustix_x86_int_0x80
434    .hidden     rustix_x86_int_0x80
435    .type       rustix_x86_int_0x80, @function
436rustix_x86_int_0x80:
437    .cfi_startproc
438    int    0x80
439    ret
440    .cfi_endproc
441    .size rustix_x86_int_0x80, .-rustix_x86_int_0x80
442    .endif
443"#
444);
445
446fn minimal_init() {
447    // Store default function addresses in static storage so that if we
448    // end up making any system calls while we read the vDSO, they'll work. If
449    // the memory happens to already be initialized, this is redundant, but not
450    // harmful.
451    #[cfg(feature = "time")]
452    {
453        CLOCK_GETTIME
454            .compare_exchange(
455                null_mut(),
456                clock_gettime_via_syscall as *mut Function,
457                Relaxed,
458                Relaxed,
459            )
460            .ok();
461    }
462
463    #[cfg(feature = "thread")]
464    #[cfg(any(
465        target_arch = "x86_64",
466        target_arch = "x86",
467        target_arch = "riscv64",
468        target_arch = "powerpc",
469        target_arch = "powerpc64",
470        target_arch = "s390x",
471    ))]
472    {
473        GETCPU
474            .compare_exchange(
475                null_mut(),
476                getcpu_via_syscall as *mut Function,
477                Relaxed,
478                Relaxed,
479            )
480            .ok();
481    }
482
483    #[cfg(target_arch = "x86")]
484    {
485        SYSCALL
486            .compare_exchange(
487                null_mut(),
488                rustix_x86_int_0x80 as *mut Function,
489                Relaxed,
490                Relaxed,
491            )
492            .ok();
493    }
494}
495
496fn init() {
497    minimal_init();
498
499    if let Some(vdso) = vdso::Vdso::new() {
500        #[cfg(feature = "time")]
501        {
502            // Look up the platform-specific `clock_gettime` symbol as
503            // documented [here], except on 32-bit platforms where we look up
504            // the `64`-suffixed variant and fail if we don't find it.
505            //
506            // [here]: https://man7.org/linux/man-pages/man7/vdso.7.html
507            #[cfg(target_arch = "x86_64")]
508            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
509            #[cfg(target_arch = "arm")]
510            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
511            #[cfg(target_arch = "aarch64")]
512            let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
513            #[cfg(target_arch = "x86")]
514            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
515            #[cfg(target_arch = "riscv64")]
516            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
517            #[cfg(target_arch = "powerpc")]
518            let ptr = vdso.sym(cstr!("LINUX_5.11"), cstr!("__kernel_clock_gettime64"));
519            #[cfg(target_arch = "powerpc64")]
520            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
521            #[cfg(target_arch = "s390x")]
522            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
523            #[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
524            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
525            #[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
526            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
527
528            // On all 64-bit platforms, the 64-bit `clock_gettime` symbols are
529            // always available.
530            #[cfg(target_pointer_width = "64")]
531            let ok = true;
532
533            // On some 32-bit platforms, the 64-bit `clock_gettime` symbols are
534            // not available on older kernel versions.
535            #[cfg(any(
536                target_arch = "arm",
537                target_arch = "mips",
538                target_arch = "mips32r6",
539                target_arch = "powerpc",
540                target_arch = "x86"
541            ))]
542            let ok = !ptr.is_null();
543
544            if ok {
545                assert!(!ptr.is_null());
546
547                // Store the computed function addresses in static storage so
548                // that we don't need to compute them again (but if we do, it
549                // doesn't hurt anything).
550                CLOCK_GETTIME.store(ptr.cast(), Relaxed);
551            }
552        }
553
554        #[cfg(feature = "thread")]
555        #[cfg(any(
556            target_arch = "x86_64",
557            target_arch = "x86",
558            target_arch = "riscv64",
559            target_arch = "powerpc",
560            target_arch = "powerpc64",
561            target_arch = "s390x",
562        ))]
563        {
564            // Look up the platform-specific `getcpu` symbol as documented
565            // [here].
566            //
567            // [here]: https://man7.org/linux/man-pages/man7/vdso.7.html
568            #[cfg(target_arch = "x86_64")]
569            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
570            #[cfg(target_arch = "x86")]
571            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
572            #[cfg(target_arch = "riscv64")]
573            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_getcpu"));
574            #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
575            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_getcpu"));
576            #[cfg(target_arch = "s390x")]
577            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_getcpu"));
578
579            #[cfg(any(
580                target_arch = "x86_64",
581                target_arch = "riscv64",
582                target_arch = "powerpc",
583                target_arch = "powerpc64",
584                target_arch = "s390x"
585            ))]
586            let ok = true;
587
588            // On 32-bit x86, the symbol doesn't appear present sometimes.
589            #[cfg(target_arch = "x86")]
590            let ok = !ptr.is_null();
591
592            #[cfg(any(
593                target_arch = "aarch64",
594                target_arch = "arm",
595                target_arch = "mips",
596                target_arch = "mips32r6",
597                target_arch = "mips64",
598                target_arch = "mips64r6",
599            ))]
600            let ok = false;
601
602            if ok {
603                assert!(!ptr.is_null());
604
605                // Store the computed function addresses in static storage so
606                // that we don't need to compute them again (but if we do, it
607                // doesn't hurt anything).
608                GETCPU.store(ptr.cast(), Relaxed);
609            }
610        }
611
612        // On x86, also look up the vsyscall entry point.
613        #[cfg(target_arch = "x86")]
614        {
615            let ptr = vdso.sym(cstr!("LINUX_2.5"), cstr!("__kernel_vsyscall"));
616            assert!(!ptr.is_null());
617
618            // As above, store the computed function addresses in
619            // static storage.
620            SYSCALL.store(ptr.cast(), Relaxed);
621        }
622    }
623}