rustix/backend/linux_raw/
vdso.rs

1//! Parse the Linux vDSO.
2//!
3//! The following code is transliterated from
4//! tools/testing/selftests/vDSO/parse_vdso.c in Linux 6.13, which is licensed
5//! with Creative Commons Zero License, version 1.0,
6//! available at <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
7//!
8//! It also incorporates the patch at:
9//! <https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/tools/testing/selftests/vDSO?h=next&id=01587d80b04f29747b6fd6d766c3bfa632f14eb0>,
10//! with changes to fix the pointer arithmetic on s390x.
11//!
12//! # Safety
13//!
14//! Parsing the vDSO involves a lot of raw pointer manipulation. This
15//! implementation follows Linux's reference implementation, and adds several
16//! additional safety checks.
17#![allow(unsafe_code)]
18
19use super::c;
20use crate::ffi::CStr;
21use crate::utils::check_raw_pointer;
22use core::ffi::c_void;
23use core::mem::size_of;
24use core::ptr::{null, null_mut};
25use linux_raw_sys::elf::*;
26
27#[cfg(target_arch = "s390x")]
28type ElfHashEntry = u64;
29#[cfg(not(target_arch = "s390x"))]
30type ElfHashEntry = u32;
31
32pub(super) struct Vdso {
33    // Load information
34    load_addr: *const Elf_Ehdr,
35    load_end: *const c_void, // the end of the `PT_LOAD` segment
36    pv_offset: usize,        // recorded paddr - recorded vaddr
37
38    // Symbol table
39    symtab: *const Elf_Sym,
40    symstrings: *const u8,
41    gnu_hash: *const u32,
42    bucket: *const ElfHashEntry,
43    chain: *const ElfHashEntry,
44    nbucket: ElfHashEntry,
45    //nchain: ElfHashEntry,
46
47    // Version table
48    versym: *const u16,
49    verdef: *const Elf_Verdef,
50}
51
52/// Straight from the ELF specification…and then tweaked slightly, in order to
53/// avoid a few clang warnings.
54/// (And then translated to Rust).
55fn elf_hash(name: &CStr) -> u32 {
56    let mut h: u32 = 0;
57    for b in name.to_bytes() {
58        h = (h << 4).wrapping_add(u32::from(*b));
59        let g = h & 0xf000_0000;
60        if g != 0 {
61            h ^= g >> 24;
62        }
63        h &= !g;
64    }
65    h
66}
67
68fn gnu_hash(name: &CStr) -> u32 {
69    let mut h: u32 = 5381;
70    for s in name.to_bytes() {
71        h = h
72            .wrapping_add(h.wrapping_mul(32))
73            .wrapping_add(u32::from(*s));
74    }
75    h
76}
77
78/// Create a `Vdso` value by parsing the vDSO at the `sysinfo_ehdr` address.
79fn init_from_sysinfo_ehdr() -> Option<Vdso> {
80    // SAFETY: The auxv initialization code does extensive checks to ensure
81    // that the value we get really is an `AT_SYSINFO_EHDR` value from the
82    // kernel.
83    unsafe {
84        let hdr = super::param::auxv::sysinfo_ehdr();
85
86        // If the platform doesn't provide a `AT_SYSINFO_EHDR`, we can't locate
87        // the vDSO.
88        if hdr.is_null() {
89            return None;
90        }
91
92        let mut vdso = Vdso {
93            load_addr: hdr,
94            load_end: hdr.cast(),
95            pv_offset: 0,
96            symtab: null(),
97            symstrings: null(),
98            gnu_hash: null(),
99            bucket: null(),
100            chain: null(),
101            nbucket: 0,
102            //nchain: 0,
103            versym: null(),
104            verdef: null(),
105        };
106
107        let hdr = &*hdr;
108        let pt = check_raw_pointer::<Elf_Phdr>(vdso.base_plus(hdr.e_phoff)? as *mut _)?.as_ptr();
109        let mut dyn_: *const Elf_Dyn = null();
110        let mut num_dyn = 0;
111
112        // We need two things from the segment table: the load offset
113        // and the dynamic table.
114        let mut found_vaddr = false;
115        for i in 0..hdr.e_phnum {
116            let phdr = &*pt.add(i as usize);
117            if phdr.p_type == PT_LOAD && !found_vaddr {
118                // The segment should be readable and executable, because it
119                // contains the symbol table and the function bodies.
120                if phdr.p_flags & (PF_R | PF_X) != (PF_R | PF_X) {
121                    return None;
122                }
123                found_vaddr = true;
124                vdso.load_end = vdso.base_plus(phdr.p_offset.checked_add(phdr.p_memsz)?)?;
125                vdso.pv_offset = phdr.p_offset.wrapping_sub(phdr.p_vaddr);
126            } else if phdr.p_type == PT_DYNAMIC {
127                // If `p_offset` is zero, it's more likely that we're looking
128                // at memory that has been zeroed than that the kernel has
129                // somehow aliased the `Ehdr` and the `Elf_Dyn` array.
130                if phdr.p_offset < size_of::<Elf_Ehdr>() {
131                    return None;
132                }
133
134                dyn_ = check_raw_pointer::<Elf_Dyn>(vdso.base_plus(phdr.p_offset)? as *mut _)?
135                    .as_ptr();
136                num_dyn = phdr.p_memsz / size_of::<Elf_Dyn>();
137            } else if phdr.p_type == PT_INTERP || phdr.p_type == PT_GNU_RELRO {
138                // Don't trust any ELF image that has an “interpreter” or
139                // that uses RELRO, which is likely to be a user ELF image
140                // rather and not the kernel vDSO.
141                return None;
142            }
143        }
144
145        if !found_vaddr || dyn_.is_null() {
146            return None; // Failed
147        }
148
149        // Fish out the useful bits of the dynamic table.
150        let mut hash: *const ElfHashEntry = null();
151        vdso.symstrings = null();
152        vdso.symtab = null();
153        vdso.versym = null();
154        vdso.verdef = null();
155        let mut i = 0;
156        loop {
157            if i == num_dyn {
158                return None;
159            }
160            let d = &*dyn_.add(i);
161            match d.d_tag {
162                DT_STRTAB => {
163                    vdso.symstrings =
164                        check_raw_pointer::<u8>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
165                            .as_ptr();
166                }
167                DT_SYMTAB => {
168                    vdso.symtab =
169                        check_raw_pointer::<Elf_Sym>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
170                            .as_ptr();
171                }
172                DT_HASH => {
173                    hash = check_raw_pointer::<ElfHashEntry>(
174                        vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _
175                    )?
176                    .as_ptr();
177                }
178                DT_GNU_HASH => {
179                    vdso.gnu_hash =
180                        check_raw_pointer::<u32>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
181                            .as_ptr()
182                }
183                DT_VERSYM => {
184                    vdso.versym =
185                        check_raw_pointer::<u16>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
186                            .as_ptr();
187                }
188                DT_VERDEF => {
189                    vdso.verdef = check_raw_pointer::<Elf_Verdef>(
190                        vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _,
191                    )?
192                    .as_ptr();
193                }
194                DT_SYMENT => {
195                    if d.d_un.d_ptr != size_of::<Elf_Sym>() {
196                        return None; // Failed
197                    }
198                }
199                DT_NULL => break,
200                _ => {}
201            }
202            i = i.checked_add(1)?;
203        }
204        // `check_raw_pointer` will have checked these pointers for null,
205        // however they could still be null if the expected dynamic table
206        // entries are absent.
207        if vdso.symstrings.is_null()
208            || vdso.symtab.is_null()
209            || (hash.is_null() && vdso.gnu_hash.is_null())
210        {
211            return None; // Failed
212        }
213
214        if vdso.verdef.is_null() {
215            vdso.versym = null();
216        }
217
218        // Parse the hash table header.
219        if !vdso.gnu_hash.is_null() {
220            vdso.nbucket = ElfHashEntry::from(*vdso.gnu_hash);
221            // The bucket array is located after the header (4 uint32) and the
222            // bloom filter (size_t array of gnu_hash[2] elements).
223            vdso.bucket = vdso
224                .gnu_hash
225                .add(4)
226                .add(size_of::<c::size_t>() / 4 * *vdso.gnu_hash.add(2) as usize)
227                .cast();
228        } else {
229            vdso.nbucket = *hash.add(0);
230            //vdso.nchain = *hash.add(1);
231            vdso.bucket = hash.add(2);
232            vdso.chain = hash.add(vdso.nbucket as usize + 2);
233        }
234
235        // That's all we need.
236        Some(vdso)
237    }
238}
239
240impl Vdso {
241    /// Parse the vDSO.
242    ///
243    /// Returns `None` if the vDSO can't be located or if it doesn't conform to
244    /// our expectations.
245    #[inline]
246    pub(super) fn new() -> Option<Self> {
247        init_from_sysinfo_ehdr()
248    }
249
250    /// Check the version for a symbol.
251    ///
252    /// # Safety
253    ///
254    /// The raw pointers inside `self` must be valid.
255    unsafe fn match_version(&self, mut ver: u16, name: &CStr, hash: u32) -> bool {
256        // This is a helper function to check if the version indexed by
257        // ver matches name (which hashes to hash).
258        //
259        // The version definition table is a mess, and I don't know how
260        // to do this in better than linear time without allocating memory
261        // to build an index. I also don't know why the table has
262        // variable size entries in the first place.
263        //
264        // For added fun, I can't find a comprehensible specification of how
265        // to parse all the weird flags in the table.
266        //
267        // So I just parse the whole table every time.
268
269        // First step: find the version definition
270        ver &= 0x7fff; // Apparently bit 15 means "hidden"
271        let mut def = self.verdef;
272        loop {
273            if (*def).vd_version != VER_DEF_CURRENT {
274                return false; // Failed
275            }
276
277            if ((*def).vd_flags & VER_FLG_BASE) == 0 && ((*def).vd_ndx & 0x7fff) == ver {
278                break;
279            }
280
281            if (*def).vd_next == 0 {
282                return false; // No definition.
283            }
284
285            def = def
286                .cast::<u8>()
287                .add((*def).vd_next as usize)
288                .cast::<Elf_Verdef>();
289        }
290
291        // Now figure out whether it matches.
292        let aux = &*(def.cast::<u8>())
293            .add((*def).vd_aux as usize)
294            .cast::<Elf_Verdaux>();
295        (*def).vd_hash == hash
296            && (name == CStr::from_ptr(self.symstrings.add(aux.vda_name as usize).cast()))
297    }
298
299    /// Check to see if the symbol is the one we're looking for.
300    ///
301    /// # Safety
302    ///
303    /// The raw pointers inside `self` must be valid.
304    unsafe fn check_sym(
305        &self,
306        sym: &Elf_Sym,
307        i: ElfHashEntry,
308        name: &CStr,
309        version: &CStr,
310        ver_hash: u32,
311    ) -> bool {
312        // Check for a defined global or weak function w/ right name.
313        //
314        // Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol
315        // type, for compatibility with some versions of Linux on
316        // PowerPC64. See [this commit] in Linux for more background.
317        //
318        // [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a
319        if ELF_ST_TYPE(sym.st_info) != STT_FUNC && ELF_ST_TYPE(sym.st_info) != STT_NOTYPE {
320            return false;
321        }
322        if ELF_ST_BIND(sym.st_info) != STB_GLOBAL && ELF_ST_BIND(sym.st_info) != STB_WEAK {
323            return false;
324        }
325        if name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast()) {
326            return false;
327        }
328
329        // Check symbol version.
330        if !self.versym.is_null()
331            && !self.match_version(*self.versym.add(i as usize), version, ver_hash)
332        {
333            return false;
334        }
335
336        true
337    }
338
339    /// Look up a symbol in the vDSO.
340    pub(super) fn sym(&self, version: &CStr, name: &CStr) -> *mut c::c_void {
341        let ver_hash = elf_hash(version);
342
343        // SAFETY: The pointers in `self` must be valid.
344        unsafe {
345            if !self.gnu_hash.is_null() {
346                let mut h1: u32 = gnu_hash(name);
347
348                // Changes to fix the pointer arithmetic on s390x: cast
349                // `self.bucket` to `*const u32` here, because even though
350                // s390x's `ElfHashEntry` is 64-bit for `DT_HASH` tables,
351                // it uses 32-bit entries for `DT_GNU_HASH` tables.
352                let mut i = *self
353                    .bucket
354                    .cast::<u32>()
355                    .add((ElfHashEntry::from(h1) % self.nbucket) as usize);
356                if i == 0 {
357                    return null_mut();
358                }
359                h1 |= 1;
360                // Changes to fix the pointer arithmetic on s390x: As above,
361                // cast `self.bucket` to `*const u32`.
362                let mut hashval = self
363                    .bucket
364                    .cast::<u32>()
365                    .add(self.nbucket as usize)
366                    .add((i - *self.gnu_hash.add(1)) as usize);
367                loop {
368                    let sym: &Elf_Sym = &*self.symtab.add(i as usize);
369                    let h2 = *hashval;
370                    hashval = hashval.add(1);
371                    if h1 == (h2 | 1)
372                        && self.check_sym(sym, ElfHashEntry::from(i), name, version, ver_hash)
373                    {
374                        let sum = self.addr_from_elf(sym.st_value).unwrap();
375                        assert!(
376                            sum as usize >= self.load_addr as usize
377                                && sum as usize <= self.load_end as usize
378                        );
379                        return sum as *mut c::c_void;
380                    }
381                    if (h2 & 1) != 0 {
382                        break;
383                    }
384                    i += 1;
385                }
386            } else {
387                let mut i = *self
388                    .bucket
389                    .add((ElfHashEntry::from(elf_hash(name)) % self.nbucket) as usize);
390                while i != 0 {
391                    let sym: &Elf_Sym = &*self.symtab.add(i as usize);
392                    if sym.st_shndx != SHN_UNDEF && self.check_sym(sym, i, name, version, ver_hash)
393                    {
394                        let sum = self.addr_from_elf(sym.st_value).unwrap();
395                        assert!(
396                            sum as usize >= self.load_addr as usize
397                                && sum as usize <= self.load_end as usize
398                        );
399                        return sum as *mut c::c_void;
400                    }
401                    i = *self.chain.add(i as usize);
402                }
403            }
404        }
405
406        null_mut()
407    }
408
409    /// Add the given address to the vDSO base address.
410    unsafe fn base_plus(&self, offset: usize) -> Option<*const c_void> {
411        // Check for overflow.
412        let _ = (self.load_addr as usize).checked_add(offset)?;
413        // Add the offset to the base.
414        Some(self.load_addr.cast::<u8>().add(offset).cast())
415    }
416
417    /// Translate an ELF-address-space address into a usable virtual address.
418    unsafe fn addr_from_elf(&self, elf_addr: usize) -> Option<*const c_void> {
419        self.base_plus(elf_addr.wrapping_add(self.pv_offset))
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    // Disable on MIPS since QEMU on MIPS doesn't provide a vDSO.
428    #[cfg(linux_raw)]
429    #[test]
430    #[cfg_attr(any(target_arch = "mips", target_arch = "mips64"), ignore)]
431    #[allow(unused_variables)]
432    fn test_vdso() {
433        let vdso = Vdso::new().unwrap();
434        assert!(!vdso.symtab.is_null());
435        assert!(!vdso.symstrings.is_null());
436
437        {
438            #[cfg(target_arch = "x86_64")]
439            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
440            #[cfg(target_arch = "arm")]
441            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
442            #[cfg(target_arch = "aarch64")]
443            let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
444            #[cfg(target_arch = "x86")]
445            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
446            #[cfg(target_arch = "riscv64")]
447            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
448            #[cfg(target_arch = "powerpc")]
449            let _ptr = vdso.sym(cstr!("LINUX_5.11"), cstr!("__kernel_clock_gettime64"));
450            #[cfg(target_arch = "powerpc64")]
451            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
452            #[cfg(target_arch = "s390x")]
453            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
454            #[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
455            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
456            #[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
457            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
458
459            // On PowerPC, "__kernel_clock_gettime64" isn't available in
460            // Linux < 5.11.
461            // On x86, "__vdso_clock_gettime64" isn't available in
462            // Linux < 5.3.
463            #[cfg(not(any(target_arch = "powerpc", target_arch = "x86")))]
464            assert!(!ptr.is_null());
465        }
466
467        {
468            #[cfg(target_arch = "x86_64")]
469            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
470            #[cfg(target_arch = "arm")]
471            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
472            #[cfg(target_arch = "aarch64")]
473            let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_getres"));
474            #[cfg(target_arch = "x86")]
475            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
476            #[cfg(target_arch = "riscv64")]
477            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_getres"));
478            #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
479            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_getres"));
480            #[cfg(target_arch = "s390x")]
481            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_getres"));
482            #[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
483            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
484            #[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
485            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
486
487            // Some versions of Linux appear to lack "__vdso_clock_getres" on x86.
488            #[cfg(not(target_arch = "x86"))]
489            assert!(!ptr.is_null());
490        }
491
492        {
493            #[cfg(target_arch = "x86_64")]
494            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
495            #[cfg(target_arch = "arm")]
496            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
497            #[cfg(target_arch = "aarch64")]
498            let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_gettimeofday"));
499            #[cfg(target_arch = "x86")]
500            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
501            #[cfg(target_arch = "riscv64")]
502            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_gettimeofday"));
503            #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
504            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_gettimeofday"));
505            #[cfg(target_arch = "s390x")]
506            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_gettimeofday"));
507            #[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
508            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
509            #[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
510            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
511
512            // Some versions of Linux appear to lack "__vdso_gettimeofday" on x86.
513            #[cfg(not(target_arch = "x86"))]
514            assert!(!ptr.is_null());
515        }
516
517        #[cfg(any(
518            target_arch = "x86_64",
519            target_arch = "x86",
520            target_arch = "riscv64",
521            target_arch = "powerpc",
522            target_arch = "powerpc64",
523            target_arch = "s390x",
524        ))]
525        {
526            #[cfg(target_arch = "x86_64")]
527            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
528            #[cfg(target_arch = "x86")]
529            let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
530            #[cfg(target_arch = "riscv64")]
531            let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_getcpu"));
532            #[cfg(target_arch = "powerpc")]
533            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_getcpu"));
534            #[cfg(target_arch = "powerpc64")]
535            let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_getcpu"));
536            #[cfg(target_arch = "s390x")]
537            let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_getcpu"));
538
539            // On PowerPC, "__kernel_getcpu" isn't available in 32-bit kernels.
540            // Some versions of Linux appear to lack "__vdso_getcpu" on x86.
541            #[cfg(not(any(target_arch = "powerpc", target_arch = "x86")))]
542            assert!(!ptr.is_null());
543        }
544    }
545}