swash/internal/
parse.rs

1//! Parsing primitives.
2
3use core::ops::Range;
4
5/// Buffer wrapping a byte slice for safely reading big endian data.
6#[derive(Copy, Clone)]
7pub struct Bytes<'a>(pub &'a [u8]);
8
9impl<'a> Bytes<'a> {
10    /// Creates a new bytes instance for the specified buffer.
11    pub fn new(data: &'a [u8]) -> Self {
12        Self(data)
13    }
14
15    /// Creates a new bytes instance for the specified buffer and offset.
16    pub fn with_offset(data: &'a [u8], offset: usize) -> Option<Self> {
17        Some(Self(data.get(offset..)?))
18    }
19
20    /// Creates a new bytes instance with the specified range of data.
21    pub fn with_range(data: &'a [u8], range: Range<usize>) -> Option<Self> {
22        Some(Self(data.get(range)?))
23    }
24
25    /// Returns the underlying data.
26    pub fn data(&self) -> &'a [u8] {
27        self.0
28    }
29
30    /// Returns the length of the underlying data.
31    pub fn len(&self) -> usize {
32        self.0.len()
33    }
34
35    /// Returns true if the specified range is within the bounds of the
36    /// underlying data.
37    pub fn check_range(&self, offset: usize, len: usize) -> bool {
38        let end = self.0.len();
39        (offset < end) & (end - offset >= len)
40    }
41
42    /// Returns an error if the specified range is not within the bounds of
43    /// the underlying data.
44    pub fn ensure_range(&self, offset: usize, len: usize) -> Option<()> {
45        if self.check_range(offset, len) {
46            Some(())
47        } else {
48            None
49        }
50    }
51
52    /// Reads a value of the specified type at some offset.
53    #[inline(always)]
54    pub fn read<T: FromBeData>(&self, offset: usize) -> Option<T> {
55        T::from_be_data(self.0, offset)
56    }
57
58    /// Reads a u8 value at some offset.
59    #[inline(always)]
60    pub fn read_u8(&self, offset: usize) -> Option<u8> {
61        u8::from_be_data(self.0, offset)
62    }
63
64    /// Reads a u16 value at some offset.
65    #[inline(always)]
66    pub fn read_u16(&self, offset: usize) -> Option<u16> {
67        u16::from_be_data(self.0, offset)
68    }
69
70    /// Reads a u24 value at the specified offset.
71    #[inline(always)]
72    pub fn read_u24(&self, offset: usize) -> Option<u32> {
73        U24::from_be_data(self.0, offset).map(|x| x.0)
74    }
75
76    /// Reads a u32 value at some offset.
77    #[inline(always)]
78    pub fn read_u32(&self, offset: usize) -> Option<u32> {
79        u32::from_be_data(self.0, offset)
80    }
81
82    /// Reads an i8 value at some offset.
83    #[inline(always)]
84    pub fn read_i8(&self, offset: usize) -> Option<i8> {
85        i8::from_be_data(self.0, offset)
86    }
87
88    /// Reads an i16 value at some offset.
89    #[inline(always)]
90    pub fn read_i16(&self, offset: usize) -> Option<i16> {
91        i16::from_be_data(self.0, offset)
92    }
93
94    /// Reads a value of the specified type at some offset, or returns the
95    /// default value on bounds check failure.
96    pub fn read_or_default<T: FromBeData + Default>(&self, offset: usize) -> T {
97        T::from_be_data(self.0, offset).unwrap_or_default()
98    }
99
100    /// Returns a value of the specified type at some offset without bounds
101    /// checking.
102    #[inline(always)]
103    pub unsafe fn read_unchecked<T: FromBeData>(&self, offset: usize) -> T {
104        T::from_be_data_unchecked(self.0, offset)
105    }
106
107    /// Reads an array of values of the specified type and length at some
108    /// offset.
109    pub fn read_array<T: FromBeData>(&self, offset: usize, len: usize) -> Option<Array<'a, T>> {
110        let len = len * T::SIZE;
111        if !self.check_range(offset, len) {
112            return None;
113        }
114        Some(Array::new(&self.0[offset..offset + len]))
115    }
116
117    /// Reads a sequence of bytes at the specified offset and length.
118    pub fn read_bytes(&self, offset: usize, len: usize) -> Option<&'a [u8]> {
119        if !self.check_range(offset, len) {
120            return None;
121        }
122        Some(&self.0[offset..offset + len])
123    }
124
125    /// Creates a new stream at the specified offset.
126    pub fn stream_at(&self, offset: usize) -> Option<Stream<'a>> {
127        Stream::with_offset(self.0, offset)
128    }
129}
130
131impl<'a> core::ops::Deref for Bytes<'a> {
132    type Target = [u8];
133    fn deref(&self) -> &Self::Target {
134        self.0
135    }
136}
137
138/// Stream over a byte slice for safely reading big endian data.
139#[derive(Copy, Clone)]
140pub struct Stream<'a> {
141    data: &'a [u8],
142    offset: usize,
143}
144
145impl<'a> Stream<'a> {
146    /// Creates a new stream wrapping the specified bytes.
147    pub fn new(data: &'a [u8]) -> Self {
148        Self { data, offset: 0 }
149    }
150
151    /// Creates a new stream with the specified data and offset.
152    pub fn with_offset(data: &'a [u8], offset: usize) -> Option<Self> {
153        let data = data.get(offset..)?;
154        Some(Self { data, offset: 0 })
155    }
156
157    /// Creates a new stream with the specified range of data.
158    pub fn with_range(data: &'a [u8], range: Range<usize>) -> Option<Self> {
159        let data = data.get(range)?;
160        Some(Self { data, offset: 0 })
161    }
162
163    /// Returns the underlying buffer for the cursor.
164    pub fn data(&self) -> &'a [u8] {
165        self.data
166    }
167
168    /// Returns the length of the underlying buffer.
169    pub fn len(&self) -> usize {
170        self.data.len()
171    }
172
173    /// Returns the current offset.
174    pub fn offset(&self) -> usize {
175        self.offset
176    }
177
178    /// Returns the number of bytes available for reading.
179    pub fn remaining(&self) -> usize {
180        self.data.len() - self.offset
181    }
182
183    /// Sets the offset.
184    pub fn set_offset(&mut self, offset: usize) -> Option<()> {
185        if offset > self.data.len() {
186            return None;
187        }
188        self.offset = offset;
189        Some(())
190    }
191
192    /// Returns true if the specified number of bytes can be read.
193    pub fn check_range(&self, len: usize) -> bool {
194        self.data.len() - self.offset >= len
195    }
196
197    /// Returns an error of the specified number of bytes cannot be read.
198    pub fn ensure_range(&self, len: usize) -> Option<()> {
199        if self.check_range(len) {
200            Some(())
201        } else {
202            None
203        }
204    }
205
206    /// Skips the specified number of bytes.
207    pub fn skip(&mut self, bytes: usize) -> Option<()> {
208        self.set_offset(self.offset.checked_add(bytes)?)
209    }
210
211    /// Reads a value of the specified type and advances the offset.
212    pub fn read<T: FromBeData>(&mut self) -> Option<T> {
213        if self.data.len() - self.offset < T::SIZE {
214            None
215        } else {
216            let v = unsafe { T::from_be_data_unchecked(self.data, self.offset) };
217            self.offset += T::SIZE;
218            Some(v)
219        }
220    }
221
222    /// Reads a u8 value and advances the offset.
223    #[inline(always)]
224    pub fn read_u8(&mut self) -> Option<u8> {
225        self.read::<u8>()
226    }
227
228    /// Reads a u16 value and advances the offset.
229    #[inline(always)]
230    pub fn read_u16(&mut self) -> Option<u16> {
231        self.read::<u16>()
232    }
233
234    /// Reads a u32 value and advances the offset.
235    #[inline(always)]
236    pub fn read_u32(&mut self) -> Option<u32> {
237        self.read::<u32>()
238    }
239
240    /// Reads an i8 value and advances the offset.
241    #[inline(always)]
242    pub fn read_i8(&mut self) -> Option<i8> {
243        self.read::<i8>()
244    }
245
246    /// Reads an i16 value and advances the offset.
247    #[inline(always)]
248    pub fn read_i16(&mut self) -> Option<i16> {
249        self.read::<i16>()
250    }
251
252    /// Reads an array of values of the specified type and length and
253    /// advances the offset.
254    pub fn read_array<T: FromBeData>(&mut self, len: usize) -> Option<Array<'a, T>> {
255        let len = len * T::SIZE;
256        if !self.check_range(len) {
257            return None;
258        }
259        let arr = Array::new(&self.data[self.offset..self.offset + len]);
260        self.offset += len;
261        Some(arr)
262    }
263
264    /// Reads a sequence of bytes of the specified length and advances the
265    /// offset.
266    pub fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> {
267        if !self.check_range(len) {
268            return None;
269        }
270        let bytes = &self.data[self.offset..self.offset + len];
271        self.offset += len;
272        Some(bytes)
273    }
274}
275
276/// An array wrapping a byte buffer over a sequence of values that implement
277/// [`FromBeData`].
278#[derive(Copy, Clone)]
279pub struct Array<'a, T: FromBeData> {
280    data: &'a [u8],
281    len: usize,
282    _p: core::marker::PhantomData<T>,
283}
284
285impl<T> core::fmt::Debug for Array<'_, T>
286where
287    T: core::fmt::Debug + FromBeData,
288{
289    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
290        write!(f, "[")?;
291        for (i, value) in self.iter().enumerate() {
292            if i > 0 {
293                write!(f, ", ")?;
294            }
295            write!(f, "{:?}", value)?;
296        }
297        write!(f, "]")
298    }
299}
300
301impl<'a, T: FromBeData> Array<'a, T> {
302    pub(crate) fn new(data: &'a [u8]) -> Self {
303        Self {
304            data,
305            len: data.len() / T::SIZE,
306            _p: core::marker::PhantomData {},
307        }
308    }
309
310    /// Returns the length of the array.
311    pub fn len(&self) -> usize {
312        self.len
313    }
314
315    /// Returns the element at the specified index.
316    pub fn get(&self, index: usize) -> Option<T> {
317        if index >= self.len {
318            None
319        } else {
320            unsafe { Some(T::from_be_data_unchecked(self.data, index * T::SIZE)) }
321        }
322    }
323
324    /// Returns the element at the specified index, or some value if the index
325    /// is out of bounds.
326    pub fn get_or(&self, index: usize, or: T) -> T {
327        if index >= self.len {
328            or
329        } else {
330            unsafe { T::from_be_data_unchecked(self.data, index * T::SIZE) }
331        }
332    }
333
334    /// Returns the element at the specified index without bounds checking.
335    pub unsafe fn get_unchecked(&self, index: usize) -> T {
336        T::from_be_data_unchecked(self.data, index * T::SIZE)
337    }
338
339    /// Performs a binary search over the array using the specified comparator
340    /// function. Returns the index and value of the element on success, or
341    /// `None` if a match was not found.
342    pub fn binary_search_by<F>(&self, mut f: F) -> Option<(usize, T)>
343    where
344        F: FnMut(&T) -> core::cmp::Ordering,
345    {
346        // Taken from Rust core library.
347        use core::cmp::Ordering::*;
348        let mut size = self.len;
349        if size == 0 {
350            return None;
351        }
352        let mut base = 0usize;
353        while size > 1 {
354            let half = size / 2;
355            let mid = base + half;
356            // SAFETY: the call is made safe by the following inconstants:
357            // - `mid >= 0`: by definition
358            // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...`
359            let element = unsafe { self.get_unchecked(mid) };
360            base = match f(&element) {
361                Greater => base,
362                Less => mid,
363                Equal => return Some((mid, element)),
364            };
365            size -= half;
366        }
367        None
368    }
369
370    /// Returns an iterator over the elements of the array.
371    pub fn iter(&self) -> ArrayIter<'a, T> {
372        ArrayIter {
373            inner: *self,
374            cur: 0,
375        }
376    }
377}
378
379/// Iterator over the elements of an array.
380#[derive(Clone)]
381#[doc(hidden)]
382pub struct ArrayIter<'a, T: FromBeData> {
383    inner: Array<'a, T>,
384    cur: usize,
385}
386
387impl<'a, T: FromBeData + 'a> Iterator for ArrayIter<'a, T> {
388    type Item = T;
389
390    fn size_hint(&self) -> (usize, Option<usize>) {
391        let remaining = self.inner.len - self.cur;
392        (remaining, Some(remaining))
393    }
394
395    fn next(&mut self) -> Option<T> {
396        if self.cur >= self.inner.len {
397            return None;
398        }
399        self.cur += 1;
400        unsafe { Some(self.inner.get_unchecked(self.cur - 1)) }
401    }
402}
403
404impl<'a, T: FromBeData + 'a> IntoIterator for Array<'a, T> {
405    type IntoIter = ArrayIter<'a, T>;
406    type Item = T;
407
408    fn into_iter(self) -> Self::IntoIter {
409        ArrayIter {
410            inner: self,
411            cur: 0,
412        }
413    }
414}
415
416/// Interface for reading big endian data from a buffer.
417pub trait FromBeData: Sized + Copy + Clone {
418    const SIZE: usize = core::mem::size_of::<Self>();
419
420    #[inline(always)]
421    fn from_be_data(buf: &[u8], offset: usize) -> Option<Self> {
422        let len = buf.len();
423        if (offset < len) && ((len - offset) >= Self::SIZE) {
424            unsafe { Some(Self::from_be_data_unchecked(buf, offset)) }
425        } else {
426            None
427        }
428    }
429
430    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self;
431}
432
433pub(crate) const USE_UNALIGNED_READS_LE: bool =
434    cfg!(any(target_arch = "x86", target_arch = "x86_64")) && cfg!(not(debug_assertions));
435
436impl FromBeData for u8 {
437    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
438        *buf.get_unchecked(offset)
439    }
440}
441
442impl FromBeData for i8 {
443    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
444        *buf.get_unchecked(offset) as i8
445    }
446}
447
448impl FromBeData for u16 {
449    #[inline(always)]
450    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
451        if USE_UNALIGNED_READS_LE {
452            (buf.as_ptr().add(offset) as *const u16)
453                .read_unaligned()
454                .swap_bytes()
455        } else {
456            (*buf.get_unchecked(offset) as u16) << 8 | *buf.get_unchecked(offset + 1) as u16
457        }
458    }
459}
460
461impl FromBeData for i16 {
462    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
463        u16::from_be_data_unchecked(buf, offset) as i16
464    }
465}
466
467impl FromBeData for u32 {
468    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
469        if USE_UNALIGNED_READS_LE {
470            (buf.as_ptr().add(offset) as *const u32)
471                .read_unaligned()
472                .swap_bytes()
473        } else {
474            (*buf.get_unchecked(offset) as u32) << 24
475                | (*buf.get_unchecked(offset + 1) as u32) << 16
476                | (*buf.get_unchecked(offset + 2) as u32) << 8
477                | *buf.get_unchecked(offset + 3) as u32
478        }
479    }
480}
481
482impl FromBeData for i32 {
483    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
484        u32::from_be_data_unchecked(buf, offset) as i32
485    }
486}
487
488impl FromBeData for u64 {
489    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
490        if USE_UNALIGNED_READS_LE {
491            (buf.as_ptr().add(offset) as *const u64)
492                .read_unaligned()
493                .swap_bytes()
494        } else {
495            (*buf.get_unchecked(offset) as u64) << 56
496                | (*buf.get_unchecked(offset + 1) as u64) << 48
497                | (*buf.get_unchecked(offset + 2) as u64) << 40
498                | (*buf.get_unchecked(offset + 3) as u64) << 32
499                | (*buf.get_unchecked(offset + 4) as u64) << 24
500                | (*buf.get_unchecked(offset + 5) as u64) << 16
501                | (*buf.get_unchecked(offset + 6) as u64) << 8
502                | *buf.get_unchecked(offset + 7) as u64
503        }
504    }
505}
506
507/// Unsigned 24-bit integer.
508#[derive(Copy, Clone)]
509#[doc(hidden)]
510pub struct U24(pub u32);
511
512impl FromBeData for U24 {
513    const SIZE: usize = 3;
514
515    unsafe fn from_be_data_unchecked(buf: &[u8], offset: usize) -> Self {
516        Self(
517            (*buf.get_unchecked(offset) as u32) << 16
518                | (*buf.get_unchecked(offset + 1) as u32) << 8
519                | *buf.get_unchecked(offset + 2) as u32,
520        )
521    }
522}
523
524impl FromBeData for () {
525    unsafe fn from_be_data_unchecked(_buf: &[u8], _offset: usize) -> Self {}
526}