regex_automata/util/
int.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/*!
This module provides several integer oriented traits for converting between
both fixed size integers and integers whose size varies based on the target
(like `usize`).

The driving design principle of this module is to attempt to centralize as many
`as` casts as possible here. And in particular, we separate casts into two
buckets:

* Casts that we use for their truncating behavior. In this case, we use more
descriptive names, like `low_u32` and `high_u32`.
* Casts that we use for converting back-and-forth between `usize`. These
conversions are generally necessary because we often store indices in different
formats to save on memory, which requires converting to and from `usize`. In
this case, we very specifically do not want to overflow, and so the methods
defined here will panic if the `as` cast would be lossy in debug mode. (A
normal `as` cast will never panic!)

For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there.

For regex engines, floating point is just never used, so we don't have to worry
about `as` casts for those.

Otherwise, this module pretty much covers all of our `as` needs except for one
thing: const contexts. There are a select few places in this crate where we
still need to use `as` because const functions on traits aren't stable yet.
If we wind up significantly expanding our const footprint in this crate, it
might be worth defining free functions to handle those cases. But at the time
of writing, that just seemed like too much ceremony. Instead, I comment each
such use of `as` in a const context with a "fixme" notice.

NOTE: for simplicity, we don't take target pointer width into account here for
`usize` conversions. Since we currently only panic in debug mode, skipping the
check when it can be proven it isn't needed at compile time doesn't really
matter. Now, if we wind up wanting to do as many checks as possible in release
mode, then we would want to skip those when we know the conversions are always
non-lossy.

NOTE: this module isn't an exhaustive API. For example, we still use things
like `u64::from` where possible, or even `usize::try_from()` for when we do
explicitly want to panic or when we want to return an error for overflow.
*/

// We define a little more than what we need, but I'd rather just have
// everything via a consistent and uniform API then have holes.
#![allow(dead_code)]

pub(crate) trait U8 {
    fn as_usize(self) -> usize;
}

impl U8 for u8 {
    fn as_usize(self) -> usize {
        usize::from(self)
    }
}

pub(crate) trait U16 {
    fn as_usize(self) -> usize;
    fn low_u8(self) -> u8;
    fn high_u8(self) -> u8;
}

impl U16 for u16 {
    fn as_usize(self) -> usize {
        usize::from(self)
    }

    fn low_u8(self) -> u8 {
        self as u8
    }

    fn high_u8(self) -> u8 {
        (self >> 8) as u8
    }
}

pub(crate) trait U32 {
    fn as_usize(self) -> usize;
    fn low_u8(self) -> u8;
    fn low_u16(self) -> u16;
    fn high_u16(self) -> u16;
}

impl U32 for u32 {
    fn as_usize(self) -> usize {
        #[cfg(debug_assertions)]
        {
            usize::try_from(self).expect("u32 overflowed usize")
        }
        #[cfg(not(debug_assertions))]
        {
            self as usize
        }
    }

    fn low_u8(self) -> u8 {
        self as u8
    }

    fn low_u16(self) -> u16 {
        self as u16
    }

    fn high_u16(self) -> u16 {
        (self >> 16) as u16
    }
}

pub(crate) trait U64 {
    fn as_usize(self) -> usize;
    fn low_u8(self) -> u8;
    fn low_u16(self) -> u16;
    fn low_u32(self) -> u32;
    fn high_u32(self) -> u32;
}

impl U64 for u64 {
    fn as_usize(self) -> usize {
        #[cfg(debug_assertions)]
        {
            usize::try_from(self).expect("u64 overflowed usize")
        }
        #[cfg(not(debug_assertions))]
        {
            self as usize
        }
    }

    fn low_u8(self) -> u8 {
        self as u8
    }

    fn low_u16(self) -> u16 {
        self as u16
    }

    fn low_u32(self) -> u32 {
        self as u32
    }

    fn high_u32(self) -> u32 {
        (self >> 32) as u32
    }
}

pub(crate) trait I32 {
    fn as_usize(self) -> usize;
    fn to_bits(self) -> u32;
    fn from_bits(n: u32) -> i32;
}

impl I32 for i32 {
    fn as_usize(self) -> usize {
        #[cfg(debug_assertions)]
        {
            usize::try_from(self).expect("i32 overflowed usize")
        }
        #[cfg(not(debug_assertions))]
        {
            self as usize
        }
    }

    fn to_bits(self) -> u32 {
        self as u32
    }

    fn from_bits(n: u32) -> i32 {
        n as i32
    }
}

pub(crate) trait Usize {
    fn as_u8(self) -> u8;
    fn as_u16(self) -> u16;
    fn as_u32(self) -> u32;
    fn as_u64(self) -> u64;
}

impl Usize for usize {
    fn as_u8(self) -> u8 {
        #[cfg(debug_assertions)]
        {
            u8::try_from(self).expect("usize overflowed u8")
        }
        #[cfg(not(debug_assertions))]
        {
            self as u8
        }
    }

    fn as_u16(self) -> u16 {
        #[cfg(debug_assertions)]
        {
            u16::try_from(self).expect("usize overflowed u16")
        }
        #[cfg(not(debug_assertions))]
        {
            self as u16
        }
    }

    fn as_u32(self) -> u32 {
        #[cfg(debug_assertions)]
        {
            u32::try_from(self).expect("usize overflowed u32")
        }
        #[cfg(not(debug_assertions))]
        {
            self as u32
        }
    }

    fn as_u64(self) -> u64 {
        #[cfg(debug_assertions)]
        {
            u64::try_from(self).expect("usize overflowed u64")
        }
        #[cfg(not(debug_assertions))]
        {
            self as u64
        }
    }
}

// Pointers aren't integers, but we convert pointers to integers to perform
// offset arithmetic in some places. (And no, we don't convert the integers
// back to pointers.) So add 'as_usize' conversions here too for completeness.
//
// These 'as' casts are actually okay because they're always non-lossy. But the
// idea here is to just try and remove as much 'as' as possible, particularly
// in this crate where we are being really paranoid about offsets and making
// sure we don't panic on inputs that might be untrusted. This way, the 'as'
// casts become easier to audit if they're all in one place, even when some of
// them are actually okay 100% of the time.

pub(crate) trait Pointer {
    fn as_usize(self) -> usize;
}

impl<T> Pointer for *const T {
    fn as_usize(self) -> usize {
        self as usize
    }
}