tiny_skia/wide/
f32x8_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::{i32x8, u32x8};
11
12cfg_if::cfg_if! {
13    if #[cfg(all(feature = "simd", target_feature = "avx"))] {
14        #[cfg(target_arch = "x86")]
15        use core::arch::x86::*;
16        #[cfg(target_arch = "x86_64")]
17        use core::arch::x86_64::*;
18
19        #[derive(Clone, Copy, Debug)]
20        #[repr(C, align(32))]
21        pub struct f32x8(__m256);
22    } else {
23        use super::f32x4;
24
25        #[derive(Clone, Copy, Debug)]
26        #[repr(C, align(32))]
27        pub struct f32x8(pub f32x4, pub f32x4);
28    }
29}
30
31unsafe impl bytemuck::Zeroable for f32x8 {}
32unsafe impl bytemuck::Pod for f32x8 {}
33
34impl Default for f32x8 {
35    fn default() -> Self {
36        Self::splat(0.0)
37    }
38}
39
40impl f32x8 {
41    pub fn splat(n: f32) -> Self {
42        cast([n, n, n, n, n, n, n, n])
43    }
44
45    pub fn floor(self) -> Self {
46        let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
47        roundtrip
48            - roundtrip
49                .cmp_gt(self)
50                .blend(f32x8::splat(1.0), f32x8::default())
51    }
52
53    pub fn fract(self) -> Self {
54        self - self.floor()
55    }
56
57    pub fn normalize(self) -> Self {
58        self.max(f32x8::default()).min(f32x8::splat(1.0))
59    }
60
61    pub fn to_i32x8_bitcast(self) -> i32x8 {
62        bytemuck::cast(self)
63    }
64
65    pub fn to_u32x8_bitcast(self) -> u32x8 {
66        bytemuck::cast(self)
67    }
68
69    pub fn cmp_eq(self, rhs: Self) -> Self {
70        cfg_if::cfg_if! {
71            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
72                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) })
73            } else {
74                Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
75            }
76        }
77    }
78
79    pub fn cmp_ne(self, rhs: Self) -> Self {
80        cfg_if::cfg_if! {
81            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
82                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_NEQ_OQ) })
83            } else {
84                Self(self.0.cmp_ne(rhs.0), self.1.cmp_ne(rhs.1))
85            }
86        }
87    }
88
89    pub fn cmp_ge(self, rhs: Self) -> Self {
90        cfg_if::cfg_if! {
91            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
92                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GE_OQ) })
93            } else {
94                Self(self.0.cmp_ge(rhs.0), self.1.cmp_ge(rhs.1))
95            }
96        }
97    }
98
99    pub fn cmp_gt(self, rhs: Self) -> Self {
100        cfg_if::cfg_if! {
101            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
102                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_GT_OQ) })
103            } else {
104                Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1))
105            }
106        }
107    }
108
109    pub fn cmp_le(self, rhs: Self) -> Self {
110        cfg_if::cfg_if! {
111            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
112                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LE_OQ) })
113            } else {
114                Self(self.0.cmp_le(rhs.0), self.1.cmp_le(rhs.1))
115            }
116        }
117    }
118
119    pub fn cmp_lt(self, rhs: Self) -> Self {
120        cfg_if::cfg_if! {
121            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
122                Self(unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_LT_OQ) })
123            } else {
124                Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1))
125            }
126        }
127    }
128
129    #[inline]
130    pub fn blend(self, t: Self, f: Self) -> Self {
131        cfg_if::cfg_if! {
132            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
133                Self(unsafe { _mm256_blendv_ps(f.0, t.0, self.0) })
134            } else {
135                Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1))
136            }
137        }
138    }
139
140    pub fn abs(self) -> Self {
141        let non_sign_bits = f32x8::splat(f32::from_bits(i32::MAX as u32));
142        self & non_sign_bits
143    }
144
145    pub fn max(self, rhs: Self) -> Self {
146        // These technically don't have the same semantics for NaN and 0, but it
147        // doesn't seem to matter as Skia does it the same way.
148        cfg_if::cfg_if! {
149            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
150                Self(unsafe { _mm256_max_ps(self.0, rhs.0) })
151            } else {
152                Self(self.0.max(rhs.0), self.1.max(rhs.1))
153            }
154        }
155    }
156
157    pub fn min(self, rhs: Self) -> Self {
158        // These technically don't have the same semantics for NaN and 0, but it
159        // doesn't seem to matter as Skia does it the same way.
160        cfg_if::cfg_if! {
161            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
162                Self(unsafe { _mm256_min_ps(self.0, rhs.0) })
163            } else {
164                Self(self.0.min(rhs.0), self.1.min(rhs.1))
165            }
166        }
167    }
168
169    pub fn is_finite(self) -> Self {
170        let shifted_exp_mask = u32x8::splat(0xFF000000);
171        let u: u32x8 = cast(self);
172        let shift_u = u.shl::<1>();
173        let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
174        cast(out)
175    }
176
177    pub fn round(self) -> Self {
178        cfg_if::cfg_if! {
179            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
180                Self(unsafe { _mm256_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) })
181            } else {
182                Self(self.0.round(), self.1.round())
183            }
184        }
185    }
186
187    pub fn round_int(self) -> i32x8 {
188        // These technically don't have the same semantics for NaN and out of
189        // range values, but it doesn't seem to matter as Skia does it the same
190        // way.
191        cfg_if::cfg_if! {
192            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
193                cast(unsafe { _mm256_cvtps_epi32(self.0) })
194            } else {
195                i32x8(self.0.round_int(), self.1.round_int())
196            }
197        }
198    }
199
200    pub fn trunc_int(self) -> i32x8 {
201        // These technically don't have the same semantics for NaN and out of
202        // range values, but it doesn't seem to matter as Skia does it the same
203        // way.
204        cfg_if::cfg_if! {
205            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
206                cast(unsafe { _mm256_cvttps_epi32(self.0) })
207            } else {
208                i32x8(self.0.trunc_int(), self.1.trunc_int())
209            }
210        }
211    }
212
213    pub fn recip_fast(self) -> Self {
214        cfg_if::cfg_if! {
215            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
216                Self(unsafe { _mm256_rcp_ps(self.0) })
217            } else {
218                Self(self.0.recip_fast(), self.1.recip_fast())
219            }
220        }
221    }
222
223    pub fn recip_sqrt(self) -> Self {
224        cfg_if::cfg_if! {
225            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
226                Self(unsafe { _mm256_rsqrt_ps(self.0) })
227            } else {
228                Self(self.0.recip_sqrt(), self.1.recip_sqrt())
229            }
230        }
231    }
232
233    pub fn sqrt(self) -> Self {
234        cfg_if::cfg_if! {
235            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
236                Self(unsafe { _mm256_sqrt_ps(self.0) })
237            } else {
238                Self(self.0.sqrt(), self.1.sqrt())
239            }
240        }
241    }
242}
243
244impl From<[f32; 8]> for f32x8 {
245    fn from(v: [f32; 8]) -> Self {
246        cast(v)
247    }
248}
249
250impl From<f32x8> for [f32; 8] {
251    fn from(v: f32x8) -> Self {
252        cast(v)
253    }
254}
255
256impl core::ops::Add for f32x8 {
257    type Output = Self;
258
259    fn add(self, rhs: Self) -> Self::Output {
260        cfg_if::cfg_if! {
261            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
262                Self(unsafe { _mm256_add_ps(self.0, rhs.0) })
263            } else {
264                Self(self.0 + rhs.0, self.1 + rhs.1)
265            }
266        }
267    }
268}
269
270impl core::ops::AddAssign for f32x8 {
271    fn add_assign(&mut self, rhs: f32x8) {
272        *self = *self + rhs;
273    }
274}
275
276impl core::ops::Sub for f32x8 {
277    type Output = Self;
278
279    fn sub(self, rhs: Self) -> Self::Output {
280        cfg_if::cfg_if! {
281            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
282                Self(unsafe { _mm256_sub_ps(self.0, rhs.0) })
283            } else {
284                Self(self.0 - rhs.0, self.1 - rhs.1)
285            }
286        }
287    }
288}
289
290impl core::ops::Mul for f32x8 {
291    type Output = Self;
292
293    fn mul(self, rhs: Self) -> Self::Output {
294        cfg_if::cfg_if! {
295            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
296                Self(unsafe { _mm256_mul_ps(self.0, rhs.0) })
297            } else {
298                Self(self.0 * rhs.0, self.1 * rhs.1)
299            }
300        }
301    }
302}
303
304impl core::ops::MulAssign for f32x8 {
305    fn mul_assign(&mut self, rhs: f32x8) {
306        *self = *self * rhs;
307    }
308}
309
310impl core::ops::Div for f32x8 {
311    type Output = Self;
312
313    fn div(self, rhs: Self) -> Self::Output {
314        cfg_if::cfg_if! {
315            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
316                Self(unsafe { _mm256_div_ps(self.0, rhs.0) })
317            } else {
318                Self(self.0 / rhs.0, self.1 / rhs.1)
319            }
320        }
321    }
322}
323
324impl core::ops::BitAnd for f32x8 {
325    type Output = Self;
326
327    #[inline(always)]
328    fn bitand(self, rhs: Self) -> Self::Output {
329        cfg_if::cfg_if! {
330            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
331                Self(unsafe { _mm256_and_ps(self.0, rhs.0) })
332            } else {
333                Self(self.0 & rhs.0, self.1 & rhs.1)
334            }
335        }
336    }
337}
338
339impl core::ops::BitOr for f32x8 {
340    type Output = Self;
341
342    #[inline(always)]
343    fn bitor(self, rhs: Self) -> Self::Output {
344        cfg_if::cfg_if! {
345            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
346                Self(unsafe { _mm256_or_ps(self.0, rhs.0) })
347            } else {
348                Self(self.0 | rhs.0, self.1 | rhs.1)
349            }
350        }
351    }
352}
353
354impl core::ops::BitXor for f32x8 {
355    type Output = Self;
356
357    #[inline(always)]
358    fn bitxor(self, rhs: Self) -> Self::Output {
359        cfg_if::cfg_if! {
360            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
361                Self(unsafe { _mm256_xor_ps(self.0, rhs.0) })
362            } else {
363                Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
364            }
365        }
366    }
367}
368
369impl core::ops::Neg for f32x8 {
370    type Output = Self;
371
372    fn neg(self) -> Self {
373        Self::default() - self
374    }
375}
376
377impl core::ops::Not for f32x8 {
378    type Output = Self;
379
380    fn not(self) -> Self {
381        cfg_if::cfg_if! {
382            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
383                let all_bits = unsafe { _mm256_set1_ps(f32::from_bits(u32::MAX)) };
384                Self(unsafe { _mm256_xor_ps(self.0, all_bits) })
385            } else {
386                Self(!self.0, !self.1)
387            }
388        }
389    }
390}
391
392impl core::cmp::PartialEq for f32x8 {
393    fn eq(&self, rhs: &Self) -> bool {
394        cfg_if::cfg_if! {
395            if #[cfg(all(feature = "simd", target_feature = "avx"))] {
396                let mask = unsafe { _mm256_cmp_ps(self.0, rhs.0, _CMP_EQ_OQ) };
397                unsafe { _mm256_movemask_ps(mask) == 0b1111_1111 }
398            } else {
399                self.0 == rhs.0 && self.1 == rhs.1
400            }
401        }
402    }
403}