tiny_skia/wide/
f32x4_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10#[cfg(all(not(feature = "std"), feature = "no-std-float"))]
11use tiny_skia_path::NoStdFloat;
12
13use super::i32x4;
14
15cfg_if::cfg_if! {
16    if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
17        #[cfg(target_arch = "x86")]
18        use core::arch::x86::*;
19        #[cfg(target_arch = "x86_64")]
20        use core::arch::x86_64::*;
21
22        #[derive(Clone, Copy, Debug)]
23        #[repr(C, align(16))]
24        pub struct f32x4(__m128);
25    } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
26        use core::arch::wasm32::*;
27
28        // repr(transparent) allows for directly passing the v128 on the WASM stack.
29        #[derive(Clone, Copy, Debug)]
30        #[repr(transparent)]
31        pub struct f32x4(v128);
32    } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
33        use core::arch::aarch64::*;
34
35        #[derive(Clone, Copy, Debug)]
36        #[repr(C, align(16))]
37        pub struct f32x4(float32x4_t);
38    } else {
39        use super::FasterMinMax;
40
41        #[derive(Clone, Copy, Debug)]
42        #[repr(C, align(16))]
43        pub struct f32x4([f32; 4]);
44    }
45}
46
47unsafe impl bytemuck::Zeroable for f32x4 {}
48unsafe impl bytemuck::Pod for f32x4 {}
49
50impl Default for f32x4 {
51    fn default() -> Self {
52        Self::splat(0.0)
53    }
54}
55
56impl f32x4 {
57    pub fn splat(n: f32) -> Self {
58        Self::from([n, n, n, n])
59    }
60
61    pub fn floor(self) -> Self {
62        cfg_if::cfg_if! {
63            if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
64                Self(f32x4_floor(self.0))
65            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
66                Self(unsafe { vrndmq_f32(self.0) })
67            } else {
68                let roundtrip: f32x4 = cast(self.trunc_int().to_f32x4());
69                roundtrip - roundtrip.cmp_gt(self).blend(f32x4::splat(1.0), f32x4::default())
70            }
71        }
72    }
73
74    pub fn abs(self) -> Self {
75        cfg_if::cfg_if! {
76            if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
77                Self(f32x4_abs(self.0))
78            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
79                Self(unsafe { vabsq_f32(self.0) })
80            } else {
81                let non_sign_bits = f32x4::splat(f32::from_bits(i32::MAX as u32));
82                self & non_sign_bits
83            }
84        }
85    }
86
87    pub fn max(self, rhs: Self) -> Self {
88        // These technically don't have the same semantics for NaN and 0, but it
89        // doesn't seem to matter as Skia does it the same way.
90        cfg_if::cfg_if! {
91            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
92                Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
94                Self(f32x4_pmax(self.0, rhs.0))
95            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
96                Self(unsafe { vmaxq_f32(self.0, rhs.0) })
97            } else {
98                Self([
99                    self.0[0].faster_max(rhs.0[0]),
100                    self.0[1].faster_max(rhs.0[1]),
101                    self.0[2].faster_max(rhs.0[2]),
102                    self.0[3].faster_max(rhs.0[3]),
103                ])
104            }
105        }
106    }
107
108    pub fn min(self, rhs: Self) -> Self {
109        // These technically don't have the same semantics for NaN and 0, but it
110        // doesn't seem to matter as Skia does it the same way.
111        cfg_if::cfg_if! {
112            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
113                Self(unsafe { _mm_min_ps(self.0, rhs.0) })
114            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
115                Self(f32x4_pmin(self.0, rhs.0))
116            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
117                Self(unsafe { vminq_f32(self.0, rhs.0) })
118            } else {
119                Self([
120                    self.0[0].faster_min(rhs.0[0]),
121                    self.0[1].faster_min(rhs.0[1]),
122                    self.0[2].faster_min(rhs.0[2]),
123                    self.0[3].faster_min(rhs.0[3]),
124                ])
125            }
126        }
127    }
128
129    pub fn cmp_eq(self, rhs: Self) -> Self {
130        cfg_if::cfg_if! {
131            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
132                Self(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
133            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
134                Self(f32x4_eq(self.0, rhs.0))
135            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
136                Self(cast(unsafe { vceqq_f32(self.0, rhs.0) }))
137            } else {
138                Self([
139                    if self.0[0] == rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
140                    if self.0[1] == rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
141                    if self.0[2] == rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
142                    if self.0[3] == rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
143                ])
144            }
145        }
146    }
147
148    pub fn cmp_ne(self, rhs: Self) -> Self {
149        cfg_if::cfg_if! {
150            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
151                Self(unsafe { _mm_cmpneq_ps(self.0, rhs.0) })
152            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
153                Self(f32x4_ne(self.0, rhs.0))
154            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
155                Self(cast(unsafe { vmvnq_u32(vceqq_f32(self.0, rhs.0)) }))
156            } else {
157                Self([
158                    if self.0[0] != rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
159                    if self.0[1] != rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
160                    if self.0[2] != rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
161                    if self.0[3] != rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
162                ])
163            }
164        }
165    }
166
167    pub fn cmp_ge(self, rhs: Self) -> Self {
168        cfg_if::cfg_if! {
169            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
170                Self(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
171            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
172                Self(f32x4_ge(self.0, rhs.0))
173            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
174                Self(cast(unsafe { vcgeq_f32(self.0, rhs.0) }))
175            } else {
176                Self([
177                    if self.0[0] >= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
178                    if self.0[1] >= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
179                    if self.0[2] >= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
180                    if self.0[3] >= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
181                ])
182            }
183        }
184    }
185
186    pub fn cmp_gt(self, rhs: Self) -> Self {
187        cfg_if::cfg_if! {
188            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
189                Self(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
190            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
191                Self(f32x4_gt(self.0, rhs.0))
192            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
193                Self(cast(unsafe { vcgtq_f32(self.0, rhs.0) }))
194            } else {
195                Self([
196                    if self.0[0] > rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
197                    if self.0[1] > rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
198                    if self.0[2] > rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
199                    if self.0[3] > rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
200                ])
201            }
202        }
203    }
204
205    pub fn cmp_le(self, rhs: Self) -> Self {
206        cfg_if::cfg_if! {
207            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
208                Self(unsafe { _mm_cmple_ps(self.0, rhs.0) })
209            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
210                Self(f32x4_le(self.0, rhs.0))
211            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
212                Self(cast(unsafe { vcleq_f32(self.0, rhs.0) }))
213            } else {
214                Self([
215                    if self.0[0] <= rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
216                    if self.0[1] <= rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
217                    if self.0[2] <= rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
218                    if self.0[3] <= rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
219                ])
220            }
221        }
222    }
223
224    pub fn cmp_lt(self, rhs: Self) -> Self {
225        cfg_if::cfg_if! {
226            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
227                Self(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
228            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
229                Self(f32x4_lt(self.0, rhs.0))
230            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
231                Self(cast(unsafe { vcltq_f32(self.0, rhs.0) }))
232            } else {
233                Self([
234                    if self.0[0] < rhs.0[0] { f32::from_bits(u32::MAX) } else { 0.0 },
235                    if self.0[1] < rhs.0[1] { f32::from_bits(u32::MAX) } else { 0.0 },
236                    if self.0[2] < rhs.0[2] { f32::from_bits(u32::MAX) } else { 0.0 },
237                    if self.0[3] < rhs.0[3] { f32::from_bits(u32::MAX) } else { 0.0 },
238                ])
239            }
240        }
241    }
242
243    #[inline]
244    pub fn blend(self, t: Self, f: Self) -> Self {
245        cfg_if::cfg_if! {
246            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
247                Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
248            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
249                Self(v128_bitselect(t.0, f.0, self.0))
250            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
251                Self(unsafe { cast(vbslq_u32( cast(self.0), cast(t.0), cast(f.0))) })
252            } else {
253                super::generic_bit_blend(self, t, f)
254            }
255        }
256    }
257
258    pub fn round(self) -> Self {
259        cfg_if::cfg_if! {
260            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
261                Self(
262                    unsafe { _mm_round_ps(self.0, _MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) },
263                )
264            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
265                Self(f32x4_nearest(self.0))
266            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
267                Self(unsafe { vrndnq_f32(self.0) })
268            } else {
269                use super::u32x4;
270
271                let to_int = f32x4::splat(1.0 / f32::EPSILON);
272                let u: u32x4 = cast(self);
273                let e: i32x4 = cast(u.shr::<23>() & u32x4::splat(0xff));
274                let mut y: f32x4;
275
276                let no_op_magic = i32x4::splat(0x7f + 23);
277                let no_op_mask: f32x4 = cast(e.cmp_gt(no_op_magic) | e.cmp_eq(no_op_magic));
278                let no_op_val: f32x4 = self;
279
280                let zero_magic = i32x4::splat(0x7f - 1);
281                let zero_mask: f32x4 = cast(e.cmp_lt(zero_magic));
282                let zero_val: f32x4 = self * f32x4::splat(0.0);
283
284                let neg_bit: f32x4 = cast(cast::<u32x4, i32x4>(u).cmp_lt(i32x4::default()));
285                let x: f32x4 = neg_bit.blend(-self, self);
286                y = x + to_int - to_int - x;
287                y = y.cmp_gt(f32x4::splat(0.5)).blend(
288                    y + x - f32x4::splat(-1.0),
289                    y.cmp_lt(f32x4::splat(-0.5)).blend(y + x + f32x4::splat(1.0), y + x),
290                );
291                y = neg_bit.blend(-y, y);
292
293                no_op_mask.blend(no_op_val, zero_mask.blend(zero_val, y))
294            }
295        }
296    }
297
298    pub fn round_int(self) -> i32x4 {
299        // These technically don't have the same semantics for NaN and out of
300        // range values, but it doesn't seem to matter as Skia does it the same
301        // way.
302        cfg_if::cfg_if! {
303            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
304                i32x4(unsafe { _mm_cvtps_epi32(self.0) })
305            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
306                i32x4(i32x4_trunc_sat_f32x4(self.round().0))
307            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
308                i32x4(unsafe { vcvtnq_s32_f32(self.0) } )
309            } else {
310                let rounded: [f32; 4] = cast(self.round());
311                cast([
312                    rounded[0] as i32,
313                    rounded[1] as i32,
314                    rounded[2] as i32,
315                    rounded[3] as i32,
316                ])
317            }
318        }
319    }
320
321    pub fn trunc_int(self) -> i32x4 {
322        // These technically don't have the same semantics for NaN and out of
323        // range values, but it doesn't seem to matter as Skia does it the same
324        // way.
325        cfg_if::cfg_if! {
326            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
327                i32x4(unsafe { _mm_cvttps_epi32(self.0) })
328            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
329                i32x4(i32x4_trunc_sat_f32x4(self.0))
330            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
331                i32x4(unsafe { vcvtq_s32_f32(self.0) })
332            } else {
333                cast([
334                    self.0[0] as i32,
335                    self.0[1] as i32,
336                    self.0[2] as i32,
337                    self.0[3] as i32,
338                ])
339            }
340        }
341    }
342
343    pub fn recip_fast(self) -> Self {
344        cfg_if::cfg_if! {
345            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
346                Self(unsafe { _mm_rcp_ps(self.0) })
347            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
348                Self(f32x4_div(f32x4_splat(1.0), self.0))
349            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
350                unsafe {
351                    let a = vrecpeq_f32(self.0);
352                    let a = vmulq_f32(vrecpsq_f32(self.0, a), a);
353                    Self(a)
354                }
355            } else {
356                Self::from([
357                    1.0 / self.0[0],
358                    1.0 / self.0[1],
359                    1.0 / self.0[2],
360                    1.0 / self.0[3],
361                ])
362            }
363        }
364    }
365
366    pub fn recip_sqrt(self) -> Self {
367        cfg_if::cfg_if! {
368            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
369                Self(unsafe { _mm_rsqrt_ps(self.0) })
370            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
371                Self(f32x4_div(f32x4_splat(1.0), f32x4_sqrt(self.0)))
372            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
373                unsafe {
374                    let a = vrsqrteq_f32(self.0);
375                    let a = vmulq_f32(vrsqrtsq_f32(self.0, vmulq_f32(a, a)), a);
376                    Self(a)
377                }
378            } else {
379                Self::from([
380                    1.0 / self.0[0].sqrt(),
381                    1.0 / self.0[1].sqrt(),
382                    1.0 / self.0[2].sqrt(),
383                    1.0 / self.0[3].sqrt(),
384                ])
385            }
386        }
387    }
388
389    pub fn sqrt(self) -> Self {
390        cfg_if::cfg_if! {
391            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
392                Self(unsafe { _mm_sqrt_ps(self.0) })
393            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
394                Self(f32x4_sqrt(self.0))
395            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
396                Self(unsafe { vsqrtq_f32(self.0) })
397            } else {
398                Self::from([
399                    self.0[0].sqrt(),
400                    self.0[1].sqrt(),
401                    self.0[2].sqrt(),
402                    self.0[3].sqrt(),
403                ])
404            }
405        }
406    }
407}
408
409impl From<[f32; 4]> for f32x4 {
410    fn from(v: [f32; 4]) -> Self {
411        cast(v)
412    }
413}
414
415impl From<f32x4> for [f32; 4] {
416    fn from(v: f32x4) -> Self {
417        cast(v)
418    }
419}
420
421impl core::ops::Add for f32x4 {
422    type Output = Self;
423
424    fn add(self, rhs: Self) -> Self::Output {
425        cfg_if::cfg_if! {
426            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
427                Self(unsafe { _mm_add_ps(self.0, rhs.0) })
428            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
429                Self(f32x4_add(self.0, rhs.0))
430            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
431                Self(unsafe { vaddq_f32(self.0, rhs.0) })
432            } else {
433                Self([
434                    self.0[0] + rhs.0[0],
435                    self.0[1] + rhs.0[1],
436                    self.0[2] + rhs.0[2],
437                    self.0[3] + rhs.0[3],
438                ])
439            }
440        }
441    }
442}
443
444impl core::ops::AddAssign for f32x4 {
445    fn add_assign(&mut self, rhs: f32x4) {
446        *self = *self + rhs;
447    }
448}
449
450impl core::ops::Sub for f32x4 {
451    type Output = Self;
452
453    fn sub(self, rhs: Self) -> Self::Output {
454        cfg_if::cfg_if! {
455            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
456                Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
457            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
458                Self(f32x4_sub(self.0, rhs.0))
459            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
460                Self(unsafe { vsubq_f32(self.0, rhs.0) })
461            } else {
462                Self([
463                    self.0[0] - rhs.0[0],
464                    self.0[1] - rhs.0[1],
465                    self.0[2] - rhs.0[2],
466                    self.0[3] - rhs.0[3],
467                ])
468            }
469        }
470    }
471}
472
473impl core::ops::Mul for f32x4 {
474    type Output = Self;
475
476    fn mul(self, rhs: Self) -> Self::Output {
477        cfg_if::cfg_if! {
478            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
479                Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
480            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
481                Self(f32x4_mul(self.0, rhs.0))
482            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
483                Self(unsafe { vmulq_f32(self.0, rhs.0) })
484            } else {
485                Self([
486                    self.0[0] * rhs.0[0],
487                    self.0[1] * rhs.0[1],
488                    self.0[2] * rhs.0[2],
489                    self.0[3] * rhs.0[3],
490                ])
491            }
492        }
493    }
494}
495
496impl core::ops::MulAssign for f32x4 {
497    fn mul_assign(&mut self, rhs: f32x4) {
498        *self = *self * rhs;
499    }
500}
501
502impl core::ops::Div for f32x4 {
503    type Output = Self;
504
505    fn div(self, rhs: Self) -> Self::Output {
506        cfg_if::cfg_if! {
507            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
508                Self(unsafe { _mm_div_ps(self.0, rhs.0) })
509            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
510                Self(f32x4_div(self.0, rhs.0))
511            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
512                Self(unsafe { vdivq_f32(self.0, rhs.0) })
513            } else {
514                Self([
515                    self.0[0] / rhs.0[0],
516                    self.0[1] / rhs.0[1],
517                    self.0[2] / rhs.0[2],
518                    self.0[3] / rhs.0[3],
519                ])
520            }
521        }
522    }
523}
524
525impl core::ops::BitAnd for f32x4 {
526    type Output = Self;
527
528    #[inline(always)]
529    fn bitand(self, rhs: Self) -> Self::Output {
530        cfg_if::cfg_if! {
531            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
532                Self(unsafe { _mm_and_ps(self.0, rhs.0) })
533            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
534                Self(v128_and(self.0, rhs.0))
535            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
536                Self(cast(unsafe { vandq_u32(cast(self.0), cast(rhs.0)) }))
537            } else {
538                Self([
539                    f32::from_bits(self.0[0].to_bits() & rhs.0[0].to_bits()),
540                    f32::from_bits(self.0[1].to_bits() & rhs.0[1].to_bits()),
541                    f32::from_bits(self.0[2].to_bits() & rhs.0[2].to_bits()),
542                    f32::from_bits(self.0[3].to_bits() & rhs.0[3].to_bits()),
543                ])
544            }
545        }
546    }
547}
548
549impl core::ops::BitOr for f32x4 {
550    type Output = Self;
551
552    #[inline(always)]
553    fn bitor(self, rhs: Self) -> Self::Output {
554        cfg_if::cfg_if! {
555            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
556                Self(unsafe { _mm_or_ps(self.0, rhs.0) })
557            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
558                Self(v128_or(self.0, rhs.0))
559            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
560                Self(cast(unsafe { vorrq_u32(cast(self.0), cast(rhs.0)) }))
561            } else {
562                Self([
563                    f32::from_bits(self.0[0].to_bits() | rhs.0[0].to_bits()),
564                    f32::from_bits(self.0[1].to_bits() | rhs.0[1].to_bits()),
565                    f32::from_bits(self.0[2].to_bits() | rhs.0[2].to_bits()),
566                    f32::from_bits(self.0[3].to_bits() | rhs.0[3].to_bits()),
567                ])
568            }
569        }
570    }
571}
572
573impl core::ops::BitXor for f32x4 {
574    type Output = Self;
575
576    #[inline(always)]
577    fn bitxor(self, rhs: Self) -> Self::Output {
578        cfg_if::cfg_if! {
579            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
580                Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
581            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
582                Self(v128_xor(self.0, rhs.0))
583            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
584                Self(cast(unsafe { veorq_u32(cast(self.0), cast(rhs.0)) }))
585            } else {
586                Self([
587                    f32::from_bits(self.0[0].to_bits() ^ rhs.0[0].to_bits()),
588                    f32::from_bits(self.0[1].to_bits() ^ rhs.0[1].to_bits()),
589                    f32::from_bits(self.0[2].to_bits() ^ rhs.0[2].to_bits()),
590                    f32::from_bits(self.0[3].to_bits() ^ rhs.0[3].to_bits()),
591                ])
592            }
593        }
594    }
595}
596
597impl core::ops::Neg for f32x4 {
598    type Output = Self;
599
600    fn neg(self) -> Self {
601        Self::default() - self
602    }
603}
604
605impl core::ops::Not for f32x4 {
606    type Output = Self;
607
608    fn not(self) -> Self {
609        cfg_if::cfg_if! {
610            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
611                unsafe {
612                    let all_bits = _mm_set1_ps(f32::from_bits(u32::MAX));
613                    Self(_mm_xor_ps(self.0, all_bits))
614                }
615            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
616                Self(v128_not(self.0))
617            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
618                Self(cast(unsafe { vmvnq_u32(cast(self.0)) }))
619            } else {
620                self ^ Self::splat(cast(u32::MAX))
621            }
622        }
623    }
624}
625
626impl core::cmp::PartialEq for f32x4 {
627    fn eq(&self, rhs: &Self) -> bool {
628        cfg_if::cfg_if! {
629            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
630                unsafe { _mm_movemask_ps(_mm_cmpeq_ps(self.0, rhs.0)) == 0b1111 }
631            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
632                unsafe { vminvq_u32(vceqq_f32(self.0, rhs.0)) != 0 }
633            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
634                u32x4_all_true(f32x4_eq(self.0, rhs.0))
635            } else {
636                self.0 == rhs.0
637            }
638        }
639    }
640}