tiny_skia/wide/
i32x4_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::f32x4;
11
12cfg_if::cfg_if! {
13    if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
14        #[cfg(target_arch = "x86")]
15        use core::arch::x86::*;
16        #[cfg(target_arch = "x86_64")]
17        use core::arch::x86_64::*;
18
19        #[derive(Clone, Copy, Debug)]
20        #[repr(C, align(16))]
21        pub struct i32x4(pub __m128i);
22    } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
23        use core::arch::wasm32::*;
24
25        #[derive(Clone, Copy, Debug)]
26        #[repr(C, align(16))]
27        pub struct i32x4(pub v128);
28    } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
29        use core::arch::aarch64::*;
30
31        #[derive(Clone, Copy, Debug)]
32        #[repr(C, align(16))]
33        pub struct i32x4(pub int32x4_t);
34    } else {
35        #[derive(Clone, Copy, Debug)]
36        #[repr(C, align(16))]
37        pub struct i32x4([i32; 4]);
38    }
39}
40
41unsafe impl bytemuck::Zeroable for i32x4 {}
42unsafe impl bytemuck::Pod for i32x4 {}
43
44impl Default for i32x4 {
45    fn default() -> Self {
46        Self::splat(0)
47    }
48}
49
50impl i32x4 {
51    pub fn splat(n: i32) -> Self {
52        cast([n, n, n, n])
53    }
54
55    pub fn blend(self, t: Self, f: Self) -> Self {
56        cfg_if::cfg_if! {
57            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
58                Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
59            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
60                Self(v128_bitselect(t.0, f.0, self.0))
61            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
62                Self(unsafe { vbslq_s32(cast(self.0), t.0, f.0) })
63            } else {
64                super::generic_bit_blend(self, t, f)
65            }
66        }
67    }
68
69    pub fn cmp_eq(self, rhs: Self) -> Self {
70        cfg_if::cfg_if! {
71            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
72                cast(Self(cast(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) })))
73            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
74                Self(i32x4_eq(self.0, rhs.0))
75            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
76                Self(unsafe { cast(vceqq_s32(self.0, rhs.0)) })
77            } else {
78                Self([
79                    if self.0[0] == rhs.0[0] { -1 } else { 0 },
80                    if self.0[1] == rhs.0[1] { -1 } else { 0 },
81                    if self.0[2] == rhs.0[2] { -1 } else { 0 },
82                    if self.0[3] == rhs.0[3] { -1 } else { 0 },
83                ])
84            }
85        }
86    }
87
88    pub fn cmp_gt(self, rhs: Self) -> Self {
89        cfg_if::cfg_if! {
90            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
91                cast(Self(cast(unsafe { _mm_cmpgt_epi32(self.0, rhs.0) })))
92            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
93                Self(i32x4_gt(self.0, rhs.0))
94            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
95                Self(unsafe { cast(vcgtq_s32(self.0, rhs.0)) })
96            } else {
97                Self([
98                    if self.0[0] > rhs.0[0] { -1 } else { 0 },
99                    if self.0[1] > rhs.0[1] { -1 } else { 0 },
100                    if self.0[2] > rhs.0[2] { -1 } else { 0 },
101                    if self.0[3] > rhs.0[3] { -1 } else { 0 },
102                ])
103            }
104        }
105    }
106
107    pub fn cmp_lt(self, rhs: Self) -> Self {
108        cfg_if::cfg_if! {
109            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
110                cast(Self(cast(unsafe { _mm_cmplt_epi32(self.0, rhs.0) })))
111            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
112                Self(i32x4_lt(self.0, rhs.0))
113            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
114                Self(unsafe { cast(vcltq_s32(self.0, rhs.0)) })
115            } else {
116                Self([
117                    if self.0[0] < rhs.0[0] { -1 } else { 0 },
118                    if self.0[1] < rhs.0[1] { -1 } else { 0 },
119                    if self.0[2] < rhs.0[2] { -1 } else { 0 },
120                    if self.0[3] < rhs.0[3] { -1 } else { 0 },
121                ])
122            }
123        }
124    }
125
126    pub fn to_f32x4(self) -> f32x4 {
127        cfg_if::cfg_if! {
128            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
129                cast(Self(cast(unsafe { _mm_cvtepi32_ps(self.0) })))
130            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
131                cast(Self(f32x4_convert_i32x4(self.0)))
132            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
133                cast(Self(unsafe { cast(vcvtq_f32_s32(self.0)) }))
134            } else {
135                let arr: [i32; 4] = cast(self);
136                cast([
137                    arr[0] as f32,
138                    arr[1] as f32,
139                    arr[2] as f32,
140                    arr[3] as f32,
141                ])
142            }
143        }
144    }
145
146    pub fn to_f32x4_bitcast(self) -> f32x4 {
147        bytemuck::cast(self)
148    }
149}
150
151impl From<[i32; 4]> for i32x4 {
152    fn from(v: [i32; 4]) -> Self {
153        cast(v)
154    }
155}
156
157impl From<i32x4> for [i32; 4] {
158    fn from(v: i32x4) -> Self {
159        cast(v)
160    }
161}
162
163impl core::ops::Add for i32x4 {
164    type Output = Self;
165
166    fn add(self, rhs: Self) -> Self::Output {
167        cfg_if::cfg_if! {
168            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
169                Self(unsafe { _mm_add_epi32(self.0, rhs.0) })
170            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
171                Self(i32x4_add(self.0, rhs.0))
172            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
173                Self(unsafe { vaddq_s32(self.0, rhs.0) })
174            } else {
175                Self([
176                    self.0[0].wrapping_add(rhs.0[0]),
177                    self.0[1].wrapping_add(rhs.0[1]),
178                    self.0[2].wrapping_add(rhs.0[2]),
179                    self.0[3].wrapping_add(rhs.0[3]),
180                ])
181            }
182        }
183    }
184}
185
186impl core::ops::BitAnd for i32x4 {
187    type Output = Self;
188
189    fn bitand(self, rhs: Self) -> Self::Output {
190        cfg_if::cfg_if! {
191            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
192                Self(unsafe { _mm_and_si128(self.0, rhs.0) })
193            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
194                Self(v128_and(self.0, rhs.0))
195            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
196                Self(unsafe { vandq_s32(self.0, rhs.0) })
197            } else {
198                Self([
199                    self.0[0] & rhs.0[0],
200                    self.0[1] & rhs.0[1],
201                    self.0[2] & rhs.0[2],
202                    self.0[3] & rhs.0[3],
203                ])
204            }
205        }
206    }
207}
208
209impl core::ops::Mul for i32x4 {
210    type Output = Self;
211
212    fn mul(self, rhs: Self) -> Self::Output {
213        cfg_if::cfg_if! {
214            if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
215                Self(unsafe { _mm_mullo_epi32(self.0, rhs.0) })
216            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
217                Self(i32x4_mul(self.0, rhs.0))
218            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
219                Self(unsafe { vmulq_s32(self.0, rhs.0) })
220            } else {
221                // Cast is required, since we have to use scalar multiplication on SSE2.
222                let a: [i32; 4] = cast(self);
223                let b: [i32; 4] = cast(rhs);
224                Self(cast([
225                    a[0].wrapping_mul(b[0]),
226                    a[1].wrapping_mul(b[1]),
227                    a[2].wrapping_mul(b[2]),
228                    a[3].wrapping_mul(b[3]),
229                ]))
230            }
231        }
232    }
233}
234
235impl core::ops::BitOr for i32x4 {
236    type Output = Self;
237
238    #[inline]
239    fn bitor(self, rhs: Self) -> Self::Output {
240        cfg_if::cfg_if! {
241            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
242                Self(unsafe { _mm_or_si128(self.0, rhs.0) })
243            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
244                Self(v128_or(self.0, rhs.0))
245            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
246                Self(unsafe { vorrq_s32(self.0, rhs.0) })
247            } else {
248                Self([
249                    self.0[0] | rhs.0[0],
250                    self.0[1] | rhs.0[1],
251                    self.0[2] | rhs.0[2],
252                    self.0[3] | rhs.0[3],
253                ])
254            }
255        }
256    }
257}
258
259impl core::ops::BitXor for i32x4 {
260    type Output = Self;
261
262    #[inline]
263    fn bitxor(self, rhs: Self) -> Self::Output {
264        cfg_if::cfg_if! {
265            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
266                Self(unsafe { _mm_xor_si128(self.0, rhs.0) })
267            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
268                Self(v128_xor(self.0, rhs.0))
269            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
270                Self(unsafe { veorq_s32(self.0, rhs.0) })
271            } else {
272                Self([
273                    self.0[0] ^ rhs.0[0],
274                    self.0[1] ^ rhs.0[1],
275                    self.0[2] ^ rhs.0[2],
276                    self.0[3] ^ rhs.0[3],
277                ])
278            }
279        }
280    }
281}