fast_srgb8/
sse2.rs
1use super::TO_SRGB8_TABLE;
2#[cfg(target_arch = "x86")]
3use core::arch::x86::*;
4#[cfg(target_arch = "x86_64")]
5use core::arch::x86_64::*;
6use core::mem::transmute;
7
8const MAXV: __m128 = unsafe { transmute([0x3f7fffffu32; 4]) };
9const MINV: __m128 = unsafe { transmute([0x39000000u32; 4]) };
10const MANT_MASK: __m128i = unsafe { transmute([0xffu32; 4]) };
11const TOP_SCALE: __m128i = unsafe { transmute([0x02000000u32; 4]) };
12
13#[inline]
14#[target_feature(enable = "sse2")]
15unsafe fn simd_to_srgb8_sse2(input: __m128) -> __m128i {
16 let clamped = _mm_min_ps(_mm_max_ps(input, MINV), MAXV);
18 let tab_index = _mm_srli_epi32(_mm_castps_si128(clamped), 20);
20 let indices: [u32; 4] = transmute(tab_index);
24 #[cfg(all(not(unstable_bench), test))]
25 {
26 for &i in &indices {
27 debug_assert!(TO_SRGB8_TABLE
28 .get(i.checked_sub((127 - 13) * 8).unwrap() as usize)
29 .is_some());
30 }
31 }
32 let loaded: [u32; 4] = [
33 *TO_SRGB8_TABLE.get_unchecked(*indices.get_unchecked(0) as usize - (127 - 13) * 8),
34 *TO_SRGB8_TABLE.get_unchecked(*indices.get_unchecked(1) as usize - (127 - 13) * 8),
35 *TO_SRGB8_TABLE.get_unchecked(*indices.get_unchecked(2) as usize - (127 - 13) * 8),
36 *TO_SRGB8_TABLE.get_unchecked(*indices.get_unchecked(3) as usize - (127 - 13) * 8),
37 ];
38 let entry: __m128i = transmute(loaded);
39
40 let tabmult1 = _mm_srli_epi32(_mm_castps_si128(clamped), 12);
41 let tabmult2 = _mm_and_si128(tabmult1, MANT_MASK);
42 let tabmult3 = _mm_or_si128(tabmult2, TOP_SCALE);
43 let tabprod = _mm_madd_epi16(entry, tabmult3);
44 _mm_srli_epi32(tabprod, 16)
45}
46
47#[inline]
48pub unsafe fn simd_to_srgb8(input: [f32; 4]) -> [u8; 4] {
49 let res: __m128i = simd_to_srgb8_sse2(transmute(input));
50 let [a, b, c, d]: [u32; 4] = transmute(res);
51 #[cfg(all(not(unstable_bench), test))]
52 {
53 debug_assert!([a, b, c, d].iter().all(|v| *v < 256), "{:?}", [a, b, c, d]);
54 }
55 [a as u8, b as u8, c as u8, d as u8]
56 }