simd_adler32/imp/
wasm.rs

1use super::Adler32Imp;
2
3/// Resolves update implementation if CPU supports simd128 instructions.
4pub fn get_imp() -> Option<Adler32Imp> {
5  get_imp_inner()
6}
7
8#[inline]
9#[cfg(target_feature = "simd128")]
10fn get_imp_inner() -> Option<Adler32Imp> {
11  Some(imp::update)
12}
13
14#[inline]
15#[cfg(not(target_feature = "simd128"))]
16fn get_imp_inner() -> Option<Adler32Imp> {
17  None
18}
19
20#[cfg(target_feature = "simd128")]
21mod imp {
22  const MOD: u32 = 65521;
23  const NMAX: usize = 5552;
24  const BLOCK_SIZE: usize = 32;
25  const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
26
27  #[cfg(target_arch = "wasm32")]
28  use core::arch::wasm32::*;
29  #[cfg(target_arch = "wasm64")]
30  use core::arch::wasm64::*;
31
32  pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
33    update_imp(a, b, data)
34  }
35
36  #[inline]
37  #[target_feature(enable = "simd128")]
38  fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
39    let mut a = a as u32;
40    let mut b = b as u32;
41
42    let chunks = data.chunks_exact(CHUNK_SIZE);
43    let remainder = chunks.remainder();
44    for chunk in chunks {
45      update_chunk_block(&mut a, &mut b, chunk);
46    }
47
48    update_block(&mut a, &mut b, remainder);
49
50    (a as u16, b as u16)
51  }
52
53  fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
54    debug_assert_eq!(
55      chunk.len(),
56      CHUNK_SIZE,
57      "Unexpected chunk size (expected {}, got {})",
58      CHUNK_SIZE,
59      chunk.len()
60    );
61
62    reduce_add_blocks(a, b, chunk);
63
64    *a %= MOD;
65    *b %= MOD;
66  }
67
68  fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
69    debug_assert!(
70      chunk.len() <= CHUNK_SIZE,
71      "Unexpected chunk size (expected <= {}, got {})",
72      CHUNK_SIZE,
73      chunk.len()
74    );
75
76    for byte in reduce_add_blocks(a, b, chunk) {
77      *a += *byte as u32;
78      *b += *a;
79    }
80
81    *a %= MOD;
82    *b %= MOD;
83  }
84
85  #[inline(always)]
86  fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
87    if chunk.len() < BLOCK_SIZE {
88      return chunk;
89    }
90
91    let blocks = chunk.chunks_exact(BLOCK_SIZE);
92    let blocks_remainder = blocks.remainder();
93
94    let weight_hi_v = get_weight_hi();
95    let weight_lo_v = get_weight_lo();
96
97    let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0);
98    let mut a_v = u32x4(0, 0, 0, 0);
99    let mut b_v = u32x4(*b, 0, 0, 0);
100
101    for block in blocks {
102      let block_ptr = block.as_ptr() as *const v128;
103      let v_lo = unsafe { block_ptr.read_unaligned() };
104      let v_hi = unsafe { block_ptr.add(1).read_unaligned() };
105
106      p_v = u32x4_add(p_v, a_v);
107
108      a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo));
109      let mad = i32x4_dot_i8x16(v_lo, weight_lo_v);
110      b_v = u32x4_add(b_v, mad);
111
112      a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi));
113      let mad = i32x4_dot_i8x16(v_hi, weight_hi_v);
114      b_v = u32x4_add(b_v, mad);
115    }
116
117    b_v = u32x4_add(b_v, u32x4_shl(p_v, 5));
118
119    *a += reduce_add(a_v);
120    *b = reduce_add(b_v);
121
122    blocks_remainder
123  }
124
125  #[inline(always)]
126  fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 {
127    let a_lo = u16x8_extend_low_u8x16(a);
128    let a_hi = u16x8_extend_high_u8x16(a);
129
130    let b_lo = u16x8_extend_low_u8x16(b);
131    let b_hi = u16x8_extend_high_u8x16(b);
132
133    let lo = i32x4_dot_i16x8(a_lo, b_lo);
134    let hi = i32x4_dot_i16x8(a_hi, b_hi);
135
136    i32x4_add(lo, hi)
137  }
138
139  #[inline(always)]
140  fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 {
141    u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a))
142  }
143
144  #[inline(always)]
145  fn reduce_add(v: v128) -> u32 {
146    let arr: [u32; 4] = unsafe { std::mem::transmute(v) };
147    let mut sum = 0u32;
148    for val in arr {
149      sum = sum.wrapping_add(val);
150    }
151    sum
152  }
153
154  #[inline(always)]
155  fn get_weight_lo() -> v128 {
156    u8x16(
157      32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
158    )
159  }
160
161  #[inline(always)]
162  fn get_weight_hi() -> v128 {
163    u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
164  }
165}
166
167#[cfg(test)]
168mod tests {
169  use rand::Rng;
170
171  #[test]
172  fn zeroes() {
173    assert_sum_eq(&[]);
174    assert_sum_eq(&[0]);
175    assert_sum_eq(&[0, 0]);
176    assert_sum_eq(&[0; 100]);
177    assert_sum_eq(&[0; 1024]);
178    assert_sum_eq(&[0; 512 * 1024]);
179  }
180
181  #[test]
182  fn ones() {
183    assert_sum_eq(&[]);
184    assert_sum_eq(&[1]);
185    assert_sum_eq(&[1, 1]);
186    assert_sum_eq(&[1; 100]);
187    assert_sum_eq(&[1; 1024]);
188    assert_sum_eq(&[1; 512 * 1024]);
189  }
190
191  #[test]
192  fn random() {
193    let mut random = [0; 512 * 1024];
194    rand::thread_rng().fill(&mut random[..]);
195
196    assert_sum_eq(&random[..1]);
197    assert_sum_eq(&random[..100]);
198    assert_sum_eq(&random[..1024]);
199    assert_sum_eq(&random[..512 * 1024]);
200  }
201
202  /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
203  #[test]
204  fn wiki() {
205    assert_sum_eq(b"Wikipedia");
206  }
207
208  fn assert_sum_eq(data: &[u8]) {
209    if let Some(update) = super::get_imp() {
210      let (a, b) = update(1, 0, data);
211      let left = u32::from(b) << 16 | u32::from(a);
212      let right = adler::adler32_slice(data);
213
214      assert_eq!(left, right, "len({})", data.len());
215    }
216  }
217}