ini_core/parse/
sse2.rs
1
2#[cfg(target_arch = "x86")]
3use core::arch::x86::*;
4#[cfg(target_arch = "x86_64")]
5use core::arch::x86_64::*;
6
7#[inline]
8pub fn find_nl(s: &[u8]) -> usize {
9 let mut offset = 0;
10
11 unsafe {
12 let n_lit = _mm_set1_epi8(b'\n' as i8);
13 let r_lit = _mm_set1_epi8(b'\r' as i8);
14
15 while offset + 16 <= s.len() {
16 let block = _mm_loadu_si128(s.as_ptr().add(offset) as *const _);
17
18 let n_eq = _mm_cmpeq_epi8(n_lit, block);
19 let r_eq = _mm_cmpeq_epi8(r_lit, block);
20
21 let mask = _mm_movemask_epi8(_mm_or_si128(n_eq, r_eq));
22
23 if mask != 0 {
24 return offset + mask.trailing_zeros() as usize;
25 }
26
27 offset += 16;
28 }
29 }
30
31 unsafe_assert!(offset <= s.len());
32 offset += super::generic::find_nl(&s[offset..]);
33 unsafe_assert!(offset <= s.len());
34 return offset;
35}
36
37#[inline]
38pub fn find_nl_chr(s: &[u8], chr: u8) -> usize {
39 let mut offset = 0;
40
41 unsafe {
42 let n_lit = _mm_set1_epi8(b'\n' as i8);
43 let r_lit = _mm_set1_epi8(b'\r' as i8);
44 let c_lit = _mm_set1_epi8(chr as i8);
45
46 while offset + 16 <= s.len() {
47 let block = _mm_loadu_si128(s.as_ptr().add(offset) as *const _);
48
49 let n_eq = _mm_cmpeq_epi8(n_lit, block);
50 let r_eq = _mm_cmpeq_epi8(r_lit, block);
51 let c_eq = _mm_cmpeq_epi8(c_lit, block);
52
53 let mask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(n_eq, r_eq), c_eq));
54
55 if mask != 0 {
56 return offset + mask.trailing_zeros() as usize;
57 }
58
59 offset += 16;
60 }
61 }
62
63 unsafe_assert!(offset <= s.len());
64 offset += super::generic::find_nl_chr(&s[offset..], chr);
65 unsafe_assert!(offset <= s.len());
66 return offset;
67}