ini_core/parse/
sse2.rs

1
2#[cfg(target_arch = "x86")]
3use core::arch::x86::*;
4#[cfg(target_arch = "x86_64")]
5use core::arch::x86_64::*;
6
7#[inline]
8pub fn find_nl(s: &[u8]) -> usize {
9	let mut offset = 0;
10
11	unsafe {
12		let n_lit = _mm_set1_epi8(b'\n' as i8);
13		let r_lit = _mm_set1_epi8(b'\r' as i8);
14
15		while offset + 16 <= s.len() {
16			let block = _mm_loadu_si128(s.as_ptr().add(offset) as *const _);
17
18			let n_eq = _mm_cmpeq_epi8(n_lit, block);
19			let r_eq = _mm_cmpeq_epi8(r_lit, block);
20
21			let mask = _mm_movemask_epi8(_mm_or_si128(n_eq, r_eq));
22
23			if mask != 0 {
24				return offset + mask.trailing_zeros() as usize;
25			}
26
27			offset += 16;
28		}
29	}
30
31	unsafe_assert!(offset <= s.len());
32	offset += super::generic::find_nl(&s[offset..]);
33	unsafe_assert!(offset <= s.len());
34	return offset;
35}
36
37#[inline]
38pub fn find_nl_chr(s: &[u8], chr: u8) -> usize {
39	let mut offset = 0;
40
41	unsafe {
42		let n_lit = _mm_set1_epi8(b'\n' as i8);
43		let r_lit = _mm_set1_epi8(b'\r' as i8);
44		let c_lit = _mm_set1_epi8(chr as i8);
45
46		while offset + 16 <= s.len() {
47			let block = _mm_loadu_si128(s.as_ptr().add(offset) as *const _);
48
49			let n_eq = _mm_cmpeq_epi8(n_lit, block);
50			let r_eq = _mm_cmpeq_epi8(r_lit, block);
51			let c_eq = _mm_cmpeq_epi8(c_lit, block);
52
53			let mask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(n_eq, r_eq), c_eq));
54
55			if mask != 0 {
56				return offset + mask.trailing_zeros() as usize;
57			}
58
59			offset += 16;
60		}
61	}
62
63	unsafe_assert!(offset <= s.len());
64	offset += super::generic::find_nl_chr(&s[offset..], chr);
65	unsafe_assert!(offset <= s.len());
66	return offset;
67}