1use core::convert::TryInto;
2
3use crate::common::BytesPerPixel;
4
5#[cfg(all(feature = "unstable", target_arch = "x86_64"))]
16mod simd {
17 use std::simd::num::{SimdInt, SimdUint};
18 use std::simd::{u8x4, u8x8, LaneCount, Simd, SimdElement, SupportedLaneCount};
19
20 fn paeth_predictor<const N: usize>(
32 a: Simd<i16, N>,
33 b: Simd<i16, N>,
34 c: Simd<i16, N>,
35 ) -> Simd<i16, N>
36 where
37 LaneCount<N>: SupportedLaneCount,
38 {
39 let mut out = [0; N];
40 for i in 0..N {
41 out[i] = super::filter_paeth_stbi_i16(a[i].into(), b[i].into(), c[i].into());
42 }
43 out.into()
44 }
45
46 fn paeth_predictor_u8<const N: usize>(
49 a: Simd<u8, N>,
50 b: Simd<u8, N>,
51 c: Simd<u8, N>,
52 ) -> Simd<u8, N>
53 where
54 LaneCount<N>: SupportedLaneCount,
55 {
56 let mut out = [0; N];
57 for i in 0..N {
58 out[i] = super::filter_paeth_stbi(a[i].into(), b[i].into(), c[i].into());
59 }
60 out.into()
61 }
62
63 #[derive(Default)]
66 struct PaethState<T, const N: usize>
67 where
68 T: SimdElement,
69 LaneCount<N>: SupportedLaneCount,
70 {
71 c: Simd<T, N>,
73
74 a: Simd<T, N>,
76 }
77
78 fn paeth_step<const N: usize>(
83 state: &mut PaethState<i16, N>,
84 b: Simd<u8, N>,
85 x: &mut Simd<u8, N>,
86 ) where
87 LaneCount<N>: SupportedLaneCount,
88 {
89 let b = b.cast::<i16>();
91
92 let predictor = paeth_predictor(state.a, b, state.c);
94 *x += predictor.cast::<u8>();
95
96 state.c = b;
98 state.a = x.cast::<i16>();
99 }
100
101 fn paeth_step_u8<const N: usize>(
105 state: &mut PaethState<u8, N>,
106 b: Simd<u8, N>,
107 x: &mut Simd<u8, N>,
108 ) where
109 LaneCount<N>: SupportedLaneCount,
110 {
111 *x += paeth_predictor_u8(state.a, b, state.c);
113
114 state.c = b;
116 state.a = *x;
117 }
118
119 fn load3(src: &[u8]) -> u8x4 {
120 u8x4::from_array([src[0], src[1], src[2], 0])
121 }
122
123 fn store3(src: u8x4, dest: &mut [u8]) {
124 dest[0..3].copy_from_slice(&src.to_array()[0..3])
125 }
126
127 pub fn unfilter_paeth3(mut prev_row: &[u8], mut curr_row: &mut [u8]) {
129 debug_assert_eq!(prev_row.len(), curr_row.len());
130 debug_assert_eq!(prev_row.len() % 3, 0);
131
132 let mut state = PaethState::<i16, 4>::default();
133 while prev_row.len() >= 4 {
134 let b = u8x4::from_slice(prev_row);
139 let mut x = u8x4::from_slice(curr_row);
140
141 paeth_step(&mut state, b, &mut x);
142
143 store3(x, curr_row);
147
148 prev_row = &prev_row[3..];
149 curr_row = &mut curr_row[3..];
150 }
151 let b = load3(prev_row);
153 let mut x = load3(curr_row);
154 paeth_step(&mut state, b, &mut x);
155 store3(x, curr_row);
156 }
157
158 pub fn unfilter_paeth_u8<const N: usize>(prev_row: &[u8], curr_row: &mut [u8])
164 where
165 LaneCount<N>: SupportedLaneCount,
166 {
167 debug_assert_eq!(prev_row.len(), curr_row.len());
168 debug_assert_eq!(prev_row.len() % N, 0);
169 assert!(matches!(N, 4 | 8));
170
171 let mut state = PaethState::<u8, N>::default();
172 for (prev_row, curr_row) in prev_row.chunks_exact(N).zip(curr_row.chunks_exact_mut(N)) {
173 let b = Simd::from_slice(prev_row);
174 let mut x = Simd::from_slice(curr_row);
175
176 paeth_step_u8(&mut state, b, &mut x);
177
178 curr_row[..N].copy_from_slice(&x.to_array()[..N]);
179 }
180 }
181
182 fn load6(src: &[u8]) -> u8x8 {
183 u8x8::from_array([src[0], src[1], src[2], src[3], src[4], src[5], 0, 0])
184 }
185
186 fn store6(src: u8x8, dest: &mut [u8]) {
187 dest[0..6].copy_from_slice(&src.to_array()[0..6])
188 }
189
190 pub fn unfilter_paeth6(mut prev_row: &[u8], mut curr_row: &mut [u8]) {
192 debug_assert_eq!(prev_row.len(), curr_row.len());
193 debug_assert_eq!(prev_row.len() % 6, 0);
194
195 let mut state = PaethState::<i16, 8>::default();
196 while prev_row.len() >= 8 {
197 let b = u8x8::from_slice(prev_row);
202 let mut x = u8x8::from_slice(curr_row);
203
204 paeth_step(&mut state, b, &mut x);
205
206 store6(x, curr_row);
210
211 prev_row = &prev_row[6..];
212 curr_row = &mut curr_row[6..];
213 }
214 let b = load6(prev_row);
216 let mut x = load6(curr_row);
217 paeth_step(&mut state, b, &mut x);
218 store6(x, curr_row);
219 }
220}
221
222#[derive(Debug, Clone, Copy, PartialEq, Eq)]
230#[repr(u8)]
231pub enum FilterType {
232 NoFilter = 0,
233 Sub = 1,
234 Up = 2,
235 Avg = 3,
236 Paeth = 4,
237}
238
239impl Default for FilterType {
240 fn default() -> Self {
241 FilterType::Sub
242 }
243}
244
245impl FilterType {
246 pub fn from_u8(n: u8) -> Option<FilterType> {
248 match n {
249 0 => Some(FilterType::NoFilter),
250 1 => Some(FilterType::Sub),
251 2 => Some(FilterType::Up),
252 3 => Some(FilterType::Avg),
253 4 => Some(FilterType::Paeth),
254 _ => None,
255 }
256 }
257}
258
259#[derive(Debug, Clone, Copy, PartialEq, Eq)]
268#[repr(u8)]
269pub enum AdaptiveFilterType {
270 Adaptive,
271 NonAdaptive,
272}
273
274impl Default for AdaptiveFilterType {
275 fn default() -> Self {
276 AdaptiveFilterType::NonAdaptive
277 }
278}
279
280fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
281 let pa = (i16::from(b) - i16::from(c)).abs();
285 let pb = (i16::from(a) - i16::from(c)).abs();
286 let pc = ((i16::from(a) - i16::from(c)) + (i16::from(b) - i16::from(c))).abs();
287
288 let mut out = a;
289 let mut min = pa;
290
291 if pb < min {
292 min = pb;
293 out = b;
294 }
295 if pc < min {
296 out = c;
297 }
298
299 out
300}
301
302fn filter_paeth_stbi(a: u8, b: u8, c: u8) -> u8 {
303 let thresh = i16::from(c) * 3 - (i16::from(a) + i16::from(b));
312 let lo = a.min(b);
313 let hi = a.max(b);
314 let t0 = if hi as i16 <= thresh { lo } else { c };
315 let t1 = if thresh <= lo as i16 { hi } else { t0 };
316 return t1;
317}
318
319#[cfg(any(test, all(feature = "unstable", target_arch = "x86_64")))]
320fn filter_paeth_stbi_i16(a: i16, b: i16, c: i16) -> i16 {
321 let thresh = c * 3 - (a + b);
324 let lo = a.min(b);
325 let hi = a.max(b);
326 let t0 = if hi <= thresh { lo } else { c };
327 let t1 = if thresh <= lo { hi } else { t0 };
328 return t1;
329}
330
331fn filter_paeth_fpnge(a: u8, b: u8, c: u8) -> u8 {
332 let pa = b.max(c) - c.min(b);
356 let pb = a.max(c) - c.min(a);
357 let pc = if (a < c) == (c < b) {
358 pa.max(pb) - pa.min(pb)
359 } else {
360 255
361 };
362
363 if pa <= pb && pa <= pc {
364 a
365 } else if pb <= pc {
366 b
367 } else {
368 c
369 }
370}
371
372pub(crate) fn unfilter(
373 mut filter: FilterType,
374 tbpp: BytesPerPixel,
375 previous: &[u8],
376 current: &mut [u8],
377) {
378 use self::FilterType::*;
379
380 if previous.is_empty() {
382 if filter == Paeth {
383 filter = Sub;
384 } else if filter == Up {
385 filter = NoFilter;
386 }
387 }
388
389 match filter {
470 NoFilter => {}
471 Sub => match tbpp {
472 BytesPerPixel::One => {
473 current.iter_mut().reduce(|&mut prev, curr| {
474 *curr = curr.wrapping_add(prev);
475 curr
476 });
477 }
478 BytesPerPixel::Two => {
479 let mut prev = [0; 2];
480 for chunk in current.chunks_exact_mut(2) {
481 let new_chunk = [
482 chunk[0].wrapping_add(prev[0]),
483 chunk[1].wrapping_add(prev[1]),
484 ];
485 *TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
486 prev = new_chunk;
487 }
488 }
489 BytesPerPixel::Three => {
490 let mut prev = [0; 3];
491 for chunk in current.chunks_exact_mut(3) {
492 let new_chunk = [
493 chunk[0].wrapping_add(prev[0]),
494 chunk[1].wrapping_add(prev[1]),
495 chunk[2].wrapping_add(prev[2]),
496 ];
497 *TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk;
498 prev = new_chunk;
499 }
500 }
501 BytesPerPixel::Four => {
502 let mut prev = [0; 4];
503 for chunk in current.chunks_exact_mut(4) {
504 let new_chunk = [
505 chunk[0].wrapping_add(prev[0]),
506 chunk[1].wrapping_add(prev[1]),
507 chunk[2].wrapping_add(prev[2]),
508 chunk[3].wrapping_add(prev[3]),
509 ];
510 *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
511 prev = new_chunk;
512 }
513 }
514 BytesPerPixel::Six => {
515 let mut prev = [0; 6];
516 for chunk in current.chunks_exact_mut(6) {
517 let new_chunk = [
518 chunk[0].wrapping_add(prev[0]),
519 chunk[1].wrapping_add(prev[1]),
520 chunk[2].wrapping_add(prev[2]),
521 chunk[3].wrapping_add(prev[3]),
522 chunk[4].wrapping_add(prev[4]),
523 chunk[5].wrapping_add(prev[5]),
524 ];
525 *TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
526 prev = new_chunk;
527 }
528 }
529 BytesPerPixel::Eight => {
530 let mut prev = [0; 8];
531 for chunk in current.chunks_exact_mut(8) {
532 let new_chunk = [
533 chunk[0].wrapping_add(prev[0]),
534 chunk[1].wrapping_add(prev[1]),
535 chunk[2].wrapping_add(prev[2]),
536 chunk[3].wrapping_add(prev[3]),
537 chunk[4].wrapping_add(prev[4]),
538 chunk[5].wrapping_add(prev[5]),
539 chunk[6].wrapping_add(prev[6]),
540 chunk[7].wrapping_add(prev[7]),
541 ];
542 *TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
543 prev = new_chunk;
544 }
545 }
546 },
547 Up => {
548 for (curr, &above) in current.iter_mut().zip(previous) {
549 *curr = curr.wrapping_add(above);
550 }
551 }
552 Avg if previous.is_empty() => match tbpp {
553 BytesPerPixel::One => {
554 current.iter_mut().reduce(|&mut prev, curr| {
555 *curr = curr.wrapping_add(prev / 2);
556 curr
557 });
558 }
559 BytesPerPixel::Two => {
560 let mut prev = [0; 2];
561 for chunk in current.chunks_exact_mut(2) {
562 let new_chunk = [
563 chunk[0].wrapping_add(prev[0] / 2),
564 chunk[1].wrapping_add(prev[1] / 2),
565 ];
566 *TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
567 prev = new_chunk;
568 }
569 }
570 BytesPerPixel::Three => {
571 let mut prev = [0; 3];
572 for chunk in current.chunks_exact_mut(3) {
573 let new_chunk = [
574 chunk[0].wrapping_add(prev[0] / 2),
575 chunk[1].wrapping_add(prev[1] / 2),
576 chunk[2].wrapping_add(prev[2] / 2),
577 ];
578 *TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk;
579 prev = new_chunk;
580 }
581 }
582 BytesPerPixel::Four => {
583 let mut prev = [0; 4];
584 for chunk in current.chunks_exact_mut(4) {
585 let new_chunk = [
586 chunk[0].wrapping_add(prev[0] / 2),
587 chunk[1].wrapping_add(prev[1] / 2),
588 chunk[2].wrapping_add(prev[2] / 2),
589 chunk[3].wrapping_add(prev[3] / 2),
590 ];
591 *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
592 prev = new_chunk;
593 }
594 }
595 BytesPerPixel::Six => {
596 let mut prev = [0; 6];
597 for chunk in current.chunks_exact_mut(6) {
598 let new_chunk = [
599 chunk[0].wrapping_add(prev[0] / 2),
600 chunk[1].wrapping_add(prev[1] / 2),
601 chunk[2].wrapping_add(prev[2] / 2),
602 chunk[3].wrapping_add(prev[3] / 2),
603 chunk[4].wrapping_add(prev[4] / 2),
604 chunk[5].wrapping_add(prev[5] / 2),
605 ];
606 *TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
607 prev = new_chunk;
608 }
609 }
610 BytesPerPixel::Eight => {
611 let mut prev = [0; 8];
612 for chunk in current.chunks_exact_mut(8) {
613 let new_chunk = [
614 chunk[0].wrapping_add(prev[0] / 2),
615 chunk[1].wrapping_add(prev[1] / 2),
616 chunk[2].wrapping_add(prev[2] / 2),
617 chunk[3].wrapping_add(prev[3] / 2),
618 chunk[4].wrapping_add(prev[4] / 2),
619 chunk[5].wrapping_add(prev[5] / 2),
620 chunk[6].wrapping_add(prev[6] / 2),
621 chunk[7].wrapping_add(prev[7] / 2),
622 ];
623 *TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
624 prev = new_chunk;
625 }
626 }
627 },
628 Avg => match tbpp {
629 BytesPerPixel::One => {
630 let mut lprev = [0; 1];
631 for (chunk, above) in current.chunks_exact_mut(1).zip(previous.chunks_exact(1)) {
632 let new_chunk =
633 [chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8)];
634 *TryInto::<&mut [u8; 1]>::try_into(chunk).unwrap() = new_chunk;
635 lprev = new_chunk;
636 }
637 }
638 BytesPerPixel::Two => {
639 let mut lprev = [0; 2];
640 for (chunk, above) in current.chunks_exact_mut(2).zip(previous.chunks_exact(2)) {
641 let new_chunk = [
642 chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8),
643 chunk[1].wrapping_add(((above[1] as u16 + lprev[1] as u16) / 2) as u8),
644 ];
645 *TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
646 lprev = new_chunk;
647 }
648 }
649 BytesPerPixel::Three => {
650 let mut lprev = [0; 3];
651 for (chunk, above) in current.chunks_exact_mut(3).zip(previous.chunks_exact(3)) {
652 let new_chunk = [
653 chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8),
654 chunk[1].wrapping_add(((above[1] as u16 + lprev[1] as u16) / 2) as u8),
655 chunk[2].wrapping_add(((above[2] as u16 + lprev[2] as u16) / 2) as u8),
656 ];
657 *TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk;
658 lprev = new_chunk;
659 }
660 }
661 BytesPerPixel::Four => {
662 let mut lprev = [0; 4];
663 for (chunk, above) in current.chunks_exact_mut(4).zip(previous.chunks_exact(4)) {
664 let new_chunk = [
665 chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8),
666 chunk[1].wrapping_add(((above[1] as u16 + lprev[1] as u16) / 2) as u8),
667 chunk[2].wrapping_add(((above[2] as u16 + lprev[2] as u16) / 2) as u8),
668 chunk[3].wrapping_add(((above[3] as u16 + lprev[3] as u16) / 2) as u8),
669 ];
670 *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
671 lprev = new_chunk;
672 }
673 }
674 BytesPerPixel::Six => {
675 let mut lprev = [0; 6];
676 for (chunk, above) in current.chunks_exact_mut(6).zip(previous.chunks_exact(6)) {
677 let new_chunk = [
678 chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8),
679 chunk[1].wrapping_add(((above[1] as u16 + lprev[1] as u16) / 2) as u8),
680 chunk[2].wrapping_add(((above[2] as u16 + lprev[2] as u16) / 2) as u8),
681 chunk[3].wrapping_add(((above[3] as u16 + lprev[3] as u16) / 2) as u8),
682 chunk[4].wrapping_add(((above[4] as u16 + lprev[4] as u16) / 2) as u8),
683 chunk[5].wrapping_add(((above[5] as u16 + lprev[5] as u16) / 2) as u8),
684 ];
685 *TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
686 lprev = new_chunk;
687 }
688 }
689 BytesPerPixel::Eight => {
690 let mut lprev = [0; 8];
691 for (chunk, above) in current.chunks_exact_mut(8).zip(previous.chunks_exact(8)) {
692 let new_chunk = [
693 chunk[0].wrapping_add(((above[0] as u16 + lprev[0] as u16) / 2) as u8),
694 chunk[1].wrapping_add(((above[1] as u16 + lprev[1] as u16) / 2) as u8),
695 chunk[2].wrapping_add(((above[2] as u16 + lprev[2] as u16) / 2) as u8),
696 chunk[3].wrapping_add(((above[3] as u16 + lprev[3] as u16) / 2) as u8),
697 chunk[4].wrapping_add(((above[4] as u16 + lprev[4] as u16) / 2) as u8),
698 chunk[5].wrapping_add(((above[5] as u16 + lprev[5] as u16) / 2) as u8),
699 chunk[6].wrapping_add(((above[6] as u16 + lprev[6] as u16) / 2) as u8),
700 chunk[7].wrapping_add(((above[7] as u16 + lprev[7] as u16) / 2) as u8),
701 ];
702 *TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
703 lprev = new_chunk;
704 }
705 }
706 },
707 #[allow(unreachable_code)]
708 Paeth => {
709 let filter_paeth_decode = if cfg!(target_arch = "x86_64") {
711 filter_paeth_stbi
712 } else {
713 filter_paeth
714 };
715
716 match tbpp {
720 BytesPerPixel::One => {
721 let mut a_bpp = [0; 1];
722 let mut c_bpp = [0; 1];
723 for (chunk, b_bpp) in current.chunks_exact_mut(1).zip(previous.chunks_exact(1))
724 {
725 let new_chunk = [chunk[0]
726 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0]))];
727 *TryInto::<&mut [u8; 1]>::try_into(chunk).unwrap() = new_chunk;
728 a_bpp = new_chunk;
729 c_bpp = b_bpp.try_into().unwrap();
730 }
731 }
732 BytesPerPixel::Two => {
733 let mut a_bpp = [0; 2];
734 let mut c_bpp = [0; 2];
735 for (chunk, b_bpp) in current.chunks_exact_mut(2).zip(previous.chunks_exact(2))
736 {
737 let new_chunk = [
738 chunk[0]
739 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
740 chunk[1]
741 .wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
742 ];
743 *TryInto::<&mut [u8; 2]>::try_into(chunk).unwrap() = new_chunk;
744 a_bpp = new_chunk;
745 c_bpp = b_bpp.try_into().unwrap();
746 }
747 }
748 BytesPerPixel::Three => {
749 #[cfg(all(feature = "unstable", target_arch = "x86_64"))]
751 {
752 simd::unfilter_paeth3(previous, current);
753 return;
754 }
755
756 let mut a_bpp = [0; 3];
757 let mut c_bpp = [0; 3];
758 for (chunk, b_bpp) in current.chunks_exact_mut(3).zip(previous.chunks_exact(3))
759 {
760 let new_chunk = [
761 chunk[0]
762 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
763 chunk[1]
764 .wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
765 chunk[2]
766 .wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
767 ];
768 *TryInto::<&mut [u8; 3]>::try_into(chunk).unwrap() = new_chunk;
769 a_bpp = new_chunk;
770 c_bpp = b_bpp.try_into().unwrap();
771 }
772 }
773 BytesPerPixel::Four => {
774 #[cfg(all(feature = "unstable", target_arch = "x86_64"))]
775 {
776 simd::unfilter_paeth_u8::<4>(previous, current);
777 return;
778 }
779
780 let mut a_bpp = [0; 4];
781 let mut c_bpp = [0; 4];
782 for (chunk, b_bpp) in current.chunks_exact_mut(4).zip(previous.chunks_exact(4))
783 {
784 let new_chunk = [
785 chunk[0]
786 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
787 chunk[1]
788 .wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
789 chunk[2]
790 .wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
791 chunk[3]
792 .wrapping_add(filter_paeth_decode(a_bpp[3], b_bpp[3], c_bpp[3])),
793 ];
794 *TryInto::<&mut [u8; 4]>::try_into(chunk).unwrap() = new_chunk;
795 a_bpp = new_chunk;
796 c_bpp = b_bpp.try_into().unwrap();
797 }
798 }
799 BytesPerPixel::Six => {
800 #[cfg(all(feature = "unstable", target_arch = "x86_64"))]
801 {
802 simd::unfilter_paeth6(previous, current);
803 return;
804 }
805
806 let mut a_bpp = [0; 6];
807 let mut c_bpp = [0; 6];
808 for (chunk, b_bpp) in current.chunks_exact_mut(6).zip(previous.chunks_exact(6))
809 {
810 let new_chunk = [
811 chunk[0]
812 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
813 chunk[1]
814 .wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
815 chunk[2]
816 .wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
817 chunk[3]
818 .wrapping_add(filter_paeth_decode(a_bpp[3], b_bpp[3], c_bpp[3])),
819 chunk[4]
820 .wrapping_add(filter_paeth_decode(a_bpp[4], b_bpp[4], c_bpp[4])),
821 chunk[5]
822 .wrapping_add(filter_paeth_decode(a_bpp[5], b_bpp[5], c_bpp[5])),
823 ];
824 *TryInto::<&mut [u8; 6]>::try_into(chunk).unwrap() = new_chunk;
825 a_bpp = new_chunk;
826 c_bpp = b_bpp.try_into().unwrap();
827 }
828 }
829 BytesPerPixel::Eight => {
830 #[cfg(all(feature = "unstable", target_arch = "x86_64"))]
831 {
832 simd::unfilter_paeth_u8::<8>(previous, current);
833 return;
834 }
835
836 let mut a_bpp = [0; 8];
837 let mut c_bpp = [0; 8];
838 for (chunk, b_bpp) in current.chunks_exact_mut(8).zip(previous.chunks_exact(8))
839 {
840 let new_chunk = [
841 chunk[0]
842 .wrapping_add(filter_paeth_decode(a_bpp[0], b_bpp[0], c_bpp[0])),
843 chunk[1]
844 .wrapping_add(filter_paeth_decode(a_bpp[1], b_bpp[1], c_bpp[1])),
845 chunk[2]
846 .wrapping_add(filter_paeth_decode(a_bpp[2], b_bpp[2], c_bpp[2])),
847 chunk[3]
848 .wrapping_add(filter_paeth_decode(a_bpp[3], b_bpp[3], c_bpp[3])),
849 chunk[4]
850 .wrapping_add(filter_paeth_decode(a_bpp[4], b_bpp[4], c_bpp[4])),
851 chunk[5]
852 .wrapping_add(filter_paeth_decode(a_bpp[5], b_bpp[5], c_bpp[5])),
853 chunk[6]
854 .wrapping_add(filter_paeth_decode(a_bpp[6], b_bpp[6], c_bpp[6])),
855 chunk[7]
856 .wrapping_add(filter_paeth_decode(a_bpp[7], b_bpp[7], c_bpp[7])),
857 ];
858 *TryInto::<&mut [u8; 8]>::try_into(chunk).unwrap() = new_chunk;
859 a_bpp = new_chunk;
860 c_bpp = b_bpp.try_into().unwrap();
861 }
862 }
863 }
864 }
865 }
866}
867
868fn filter_internal(
869 method: FilterType,
870 bpp: usize,
871 len: usize,
872 previous: &[u8],
873 current: &[u8],
874 output: &mut [u8],
875) -> FilterType {
876 use self::FilterType::*;
877
878 const CHUNK_SIZE: usize = 32;
882
883 match method {
884 NoFilter => {
885 output.copy_from_slice(current);
886 NoFilter
887 }
888 Sub => {
889 let mut out_chunks = output[bpp..].chunks_exact_mut(CHUNK_SIZE);
890 let mut cur_chunks = current[bpp..].chunks_exact(CHUNK_SIZE);
891 let mut prev_chunks = current[..len - bpp].chunks_exact(CHUNK_SIZE);
892
893 for ((out, cur), prev) in (&mut out_chunks).zip(&mut cur_chunks).zip(&mut prev_chunks) {
894 for i in 0..CHUNK_SIZE {
895 out[i] = cur[i].wrapping_sub(prev[i]);
896 }
897 }
898
899 for ((out, cur), &prev) in out_chunks
900 .into_remainder()
901 .iter_mut()
902 .zip(cur_chunks.remainder())
903 .zip(prev_chunks.remainder())
904 {
905 *out = cur.wrapping_sub(prev);
906 }
907
908 output[..bpp].copy_from_slice(¤t[..bpp]);
909 Sub
910 }
911 Up => {
912 let mut out_chunks = output.chunks_exact_mut(CHUNK_SIZE);
913 let mut cur_chunks = current.chunks_exact(CHUNK_SIZE);
914 let mut prev_chunks = previous.chunks_exact(CHUNK_SIZE);
915
916 for ((out, cur), prev) in (&mut out_chunks).zip(&mut cur_chunks).zip(&mut prev_chunks) {
917 for i in 0..CHUNK_SIZE {
918 out[i] = cur[i].wrapping_sub(prev[i]);
919 }
920 }
921
922 for ((out, cur), &prev) in out_chunks
923 .into_remainder()
924 .iter_mut()
925 .zip(cur_chunks.remainder())
926 .zip(prev_chunks.remainder())
927 {
928 *out = cur.wrapping_sub(prev);
929 }
930 Up
931 }
932 Avg => {
933 let mut out_chunks = output[bpp..].chunks_exact_mut(CHUNK_SIZE);
934 let mut cur_chunks = current[bpp..].chunks_exact(CHUNK_SIZE);
935 let mut cur_minus_bpp_chunks = current[..len - bpp].chunks_exact(CHUNK_SIZE);
936 let mut prev_chunks = previous[bpp..].chunks_exact(CHUNK_SIZE);
937
938 for (((out, cur), cur_minus_bpp), prev) in (&mut out_chunks)
939 .zip(&mut cur_chunks)
940 .zip(&mut cur_minus_bpp_chunks)
941 .zip(&mut prev_chunks)
942 {
943 for i in 0..CHUNK_SIZE {
944 out[i] = cur[i].wrapping_sub(
950 (cur_minus_bpp[i] & prev[i]) + ((cur_minus_bpp[i] ^ prev[i]) >> 1),
951 );
952 }
953 }
954
955 for (((out, cur), &cur_minus_bpp), &prev) in out_chunks
956 .into_remainder()
957 .iter_mut()
958 .zip(cur_chunks.remainder())
959 .zip(cur_minus_bpp_chunks.remainder())
960 .zip(prev_chunks.remainder())
961 {
962 *out = cur.wrapping_sub((cur_minus_bpp & prev) + ((cur_minus_bpp ^ prev) >> 1));
963 }
964
965 for i in 0..bpp {
966 output[i] = current[i].wrapping_sub(previous[i] / 2);
967 }
968 Avg
969 }
970 Paeth => {
971 let mut out_chunks = output[bpp..].chunks_exact_mut(CHUNK_SIZE);
972 let mut cur_chunks = current[bpp..].chunks_exact(CHUNK_SIZE);
973 let mut a_chunks = current[..len - bpp].chunks_exact(CHUNK_SIZE);
974 let mut b_chunks = previous[bpp..].chunks_exact(CHUNK_SIZE);
975 let mut c_chunks = previous[..len - bpp].chunks_exact(CHUNK_SIZE);
976
977 for ((((out, cur), a), b), c) in (&mut out_chunks)
978 .zip(&mut cur_chunks)
979 .zip(&mut a_chunks)
980 .zip(&mut b_chunks)
981 .zip(&mut c_chunks)
982 {
983 for i in 0..CHUNK_SIZE {
984 out[i] = cur[i].wrapping_sub(filter_paeth_fpnge(a[i], b[i], c[i]));
985 }
986 }
987
988 for ((((out, cur), &a), &b), &c) in out_chunks
989 .into_remainder()
990 .iter_mut()
991 .zip(cur_chunks.remainder())
992 .zip(a_chunks.remainder())
993 .zip(b_chunks.remainder())
994 .zip(c_chunks.remainder())
995 {
996 *out = cur.wrapping_sub(filter_paeth_fpnge(a, b, c));
997 }
998
999 for i in 0..bpp {
1000 output[i] = current[i].wrapping_sub(filter_paeth_fpnge(0, previous[i], 0));
1001 }
1002 Paeth
1003 }
1004 }
1005}
1006
1007pub(crate) fn filter(
1008 method: FilterType,
1009 adaptive: AdaptiveFilterType,
1010 bpp: BytesPerPixel,
1011 previous: &[u8],
1012 current: &[u8],
1013 output: &mut [u8],
1014) -> FilterType {
1015 use FilterType::*;
1016 let bpp = bpp.into_usize();
1017 let len = current.len();
1018
1019 match adaptive {
1020 AdaptiveFilterType::NonAdaptive => {
1021 filter_internal(method, bpp, len, previous, current, output)
1022 }
1023 AdaptiveFilterType::Adaptive => {
1024 let mut min_sum: u64 = u64::MAX;
1025 let mut filter_choice = FilterType::NoFilter;
1026 for &filter in [Sub, Up, Avg, Paeth].iter() {
1027 filter_internal(filter, bpp, len, previous, current, output);
1028 let sum = sum_buffer(output);
1029 if sum <= min_sum {
1030 min_sum = sum;
1031 filter_choice = filter;
1032 }
1033 }
1034
1035 if filter_choice != Paeth {
1036 filter_internal(filter_choice, bpp, len, previous, current, output);
1037 }
1038 filter_choice
1039 }
1040 }
1041}
1042
1043fn sum_buffer(buf: &[u8]) -> u64 {
1045 const CHUNK_SIZE: usize = 32;
1046
1047 let mut buf_chunks = buf.chunks_exact(CHUNK_SIZE);
1048 let mut sum = 0_u64;
1049
1050 for chunk in &mut buf_chunks {
1051 let mut acc = 0;
1053 for &b in chunk {
1054 acc += u64::from((b as i8).unsigned_abs());
1055 }
1056 sum = sum.saturating_add(acc);
1057 }
1058
1059 let mut acc = 0;
1060 for &b in buf_chunks.remainder() {
1061 acc += u64::from((b as i8).unsigned_abs());
1062 }
1063
1064 sum.saturating_add(acc)
1065}
1066
1067#[cfg(test)]
1068mod test {
1069 use super::*;
1070 use core::iter;
1071
1072 #[test]
1073 fn roundtrip() {
1074 const LEN: u8 = 240;
1076 let previous: Vec<_> = iter::repeat(1).take(LEN.into()).collect();
1077 let current: Vec<_> = (0..LEN).collect();
1078 let expected = current.clone();
1079 let adaptive = AdaptiveFilterType::NonAdaptive;
1080
1081 let roundtrip = |kind, bpp: BytesPerPixel| {
1082 let mut output = vec![0; LEN.into()];
1083 filter(kind, adaptive, bpp, &previous, ¤t, &mut output);
1084 unfilter(kind, bpp, &previous, &mut output);
1085 assert_eq!(
1086 output, expected,
1087 "Filtering {:?} with {:?} does not roundtrip",
1088 bpp, kind
1089 );
1090 };
1091
1092 let filters = [
1093 FilterType::NoFilter,
1094 FilterType::Sub,
1095 FilterType::Up,
1096 FilterType::Avg,
1097 FilterType::Paeth,
1098 ];
1099
1100 let bpps = [
1101 BytesPerPixel::One,
1102 BytesPerPixel::Two,
1103 BytesPerPixel::Three,
1104 BytesPerPixel::Four,
1105 BytesPerPixel::Six,
1106 BytesPerPixel::Eight,
1107 ];
1108
1109 for &filter in filters.iter() {
1110 for &bpp in bpps.iter() {
1111 roundtrip(filter, bpp);
1112 }
1113 }
1114 }
1115
1116 #[test]
1117 #[ignore] fn paeth_impls_are_equivalent() {
1119 for a in 0..=255 {
1120 for b in 0..=255 {
1121 for c in 0..=255 {
1122 let baseline = filter_paeth(a, b, c);
1123 let fpnge = filter_paeth_fpnge(a, b, c);
1124 let stbi = filter_paeth_stbi(a, b, c);
1125 let stbi_i16 = filter_paeth_stbi_i16(a as i16, b as i16, c as i16);
1126
1127 assert_eq!(baseline, fpnge);
1128 assert_eq!(baseline, stbi);
1129 assert_eq!(baseline as i16, stbi_i16);
1130 }
1131 }
1132 }
1133 }
1134
1135 #[test]
1136 fn roundtrip_ascending_previous_line() {
1137 const LEN: u8 = 240;
1139 let previous: Vec<_> = (0..LEN).collect();
1140 let current: Vec<_> = (0..LEN).collect();
1141 let expected = current.clone();
1142 let adaptive = AdaptiveFilterType::NonAdaptive;
1143
1144 let roundtrip = |kind, bpp: BytesPerPixel| {
1145 let mut output = vec![0; LEN.into()];
1146 filter(kind, adaptive, bpp, &previous, ¤t, &mut output);
1147 unfilter(kind, bpp, &previous, &mut output);
1148 assert_eq!(
1149 output, expected,
1150 "Filtering {:?} with {:?} does not roundtrip",
1151 bpp, kind
1152 );
1153 };
1154
1155 let filters = [
1156 FilterType::NoFilter,
1157 FilterType::Sub,
1158 FilterType::Up,
1159 FilterType::Avg,
1160 FilterType::Paeth,
1161 ];
1162
1163 let bpps = [
1164 BytesPerPixel::One,
1165 BytesPerPixel::Two,
1166 BytesPerPixel::Three,
1167 BytesPerPixel::Four,
1168 BytesPerPixel::Six,
1169 BytesPerPixel::Eight,
1170 ];
1171
1172 for &filter in filters.iter() {
1173 for &bpp in bpps.iter() {
1174 roundtrip(filter, bpp);
1175 }
1176 }
1177 }
1178
1179 #[test]
1180 fn sum_buffer_test() {
1185 let sum = (0..=128).sum::<u64>() + (1..=127).sum::<u64>();
1186 let buf: Vec<u8> = (0_u8..=255).collect();
1187
1188 assert_eq!(sum, crate::filter::sum_buffer(&buf));
1189 }
1190}