1#![allow(clippy::excessive_precision)]
8#![allow(clippy::erasing_op)]
10#![allow(clippy::identity_op)]
11use crate::parser::Dimensions;
12use core::num::Wrapping;
13
14pub(crate) fn choose_idct_size(full_size: Dimensions, requested_size: Dimensions) -> usize {
15 fn scaled(len: u16, scale: usize) -> u16 {
16 ((len as u32 * scale as u32 - 1) / 8 + 1) as u16
17 }
18
19 for &scale in &[1, 2, 4] {
20 if scaled(full_size.width, scale) >= requested_size.width
21 || scaled(full_size.height, scale) >= requested_size.height
22 {
23 return scale;
24 }
25 }
26
27 8
28}
29
30#[test]
31fn test_choose_idct_size() {
32 assert_eq!(
33 choose_idct_size(
34 Dimensions {
35 width: 5472,
36 height: 3648
37 },
38 Dimensions {
39 width: 200,
40 height: 200
41 }
42 ),
43 1
44 );
45 assert_eq!(
46 choose_idct_size(
47 Dimensions {
48 width: 5472,
49 height: 3648
50 },
51 Dimensions {
52 width: 500,
53 height: 500
54 }
55 ),
56 1
57 );
58 assert_eq!(
59 choose_idct_size(
60 Dimensions {
61 width: 5472,
62 height: 3648
63 },
64 Dimensions {
65 width: 684,
66 height: 456
67 }
68 ),
69 1
70 );
71 assert_eq!(
72 choose_idct_size(
73 Dimensions {
74 width: 5472,
75 height: 3648
76 },
77 Dimensions {
78 width: 999,
79 height: 456
80 }
81 ),
82 1
83 );
84 assert_eq!(
85 choose_idct_size(
86 Dimensions {
87 width: 5472,
88 height: 3648
89 },
90 Dimensions {
91 width: 684,
92 height: 999
93 }
94 ),
95 1
96 );
97 assert_eq!(
98 choose_idct_size(
99 Dimensions {
100 width: 500,
101 height: 333
102 },
103 Dimensions {
104 width: 63,
105 height: 42
106 }
107 ),
108 1
109 );
110
111 assert_eq!(
112 choose_idct_size(
113 Dimensions {
114 width: 5472,
115 height: 3648
116 },
117 Dimensions {
118 width: 685,
119 height: 999
120 }
121 ),
122 2
123 );
124 assert_eq!(
125 choose_idct_size(
126 Dimensions {
127 width: 5472,
128 height: 3648
129 },
130 Dimensions {
131 width: 1000,
132 height: 1000
133 }
134 ),
135 2
136 );
137 assert_eq!(
138 choose_idct_size(
139 Dimensions {
140 width: 5472,
141 height: 3648
142 },
143 Dimensions {
144 width: 1400,
145 height: 1400
146 }
147 ),
148 4
149 );
150
151 assert_eq!(
152 choose_idct_size(
153 Dimensions {
154 width: 5472,
155 height: 3648
156 },
157 Dimensions {
158 width: 5472,
159 height: 3648
160 }
161 ),
162 8
163 );
164 assert_eq!(
165 choose_idct_size(
166 Dimensions {
167 width: 5472,
168 height: 3648
169 },
170 Dimensions {
171 width: 16384,
172 height: 16384
173 }
174 ),
175 8
176 );
177 assert_eq!(
178 choose_idct_size(
179 Dimensions {
180 width: 1,
181 height: 1
182 },
183 Dimensions {
184 width: 65535,
185 height: 65535
186 }
187 ),
188 8
189 );
190 assert_eq!(
191 choose_idct_size(
192 Dimensions {
193 width: 5472,
194 height: 3648
195 },
196 Dimensions {
197 width: 16384,
198 height: 16384
199 }
200 ),
201 8
202 );
203}
204
205pub(crate) fn dequantize_and_idct_block(
206 scale: usize,
207 coefficients: &[i16; 64],
208 quantization_table: &[u16; 64],
209 output_linestride: usize,
210 output: &mut [u8],
211) {
212 match scale {
213 8 => dequantize_and_idct_block_8x8(
214 coefficients,
215 quantization_table,
216 output_linestride,
217 output,
218 ),
219 4 => dequantize_and_idct_block_4x4(
220 coefficients,
221 quantization_table,
222 output_linestride,
223 output,
224 ),
225 2 => dequantize_and_idct_block_2x2(
226 coefficients,
227 quantization_table,
228 output_linestride,
229 output,
230 ),
231 1 => dequantize_and_idct_block_1x1(
232 coefficients,
233 quantization_table,
234 output_linestride,
235 output,
236 ),
237 _ => panic!("Unsupported IDCT scale {scale}/8"),
238 }
239}
240
241pub fn dequantize_and_idct_block_8x8(
242 coefficients: &[i16; 64],
243 quantization_table: &[u16; 64],
244 output_linestride: usize,
245 output: &mut [u8],
246) {
247 #[cfg(not(feature = "platform_independent"))]
248 if let Some(idct) = crate::arch::get_dequantize_and_idct_block_8x8() {
249 #[allow(unsafe_code)]
250 unsafe {
251 return idct(coefficients, quantization_table, output_linestride, output);
252 }
253 }
254
255 let output = output.chunks_mut(output_linestride);
256 dequantize_and_idct_block_8x8_inner(coefficients, quantization_table, output)
257}
258
259fn dequantize_and_idct_block_8x8_inner<'a, I>(
261 coefficients: &[i16; 64],
262 quantization_table: &[u16; 64],
263 output: I,
264) where
265 I: IntoIterator<Item = &'a mut [u8]>,
266 I::IntoIter: ExactSizeIterator<Item = &'a mut [u8]>,
267{
268 let output = output.into_iter();
269 debug_assert!(
270 output.len() >= 8,
271 "Output iterator has the wrong length: {}",
272 output.len()
273 );
274
275 let mut temp = [Wrapping(0); 64];
276
277 for i in 0..8 {
279 if coefficients[i + 8] == 0
280 && coefficients[i + 16] == 0
281 && coefficients[i + 24] == 0
282 && coefficients[i + 32] == 0
283 && coefficients[i + 40] == 0
284 && coefficients[i + 48] == 0
285 && coefficients[i + 56] == 0
286 {
287 let dcterm = dequantize(coefficients[i], quantization_table[i]) << 2;
288 temp[i] = dcterm;
289 temp[i + 8] = dcterm;
290 temp[i + 16] = dcterm;
291 temp[i + 24] = dcterm;
292 temp[i + 32] = dcterm;
293 temp[i + 40] = dcterm;
294 temp[i + 48] = dcterm;
295 temp[i + 56] = dcterm;
296 } else {
297 let s0 = dequantize(coefficients[i], quantization_table[i]);
298 let s1 = dequantize(coefficients[i + 8], quantization_table[i + 8]);
299 let s2 = dequantize(coefficients[i + 16], quantization_table[i + 16]);
300 let s3 = dequantize(coefficients[i + 24], quantization_table[i + 24]);
301 let s4 = dequantize(coefficients[i + 32], quantization_table[i + 32]);
302 let s5 = dequantize(coefficients[i + 40], quantization_table[i + 40]);
303 let s6 = dequantize(coefficients[i + 48], quantization_table[i + 48]);
304 let s7 = dequantize(coefficients[i + 56], quantization_table[i + 56]);
305
306 let Kernel {
307 xs: [x0, x1, x2, x3],
308 ts: [t0, t1, t2, t3],
309 } = kernel(
310 [s0, s1, s2, s3, s4, s5, s6, s7],
311 512,
314 );
315
316 temp[i] = (x0 + t3) >> 10;
317 temp[i + 56] = (x0 - t3) >> 10;
318 temp[i + 8] = (x1 + t2) >> 10;
319 temp[i + 48] = (x1 - t2) >> 10;
320 temp[i + 16] = (x2 + t1) >> 10;
321 temp[i + 40] = (x2 - t1) >> 10;
322 temp[i + 24] = (x3 + t0) >> 10;
323 temp[i + 32] = (x3 - t0) >> 10;
324 }
325 }
326
327 for (chunk, output_chunk) in temp.chunks_exact(8).zip(output) {
328 let chunk = <&[_; 8]>::try_from(chunk).unwrap();
329
330 const X_SCALE: i32 = 65536 + (128 << 17);
337
338 let output_chunk = &mut output_chunk[..8];
340
341 let (s0, rest) = chunk.split_first().unwrap();
344 if *rest == [Wrapping(0); 7] {
345 let dcterm = stbi_clamp((stbi_fsh(*s0) + Wrapping(X_SCALE)) >> 17);
346 output_chunk[0] = dcterm;
347 output_chunk[1] = dcterm;
348 output_chunk[2] = dcterm;
349 output_chunk[3] = dcterm;
350 output_chunk[4] = dcterm;
351 output_chunk[5] = dcterm;
352 output_chunk[6] = dcterm;
353 output_chunk[7] = dcterm;
354 } else {
355 let Kernel {
356 xs: [x0, x1, x2, x3],
357 ts: [t0, t1, t2, t3],
358 } = kernel(*chunk, X_SCALE);
359
360 output_chunk[0] = stbi_clamp((x0 + t3) >> 17);
361 output_chunk[7] = stbi_clamp((x0 - t3) >> 17);
362 output_chunk[1] = stbi_clamp((x1 + t2) >> 17);
363 output_chunk[6] = stbi_clamp((x1 - t2) >> 17);
364 output_chunk[2] = stbi_clamp((x2 + t1) >> 17);
365 output_chunk[5] = stbi_clamp((x2 - t1) >> 17);
366 output_chunk[3] = stbi_clamp((x3 + t0) >> 17);
367 output_chunk[4] = stbi_clamp((x3 - t0) >> 17);
368 }
369 }
370}
371
372struct Kernel {
373 xs: [Wrapping<i32>; 4],
374 ts: [Wrapping<i32>; 4],
375}
376
377#[inline]
378fn kernel_x([s0, s2, s4, s6]: [Wrapping<i32>; 4], x_scale: i32) -> [Wrapping<i32>; 4] {
379 let (t2, t3);
381 {
382 let p2 = s2;
383 let p3 = s6;
384
385 let p1 = (p2 + p3) * stbi_f2f(0.5411961);
386 t2 = p1 + p3 * stbi_f2f(-1.847759065);
387 t3 = p1 + p2 * stbi_f2f(0.765366865);
388 }
389
390 let (t0, t1);
391 {
392 let p2 = s0;
393 let p3 = s4;
394
395 t0 = stbi_fsh(p2 + p3);
396 t1 = stbi_fsh(p2 - p3);
397 }
398
399 let x0 = t0 + t3;
400 let x3 = t0 - t3;
401 let x1 = t1 + t2;
402 let x2 = t1 - t2;
403
404 let x_scale = Wrapping(x_scale);
405
406 [x0 + x_scale, x1 + x_scale, x2 + x_scale, x3 + x_scale]
407}
408
409#[inline]
410fn kernel_t([s1, s3, s5, s7]: [Wrapping<i32>; 4]) -> [Wrapping<i32>; 4] {
411 let mut t0 = s7;
413 let mut t1 = s5;
414 let mut t2 = s3;
415 let mut t3 = s1;
416
417 let p3 = t0 + t2;
418 let p4 = t1 + t3;
419 let p1 = t0 + t3;
420 let p2 = t1 + t2;
421 let p5 = (p3 + p4) * stbi_f2f(1.175875602);
422
423 t0 *= stbi_f2f(0.298631336);
424 t1 *= stbi_f2f(2.053119869);
425 t2 *= stbi_f2f(3.072711026);
426 t3 *= stbi_f2f(1.501321110);
427
428 let p1 = p5 + p1 * stbi_f2f(-0.899976223);
429 let p2 = p5 + p2 * stbi_f2f(-2.562915447);
430 let p3 = p3 * stbi_f2f(-1.961570560);
431 let p4 = p4 * stbi_f2f(-0.390180644);
432
433 t3 += p1 + p4;
434 t2 += p2 + p3;
435 t1 += p2 + p4;
436 t0 += p1 + p3;
437
438 [t0, t1, t2, t3]
439}
440
441#[inline]
442fn kernel([s0, s1, s2, s3, s4, s5, s6, s7]: [Wrapping<i32>; 8], x_scale: i32) -> Kernel {
443 Kernel {
444 xs: kernel_x([s0, s2, s4, s6], x_scale),
445 ts: kernel_t([s1, s3, s5, s7]),
446 }
447}
448
449#[inline(always)]
450fn dequantize(c: i16, q: u16) -> Wrapping<i32> {
451 Wrapping(i32::from(c) * i32::from(q))
452}
453
454fn dequantize_and_idct_block_4x4(
457 coefficients: &[i16; 64],
458 quantization_table: &[u16; 64],
459 output_linestride: usize,
460 output: &mut [u8],
461) {
462 debug_assert_eq!(coefficients.len(), 64);
463 let mut temp = [Wrapping(0i32); 4 * 4];
464
465 const CONST_BITS: usize = 12;
466 const PASS1_BITS: usize = 2;
467 const FINAL_BITS: usize = CONST_BITS + PASS1_BITS + 3;
468
469 for i in 0..4 {
471 let s0 = Wrapping(coefficients[i + 8 * 0] as i32 * quantization_table[i + 8 * 0] as i32);
472 let s1 = Wrapping(coefficients[i + 8 * 1] as i32 * quantization_table[i + 8 * 1] as i32);
473 let s2 = Wrapping(coefficients[i + 8 * 2] as i32 * quantization_table[i + 8 * 2] as i32);
474 let s3 = Wrapping(coefficients[i + 8 * 3] as i32 * quantization_table[i + 8 * 3] as i32);
475
476 let x0 = (s0 + s2) << PASS1_BITS;
477 let x2 = (s0 - s2) << PASS1_BITS;
478
479 let p1 = (s1 + s3) * stbi_f2f(0.541196100);
480 let t0 = (p1 + s3 * stbi_f2f(-1.847759065) + Wrapping(512)) >> (CONST_BITS - PASS1_BITS);
481 let t2 = (p1 + s1 * stbi_f2f(0.765366865) + Wrapping(512)) >> (CONST_BITS - PASS1_BITS);
482
483 temp[i + 4 * 0] = x0 + t2;
484 temp[i + 4 * 3] = x0 - t2;
485 temp[i + 4 * 1] = x2 + t0;
486 temp[i + 4 * 2] = x2 - t0;
487 }
488
489 for i in 0..4 {
490 let s0 = temp[i * 4 + 0];
491 let s1 = temp[i * 4 + 1];
492 let s2 = temp[i * 4 + 2];
493 let s3 = temp[i * 4 + 3];
494
495 let x0 = (s0 + s2) << CONST_BITS;
496 let x2 = (s0 - s2) << CONST_BITS;
497
498 let p1 = (s1 + s3) * stbi_f2f(0.541196100);
499 let t0 = p1 + s3 * stbi_f2f(-1.847759065);
500 let t2 = p1 + s1 * stbi_f2f(0.765366865);
501
502 let x0 = x0 + Wrapping(1 << (FINAL_BITS - 1)) + Wrapping(128 << FINAL_BITS);
509 let x2 = x2 + Wrapping(1 << (FINAL_BITS - 1)) + Wrapping(128 << FINAL_BITS);
510
511 let output = &mut output[i * output_linestride..][..4];
512 output[0] = stbi_clamp((x0 + t2) >> FINAL_BITS);
513 output[3] = stbi_clamp((x0 - t2) >> FINAL_BITS);
514 output[1] = stbi_clamp((x2 + t0) >> FINAL_BITS);
515 output[2] = stbi_clamp((x2 - t0) >> FINAL_BITS);
516 }
517}
518
519fn dequantize_and_idct_block_2x2(
520 coefficients: &[i16; 64],
521 quantization_table: &[u16; 64],
522 output_linestride: usize,
523 output: &mut [u8],
524) {
525 debug_assert_eq!(coefficients.len(), 64);
526
527 const SCALE_BITS: usize = 3;
528
529 let s00 = Wrapping(coefficients[8 * 0] as i32 * quantization_table[8 * 0] as i32);
531 let s10 = Wrapping(coefficients[8 * 1] as i32 * quantization_table[8 * 1] as i32);
532
533 let x0 = s00 + s10;
534 let x2 = s00 - s10;
535
536 let s01 = Wrapping(coefficients[8 * 0 + 1] as i32 * quantization_table[8 * 0 + 1] as i32);
538 let s11 = Wrapping(coefficients[8 * 1 + 1] as i32 * quantization_table[8 * 1 + 1] as i32);
539
540 let x1 = s01 + s11;
541 let x3 = s01 - s11;
542
543 let x0 = x0 + Wrapping(1 << (SCALE_BITS - 1)) + Wrapping(128 << SCALE_BITS);
544 let x2 = x2 + Wrapping(1 << (SCALE_BITS - 1)) + Wrapping(128 << SCALE_BITS);
545
546 output[0] = stbi_clamp((x0 + x1) >> SCALE_BITS);
548 output[1] = stbi_clamp((x0 - x1) >> SCALE_BITS);
549
550 output[output_linestride + 0] = stbi_clamp((x2 + x3) >> SCALE_BITS);
552 output[output_linestride + 1] = stbi_clamp((x2 - x3) >> SCALE_BITS);
553}
554
555fn dequantize_and_idct_block_1x1(
556 coefficients: &[i16; 64],
557 quantization_table: &[u16; 64],
558 _output_linestride: usize,
559 output: &mut [u8],
560) {
561 debug_assert_eq!(coefficients.len(), 64);
562
563 let s0 = (Wrapping(coefficients[0] as i32 * quantization_table[0] as i32) + Wrapping(128 * 8)) / Wrapping(8);
564 output[0] = stbi_clamp(s0);
565}
566
567fn stbi_clamp(x: Wrapping<i32>) -> u8 {
569 x.0.max(0).min(255) as u8
570}
571
572fn stbi_f2f(x: f32) -> Wrapping<i32> {
573 Wrapping((x * 4096.0 + 0.5) as i32)
574}
575
576fn stbi_fsh(x: Wrapping<i32>) -> Wrapping<i32> {
577 x << 12
578}
579
580#[test]
581fn test_dequantize_and_idct_block_8x8() {
582 #[rustfmt::skip]
583 let coefficients: [i16; 8 * 8] = [
584 -14, -39, 58, -2, 3, 3, 0, 1,
585 11, 27, 4, -3, 3, 0, 1, 0,
586 -6, -13, -9, -1, -2, -1, 0, 0,
587 -4, 0, -1, -2, 0, 0, 0, 0,
588 3, 0, 0, 0, 0, 0, 0, 0,
589 -3, -2, 0, 0, 0, 0, 0, 0,
590 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0
592 ];
593
594 #[rustfmt::skip]
595 let quantization_table: [u16; 8 * 8] = [
596 8, 6, 5, 8, 12, 20, 26, 31,
597 6, 6, 7, 10, 13, 29, 30, 28,
598 7, 7, 8, 12, 20, 29, 35, 28,
599 7, 9, 11, 15, 26, 44, 40, 31,
600 9, 11, 19, 28, 34, 55, 52, 39,
601 12, 18, 28, 32, 41, 52, 57, 46,
602 25, 32, 39, 44, 52, 61, 60, 51,
603 36, 46, 48, 49, 56, 50, 52, 50
604 ];
605 let output_linestride: usize = 8;
606 let mut output = [0u8; 8 * 8];
607 dequantize_and_idct_block_8x8(
608 &coefficients,
609 &quantization_table,
610 output_linestride,
611 &mut output,
612 );
613 #[rustfmt::skip]
614 let expected_output = [
615 118, 92, 110, 83, 77, 93, 144, 198,
616 172, 116, 114, 87, 78, 93, 146, 191,
617 194, 107, 91, 76, 71, 93, 160, 198,
618 196, 100, 80, 74, 67, 92, 174, 209,
619 182, 104, 88, 81, 68, 89, 178, 206,
620 105, 64, 59, 59, 63, 94, 183, 201,
621 35, 27, 28, 37, 72, 121, 203, 204,
622 37, 45, 41, 47, 98, 154, 223, 208
623 ];
624 for i in 0..64 {
625 assert!((output[i] as i16 - expected_output[i] as i16).abs() <= 1);
626 }
627}
628
629#[test]
630fn test_dequantize_and_idct_block_8x8_all_zero() {
631 let mut output = [0u8; 8 * 8];
632 dequantize_and_idct_block_8x8(&[0; 8 * 8], &[666; 8 * 8], 8, &mut output);
633 assert_eq!(&output[..], &[128; 8 * 8][..]);
634}
635
636#[test]
637fn test_dequantize_and_idct_block_8x8_saturated() {
638 #[cfg(not(feature = "platform_independent"))]
640 if crate::arch::get_dequantize_and_idct_block_8x8().is_some() {
641 return;
642 }
643 let mut output = [0u8; 8 * 8];
644 dequantize_and_idct_block_8x8(&[i16::MAX; 8 * 8], &[u16::MAX; 8 * 8], 8, &mut output);
645 #[rustfmt::skip]
646 let expected = [
647 0, 0, 0, 255, 255, 0, 0, 255,
648 0, 0, 215, 0, 0, 255, 255, 0,
649 255, 255, 255, 255, 255, 0, 0, 255,
650 0, 0, 255, 0, 255, 0, 255, 255,
651 0, 0, 255, 255, 0, 255, 0, 0,
652 255, 255, 0, 255, 255, 255, 170, 0,
653 0, 255, 0, 0, 0, 0, 0, 255,
654 255, 255, 0, 255, 0, 255, 0, 0
655 ];
656 assert_eq!(&output[..], &expected[..]);
657}