zune_jpeg/idct/
scalar.rs
1const SCALE_BITS: i32 = 512 + 65536 + (128 << 17);
14
15#[allow(unused_assignments)]
16#[allow(
17 clippy::too_many_lines,
18 clippy::op_ref,
19 clippy::cast_possible_truncation
20)]
21pub fn idct_int(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
22 let mut pos = 0;
25
26 let mut i = 0;
27 if &in_vector[1..] == &[0_i32; 63] {
30 let coeff = [(((in_vector[0] >> 3) + 128) as i16).clamp(0, 255); 8];
32
33 macro_rules! store {
34 ($index:tt) => {
35 let mcu_stride: &mut [i16; 8] = out_vector
37 .get_mut($index..$index + 8)
38 .unwrap()
39 .try_into()
40 .unwrap();
41 mcu_stride.copy_from_slice(&coeff);
43 $index += stride;
45 };
46 }
47 store!(pos);
49 store!(pos);
50 store!(pos);
51 store!(pos);
52
53 store!(pos);
54 store!(pos);
55 store!(pos);
56 store!(pos);
57 } else {
58 for ptr in 0..8 {
61 let p2 = in_vector[ptr + 16];
62 let p3 = in_vector[ptr + 48];
63
64 let p1 = (p2 + p3).wrapping_mul(2217);
65
66 let t2 = p1 + p3 * -7567;
67 let t3 = p1 + p2 * 3135;
68
69 let p2 = in_vector[ptr];
70 let p3 = in_vector[32 + ptr];
71 let t0 = fsh(p2 + p3);
72 let t1 = fsh(p2 - p3);
73
74 let x0 = t0 + t3 + 512;
75 let x3 = t0 - t3 + 512;
76 let x1 = t1 + t2 + 512;
77 let x2 = t1 - t2 + 512;
78
79 let mut t0 = in_vector[ptr + 56];
81 let mut t1 = in_vector[ptr + 40];
82 let mut t2 = in_vector[ptr + 24];
83 let mut t3 = in_vector[ptr + 8];
84
85 let p3 = t0 + t2;
86 let p4 = t1 + t3;
87 let p1 = t0 + t3;
88 let p2 = t1 + t2;
89 let p5 = (p3 + p4) * 4816;
90
91 t0 *= 1223;
92 t1 *= 8410;
93 t2 *= 12586;
94 t3 *= 6149;
95
96 let p1 = p5 + p1 * -3685;
97 let p2 = p5 + p2 * -10497;
98 let p3 = p3 * -8034;
99 let p4 = p4 * -1597;
100
101 t3 += p1 + p4;
102 t2 += p2 + p3;
103 t1 += p2 + p4;
104 t0 += p1 + p3;
105
106 in_vector[ptr] = (x0 + t3) >> 10;
109 in_vector[ptr + 8] = (x1 + t2) >> 10;
110 in_vector[ptr + 16] = (x2 + t1) >> 10;
111 in_vector[ptr + 24] = (x3 + t0) >> 10;
112 in_vector[ptr + 32] = (x3 - t0) >> 10;
113 in_vector[ptr + 40] = (x2 - t1) >> 10;
114 in_vector[ptr + 48] = (x1 - t2) >> 10;
115 in_vector[ptr + 56] = (x0 - t3) >> 10;
116 }
117
118 while i < 64 {
120 let p2 = in_vector[i + 2];
124 let p3 = in_vector[i + 6];
125
126 let p1 = (p2 + p3) * 2217;
127 let t2 = p1 + p3 * -7567;
128 let t3 = p1 + p2 * 3135;
129
130 let p2 = in_vector[i];
131 let p3 = in_vector[i + 4];
132
133 let t0 = fsh(p2 + p3);
134 let t1 = fsh(p2 - p3);
135 let x0 = t0 + t3 + SCALE_BITS;
142 let x3 = t0 - t3 + SCALE_BITS;
143 let x1 = t1 + t2 + SCALE_BITS;
144 let x2 = t1 - t2 + SCALE_BITS;
145 let mut t0 = in_vector[i + 7];
147 let mut t1 = in_vector[i + 5];
148 let mut t2 = in_vector[i + 3];
149 let mut t3 = in_vector[i + 1];
150
151 let p3 = t0 + t2;
152 let p4 = t1 + t3;
153 let p1 = t0 + t3;
154 let p2 = t1 + t2;
155 let p5 = (p3 + p4) * f2f(1.175875602);
156
157 t0 = t0.wrapping_mul(1223);
158 t1 = t1.wrapping_mul(8410);
159 t2 = t2.wrapping_mul(12586);
160 t3 = t3.wrapping_mul(6149);
161
162 let p1 = p5 + p1 * -3685;
163 let p2 = p5 + p2 * -10497;
164 let p3 = p3 * -8034;
165 let p4 = p4 * -1597;
166
167 t3 += p1 + p4;
168 t2 += p2 + p3;
169 t1 += p2 + p4;
170 t0 += p1 + p3;
171
172 let out: &mut [i16; 8] = out_vector
173 .get_mut(pos..pos + 8)
174 .unwrap()
175 .try_into()
176 .unwrap();
177
178 out[0] = clamp((x0 + t3) >> 17);
179 out[1] = clamp((x1 + t2) >> 17);
180 out[2] = clamp((x2 + t1) >> 17);
181 out[3] = clamp((x3 + t0) >> 17);
182 out[4] = clamp((x3 - t0) >> 17);
183 out[5] = clamp((x2 - t1) >> 17);
184 out[6] = clamp((x1 - t2) >> 17);
185 out[7] = clamp((x0 - t3) >> 17);
186
187 i += 8;
188
189 pos += stride;
190 }
191 }
192}
193
194#[inline]
195#[allow(clippy::cast_possible_truncation)]
196fn f2f(x: f32) -> i32 {
198 (x * 4096.0 + 0.5) as i32
199}
200
201#[inline]
202fn fsh(x: i32) -> i32 {
204 x << 12
205}
206
207#[inline]
209#[allow(clippy::cast_possible_truncation)]
210fn clamp(a: i32) -> i16 {
211 a.clamp(0, 255) as i16
212}