zune_jpeg/mcu.rs
1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::{format, vec};
10use core::cmp::min;
11
12use zune_core::bytestream::ZReaderTrait;
13use zune_core::colorspace::ColorSpace;
14use zune_core::colorspace::ColorSpace::Luma;
15use zune_core::log::{error, trace, warn};
16
17use crate::bitstream::BitStream;
18use crate::components::SampleRatios;
19use crate::decoder::MAX_COMPONENTS;
20use crate::errors::DecodeErrors;
21use crate::marker::Marker;
22use crate::misc::{calculate_padded_width, setup_component_params};
23use crate::worker::{color_convert, upsample};
24use crate::JpegDecoder;
25
26/// The size of a DC block for a MCU.
27
28pub const DCT_BLOCK: usize = 64;
29
30impl<T: ZReaderTrait> JpegDecoder<T> {
31 /// Check for existence of DC and AC Huffman Tables
32 pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
33 // check that dc and AC tables exist outside the hot path
34 for component in &self.components {
35 let _ = &self
36 .dc_huffman_tables
37 .get(component.dc_huff_table)
38 .as_ref()
39 .ok_or_else(|| {
40 DecodeErrors::HuffmanDecode(format!(
41 "No Huffman DC table for component {:?} ",
42 component.component_id
43 ))
44 })?
45 .as_ref()
46 .ok_or_else(|| {
47 DecodeErrors::HuffmanDecode(format!(
48 "No DC table for component {:?}",
49 component.component_id
50 ))
51 })?;
52
53 let _ = &self
54 .ac_huffman_tables
55 .get(component.ac_huff_table)
56 .as_ref()
57 .ok_or_else(|| {
58 DecodeErrors::HuffmanDecode(format!(
59 "No Huffman AC table for component {:?} ",
60 component.component_id
61 ))
62 })?
63 .as_ref()
64 .ok_or_else(|| {
65 DecodeErrors::HuffmanDecode(format!(
66 "No AC table for component {:?}",
67 component.component_id
68 ))
69 })?;
70 }
71 Ok(())
72 }
73
74 /// Decode MCUs and carry out post processing.
75 ///
76 /// This is the main decoder loop for the library, the hot path.
77 ///
78 /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
79 /// here.
80 #[allow(
81 clippy::similar_names,
82 clippy::too_many_lines,
83 clippy::cast_possible_truncation
84 )]
85 #[inline(never)]
86 pub(crate) fn decode_mcu_ycbcr_baseline(
87 &mut self, pixels: &mut [u8]
88 ) -> Result<(), DecodeErrors> {
89 setup_component_params(self)?;
90
91 // check dc and AC tables
92 self.check_tables()?;
93
94 let (mut mcu_width, mut mcu_height);
95
96 if self.is_interleaved {
97 // set upsampling functions
98 self.set_upsampling()?;
99
100 mcu_width = self.mcu_x;
101 mcu_height = self.mcu_y;
102 } else {
103 // For non-interleaved images( (1*1) subsampling)
104 // number of MCU's are the widths (+7 to account for paddings) divided bu 8.
105 mcu_width = ((self.info.width + 7) / 8) as usize;
106 mcu_height = ((self.info.height + 7) / 8) as usize;
107 }
108 if self.is_interleaved
109 && self.input_colorspace.num_components() > 1
110 && self.options.jpeg_get_out_colorspace().num_components() == 1
111 && (self.sub_sample_ratio == SampleRatios::V
112 || self.sub_sample_ratio == SampleRatios::HV)
113 {
114 // For a specific set of images, e.g interleaved,
115 // when converting from YcbCr to grayscale, we need to
116 // take into account mcu height since the MCU decoding needs to take
117 // it into account for padding purposes and the post processor
118 // parses two rows per mcu width.
119 //
120 // set coeff to be 2 to ensure that we increment two rows
121 // for every mcu processed also
122 mcu_height *= self.v_max;
123 mcu_height /= self.h_max;
124 self.coeff = 2;
125 }
126
127 if self.input_colorspace.num_components() > self.components.len() {
128 let msg = format!(
129 " Expected {} number of components but found {}",
130 self.input_colorspace.num_components(),
131 self.components.len()
132 );
133 return Err(DecodeErrors::Format(msg));
134 }
135
136 if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
137 warn!("Grayscale image with down-sampled component, resetting component details");
138
139 self.reset_params();
140
141 mcu_width = ((self.info.width + 7) / 8) as usize;
142 mcu_height = ((self.info.height + 7) / 8) as usize;
143 }
144 let width = usize::from(self.info.width);
145
146 let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
147
148 let mut stream = BitStream::new();
149 let mut tmp = [0_i32; DCT_BLOCK];
150
151 let comp_len = self.components.len();
152
153 for (pos, comp) in self.components.iter_mut().enumerate() {
154 // Allocate only needed components.
155 //
156 // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
157 // components.
158 if min(
159 self.options.jpeg_get_out_colorspace().num_components() - 1,
160 pos
161 ) == pos
162 || comp_len == 4
163 // Special colorspace
164 {
165 // allocate enough space to hold a whole MCU width
166 // this means we should take into account sampling ratios
167 // `*8` is because each MCU spans 8 widths.
168 let len = comp.width_stride * comp.vertical_sample * 8;
169
170 comp.needed = true;
171 comp.raw_coeff = vec![0; len];
172 } else {
173 comp.needed = false;
174 }
175 }
176
177 let mut pixels_written = 0;
178
179 let is_hv = usize::from(self.is_interleaved);
180 let upsampler_scratch_size = is_hv * self.components[0].width_stride;
181 let mut upsampler_scratch_space = vec![0; upsampler_scratch_size];
182
183 for i in 0..mcu_height {
184 // Report if we have no more bytes
185 // This may generate false negatives since we over-read bytes
186 // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem)
187 if stream.overread_by > 37
188 // favourite number :)
189 {
190 if self.options.get_strict_mode() {
191 return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
192 };
193
194 error!("Premature end of buffer");
195 break;
196 }
197 // decode a whole MCU width,
198 // this takes into account interleaved components.
199 self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?;
200 // process that width up until it's impossible
201 self.post_process(
202 pixels,
203 i,
204 mcu_height,
205 width,
206 padded_width,
207 &mut pixels_written,
208 &mut upsampler_scratch_space
209 )?;
210 }
211 // it may happen that some images don't have the whole buffer
212 // so we can't panic in case of that
213 // assert_eq!(pixels_written, pixels.len());
214
215 trace!("Finished decoding image");
216
217 Ok(())
218 }
219 fn decode_mcu_width(
220 &mut self, mcu_width: usize, tmp: &mut [i32; 64], stream: &mut BitStream
221 ) -> Result<(), DecodeErrors> {
222 for j in 0..mcu_width {
223 // iterate over components
224 for component in &mut self.components {
225 let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
226 .as_ref()
227 .unwrap();
228
229 let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
230 .as_ref()
231 .unwrap();
232
233 let qt_table = &component.quantization_table;
234 let channel = &mut component.raw_coeff;
235
236 // If image is interleaved iterate over scan components,
237 // otherwise if it-s non-interleaved, these routines iterate in
238 // trivial scanline order(Y,Cb,Cr)
239 for v_samp in 0..component.vertical_sample {
240 for h_samp in 0..component.horizontal_sample {
241 // Fill the array with zeroes, decode_mcu_block expects
242 // a zero based array.
243 tmp.fill(0);
244
245 stream.decode_mcu_block(
246 &mut self.stream,
247 dc_table,
248 ac_table,
249 qt_table,
250 tmp,
251 &mut component.dc_pred
252 )?;
253
254 if component.needed {
255 let idct_position = {
256 // derived from stb and rewritten for my tastes
257 let c2 = v_samp * 8;
258 let c3 = ((j * component.horizontal_sample) + h_samp) * 8;
259
260 component.width_stride * c2 + c3
261 };
262
263 let idct_pos = channel.get_mut(idct_position..).unwrap();
264 // call idct.
265 (self.idct_func)(tmp, idct_pos, component.width_stride);
266 }
267 }
268 }
269 }
270 self.todo = self.todo.saturating_sub(1);
271 // After all interleaved components, that's an MCU
272 // handle stream markers
273 //
274 // In some corrupt images, it may occur that header markers occur in the stream.
275 // The spec EXPLICITLY FORBIDS this, specifically, in
276 // routine F.2.2.5 it says
277 // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
278 //
279 // But libjpeg-turbo allows it because of some weird reason. so I'll also
280 // allow it because of some weird reason.
281 if let Some(m) = stream.marker {
282 if m == Marker::EOI {
283 // acknowledge and ignore EOI marker.
284 stream.marker.take();
285 trace!("Found EOI marker");
286 } else if let Marker::RST(_) = m {
287 if self.todo == 0 {
288 self.handle_rst(stream)?;
289 }
290 } else {
291 if self.options.get_strict_mode() {
292 return Err(DecodeErrors::Format(format!(
293 "Marker {m:?} found where not expected"
294 )));
295 }
296 error!(
297 "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
298 m
299 );
300
301 self.parse_marker_inner(m)?;
302 }
303 }
304 }
305 Ok(())
306 }
307 // handle RST markers.
308 // No-op if not using restarts
309 // this routine is shared with mcu_prog
310 #[cold]
311 pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
312 self.todo = self.restart_interval;
313
314 if let Some(marker) = stream.marker {
315 // Found a marker
316 // Read stream and see what marker is stored there
317 match marker {
318 Marker::RST(_) => {
319 // reset stream
320 stream.reset();
321 // Initialize dc predictions to zero for all components
322 self.components.iter_mut().for_each(|x| x.dc_pred = 0);
323 // Start iterating again. from position.
324 }
325 Marker::EOI => {
326 // silent pass
327 }
328 _ => {
329 return Err(DecodeErrors::MCUError(format!(
330 "Marker {marker:?} found in bitstream, possibly corrupt jpeg"
331 )));
332 }
333 }
334 }
335 Ok(())
336 }
337 #[allow(clippy::too_many_lines, clippy::too_many_arguments)]
338 pub(crate) fn post_process(
339 &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
340 padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
341 ) -> Result<(), DecodeErrors> {
342 let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
343
344 let mut px = *pixels_written;
345 // indicates whether image is vertically up-sampled
346 let is_vertically_sampled = self
347 .components
348 .iter()
349 .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V);
350
351 let mut comp_len = self.components.len();
352
353 // If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we
354 // will panic when we are trying to read samples, so for that case,
355 // hardcode it so that we don't panic when doing
356 // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
357 if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
358 comp_len = out_colorspace_components;
359 }
360 let mut color_conv_function =
361 |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> {
362 for (pos, output) in pixels[px..]
363 .chunks_exact_mut(width * out_colorspace_components)
364 .take(num_iters)
365 .enumerate()
366 {
367 let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]];
368
369 // iterate over each line, since color-convert needs only
370 // one line
371 for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
372 *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]
373 }
374 color_convert(
375 &raw_samples,
376 self.color_convert_16,
377 self.input_colorspace,
378 self.options.jpeg_get_out_colorspace(),
379 output,
380 width,
381 padded_width
382 )?;
383 px += width * out_colorspace_components;
384 }
385 Ok(())
386 };
387
388 let comps = &mut self.components[..];
389
390 if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
391 {
392 // duplicated so that we can check that samples match
393 // Fixes bug https://github.com/etemesi254/zune-image/issues/151
394 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
395
396 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
397 *samp = if component.sample_ratio == SampleRatios::None {
398 &component.raw_coeff
399 } else {
400 &component.upsample_dest
401 };
402 }
403 }
404 for comp in comps.iter_mut() {
405 upsample(
406 comp,
407 mcu_height,
408 i,
409 upsampler_scratch_space,
410 is_vertically_sampled
411 );
412 }
413
414 if is_vertically_sampled {
415 if i > 0 {
416 // write the last line, it wasn't up-sampled as we didn't have row_down
417 // yet
418 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
419
420 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
421 *samp = &component.first_row_upsample_dest;
422 }
423
424 // ensure length matches for all samples
425 let first_len = samples[0].len();
426 for samp in samples.iter().take(comp_len) {
427 assert_eq!(first_len, samp.len());
428 }
429 let num_iters = self.coeff * self.v_max;
430
431 color_conv_function(num_iters, samples)?;
432 }
433
434 // After up-sampling the last row, save any row that can be used for
435 // a later up-sampling,
436 //
437 // E.g the Y sample is not sampled but we haven't finished upsampling the last row of
438 // the previous mcu, since we don't have the down row, so save it
439 for component in comps.iter_mut() {
440 if component.sample_ratio != SampleRatios::H {
441 // We don't care about H sampling factors, since it's copied in the workers function
442
443 // copy last row to be used for the next color conversion
444 let size = component.vertical_sample
445 * component.width_stride
446 * component.sample_ratio.sample();
447
448 let last_bytes =
449 component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
450
451 component
452 .first_row_upsample_dest
453 .copy_from_slice(last_bytes);
454 }
455 }
456 }
457
458 let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]];
459
460 for (samp, component) in samples.iter_mut().zip(comps.iter()) {
461 *samp = if component.sample_ratio == SampleRatios::None {
462 &component.raw_coeff
463 } else {
464 &component.upsample_dest
465 };
466 }
467
468 // we either do 7 or 8 MCU's depending on the state, this only applies to
469 // vertically sampled images
470 //
471 // for rows up until the last MCU, we do not upsample the last stride of the MCU
472 // which means that the number of iterations should take that into account is one less the
473 // up-sampled size
474 //
475 // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
476 // should sample full raw coeffs
477 let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1));
478
479 let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max;
480
481 color_conv_function(num_iters, samples)?;
482 } else {
483 let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
484
485 self.components
486 .iter()
487 .enumerate()
488 .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff);
489
490 color_conv_function(8 * self.coeff, channels_ref)?;
491 }
492
493 *pixels_written = px;
494 Ok(())
495 }
496}