zune_jpeg/
headers.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9//! Decode Decoder markers/segments
10//!
11//! This file deals with decoding header information in a jpeg file
12//!
13use alloc::format;
14use alloc::string::ToString;
15use alloc::vec::Vec;
16
17use zune_core::bytestream::ZReaderTrait;
18use zune_core::colorspace::ColorSpace;
19use zune_core::log::{debug, error, trace, warn};
20
21use crate::components::Components;
22use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS};
23use crate::errors::DecodeErrors;
24use crate::huffman::HuffmanTable;
25use crate::misc::{SOFMarkers, UN_ZIGZAG};
26
27///**B.2.4.2 Huffman table-specification syntax**
28#[allow(clippy::similar_names, clippy::cast_sign_loss)]
29pub(crate) fn parse_huffman<T: ZReaderTrait>(
30    decoder: &mut JpegDecoder<T>
31) -> Result<(), DecodeErrors>
32where
33{
34    // Read the length of the Huffman table
35    let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or(
36        DecodeErrors::FormatStatic("Invalid Huffman length in image")
37    )?);
38
39    while dht_length > 16 {
40        // HT information
41        let ht_info = decoder.stream.get_u8_err()?;
42        // third bit indicates whether the huffman encoding is DC or AC type
43        let dc_or_ac = (ht_info >> 4) & 0xF;
44        // Indicate the position of this table, should be less than 4;
45        let index = (ht_info & 0xF) as usize;
46        // read the number of symbols
47        let mut num_symbols: [u8; 17] = [0; 17];
48
49        if index >= MAX_COMPONENTS {
50            return Err(DecodeErrors::HuffmanDecode(format!(
51                "Invalid DHT index {index}, expected between 0 and 3"
52            )));
53        }
54
55        if dc_or_ac > 1 {
56            return Err(DecodeErrors::HuffmanDecode(format!(
57                "Invalid DHT position {dc_or_ac}, should be 0 or 1"
58            )));
59        }
60
61        decoder
62            .stream
63            .read_exact(&mut num_symbols[1..17])
64            .map_err(|_| DecodeErrors::ExhaustedData)?;
65
66        dht_length -= 1 + 16;
67
68        let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum();
69
70        // The sum of the number of symbols cannot be greater than 256;
71        if symbols_sum > 256 {
72            return Err(DecodeErrors::FormatStatic(
73                "Encountered Huffman table with excessive length in DHT"
74            ));
75        }
76        if symbols_sum > dht_length {
77            return Err(DecodeErrors::HuffmanDecode(format!(
78                "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}"
79            )));
80        }
81        dht_length -= symbols_sum;
82        // A table containing symbols in increasing code length
83        let mut symbols = [0; 256];
84
85        decoder
86            .stream
87            .read_exact(&mut symbols[0..(symbols_sum as usize)])
88            .map_err(|x| {
89                DecodeErrors::Format(format!("Could not read symbols into the buffer\n{x}"))
90            })?;
91        // store
92        match dc_or_ac {
93            0 => {
94                decoder.dc_huffman_tables[index] = Some(HuffmanTable::new(
95                    &num_symbols,
96                    symbols,
97                    true,
98                    decoder.is_progressive
99                )?);
100            }
101            _ => {
102                decoder.ac_huffman_tables[index] = Some(HuffmanTable::new(
103                    &num_symbols,
104                    symbols,
105                    false,
106                    decoder.is_progressive
107                )?);
108            }
109        }
110    }
111
112    if dht_length > 0 {
113        return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition"));
114    }
115
116    Ok(())
117}
118
119///**B.2.4.1 Quantization table-specification syntax**
120#[allow(clippy::cast_possible_truncation, clippy::needless_range_loop)]
121pub(crate) fn parse_dqt<T: ZReaderTrait>(img: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> {
122    // read length
123    let mut qt_length =
124        img.stream
125            .get_u16_be_err()?
126            .checked_sub(2)
127            .ok_or(DecodeErrors::FormatStatic(
128                "Invalid DQT length. Length should be greater than 2"
129            ))?;
130    // A single DQT header may have multiple QT's
131    while qt_length > 0 {
132        let qt_info = img.stream.get_u8_err()?;
133        // 0 = 8 bit otherwise 16 bit dqt
134        let precision = (qt_info >> 4) as usize;
135        // last 4 bits give us position
136        let table_position = (qt_info & 0x0f) as usize;
137        let precision_value = 64 * (precision + 1);
138
139        if (precision_value + 1) as u16 > qt_length {
140            return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left :{}. Too small to construct a valid qt table which should be {} long", qt_length, precision_value + 1)));
141        }
142
143        let dct_table = match precision {
144            0 => {
145                let mut qt_values = [0; 64];
146
147                img.stream.read_exact(&mut qt_values).map_err(|x| {
148                    DecodeErrors::Format(format!("Could not read symbols into the buffer\n{x}"))
149                })?;
150                qt_length -= (precision_value as u16) + 1 /*QT BIT*/;
151                // carry out un zig-zag here
152                un_zig_zag(&qt_values)
153            }
154            1 => {
155                // 16 bit quantization tables
156                let mut qt_values = [0_u16; 64];
157
158                for i in 0..64 {
159                    qt_values[i] = img.stream.get_u16_be_err()?;
160                }
161                qt_length -= (precision_value as u16) + 1;
162
163                un_zig_zag(&qt_values)
164            }
165            _ => {
166                return Err(DecodeErrors::DqtError(format!(
167                    "Expected QT precision value of either 0 or 1, found {precision:?}"
168                )));
169            }
170        };
171
172        if table_position >= MAX_COMPONENTS {
173            return Err(DecodeErrors::DqtError(format!(
174                "Too large table position for QT :{table_position}, expected between 0 and 3"
175            )));
176        }
177
178        img.qt_tables[table_position] = Some(dct_table);
179    }
180
181    return Ok(());
182}
183
184/// Section:`B.2.2 Frame header syntax`
185
186pub(crate) fn parse_start_of_frame<T: ZReaderTrait>(
187    sof: SOFMarkers, img: &mut JpegDecoder<T>
188) -> Result<(), DecodeErrors> {
189    if img.seen_sof {
190        return Err(DecodeErrors::SofError(
191            "Two Start of Frame Markers".to_string()
192        ));
193    }
194    // Get length of the frame header
195    let length = img.stream.get_u16_be_err()?;
196    // usually 8, but can be 12 and 16, we currently support only 8
197    // so sorry about that 12 bit images
198    let dt_precision = img.stream.get_u8_err()?;
199
200    if dt_precision != 8 {
201        return Err(DecodeErrors::SofError(format!(
202            "The library can only parse 8-bit images, the image has {dt_precision} bits of precision"
203        )));
204    }
205
206    img.info.set_density(dt_precision);
207
208    // read  and set the image height.
209    let img_height = img.stream.get_u16_be_err()?;
210    img.info.set_height(img_height);
211
212    // read and set the image width
213    let img_width = img.stream.get_u16_be_err()?;
214    img.info.set_width(img_width);
215
216    trace!("Image width  :{}", img_width);
217    trace!("Image height :{}", img_height);
218
219    if usize::from(img_width) > img.options.get_max_width() {
220        return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images", img_width, img.options.get_max_width())));
221    }
222
223    if usize::from(img_height) > img.options.get_max_height() {
224        return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images", img_height, img.options.get_max_height())));
225    }
226
227    // Check image width or height is zero
228    if img_width == 0 || img_height == 0 {
229        return Err(DecodeErrors::ZeroError);
230    }
231
232    // Number of components for the image.
233    let num_components = img.stream.get_u8_err()?;
234
235    if num_components == 0 {
236        return Err(DecodeErrors::SofError(
237            "Number of components cannot be zero.".to_string()
238        ));
239    }
240
241    let expected = 8 + 3 * u16::from(num_components);
242    // length should be equal to num components
243    if length != expected {
244        return Err(DecodeErrors::SofError(format!(
245            "Length of start of frame differs from expected {expected},value is {length}"
246        )));
247    }
248
249    trace!("Image components : {}", num_components);
250
251    if num_components == 1 {
252        // SOF sets the number of image components
253        // and that to us translates to setting input and output
254        // colorspaces to zero
255        img.input_colorspace = ColorSpace::Luma;
256        img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma);
257        debug!("Overriding default colorspace set to Luma");
258    }
259    if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr {
260        trace!("Input image has 4 components, defaulting to CMYK colorspace");
261        // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/
262        img.input_colorspace = ColorSpace::CMYK;
263    }
264
265    // set number of components
266    img.info.components = num_components;
267
268    let mut components = Vec::with_capacity(num_components as usize);
269    let mut temp = [0; 3];
270
271    for pos in 0..num_components {
272        // read 3 bytes for each component
273        img.stream
274            .read_exact(&mut temp)
275            .map_err(|x| DecodeErrors::Format(format!("Could not read component data\n{x}")))?;
276        // create a component.
277        let component = Components::from(temp, pos)?;
278
279        components.push(component);
280    }
281    img.seen_sof = true;
282
283    img.info.set_sof_marker(sof);
284
285    img.components = components;
286
287    Ok(())
288}
289
290/// Parse a start of scan data
291pub(crate) fn parse_sos<T: ZReaderTrait>(image: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> {
292    // Scan header length
293    let ls = image.stream.get_u16_be_err()?;
294    // Number of image components in scan
295    let ns = image.stream.get_u8_err()?;
296
297    let mut seen = [-1; { MAX_COMPONENTS + 1 }];
298
299    image.num_scans = ns;
300
301    if ls != 6 + 2 * u16::from(ns) {
302        return Err(DecodeErrors::SosError(format!(
303            "Bad SOS length {ls},corrupt jpeg"
304        )));
305    }
306
307    // Check number of components.
308    if !(1..5).contains(&ns) {
309        return Err(DecodeErrors::SosError(format!(
310            "Number of components in start of scan should be less than 3 but more than 0. Found {ns}"
311        )));
312    }
313
314    if image.info.components == 0 {
315        return Err(DecodeErrors::FormatStatic(
316            "Error decoding SOF Marker, Number of components cannot be zero."
317        ));
318    }
319
320    // consume spec parameters
321    for i in 0..ns {
322        // CS_i parameter, I don't need it so I might as well delete it
323        let id = image.stream.get_u8_err()?;
324
325        if seen.contains(&i32::from(id)) {
326            return Err(DecodeErrors::SofError(format!(
327                "Duplicate ID {id} seen twice in the same component"
328            )));
329        }
330
331        seen[usize::from(i)] = i32::from(id);
332        // DC and AC huffman table position
333        // top 4 bits contain dc huffman destination table
334        // lower four bits contain ac huffman destination table
335        let y = image.stream.get_u8_err()?;
336
337        let mut j = 0;
338
339        while j < image.info.components {
340            if image.components[j as usize].id == id {
341                break;
342            }
343
344            j += 1;
345        }
346
347        if j == image.info.components {
348            return Err(DecodeErrors::SofError(format!(
349                "Invalid component id {}, expected a value between 0 and {}",
350                id,
351                image.components.len()
352            )));
353        }
354
355        image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF);
356        image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF);
357        image.z_order[i as usize] = j as usize;
358    }
359
360    // Collect the component spec parameters
361    // This is only needed for progressive images but I'll read
362    // them in order to ensure they are correct according to the spec
363
364    // Extract progressive information
365
366    // https://www.w3.org/Graphics/JPEG/itu-t81.pdf
367    // Page 42
368
369    // Start of spectral / predictor selection. (between 0 and 63)
370    image.spec_start = image.stream.get_u8_err()?;
371    // End of spectral selection
372    image.spec_end = image.stream.get_u8_err()?;
373
374    let bit_approx = image.stream.get_u8_err()?;
375    // successive approximation bit position high
376    image.succ_high = bit_approx >> 4;
377
378    if image.spec_end > 63 {
379        return Err(DecodeErrors::SosError(format!(
380            "Invalid Se parameter {}, range should be 0-63",
381            image.spec_end
382        )));
383    }
384    if image.spec_start > 63 {
385        return Err(DecodeErrors::SosError(format!(
386            "Invalid Ss parameter {}, range should be 0-63",
387            image.spec_start
388        )));
389    }
390    if image.succ_high > 13 {
391        return Err(DecodeErrors::SosError(format!(
392            "Invalid Ah parameter {}, range should be 0-13",
393            image.succ_low
394        )));
395    }
396    // successive approximation bit position low
397    image.succ_low = bit_approx & 0xF;
398
399    if image.succ_low > 13 {
400        return Err(DecodeErrors::SosError(format!(
401            "Invalid Al parameter {}, range should be 0-13",
402            image.succ_low
403        )));
404    }
405
406    trace!(
407        "Ss={}, Se={} Ah={} Al={}",
408        image.spec_start,
409        image.spec_end,
410        image.succ_high,
411        image.succ_low
412    );
413
414    Ok(())
415}
416
417/// Parse Adobe App14 segment
418pub(crate) fn parse_app14<T: ZReaderTrait>(
419    decoder: &mut JpegDecoder<T>
420) -> Result<(), DecodeErrors> {
421    // skip length
422    let mut length = usize::from(decoder.stream.get_u16_be());
423
424    if length < 2 || !decoder.stream.has(length - 2) {
425        return Err(DecodeErrors::ExhaustedData);
426    }
427    if length < 14 {
428        return Err(DecodeErrors::FormatStatic(
429            "Too short of a length for App14 segment"
430        ));
431    }
432    if decoder.stream.peek_at(0, 5) == Ok(b"Adobe") {
433        // move stream 6 bytes to remove adobe id
434        decoder.stream.skip(6);
435        // skip version, flags0 and flags1
436        decoder.stream.skip(5);
437        // get color transform
438        let transform = decoder.stream.get_u8();
439        // https://exiftool.org/TagNames/JPEG.html#Adobe
440        match transform {
441            0 => decoder.input_colorspace = ColorSpace::CMYK,
442            1 => decoder.input_colorspace = ColorSpace::YCbCr,
443            2 => decoder.input_colorspace = ColorSpace::YCCK,
444            _ => {
445                return Err(DecodeErrors::Format(format!(
446                    "Unknown Adobe colorspace {transform}"
447                )))
448            }
449        }
450        // length   = 2
451        // adobe id = 6
452        // version =  5
453        // transform = 1
454        length = length.saturating_sub(14);
455    } else if decoder.options.get_strict_mode() {
456        return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment"));
457    } else {
458        length = length.saturating_sub(2);
459        error!("Not a valid Adobe APP14 Segment");
460    }
461    // skip any proceeding lengths.
462    // we do not need them
463    decoder.stream.skip(length);
464
465    Ok(())
466}
467
468/// Parse the APP1 segment
469///
470/// This contains the exif tag
471pub(crate) fn parse_app1<T: ZReaderTrait>(
472    decoder: &mut JpegDecoder<T>
473) -> Result<(), DecodeErrors> {
474    // contains exif data
475    let mut length = usize::from(decoder.stream.get_u16_be());
476
477    if length < 2 || !decoder.stream.has(length - 2) {
478        return Err(DecodeErrors::ExhaustedData);
479    }
480    // length bytes
481    length -= 2;
482
483    if length > 6 && decoder.stream.peek_at(0, 6).unwrap() == b"Exif\x00\x00" {
484        trace!("Exif segment present");
485        // skip bytes we read above
486        decoder.stream.skip(6);
487        length -= 6;
488
489        let exif_bytes = decoder.stream.peek_at(0, length).unwrap().to_vec();
490
491        decoder.exif_data = Some(exif_bytes);
492    } else {
493        warn!("Wrongly formatted exif tag");
494    }
495
496    decoder.stream.skip(length);
497    Ok(())
498}
499
500pub(crate) fn parse_app2<T: ZReaderTrait>(
501    decoder: &mut JpegDecoder<T>
502) -> Result<(), DecodeErrors> {
503    let mut length = usize::from(decoder.stream.get_u16_be());
504
505    if length < 2 || !decoder.stream.has(length - 2) {
506        return Err(DecodeErrors::ExhaustedData);
507    }
508    // length bytes
509    length -= 2;
510
511    if length > 14 && decoder.stream.peek_at(0, 12).unwrap() == *b"ICC_PROFILE\0" {
512        trace!("ICC Profile present");
513        // skip 12 bytes which indicate ICC profile
514        length -= 12;
515        decoder.stream.skip(12);
516        let seq_no = decoder.stream.get_u8();
517        let num_markers = decoder.stream.get_u8();
518        // deduct the two bytes we read above
519        length -= 2;
520
521        let data = decoder.stream.peek_at(0, length).unwrap().to_vec();
522
523        let icc_chunk = ICCChunk {
524            seq_no,
525            num_markers,
526            data
527        };
528        decoder.icc_data.push(icc_chunk);
529    }
530
531    decoder.stream.skip(length);
532
533    Ok(())
534}
535
536/// Small utility function to print Un-zig-zagged quantization tables
537
538fn un_zig_zag<T>(a: &[T]) -> [i32; 64]
539where
540    T: Default + Copy,
541    i32: core::convert::From<T>
542{
543    let mut output = [i32::default(); 64];
544
545    for i in 0..64 {
546        output[UN_ZIGZAG[i]] = i32::from(a[i]);
547    }
548
549    output
550}