png/
text_metadata.rs

1//! # Text chunks (tEXt/zTXt/iTXt) structs and functions
2//!
3//! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for
4//! embedded text chunks in the file. They may appear either before or after the image data
5//! chunks. There are three kinds of text chunks.
6//!  -   `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded.
7//!  -   `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and
8//!       encoding, but the `text` field is compressed before being written into the PNG file.
9//!  -   `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports
10//!        compression of the text field as well.
11//!
12//!  The `ISO 8859-1` encoding technically doesn't allow any control characters
13//!  to be used, but in practice these values are encountered anyway. This can
14//!  either be the extended `ISO-8859-1` encoding with control characters or the
15//!  `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is
16//!  used.
17//!
18//!  ## Reading text chunks
19//!
20//!  As a PNG is decoded, any text chunk encountered is appended the
21//!  [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`,
22//!  `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered
23//!  chunk is `tEXt`, `zTXt`, or `iTXt`.
24//!
25//!  ```
26//!  use std::fs::File;
27//!  use std::iter::FromIterator;
28//!  use std::path::PathBuf;
29//!
30//!  // Opening a png file that has a zTXt chunk
31//!  let decoder = png::Decoder::new(
32//!      File::open(PathBuf::from_iter([
33//!          "tests",
34//!          "text_chunk_examples",
35//!          "ztxt_example.png",
36//!      ]))
37//!      .unwrap(),
38//!  );
39//!  let mut reader = decoder.read_info().unwrap();
40//!  // If the text chunk is before the image data frames, `reader.info()` already contains the text.
41//!  for text_chunk in &reader.info().compressed_latin1_text {
42//!      println!("{:?}", text_chunk.keyword); // Prints the keyword
43//!      println!("{:#?}", text_chunk); // Prints out the text chunk.
44//!      // To get the uncompressed text, use the `get_text` method.
45//!      println!("{}", text_chunk.get_text().unwrap());
46//!  }
47//!  ```
48//!
49//!  ## Writing text chunks
50//!
51//!  There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file.
52//!  To add a text chunk at any point in the stream, use the `write_text_chunk` method.
53//!
54//!  ```
55//!  # use png::text_metadata::{ITXtChunk, ZTXtChunk};
56//!  # use std::env;
57//!  # use std::fs::File;
58//!  # use std::io::BufWriter;
59//!  # use std::iter::FromIterator;
60//!  # use std::path::PathBuf;
61//!  # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap();
62//!  # let ref mut w = BufWriter::new(file);
63//!  let mut encoder = png::Encoder::new(w, 2, 1); // Width is 2 pixels and height is 1.
64//!  encoder.set_color(png::ColorType::Rgba);
65//!  encoder.set_depth(png::BitDepth::Eight);
66//!  // Adding text chunks to the header
67//!  encoder
68//!     .add_text_chunk(
69//!         "Testing tEXt".to_string(),
70//!         "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(),
71//!     )
72//!     .unwrap();
73//!  encoder
74//!      .add_ztxt_chunk(
75//!          "Testing zTXt".to_string(),
76//!          "This is a zTXt chunk that is compressed in the png file.".to_string(),
77//!      )
78//!      .unwrap();
79//!  encoder
80//!      .add_itxt_chunk(
81//!          "Testing iTXt".to_string(),
82//!          "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(),
83//!      )
84//!      .unwrap();
85//!
86//!  let mut writer = encoder.write_header().unwrap();
87//!
88//!  let data = [255, 0, 0, 255, 0, 0, 0, 255]; // An array containing a RGBA sequence. First pixel is red and second pixel is black.
89//!  writer.write_image_data(&data).unwrap(); // Save
90//!
91//!  // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file.
92//!  let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string());
93//!  writer.write_text_chunk(&tail_ztxt_chunk).unwrap();
94//!
95//!  // The fields of the text chunk are public, so they can be mutated before being written to the file.
96//!  let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string());
97//!  tail_itxt_chunk.compressed = true;
98//!  tail_itxt_chunk.language_tag = "hi".to_string();
99//!  tail_itxt_chunk.translated_keyword = "लेखक".to_string();
100//!  writer.write_text_chunk(&tail_itxt_chunk).unwrap();
101//!  ```
102
103#![warn(missing_docs)]
104
105use crate::{chunk, encoder, DecodingError, EncodingError};
106use fdeflate::BoundedDecompressionError;
107use flate2::write::ZlibEncoder;
108use flate2::Compression;
109use std::{convert::TryFrom, io::Write};
110
111/// Default decompression limit for compressed text chunks.
112pub const DECOMPRESSION_LIMIT: usize = 2097152; // 2 MiB
113
114/// Text encoding errors that is wrapped by the standard EncodingError type
115#[derive(Debug, Clone, Copy)]
116pub(crate) enum TextEncodingError {
117    /// Unrepresentable characters in string
118    Unrepresentable,
119    /// Keyword longer than 79 bytes or empty
120    InvalidKeywordSize,
121    /// Error encountered while compressing text
122    CompressionError,
123}
124
125/// Text decoding error that is wrapped by the standard DecodingError type
126#[derive(Debug, Clone, Copy)]
127pub(crate) enum TextDecodingError {
128    /// Unrepresentable characters in string
129    Unrepresentable,
130    /// Keyword longer than 79 bytes or empty
131    InvalidKeywordSize,
132    /// Missing null separator
133    MissingNullSeparator,
134    /// Compressed text cannot be uncompressed
135    InflationError,
136    /// Needs more space to decompress
137    OutOfDecompressionSpace,
138    /// Using an unspecified value for the compression method
139    InvalidCompressionMethod,
140    /// Using a byte that is not 0 or 255 as compression flag in iTXt chunk
141    InvalidCompressionFlag,
142    /// Missing the compression flag
143    MissingCompressionFlag,
144}
145
146/// A generalized text chunk trait
147pub trait EncodableTextChunk {
148    /// Encode text chunk as `Vec<u8>` to a `Write`
149    fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError>;
150}
151
152/// Struct representing a tEXt chunk
153#[derive(Clone, Debug, PartialEq, Eq)]
154pub struct TEXtChunk {
155    /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
156    pub keyword: String,
157    /// Text field of tEXt chunk. Can be at most 2GB.
158    pub text: String,
159}
160
161fn decode_iso_8859_1(text: &[u8]) -> String {
162    text.iter().map(|&b| b as char).collect()
163}
164
165pub(crate) fn encode_iso_8859_1(text: &str) -> Result<Vec<u8>, TextEncodingError> {
166    encode_iso_8859_1_iter(text).collect()
167}
168
169fn encode_iso_8859_1_into(buf: &mut Vec<u8>, text: &str) -> Result<(), TextEncodingError> {
170    for b in encode_iso_8859_1_iter(text) {
171        buf.push(b?);
172    }
173    Ok(())
174}
175
176fn encode_iso_8859_1_iter(text: &str) -> impl Iterator<Item = Result<u8, TextEncodingError>> + '_ {
177    text.chars()
178        .map(|c| u8::try_from(c as u32).map_err(|_| TextEncodingError::Unrepresentable))
179}
180
181fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> {
182    if text.is_ascii() {
183        // `from_utf8` cannot panic because we're already checked that `text` is ASCII-7.
184        // And this is the only safe way to get ASCII-7 string from `&[u8]`.
185        Ok(std::str::from_utf8(text).expect("unreachable"))
186    } else {
187        Err(TextDecodingError::Unrepresentable)
188    }
189}
190
191impl TEXtChunk {
192    /// Constructs a new TEXtChunk.
193    /// Not sure whether it should take &str or String.
194    pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
195        Self {
196            keyword: keyword.into(),
197            text: text.into(),
198        }
199    }
200
201    /// Decodes a slice of bytes to a String using Latin-1 decoding.
202    /// The decoder runs in strict mode, and any decoding errors are passed along to the caller.
203    pub(crate) fn decode(
204        keyword_slice: &[u8],
205        text_slice: &[u8],
206    ) -> Result<Self, TextDecodingError> {
207        if keyword_slice.is_empty() || keyword_slice.len() > 79 {
208            return Err(TextDecodingError::InvalidKeywordSize);
209        }
210
211        Ok(Self {
212            keyword: decode_iso_8859_1(keyword_slice),
213            text: decode_iso_8859_1(text_slice),
214        })
215    }
216}
217
218impl EncodableTextChunk for TEXtChunk {
219    /// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes.
220    fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
221        let mut data = encode_iso_8859_1(&self.keyword)?;
222
223        if data.is_empty() || data.len() > 79 {
224            return Err(TextEncodingError::InvalidKeywordSize.into());
225        }
226
227        data.push(0);
228
229        encode_iso_8859_1_into(&mut data, &self.text)?;
230
231        encoder::write_chunk(w, chunk::tEXt, &data)
232    }
233}
234
235/// Struct representing a zTXt chunk
236#[derive(Clone, Debug, PartialEq, Eq)]
237pub struct ZTXtChunk {
238    /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
239    pub keyword: String,
240    /// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary.
241    text: OptCompressed,
242}
243
244/// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field.
245#[derive(Clone, Debug, PartialEq, Eq)]
246enum OptCompressed {
247    /// Compressed version of text field. Can be at most 2GB.
248    Compressed(Vec<u8>),
249    /// Uncompressed text field.
250    Uncompressed(String),
251}
252
253impl ZTXtChunk {
254    /// Creates a new ZTXt chunk.
255    pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
256        Self {
257            keyword: keyword.into(),
258            text: OptCompressed::Uncompressed(text.into()),
259        }
260    }
261
262    pub(crate) fn decode(
263        keyword_slice: &[u8],
264        compression_method: u8,
265        text_slice: &[u8],
266    ) -> Result<Self, TextDecodingError> {
267        if keyword_slice.is_empty() || keyword_slice.len() > 79 {
268            return Err(TextDecodingError::InvalidKeywordSize);
269        }
270
271        if compression_method != 0 {
272            return Err(TextDecodingError::InvalidCompressionMethod);
273        }
274
275        Ok(Self {
276            keyword: decode_iso_8859_1(keyword_slice),
277            text: OptCompressed::Compressed(text_slice.to_vec()),
278        })
279    }
280
281    /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
282    pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
283        self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
284    }
285
286    /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
287    pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
288        match &self.text {
289            OptCompressed::Compressed(v) => {
290                let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(&v[..], limit) {
291                    Ok(s) => s,
292                    Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
293                        return Err(DecodingError::from(
294                            TextDecodingError::OutOfDecompressionSpace,
295                        ));
296                    }
297                    Err(_) => {
298                        return Err(DecodingError::from(TextDecodingError::InflationError));
299                    }
300                };
301                self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw));
302            }
303            OptCompressed::Uncompressed(_) => {}
304        };
305        Ok(())
306    }
307
308    /// Decompresses the inner text, and returns it as a `String`.
309    /// If decompression uses more the 2MiB, first call decompress with limit, and then this method.
310    pub fn get_text(&self) -> Result<String, DecodingError> {
311        match &self.text {
312            OptCompressed::Compressed(v) => {
313                let uncompressed_raw = fdeflate::decompress_to_vec(v)
314                    .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
315                Ok(decode_iso_8859_1(&uncompressed_raw))
316            }
317            OptCompressed::Uncompressed(s) => Ok(s.clone()),
318        }
319    }
320
321    /// Compresses the inner text, mutating its own state.
322    pub fn compress_text(&mut self) -> Result<(), EncodingError> {
323        match &self.text {
324            OptCompressed::Uncompressed(s) => {
325                let uncompressed_raw = encode_iso_8859_1(s)?;
326                let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
327                encoder
328                    .write_all(&uncompressed_raw)
329                    .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
330                self.text = OptCompressed::Compressed(
331                    encoder
332                        .finish()
333                        .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
334                );
335            }
336            OptCompressed::Compressed(_) => {}
337        }
338
339        Ok(())
340    }
341}
342
343impl EncodableTextChunk for ZTXtChunk {
344    fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
345        let mut data = encode_iso_8859_1(&self.keyword)?;
346
347        if data.is_empty() || data.len() > 79 {
348            return Err(TextEncodingError::InvalidKeywordSize.into());
349        }
350
351        // Null separator
352        data.push(0);
353
354        // Compression method: the only valid value is 0, as of 2021.
355        data.push(0);
356
357        match &self.text {
358            OptCompressed::Compressed(v) => {
359                data.extend_from_slice(&v[..]);
360            }
361            OptCompressed::Uncompressed(s) => {
362                // This code may have a bug. Check for correctness.
363                let uncompressed_raw = encode_iso_8859_1(s)?;
364                let mut encoder = ZlibEncoder::new(data, Compression::fast());
365                encoder
366                    .write_all(&uncompressed_raw)
367                    .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
368                data = encoder
369                    .finish()
370                    .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
371            }
372        };
373
374        encoder::write_chunk(w, chunk::zTXt, &data)
375    }
376}
377
378/// Struct encoding an iTXt chunk
379#[derive(Clone, Debug, PartialEq, Eq)]
380pub struct ITXtChunk {
381    /// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1.
382    pub keyword: String,
383    /// Indicates whether the text will be (or was) compressed in the PNG.
384    pub compressed: bool,
385    /// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded.
386    pub language_tag: String,
387    /// Translated keyword. This is UTF-8 encoded.
388    pub translated_keyword: String,
389    /// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary.
390    text: OptCompressed,
391}
392
393impl ITXtChunk {
394    /// Constructs a new iTXt chunk. Leaves all but keyword and text to default values.
395    pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
396        Self {
397            keyword: keyword.into(),
398            compressed: false,
399            language_tag: "".to_string(),
400            translated_keyword: "".to_string(),
401            text: OptCompressed::Uncompressed(text.into()),
402        }
403    }
404
405    pub(crate) fn decode(
406        keyword_slice: &[u8],
407        compression_flag: u8,
408        compression_method: u8,
409        language_tag_slice: &[u8],
410        translated_keyword_slice: &[u8],
411        text_slice: &[u8],
412    ) -> Result<Self, TextDecodingError> {
413        if keyword_slice.is_empty() || keyword_slice.len() > 79 {
414            return Err(TextDecodingError::InvalidKeywordSize);
415        }
416        let keyword = decode_iso_8859_1(keyword_slice);
417
418        let compressed = match compression_flag {
419            0 => false,
420            1 => true,
421            _ => return Err(TextDecodingError::InvalidCompressionFlag),
422        };
423
424        if compressed && compression_method != 0 {
425            return Err(TextDecodingError::InvalidCompressionMethod);
426        }
427
428        let language_tag = decode_ascii(language_tag_slice)?.to_owned();
429
430        let translated_keyword = std::str::from_utf8(translated_keyword_slice)
431            .map_err(|_| TextDecodingError::Unrepresentable)?
432            .to_string();
433        let text = if compressed {
434            OptCompressed::Compressed(text_slice.to_vec())
435        } else {
436            OptCompressed::Uncompressed(
437                String::from_utf8(text_slice.to_vec())
438                    .map_err(|_| TextDecodingError::Unrepresentable)?,
439            )
440        };
441
442        Ok(Self {
443            keyword,
444            compressed,
445            language_tag,
446            translated_keyword,
447            text,
448        })
449    }
450
451    /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
452    pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
453        self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
454    }
455
456    /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
457    pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
458        match &self.text {
459            OptCompressed::Compressed(v) => {
460                let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(v, limit) {
461                    Ok(s) => s,
462                    Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
463                        return Err(DecodingError::from(
464                            TextDecodingError::OutOfDecompressionSpace,
465                        ));
466                    }
467                    Err(_) => {
468                        return Err(DecodingError::from(TextDecodingError::InflationError));
469                    }
470                };
471                self.text = OptCompressed::Uncompressed(
472                    String::from_utf8(uncompressed_raw)
473                        .map_err(|_| TextDecodingError::Unrepresentable)?,
474                );
475            }
476            OptCompressed::Uncompressed(_) => {}
477        };
478        Ok(())
479    }
480
481    /// Decompresses the inner text, and returns it as a `String`.
482    /// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method.
483    pub fn get_text(&self) -> Result<String, DecodingError> {
484        match &self.text {
485            OptCompressed::Compressed(v) => {
486                let uncompressed_raw = fdeflate::decompress_to_vec(v)
487                    .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
488                String::from_utf8(uncompressed_raw)
489                    .map_err(|_| TextDecodingError::Unrepresentable.into())
490            }
491            OptCompressed::Uncompressed(s) => Ok(s.clone()),
492        }
493    }
494
495    /// Compresses the inner text, mutating its own state.
496    pub fn compress_text(&mut self) -> Result<(), EncodingError> {
497        match &self.text {
498            OptCompressed::Uncompressed(s) => {
499                let uncompressed_raw = s.as_bytes();
500                let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
501                encoder
502                    .write_all(uncompressed_raw)
503                    .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
504                self.text = OptCompressed::Compressed(
505                    encoder
506                        .finish()
507                        .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
508                );
509            }
510            OptCompressed::Compressed(_) => {}
511        }
512
513        Ok(())
514    }
515}
516
517impl EncodableTextChunk for ITXtChunk {
518    fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
519        // Keyword
520        let mut data = encode_iso_8859_1(&self.keyword)?;
521
522        if data.is_empty() || data.len() > 79 {
523            return Err(TextEncodingError::InvalidKeywordSize.into());
524        }
525
526        // Null separator
527        data.push(0);
528
529        // Compression flag
530        if self.compressed {
531            data.push(1);
532        } else {
533            data.push(0);
534        }
535
536        // Compression method
537        data.push(0);
538
539        // Language tag
540        if !self.language_tag.is_ascii() {
541            return Err(EncodingError::from(TextEncodingError::Unrepresentable));
542        }
543        data.extend(self.language_tag.as_bytes());
544
545        // Null separator
546        data.push(0);
547
548        // Translated keyword
549        data.extend_from_slice(self.translated_keyword.as_bytes());
550
551        // Null separator
552        data.push(0);
553
554        // Text
555        if self.compressed {
556            match &self.text {
557                OptCompressed::Compressed(v) => {
558                    data.extend_from_slice(&v[..]);
559                }
560                OptCompressed::Uncompressed(s) => {
561                    let uncompressed_raw = s.as_bytes();
562                    let mut encoder = ZlibEncoder::new(data, Compression::fast());
563                    encoder
564                        .write_all(uncompressed_raw)
565                        .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
566                    data = encoder
567                        .finish()
568                        .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
569                }
570            }
571        } else {
572            match &self.text {
573                OptCompressed::Compressed(v) => {
574                    let uncompressed_raw = fdeflate::decompress_to_vec(v)
575                        .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
576                    data.extend_from_slice(&uncompressed_raw[..]);
577                }
578                OptCompressed::Uncompressed(s) => {
579                    data.extend_from_slice(s.as_bytes());
580                }
581            }
582        }
583
584        encoder::write_chunk(w, chunk::iTXt, &data)
585    }
586}