png/text_metadata.rs
1//! # Text chunks (tEXt/zTXt/iTXt) structs and functions
2//!
3//! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for
4//! embedded text chunks in the file. They may appear either before or after the image data
5//! chunks. There are three kinds of text chunks.
6//! - `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded.
7//! - `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and
8//! encoding, but the `text` field is compressed before being written into the PNG file.
9//! - `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports
10//! compression of the text field as well.
11//!
12//! The `ISO 8859-1` encoding technically doesn't allow any control characters
13//! to be used, but in practice these values are encountered anyway. This can
14//! either be the extended `ISO-8859-1` encoding with control characters or the
15//! `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is
16//! used.
17//!
18//! ## Reading text chunks
19//!
20//! As a PNG is decoded, any text chunk encountered is appended the
21//! [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`,
22//! `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered
23//! chunk is `tEXt`, `zTXt`, or `iTXt`.
24//!
25//! ```
26//! use std::fs::File;
27//! use std::iter::FromIterator;
28//! use std::path::PathBuf;
29//!
30//! // Opening a png file that has a zTXt chunk
31//! let decoder = png::Decoder::new(
32//! File::open(PathBuf::from_iter([
33//! "tests",
34//! "text_chunk_examples",
35//! "ztxt_example.png",
36//! ]))
37//! .unwrap(),
38//! );
39//! let mut reader = decoder.read_info().unwrap();
40//! // If the text chunk is before the image data frames, `reader.info()` already contains the text.
41//! for text_chunk in &reader.info().compressed_latin1_text {
42//! println!("{:?}", text_chunk.keyword); // Prints the keyword
43//! println!("{:#?}", text_chunk); // Prints out the text chunk.
44//! // To get the uncompressed text, use the `get_text` method.
45//! println!("{}", text_chunk.get_text().unwrap());
46//! }
47//! ```
48//!
49//! ## Writing text chunks
50//!
51//! There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file.
52//! To add a text chunk at any point in the stream, use the `write_text_chunk` method.
53//!
54//! ```
55//! # use png::text_metadata::{ITXtChunk, ZTXtChunk};
56//! # use std::env;
57//! # use std::fs::File;
58//! # use std::io::BufWriter;
59//! # use std::iter::FromIterator;
60//! # use std::path::PathBuf;
61//! # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap();
62//! # let ref mut w = BufWriter::new(file);
63//! let mut encoder = png::Encoder::new(w, 2, 1); // Width is 2 pixels and height is 1.
64//! encoder.set_color(png::ColorType::Rgba);
65//! encoder.set_depth(png::BitDepth::Eight);
66//! // Adding text chunks to the header
67//! encoder
68//! .add_text_chunk(
69//! "Testing tEXt".to_string(),
70//! "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(),
71//! )
72//! .unwrap();
73//! encoder
74//! .add_ztxt_chunk(
75//! "Testing zTXt".to_string(),
76//! "This is a zTXt chunk that is compressed in the png file.".to_string(),
77//! )
78//! .unwrap();
79//! encoder
80//! .add_itxt_chunk(
81//! "Testing iTXt".to_string(),
82//! "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(),
83//! )
84//! .unwrap();
85//!
86//! let mut writer = encoder.write_header().unwrap();
87//!
88//! let data = [255, 0, 0, 255, 0, 0, 0, 255]; // An array containing a RGBA sequence. First pixel is red and second pixel is black.
89//! writer.write_image_data(&data).unwrap(); // Save
90//!
91//! // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file.
92//! let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string());
93//! writer.write_text_chunk(&tail_ztxt_chunk).unwrap();
94//!
95//! // The fields of the text chunk are public, so they can be mutated before being written to the file.
96//! let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string());
97//! tail_itxt_chunk.compressed = true;
98//! tail_itxt_chunk.language_tag = "hi".to_string();
99//! tail_itxt_chunk.translated_keyword = "लेखक".to_string();
100//! writer.write_text_chunk(&tail_itxt_chunk).unwrap();
101//! ```
102
103#![warn(missing_docs)]
104
105use crate::{chunk, encoder, DecodingError, EncodingError};
106use fdeflate::BoundedDecompressionError;
107use flate2::write::ZlibEncoder;
108use flate2::Compression;
109use std::{convert::TryFrom, io::Write};
110
111/// Default decompression limit for compressed text chunks.
112pub const DECOMPRESSION_LIMIT: usize = 2097152; // 2 MiB
113
114/// Text encoding errors that is wrapped by the standard EncodingError type
115#[derive(Debug, Clone, Copy)]
116pub(crate) enum TextEncodingError {
117 /// Unrepresentable characters in string
118 Unrepresentable,
119 /// Keyword longer than 79 bytes or empty
120 InvalidKeywordSize,
121 /// Error encountered while compressing text
122 CompressionError,
123}
124
125/// Text decoding error that is wrapped by the standard DecodingError type
126#[derive(Debug, Clone, Copy)]
127pub(crate) enum TextDecodingError {
128 /// Unrepresentable characters in string
129 Unrepresentable,
130 /// Keyword longer than 79 bytes or empty
131 InvalidKeywordSize,
132 /// Missing null separator
133 MissingNullSeparator,
134 /// Compressed text cannot be uncompressed
135 InflationError,
136 /// Needs more space to decompress
137 OutOfDecompressionSpace,
138 /// Using an unspecified value for the compression method
139 InvalidCompressionMethod,
140 /// Using a byte that is not 0 or 255 as compression flag in iTXt chunk
141 InvalidCompressionFlag,
142 /// Missing the compression flag
143 MissingCompressionFlag,
144}
145
146/// A generalized text chunk trait
147pub trait EncodableTextChunk {
148 /// Encode text chunk as `Vec<u8>` to a `Write`
149 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError>;
150}
151
152/// Struct representing a tEXt chunk
153#[derive(Clone, Debug, PartialEq, Eq)]
154pub struct TEXtChunk {
155 /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
156 pub keyword: String,
157 /// Text field of tEXt chunk. Can be at most 2GB.
158 pub text: String,
159}
160
161fn decode_iso_8859_1(text: &[u8]) -> String {
162 text.iter().map(|&b| b as char).collect()
163}
164
165pub(crate) fn encode_iso_8859_1(text: &str) -> Result<Vec<u8>, TextEncodingError> {
166 encode_iso_8859_1_iter(text).collect()
167}
168
169fn encode_iso_8859_1_into(buf: &mut Vec<u8>, text: &str) -> Result<(), TextEncodingError> {
170 for b in encode_iso_8859_1_iter(text) {
171 buf.push(b?);
172 }
173 Ok(())
174}
175
176fn encode_iso_8859_1_iter(text: &str) -> impl Iterator<Item = Result<u8, TextEncodingError>> + '_ {
177 text.chars()
178 .map(|c| u8::try_from(c as u32).map_err(|_| TextEncodingError::Unrepresentable))
179}
180
181fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> {
182 if text.is_ascii() {
183 // `from_utf8` cannot panic because we're already checked that `text` is ASCII-7.
184 // And this is the only safe way to get ASCII-7 string from `&[u8]`.
185 Ok(std::str::from_utf8(text).expect("unreachable"))
186 } else {
187 Err(TextDecodingError::Unrepresentable)
188 }
189}
190
191impl TEXtChunk {
192 /// Constructs a new TEXtChunk.
193 /// Not sure whether it should take &str or String.
194 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
195 Self {
196 keyword: keyword.into(),
197 text: text.into(),
198 }
199 }
200
201 /// Decodes a slice of bytes to a String using Latin-1 decoding.
202 /// The decoder runs in strict mode, and any decoding errors are passed along to the caller.
203 pub(crate) fn decode(
204 keyword_slice: &[u8],
205 text_slice: &[u8],
206 ) -> Result<Self, TextDecodingError> {
207 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
208 return Err(TextDecodingError::InvalidKeywordSize);
209 }
210
211 Ok(Self {
212 keyword: decode_iso_8859_1(keyword_slice),
213 text: decode_iso_8859_1(text_slice),
214 })
215 }
216}
217
218impl EncodableTextChunk for TEXtChunk {
219 /// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes.
220 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
221 let mut data = encode_iso_8859_1(&self.keyword)?;
222
223 if data.is_empty() || data.len() > 79 {
224 return Err(TextEncodingError::InvalidKeywordSize.into());
225 }
226
227 data.push(0);
228
229 encode_iso_8859_1_into(&mut data, &self.text)?;
230
231 encoder::write_chunk(w, chunk::tEXt, &data)
232 }
233}
234
235/// Struct representing a zTXt chunk
236#[derive(Clone, Debug, PartialEq, Eq)]
237pub struct ZTXtChunk {
238 /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
239 pub keyword: String,
240 /// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary.
241 text: OptCompressed,
242}
243
244/// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field.
245#[derive(Clone, Debug, PartialEq, Eq)]
246enum OptCompressed {
247 /// Compressed version of text field. Can be at most 2GB.
248 Compressed(Vec<u8>),
249 /// Uncompressed text field.
250 Uncompressed(String),
251}
252
253impl ZTXtChunk {
254 /// Creates a new ZTXt chunk.
255 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
256 Self {
257 keyword: keyword.into(),
258 text: OptCompressed::Uncompressed(text.into()),
259 }
260 }
261
262 pub(crate) fn decode(
263 keyword_slice: &[u8],
264 compression_method: u8,
265 text_slice: &[u8],
266 ) -> Result<Self, TextDecodingError> {
267 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
268 return Err(TextDecodingError::InvalidKeywordSize);
269 }
270
271 if compression_method != 0 {
272 return Err(TextDecodingError::InvalidCompressionMethod);
273 }
274
275 Ok(Self {
276 keyword: decode_iso_8859_1(keyword_slice),
277 text: OptCompressed::Compressed(text_slice.to_vec()),
278 })
279 }
280
281 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
282 pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
283 self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
284 }
285
286 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
287 pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
288 match &self.text {
289 OptCompressed::Compressed(v) => {
290 let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(&v[..], limit) {
291 Ok(s) => s,
292 Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
293 return Err(DecodingError::from(
294 TextDecodingError::OutOfDecompressionSpace,
295 ));
296 }
297 Err(_) => {
298 return Err(DecodingError::from(TextDecodingError::InflationError));
299 }
300 };
301 self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw));
302 }
303 OptCompressed::Uncompressed(_) => {}
304 };
305 Ok(())
306 }
307
308 /// Decompresses the inner text, and returns it as a `String`.
309 /// If decompression uses more the 2MiB, first call decompress with limit, and then this method.
310 pub fn get_text(&self) -> Result<String, DecodingError> {
311 match &self.text {
312 OptCompressed::Compressed(v) => {
313 let uncompressed_raw = fdeflate::decompress_to_vec(v)
314 .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
315 Ok(decode_iso_8859_1(&uncompressed_raw))
316 }
317 OptCompressed::Uncompressed(s) => Ok(s.clone()),
318 }
319 }
320
321 /// Compresses the inner text, mutating its own state.
322 pub fn compress_text(&mut self) -> Result<(), EncodingError> {
323 match &self.text {
324 OptCompressed::Uncompressed(s) => {
325 let uncompressed_raw = encode_iso_8859_1(s)?;
326 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
327 encoder
328 .write_all(&uncompressed_raw)
329 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
330 self.text = OptCompressed::Compressed(
331 encoder
332 .finish()
333 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
334 );
335 }
336 OptCompressed::Compressed(_) => {}
337 }
338
339 Ok(())
340 }
341}
342
343impl EncodableTextChunk for ZTXtChunk {
344 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
345 let mut data = encode_iso_8859_1(&self.keyword)?;
346
347 if data.is_empty() || data.len() > 79 {
348 return Err(TextEncodingError::InvalidKeywordSize.into());
349 }
350
351 // Null separator
352 data.push(0);
353
354 // Compression method: the only valid value is 0, as of 2021.
355 data.push(0);
356
357 match &self.text {
358 OptCompressed::Compressed(v) => {
359 data.extend_from_slice(&v[..]);
360 }
361 OptCompressed::Uncompressed(s) => {
362 // This code may have a bug. Check for correctness.
363 let uncompressed_raw = encode_iso_8859_1(s)?;
364 let mut encoder = ZlibEncoder::new(data, Compression::fast());
365 encoder
366 .write_all(&uncompressed_raw)
367 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
368 data = encoder
369 .finish()
370 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
371 }
372 };
373
374 encoder::write_chunk(w, chunk::zTXt, &data)
375 }
376}
377
378/// Struct encoding an iTXt chunk
379#[derive(Clone, Debug, PartialEq, Eq)]
380pub struct ITXtChunk {
381 /// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1.
382 pub keyword: String,
383 /// Indicates whether the text will be (or was) compressed in the PNG.
384 pub compressed: bool,
385 /// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded.
386 pub language_tag: String,
387 /// Translated keyword. This is UTF-8 encoded.
388 pub translated_keyword: String,
389 /// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary.
390 text: OptCompressed,
391}
392
393impl ITXtChunk {
394 /// Constructs a new iTXt chunk. Leaves all but keyword and text to default values.
395 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
396 Self {
397 keyword: keyword.into(),
398 compressed: false,
399 language_tag: "".to_string(),
400 translated_keyword: "".to_string(),
401 text: OptCompressed::Uncompressed(text.into()),
402 }
403 }
404
405 pub(crate) fn decode(
406 keyword_slice: &[u8],
407 compression_flag: u8,
408 compression_method: u8,
409 language_tag_slice: &[u8],
410 translated_keyword_slice: &[u8],
411 text_slice: &[u8],
412 ) -> Result<Self, TextDecodingError> {
413 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
414 return Err(TextDecodingError::InvalidKeywordSize);
415 }
416 let keyword = decode_iso_8859_1(keyword_slice);
417
418 let compressed = match compression_flag {
419 0 => false,
420 1 => true,
421 _ => return Err(TextDecodingError::InvalidCompressionFlag),
422 };
423
424 if compressed && compression_method != 0 {
425 return Err(TextDecodingError::InvalidCompressionMethod);
426 }
427
428 let language_tag = decode_ascii(language_tag_slice)?.to_owned();
429
430 let translated_keyword = std::str::from_utf8(translated_keyword_slice)
431 .map_err(|_| TextDecodingError::Unrepresentable)?
432 .to_string();
433 let text = if compressed {
434 OptCompressed::Compressed(text_slice.to_vec())
435 } else {
436 OptCompressed::Uncompressed(
437 String::from_utf8(text_slice.to_vec())
438 .map_err(|_| TextDecodingError::Unrepresentable)?,
439 )
440 };
441
442 Ok(Self {
443 keyword,
444 compressed,
445 language_tag,
446 translated_keyword,
447 text,
448 })
449 }
450
451 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
452 pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
453 self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
454 }
455
456 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
457 pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
458 match &self.text {
459 OptCompressed::Compressed(v) => {
460 let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(v, limit) {
461 Ok(s) => s,
462 Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
463 return Err(DecodingError::from(
464 TextDecodingError::OutOfDecompressionSpace,
465 ));
466 }
467 Err(_) => {
468 return Err(DecodingError::from(TextDecodingError::InflationError));
469 }
470 };
471 self.text = OptCompressed::Uncompressed(
472 String::from_utf8(uncompressed_raw)
473 .map_err(|_| TextDecodingError::Unrepresentable)?,
474 );
475 }
476 OptCompressed::Uncompressed(_) => {}
477 };
478 Ok(())
479 }
480
481 /// Decompresses the inner text, and returns it as a `String`.
482 /// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method.
483 pub fn get_text(&self) -> Result<String, DecodingError> {
484 match &self.text {
485 OptCompressed::Compressed(v) => {
486 let uncompressed_raw = fdeflate::decompress_to_vec(v)
487 .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
488 String::from_utf8(uncompressed_raw)
489 .map_err(|_| TextDecodingError::Unrepresentable.into())
490 }
491 OptCompressed::Uncompressed(s) => Ok(s.clone()),
492 }
493 }
494
495 /// Compresses the inner text, mutating its own state.
496 pub fn compress_text(&mut self) -> Result<(), EncodingError> {
497 match &self.text {
498 OptCompressed::Uncompressed(s) => {
499 let uncompressed_raw = s.as_bytes();
500 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
501 encoder
502 .write_all(uncompressed_raw)
503 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
504 self.text = OptCompressed::Compressed(
505 encoder
506 .finish()
507 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
508 );
509 }
510 OptCompressed::Compressed(_) => {}
511 }
512
513 Ok(())
514 }
515}
516
517impl EncodableTextChunk for ITXtChunk {
518 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
519 // Keyword
520 let mut data = encode_iso_8859_1(&self.keyword)?;
521
522 if data.is_empty() || data.len() > 79 {
523 return Err(TextEncodingError::InvalidKeywordSize.into());
524 }
525
526 // Null separator
527 data.push(0);
528
529 // Compression flag
530 if self.compressed {
531 data.push(1);
532 } else {
533 data.push(0);
534 }
535
536 // Compression method
537 data.push(0);
538
539 // Language tag
540 if !self.language_tag.is_ascii() {
541 return Err(EncodingError::from(TextEncodingError::Unrepresentable));
542 }
543 data.extend(self.language_tag.as_bytes());
544
545 // Null separator
546 data.push(0);
547
548 // Translated keyword
549 data.extend_from_slice(self.translated_keyword.as_bytes());
550
551 // Null separator
552 data.push(0);
553
554 // Text
555 if self.compressed {
556 match &self.text {
557 OptCompressed::Compressed(v) => {
558 data.extend_from_slice(&v[..]);
559 }
560 OptCompressed::Uncompressed(s) => {
561 let uncompressed_raw = s.as_bytes();
562 let mut encoder = ZlibEncoder::new(data, Compression::fast());
563 encoder
564 .write_all(uncompressed_raw)
565 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
566 data = encoder
567 .finish()
568 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
569 }
570 }
571 } else {
572 match &self.text {
573 OptCompressed::Compressed(v) => {
574 let uncompressed_raw = fdeflate::decompress_to_vec(v)
575 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
576 data.extend_from_slice(&uncompressed_raw[..]);
577 }
578 OptCompressed::Uncompressed(s) => {
579 data.extend_from_slice(s.as_bytes());
580 }
581 }
582 }
583
584 encoder::write_chunk(w, chunk::iTXt, &data)
585 }
586}