png/text_metadata.rs
1//! # Text chunks (tEXt/zTXt/iTXt) structs and functions
2//!
3//! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for
4//! embedded text chunks in the file. They may appear either before or after the image data
5//! chunks. There are three kinds of text chunks.
6//! - `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded.
7//! - `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and
8//! encoding, but the `text` field is compressed before being written into the PNG file.
9//! - `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports
10//! compression of the text field as well.
11//!
12//! The `ISO 8859-1` encoding technically doesn't allow any control characters
13//! to be used, but in practice these values are encountered anyway. This can
14//! either be the extended `ISO-8859-1` encoding with control characters or the
15//! `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is
16//! used.
17//!
18//! ## Reading text chunks
19//!
20//! As a PNG is decoded, any text chunk encountered is appended the
21//! [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`,
22//! `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered
23//! chunk is `tEXt`, `zTXt`, or `iTXt`.
24//!
25//! ```
26//! use std::fs::File;
27//! use std::io::BufReader;
28//! use std::iter::FromIterator;
29//! use std::path::PathBuf;
30//!
31//! // Opening a png file that has a zTXt chunk
32//! let decoder = png::Decoder::new(
33//! BufReader::new(File::open("tests/text_chunk_examples/ztxt_example.png").unwrap())
34//! );
35//! let mut reader = decoder.read_info().unwrap();
36//! // If the text chunk is before the image data frames, `reader.info()` already contains the text.
37//! for text_chunk in &reader.info().compressed_latin1_text {
38//! println!("{:?}", text_chunk.keyword); // Prints the keyword
39//! println!("{:#?}", text_chunk); // Prints out the text chunk.
40//! // To get the uncompressed text, use the `get_text` method.
41//! println!("{}", text_chunk.get_text().unwrap());
42//! }
43//! ```
44//!
45//! ## Writing text chunks
46//!
47//! There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file.
48//! To add a text chunk at any point in the stream, use the `write_text_chunk` method.
49//!
50//! ```
51//! # use png::text_metadata::{ITXtChunk, ZTXtChunk};
52//! # use std::env;
53//! # use std::fs::File;
54//! # use std::io::BufWriter;
55//! # use std::iter::FromIterator;
56//! # use std::path::PathBuf;
57//! # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap();
58//! # let ref mut w = BufWriter::new(file);
59//! let mut encoder = png::Encoder::new(w, 2, 1); // Width is 2 pixels and height is 1.
60//! encoder.set_color(png::ColorType::Rgba);
61//! encoder.set_depth(png::BitDepth::Eight);
62//! // Adding text chunks to the header
63//! encoder
64//! .add_text_chunk(
65//! "Testing tEXt".to_string(),
66//! "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(),
67//! )
68//! .unwrap();
69//! encoder
70//! .add_ztxt_chunk(
71//! "Testing zTXt".to_string(),
72//! "This is a zTXt chunk that is compressed in the png file.".to_string(),
73//! )
74//! .unwrap();
75//! encoder
76//! .add_itxt_chunk(
77//! "Testing iTXt".to_string(),
78//! "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(),
79//! )
80//! .unwrap();
81//!
82//! let mut writer = encoder.write_header().unwrap();
83//!
84//! let data = [255, 0, 0, 255, 0, 0, 0, 255]; // An array containing a RGBA sequence. First pixel is red and second pixel is black.
85//! writer.write_image_data(&data).unwrap(); // Save
86//!
87//! // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file.
88//! let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string());
89//! writer.write_text_chunk(&tail_ztxt_chunk).unwrap();
90//!
91//! // The fields of the text chunk are public, so they can be mutated before being written to the file.
92//! let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string());
93//! tail_itxt_chunk.compressed = true;
94//! tail_itxt_chunk.language_tag = "hi".to_string();
95//! tail_itxt_chunk.translated_keyword = "लेखक".to_string();
96//! writer.write_text_chunk(&tail_itxt_chunk).unwrap();
97//! ```
98
99#![warn(missing_docs)]
100
101use crate::{chunk, encoder, DecodingError, EncodingError};
102use fdeflate::BoundedDecompressionError;
103use flate2::write::ZlibEncoder;
104use flate2::Compression;
105use std::{convert::TryFrom, io::Write};
106
107/// Default decompression limit for compressed text chunks.
108pub const DECOMPRESSION_LIMIT: usize = 2097152; // 2 MiB
109
110/// Text encoding errors that is wrapped by the standard EncodingError type
111#[derive(Debug, Clone, Copy)]
112pub(crate) enum TextEncodingError {
113 /// Unrepresentable characters in string
114 Unrepresentable,
115 /// Keyword longer than 79 bytes or empty
116 InvalidKeywordSize,
117 /// Error encountered while compressing text
118 CompressionError,
119}
120
121/// Text decoding error that is wrapped by the standard DecodingError type
122#[derive(Debug, Clone, Copy)]
123pub(crate) enum TextDecodingError {
124 /// Unrepresentable characters in string
125 Unrepresentable,
126 /// Keyword longer than 79 bytes or empty
127 InvalidKeywordSize,
128 /// Missing null separator
129 MissingNullSeparator,
130 /// Compressed text cannot be uncompressed
131 InflationError,
132 /// Needs more space to decompress
133 OutOfDecompressionSpace,
134 /// Using an unspecified value for the compression method
135 InvalidCompressionMethod,
136 /// Using a byte that is not 0 or 255 as compression flag in iTXt chunk
137 InvalidCompressionFlag,
138 /// Missing the compression flag
139 MissingCompressionFlag,
140}
141
142/// A generalized text chunk trait
143pub trait EncodableTextChunk {
144 /// Encode text chunk as `Vec<u8>` to a `Write`
145 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError>;
146}
147
148/// Struct representing a tEXt chunk
149#[derive(Clone, Debug, PartialEq, Eq)]
150pub struct TEXtChunk {
151 /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
152 pub keyword: String,
153 /// Text field of tEXt chunk. Can be at most 2GB.
154 pub text: String,
155}
156
157fn decode_iso_8859_1(text: &[u8]) -> String {
158 text.iter().map(|&b| b as char).collect()
159}
160
161pub(crate) fn encode_iso_8859_1(text: &str) -> Result<Vec<u8>, TextEncodingError> {
162 encode_iso_8859_1_iter(text).collect()
163}
164
165fn encode_iso_8859_1_into(buf: &mut Vec<u8>, text: &str) -> Result<(), TextEncodingError> {
166 for b in encode_iso_8859_1_iter(text) {
167 buf.push(b?);
168 }
169 Ok(())
170}
171
172fn encode_iso_8859_1_iter(text: &str) -> impl Iterator<Item = Result<u8, TextEncodingError>> + '_ {
173 text.chars()
174 .map(|c| u8::try_from(c as u32).map_err(|_| TextEncodingError::Unrepresentable))
175}
176
177fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> {
178 if text.is_ascii() {
179 // `from_utf8` cannot panic because we're already checked that `text` is ASCII-7.
180 // And this is the only safe way to get ASCII-7 string from `&[u8]`.
181 Ok(std::str::from_utf8(text).expect("unreachable"))
182 } else {
183 Err(TextDecodingError::Unrepresentable)
184 }
185}
186
187impl TEXtChunk {
188 /// Constructs a new TEXtChunk.
189 /// Not sure whether it should take &str or String.
190 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
191 Self {
192 keyword: keyword.into(),
193 text: text.into(),
194 }
195 }
196
197 /// Decodes a slice of bytes to a String using Latin-1 decoding.
198 /// The decoder runs in strict mode, and any decoding errors are passed along to the caller.
199 pub(crate) fn decode(
200 keyword_slice: &[u8],
201 text_slice: &[u8],
202 ) -> Result<Self, TextDecodingError> {
203 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
204 return Err(TextDecodingError::InvalidKeywordSize);
205 }
206
207 Ok(Self {
208 keyword: decode_iso_8859_1(keyword_slice),
209 text: decode_iso_8859_1(text_slice),
210 })
211 }
212}
213
214impl EncodableTextChunk for TEXtChunk {
215 /// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes.
216 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
217 let mut data = encode_iso_8859_1(&self.keyword)?;
218
219 if data.is_empty() || data.len() > 79 {
220 return Err(TextEncodingError::InvalidKeywordSize.into());
221 }
222
223 data.push(0);
224
225 encode_iso_8859_1_into(&mut data, &self.text)?;
226
227 encoder::write_chunk(w, chunk::tEXt, &data)
228 }
229}
230
231/// Struct representing a zTXt chunk
232#[derive(Clone, Debug, PartialEq, Eq)]
233pub struct ZTXtChunk {
234 /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
235 pub keyword: String,
236 /// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary.
237 text: OptCompressed,
238}
239
240/// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field.
241#[derive(Clone, Debug, PartialEq, Eq)]
242enum OptCompressed {
243 /// Compressed version of text field. Can be at most 2GB.
244 Compressed(Vec<u8>),
245 /// Uncompressed text field.
246 Uncompressed(String),
247}
248
249impl ZTXtChunk {
250 /// Creates a new ZTXt chunk.
251 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
252 Self {
253 keyword: keyword.into(),
254 text: OptCompressed::Uncompressed(text.into()),
255 }
256 }
257
258 pub(crate) fn decode(
259 keyword_slice: &[u8],
260 compression_method: u8,
261 text_slice: &[u8],
262 ) -> Result<Self, TextDecodingError> {
263 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
264 return Err(TextDecodingError::InvalidKeywordSize);
265 }
266
267 if compression_method != 0 {
268 return Err(TextDecodingError::InvalidCompressionMethod);
269 }
270
271 Ok(Self {
272 keyword: decode_iso_8859_1(keyword_slice),
273 text: OptCompressed::Compressed(text_slice.to_vec()),
274 })
275 }
276
277 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
278 pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
279 self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
280 }
281
282 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
283 pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
284 match &self.text {
285 OptCompressed::Compressed(v) => {
286 let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(&v[..], limit) {
287 Ok(s) => s,
288 Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
289 return Err(DecodingError::from(
290 TextDecodingError::OutOfDecompressionSpace,
291 ));
292 }
293 Err(_) => {
294 return Err(DecodingError::from(TextDecodingError::InflationError));
295 }
296 };
297 self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw));
298 }
299 OptCompressed::Uncompressed(_) => {}
300 };
301 Ok(())
302 }
303
304 /// Decompresses the inner text, and returns it as a `String`.
305 /// If decompression uses more the 2MiB, first call decompress with limit, and then this method.
306 pub fn get_text(&self) -> Result<String, DecodingError> {
307 match &self.text {
308 OptCompressed::Compressed(v) => {
309 let uncompressed_raw = fdeflate::decompress_to_vec(v)
310 .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
311 Ok(decode_iso_8859_1(&uncompressed_raw))
312 }
313 OptCompressed::Uncompressed(s) => Ok(s.clone()),
314 }
315 }
316
317 /// Compresses the inner text, mutating its own state.
318 pub fn compress_text(&mut self) -> Result<(), EncodingError> {
319 match &self.text {
320 OptCompressed::Uncompressed(s) => {
321 let uncompressed_raw = encode_iso_8859_1(s)?;
322 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
323 encoder
324 .write_all(&uncompressed_raw)
325 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
326 self.text = OptCompressed::Compressed(
327 encoder
328 .finish()
329 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
330 );
331 }
332 OptCompressed::Compressed(_) => {}
333 }
334
335 Ok(())
336 }
337}
338
339impl EncodableTextChunk for ZTXtChunk {
340 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
341 let mut data = encode_iso_8859_1(&self.keyword)?;
342
343 if data.is_empty() || data.len() > 79 {
344 return Err(TextEncodingError::InvalidKeywordSize.into());
345 }
346
347 // Null separator
348 data.push(0);
349
350 // Compression method: the only valid value is 0, as of 2021.
351 data.push(0);
352
353 match &self.text {
354 OptCompressed::Compressed(v) => {
355 data.extend_from_slice(&v[..]);
356 }
357 OptCompressed::Uncompressed(s) => {
358 // This code may have a bug. Check for correctness.
359 let uncompressed_raw = encode_iso_8859_1(s)?;
360 let mut encoder = ZlibEncoder::new(data, Compression::fast());
361 encoder
362 .write_all(&uncompressed_raw)
363 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
364 data = encoder
365 .finish()
366 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
367 }
368 };
369
370 encoder::write_chunk(w, chunk::zTXt, &data)
371 }
372}
373
374/// Struct encoding an iTXt chunk
375#[derive(Clone, Debug, PartialEq, Eq)]
376pub struct ITXtChunk {
377 /// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1.
378 pub keyword: String,
379 /// Indicates whether the text will be (or was) compressed in the PNG.
380 pub compressed: bool,
381 /// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded.
382 pub language_tag: String,
383 /// Translated keyword. This is UTF-8 encoded.
384 pub translated_keyword: String,
385 /// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary.
386 text: OptCompressed,
387}
388
389impl ITXtChunk {
390 /// Constructs a new iTXt chunk. Leaves all but keyword and text to default values.
391 pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
392 Self {
393 keyword: keyword.into(),
394 compressed: false,
395 language_tag: "".to_string(),
396 translated_keyword: "".to_string(),
397 text: OptCompressed::Uncompressed(text.into()),
398 }
399 }
400
401 pub(crate) fn decode(
402 keyword_slice: &[u8],
403 compression_flag: u8,
404 compression_method: u8,
405 language_tag_slice: &[u8],
406 translated_keyword_slice: &[u8],
407 text_slice: &[u8],
408 ) -> Result<Self, TextDecodingError> {
409 if keyword_slice.is_empty() || keyword_slice.len() > 79 {
410 return Err(TextDecodingError::InvalidKeywordSize);
411 }
412 let keyword = decode_iso_8859_1(keyword_slice);
413
414 let compressed = match compression_flag {
415 0 => false,
416 1 => true,
417 _ => return Err(TextDecodingError::InvalidCompressionFlag),
418 };
419
420 if compressed && compression_method != 0 {
421 return Err(TextDecodingError::InvalidCompressionMethod);
422 }
423
424 let language_tag = decode_ascii(language_tag_slice)?.to_owned();
425
426 let translated_keyword = std::str::from_utf8(translated_keyword_slice)
427 .map_err(|_| TextDecodingError::Unrepresentable)?
428 .to_string();
429 let text = if compressed {
430 OptCompressed::Compressed(text_slice.to_vec())
431 } else {
432 OptCompressed::Uncompressed(
433 String::from_utf8(text_slice.to_vec())
434 .map_err(|_| TextDecodingError::Unrepresentable)?,
435 )
436 };
437
438 Ok(Self {
439 keyword,
440 compressed,
441 language_tag,
442 translated_keyword,
443 text,
444 })
445 }
446
447 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
448 pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
449 self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
450 }
451
452 /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
453 pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
454 match &self.text {
455 OptCompressed::Compressed(v) => {
456 let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(v, limit) {
457 Ok(s) => s,
458 Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
459 return Err(DecodingError::from(
460 TextDecodingError::OutOfDecompressionSpace,
461 ));
462 }
463 Err(_) => {
464 return Err(DecodingError::from(TextDecodingError::InflationError));
465 }
466 };
467 self.text = OptCompressed::Uncompressed(
468 String::from_utf8(uncompressed_raw)
469 .map_err(|_| TextDecodingError::Unrepresentable)?,
470 );
471 }
472 OptCompressed::Uncompressed(_) => {}
473 };
474 Ok(())
475 }
476
477 /// Decompresses the inner text, and returns it as a `String`.
478 /// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method.
479 pub fn get_text(&self) -> Result<String, DecodingError> {
480 match &self.text {
481 OptCompressed::Compressed(v) => {
482 let uncompressed_raw = fdeflate::decompress_to_vec(v)
483 .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?;
484 String::from_utf8(uncompressed_raw)
485 .map_err(|_| TextDecodingError::Unrepresentable.into())
486 }
487 OptCompressed::Uncompressed(s) => Ok(s.clone()),
488 }
489 }
490
491 /// Compresses the inner text, mutating its own state.
492 pub fn compress_text(&mut self) -> Result<(), EncodingError> {
493 match &self.text {
494 OptCompressed::Uncompressed(s) => {
495 let uncompressed_raw = s.as_bytes();
496 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
497 encoder
498 .write_all(uncompressed_raw)
499 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
500 self.text = OptCompressed::Compressed(
501 encoder
502 .finish()
503 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?,
504 );
505 }
506 OptCompressed::Compressed(_) => {}
507 }
508
509 Ok(())
510 }
511}
512
513impl EncodableTextChunk for ITXtChunk {
514 fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
515 // Keyword
516 let mut data = encode_iso_8859_1(&self.keyword)?;
517
518 if data.is_empty() || data.len() > 79 {
519 return Err(TextEncodingError::InvalidKeywordSize.into());
520 }
521
522 // Null separator
523 data.push(0);
524
525 // Compression flag
526 if self.compressed {
527 data.push(1);
528 } else {
529 data.push(0);
530 }
531
532 // Compression method
533 data.push(0);
534
535 // Language tag
536 if !self.language_tag.is_ascii() {
537 return Err(EncodingError::from(TextEncodingError::Unrepresentable));
538 }
539 data.extend(self.language_tag.as_bytes());
540
541 // Null separator
542 data.push(0);
543
544 // Translated keyword
545 data.extend_from_slice(self.translated_keyword.as_bytes());
546
547 // Null separator
548 data.push(0);
549
550 // Text
551 if self.compressed {
552 match &self.text {
553 OptCompressed::Compressed(v) => {
554 data.extend_from_slice(&v[..]);
555 }
556 OptCompressed::Uncompressed(s) => {
557 let uncompressed_raw = s.as_bytes();
558 let mut encoder = ZlibEncoder::new(data, Compression::fast());
559 encoder
560 .write_all(uncompressed_raw)
561 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
562 data = encoder
563 .finish()
564 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
565 }
566 }
567 } else {
568 match &self.text {
569 OptCompressed::Compressed(v) => {
570 let uncompressed_raw = fdeflate::decompress_to_vec(v)
571 .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?;
572 data.extend_from_slice(&uncompressed_raw[..]);
573 }
574 OptCompressed::Uncompressed(s) => {
575 data.extend_from_slice(s.as_bytes());
576 }
577 }
578 }
579
580 encoder::write_chunk(w, chunk::iTXt, &data)
581 }
582}