exif/
isobmff.rs

1//
2// Copyright (c) 2020 KAMADA Ken'ichi.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions
7// are met:
8// 1. Redistributions of source code must retain the above copyright
9//    notice, this list of conditions and the following disclaimer.
10// 2. Redistributions in binary form must reproduce the above copyright
11//    notice, this list of conditions and the following disclaimer in the
12//    documentation and/or other materials provided with the distribution.
13//
14// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24// SUCH DAMAGE.
25//
26
27use std::convert::{TryFrom as _, TryInto as _};
28use std::io::{BufRead, ErrorKind, Seek, SeekFrom};
29
30use crate::endian::{Endian, BigEndian};
31use crate::error::Error;
32use crate::util::{read64, BufReadExt as _, ReadExt as _};
33
34// Checking "mif1" in the compatible brands should be enough, because
35// the "heic", "heix", "heim", and "heis" files shall include "mif1"
36// among the compatible brands [ISO23008-12 B.4.1] [ISO23008-12 B.4.3].
37// Same for "msf1" [ISO23008-12 B.4.2] [ISO23008-12 B.4.4].
38static HEIF_BRANDS: &[[u8; 4]] = &[*b"mif1", *b"msf1"];
39
40const MAX_EXIF_SIZE: usize = 65535;
41
42// Most errors in this file are Error::InvalidFormat.
43impl From<&'static str> for Error {
44    fn from(err: &'static str) -> Error {
45        Error::InvalidFormat(err)
46    }
47}
48
49pub fn get_exif_attr<R>(reader: &mut R) -> Result<Vec<u8>, Error>
50where R: BufRead + Seek {
51    let mut parser = Parser::new(reader);
52    match parser.parse() {
53        Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
54            Err("Broken HEIF file".into()),
55        Err(e) => Err(e),
56        Ok(mut buf) => {
57            if buf.len() < 4 {
58                return Err("ExifDataBlock too small".into());
59            }
60            let offset = BigEndian::loadu32(&buf, 0) as usize;
61            if buf.len() - 4 < offset {
62                return Err("Invalid Exif header offset".into());
63            }
64            buf.drain(.. 4 + offset);
65            Ok(buf)
66        },
67    }
68}
69
70#[derive(Debug)]
71struct Parser<R> {
72    reader: R,
73    // Whether the file type box has been checked.
74    ftyp_checked: bool,
75    // The item where Exif data is stored.
76    item_id: Option<u32>,
77    // The location of the item_id.
78    item_location: Option<Location>,
79}
80
81#[derive(Debug)]
82struct Location {
83    construction_method: u8,
84    // index, offset, length
85    extents: Vec<(u64, u64, u64)>,
86    base_offset: u64,
87}
88
89impl<R> Parser<R> where R: BufRead + Seek {
90    fn new(reader: R) -> Self {
91        Self {
92            reader: reader,
93            ftyp_checked: false,
94            item_id: None,
95            item_location: None,
96        }
97    }
98
99    fn parse(&mut self) -> Result<Vec<u8>, Error> {
100        while let Some((size, boxtype)) = self.read_box_header()? {
101            match &boxtype {
102                b"ftyp" => {
103                    let buf = self.read_file_level_box(size)?;
104                    self.parse_ftyp(BoxSplitter::new(&buf))?;
105                    self.ftyp_checked = true;
106                },
107                b"meta" => {
108                    if !self.ftyp_checked {
109                        return Err("MetaBox found before FileTypeBox".into());
110                    }
111                    let buf = self.read_file_level_box(size)?;
112                    let exif = self.parse_meta(BoxSplitter::new(&buf))?;
113                    return Ok(exif);
114                },
115                _ => self.skip_file_level_box(size)?,
116            }
117        }
118        Err(Error::NotFound("HEIF"))
119    }
120
121    // Reads size, type, and largesize,
122    // and returns body size and type.
123    // If no byte can be read due to EOF, None is returned.
124    fn read_box_header(&mut self) -> Result<Option<(u64, [u8; 4])>, Error> {
125        if self.reader.is_eof()? {
126            return Ok(None);
127        }
128        let mut buf = [0; 8];
129        self.reader.read_exact(&mut buf)?;
130        let size = match BigEndian::loadu32(&buf, 0) {
131            0 => Some(std::u64::MAX),
132            1 => read64(&mut self.reader)?.checked_sub(16),
133            x => u64::from(x).checked_sub(8),
134        }.ok_or("Invalid box size")?;
135        let boxtype = buf[4..8].try_into().expect("never fails");
136        Ok(Some((size, boxtype)))
137    }
138
139    fn read_file_level_box(&mut self, size: u64) -> Result<Vec<u8>, Error> {
140        let mut buf;
141        match size {
142            std::u64::MAX => {
143                buf = Vec::new();
144                self.reader.read_to_end(&mut buf)?;
145            },
146            _ => {
147                let size = size.try_into()
148                    .or(Err("Box is larger than the address space"))?;
149                buf = Vec::new();
150                self.reader.read_exact_len(&mut buf, size)?;
151            },
152        }
153        Ok(buf)
154    }
155
156    fn skip_file_level_box(&mut self, size: u64) -> Result<(), Error> {
157        match size {
158            std::u64::MAX => self.reader.seek(SeekFrom::End(0))?,
159            _ => self.reader.seek(SeekFrom::Current(
160                size.try_into().or(Err("Large seek not supported"))?))?,
161        };
162        Ok(())
163    }
164
165    fn parse_ftyp(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
166        let head = boxp.slice(8)?;
167        let _major_brand = &head[0..4];
168        let _minor_version = BigEndian::loadu32(&head, 4);
169        while let Ok(compat_brand) = boxp.array4() {
170            if HEIF_BRANDS.contains(&compat_brand) {
171                return Ok(());
172            }
173        }
174        Err("No compatible brand recognized in ISO base media file".into())
175    }
176
177    fn parse_meta(&mut self, mut boxp: BoxSplitter) -> Result<Vec<u8>, Error> {
178        let (version, _flags) = boxp.fullbox_header()?;
179        if version != 0 {
180            return Err("Unsupported MetaBox".into());
181        }
182        let mut idat = None;
183        let mut iloc = None;
184        while !boxp.is_empty() {
185            let (boxtype, mut body) = boxp.child_box()?;
186            match boxtype {
187                b"idat" => idat = Some(body.slice(body.len())?),
188                b"iinf" => self.parse_iinf(body)?,
189                b"iloc" => iloc = Some(body),
190                _ => {},
191            }
192        }
193
194        self.item_id.ok_or(Error::NotFound("HEIF"))?;
195        self.parse_iloc(iloc.ok_or("No ItemLocationBox")?)?;
196        let location = self.item_location.as_ref()
197            .ok_or("No matching item in ItemLocationBox")?;
198        let mut buf = Vec::new();
199        match location.construction_method {
200            0 => {
201                for &(_, off, len) in &location.extents {
202                    let off = location.base_offset.checked_add(off)
203                        .ok_or("Invalid offset")?;
204                    // Seeking beyond the EOF is allowed and
205                    // implementation-defined, but the subsequent read
206                    // should fail.
207                    self.reader.seek(SeekFrom::Start(off))?;
208                    match len {
209                        0 => { self.reader.read_to_end(&mut buf)?; },
210                        _ => {
211                            let len = len.try_into()
212                                .or(Err("Extent too large"))?;
213                            self.reader.read_exact_len(&mut buf, len)?;
214                        },
215                    }
216                    if buf.len() > MAX_EXIF_SIZE {
217                        return Err("Exif data too large".into());
218                    }
219                }
220            },
221            1 => {
222                let idat = idat.ok_or("No ItemDataBox")?;
223                for &(_, off, len) in &location.extents {
224                    let off = location.base_offset.checked_add(off)
225                        .ok_or("Invalid offset")?;
226                    let end = off.checked_add(len).ok_or("Invalid length")?;
227                    let off = off.try_into().or(Err("Offset too large"))?;
228                    let end = end.try_into().or(Err("Length too large"))?;
229                    buf.extend_from_slice(match len {
230                        0 => idat.get(off..),
231                        _ => idat.get(off..end),
232                    }.ok_or("Out of ItemDataBox")?);
233                    if buf.len() > MAX_EXIF_SIZE {
234                        return Err("Exif data too large".into());
235                    }
236                }
237            },
238            2 => return Err(Error::NotSupported(
239                "Construction by item offset is not supported")),
240            _ => return Err("Invalid construction_method".into()),
241        }
242        Ok(buf)
243    }
244
245    fn parse_iloc(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
246        let (version, _flags) = boxp.fullbox_header()?;
247        let tmp = boxp.uint16().map(usize::from)?;
248        let (offset_size, length_size, base_offset_size) =
249            (tmp >> 12, tmp >> 8 & 0xf, tmp >> 4 & 0xf);
250        let index_size = match version { 1 | 2 => tmp & 0xf, _ => 0 };
251        let item_count = match version {
252            0 | 1 => boxp.uint16()?.into(),
253            2 => boxp.uint32()?,
254            _ => return Err("Unsupported ItemLocationBox".into()),
255        };
256        for _ in 0..item_count {
257            let item_id = match version {
258                0 | 1 => boxp.uint16()?.into(),
259                2 => boxp.uint32()?,
260                _ => unreachable!(),
261            };
262            let construction_method = match version {
263                0 => 0,
264                1 | 2 => boxp.slice(2).map(|x| x[1] & 0xf)?,
265                _ => unreachable!(),
266            };
267            let data_ref_index = boxp.uint16()?;
268            if construction_method == 0 && data_ref_index != 0 {
269                return Err(Error::NotSupported(
270                    "External data reference is not supported"));
271            }
272            let base_offset = boxp.size048(base_offset_size)?
273                .ok_or("Invalid base_offset_size")?;
274            let extent_count = boxp.uint16()?.into();
275            if self.item_id == Some(item_id) {
276                let mut extents = Vec::with_capacity(extent_count);
277                for _ in 0..extent_count {
278                    let index = boxp.size048(index_size)?
279                        .ok_or("Invalid index_size")?;
280                    let offset = boxp.size048(offset_size)?
281                        .ok_or("Invalid offset_size")?;
282                    let length = boxp.size048(length_size)?
283                        .ok_or("Invalid length_size")?;
284                    extents.push((index, offset, length));
285                }
286                self.item_location = Some(Location {
287                    construction_method, extents, base_offset });
288            } else {
289                // (15 + 15 + 15) * u16::MAX never overflows.
290                boxp.slice((index_size + offset_size + length_size) *
291                           extent_count)?;
292            }
293        }
294        Ok(())
295    }
296
297    fn parse_iinf(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
298        let (version, _flags) = boxp.fullbox_header()?;
299        let entry_count = match version {
300            0 => boxp.uint16()?.into(),
301            _ => boxp.uint32()?,
302        };
303        for _ in 0..entry_count {
304            let (boxtype, body) = boxp.child_box()?;
305            match boxtype {
306                b"infe" => self.parse_infe(body)?,
307                _ => {},
308            }
309        }
310        Ok(())
311    }
312
313    fn parse_infe(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
314        let (version, _flags) = boxp.fullbox_header()?;
315        let item_id = match version {
316            2 => boxp.uint16()?.into(),
317            3 => boxp.uint32()?,
318            _ => return Err("Unsupported ItemInfoEntry".into()),
319        };
320        let _item_protection_index = boxp.slice(2)?;
321        let item_type = boxp.slice(4)?;
322        if item_type == b"Exif" {
323            self.item_id = Some(item_id);
324        }
325        Ok(())
326    }
327}
328
329pub fn is_heif(buf: &[u8]) -> bool {
330    let mut boxp = BoxSplitter::new(buf);
331    while let Ok((boxtype, mut body)) = boxp.child_box() {
332        if boxtype == b"ftyp" {
333            let _major_brand_minor_version = if body.slice(8).is_err() {
334                return false;
335            };
336            while let Ok(compat_brand) = body.array4() {
337                if HEIF_BRANDS.contains(&compat_brand) {
338                    return true;
339                }
340            }
341            return false;
342        }
343    }
344    false
345}
346
347struct BoxSplitter<'a> {
348    inner: &'a [u8],
349}
350
351impl<'a> BoxSplitter<'a> {
352    fn new(slice: &'a [u8]) -> BoxSplitter<'a> {
353        Self { inner: slice }
354    }
355
356    fn is_empty(&self) -> bool {
357        self.inner.is_empty()
358    }
359
360    fn len(&self) -> usize {
361        self.inner.len()
362    }
363
364    // Returns type and body.
365    fn child_box(&mut self) -> Result<(&'a [u8], BoxSplitter<'a>), Error> {
366        let size = self.uint32()? as usize;
367        let boxtype = self.slice(4)?;
368        let body_len = match size {
369            0 => Some(self.len()),
370            1 => usize::try_from(self.uint64()?)
371                .or(Err("Box is larger than the address space"))?
372                .checked_sub(16),
373            _ => size.checked_sub(8),
374        }.ok_or("Invalid box size")?;
375        let body = self.slice(body_len)?;
376        Ok((boxtype, BoxSplitter::new(body)))
377    }
378
379    // Returns 0-, 4-, or 8-byte unsigned integer.
380    fn size048(&mut self, size: usize) -> Result<Option<u64>, Error> {
381        match size {
382            0 => Ok(Some(0)),
383            4 => self.uint32().map(u64::from).map(Some),
384            8 => self.uint64().map(Some),
385            _ => Ok(None),
386        }
387    }
388
389    // Returns version and flags.
390    fn fullbox_header(&mut self) -> Result<(u32, u32), Error> {
391        let tmp = self.uint32()?;
392        Ok((tmp >> 24, tmp & 0xffffff))
393    }
394
395    fn uint16(&mut self) -> Result<u16, Error> {
396        self.slice(2).map(|num| BigEndian::loadu16(num, 0))
397    }
398
399    fn uint32(&mut self) -> Result<u32, Error> {
400        self.slice(4).map(|num| BigEndian::loadu32(num, 0))
401    }
402
403    fn uint64(&mut self) -> Result<u64, Error> {
404        self.slice(8).map(|num| BigEndian::loadu64(num, 0))
405    }
406
407    fn array4(&mut self) -> Result<[u8; 4], Error> {
408        self.slice(4).map(|x| x.try_into().expect("never fails"))
409    }
410
411    fn slice(&mut self, at: usize) -> Result<&'a [u8], Error> {
412        let slice = self.inner.get(..at).ok_or("Box too small")?;
413        self.inner = &self.inner[at..];
414        Ok(slice)
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use std::io::Cursor;
421    use super::*;
422
423    #[test]
424    fn extract() {
425        let file = std::fs::File::open("tests/exif.heic").unwrap();
426        let buf = get_exif_attr(
427            &mut std::io::BufReader::new(&file)).unwrap();
428        assert_eq!(buf.len(), 79);
429        assert!(buf.starts_with(b"MM\x00\x2a"));
430        assert!(buf.ends_with(b"xif\0"));
431    }
432
433    #[test]
434    fn unknown_before_ftyp() {
435        let data =
436            b"\0\0\0\x09XXXXx\
437              \0\0\0\x14ftypmif1\0\0\0\0mif1\
438              \0\0\0\x57meta\0\0\0\0\
439                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
440                  \0\0\0\x22iinf\0\0\0\0\0\x01\
441                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
442                  \0\0\0\x11idat\0\0\0\x01xabcd";
443        assert!(is_heif(data));
444        let exif = get_exif_attr(&mut Cursor::new(&data[..])).unwrap();
445        assert_eq!(exif, b"abcd");
446    }
447
448    #[test]
449    fn bad_exif_data_block() {
450        let data =
451            b"\0\0\0\x14ftypmif1\0\0\0\0mif1\
452              \0\0\0\x52meta\0\0\0\0\
453                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
454                  \0\0\0\x22iinf\0\0\0\0\0\x01\
455                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
456                  \0\0\0\x0cidat\0\0\0\x01";
457        assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])),
458                        Error::InvalidFormat("Invalid Exif header offset"));
459
460        let data =
461            b"\0\0\0\x14ftypmif1\0\0\0\0mif1\
462              \0\0\0\x51meta\0\0\0\0\
463                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
464                  \0\0\0\x22iinf\0\0\0\0\0\x01\
465                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
466                  \0\0\0\x0bidat\0\0\0";
467        assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])),
468                        Error::InvalidFormat("ExifDataBlock too small"));
469    }
470
471    #[test]
472    fn parser_box_header() {
473        // size
474        let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abcd"));
475        assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd")));
476        let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abc"));
477        assert_err_pat!(p.read_box_header(), Error::Io(_));
478        let mut p = Parser::new(Cursor::new(b"\0\0\0\x07abcd"));
479        assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_));
480        // max size
481        let mut p = Parser::new(Cursor::new(b"\xff\xff\xff\xffabcd"));
482        assert_eq!(p.read_box_header().unwrap(),
483                   Some((0xffffffff - 8, *b"abcd")));
484        // to the end of the file
485        let mut p = Parser::new(Cursor::new(b"\0\0\0\0abcd"));
486        assert_eq!(p.read_box_header().unwrap(),
487                   Some((std::u64::MAX, *b"abcd")));
488        // largesize
489        let mut p = Parser::new(Cursor::new(
490            b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x10"));
491        assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd")));
492        let mut p = Parser::new(Cursor::new(
493            b"\0\0\0\x01abcd\0\0\0\0\0\0\0"));
494        assert_err_pat!(p.read_box_header(), Error::Io(_));
495        let mut p = Parser::new(Cursor::new(
496            b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x0f"));
497        assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_));
498        // max largesize
499        let mut p = Parser::new(Cursor::new(
500            b"\0\0\0\x01abcd\xff\xff\xff\xff\xff\xff\xff\xff"));
501        assert_eq!(p.read_box_header().unwrap(),
502                   Some((std::u64::MAX.wrapping_sub(16), *b"abcd")));
503    }
504
505    #[test]
506    fn is_heif_test() {
507        // HEIF (with any coding format)
508        assert!(is_heif(b"\0\0\0\x14ftypmif1\0\0\0\0mif1"));
509        // HEIC
510        assert!(is_heif(b"\0\0\0\x18ftypheic\0\0\0\0heicmif1"));
511        // HEIC image sequence
512        assert!(is_heif(b"\0\0\0\x18ftyphevc\0\0\0\0msf1hevc"));
513        // unknown major brand but compatible with HEIF
514        assert!(is_heif(b"\0\0\0\x18ftypXXXX\0\0\0\0XXXXmif1"));
515        // incomplete brand (OK to ignore?)
516        assert!(is_heif(b"\0\0\0\x15ftypmif1\0\0\0\0mif1h"));
517        assert!(is_heif(b"\0\0\0\x16ftypmif1\0\0\0\0mif1he"));
518        assert!(is_heif(b"\0\0\0\x17ftypmif1\0\0\0\0mif1hei"));
519        // ISO base media file but not a HEIF
520        assert!(!is_heif(b"\0\0\0\x14ftypmp41\0\0\0\0mp41"));
521        // missing compatible brands (what should we do?)
522        assert!(!is_heif(b"\0\0\0\x10ftypmif1\0\0\0\0"));
523        // truncated box
524        let mut data: &[u8] = b"\0\0\0\x14ftypmif1\0\0\0\0mif1";
525        while let Some((_, rest)) = data.split_last() {
526            data = rest;
527            assert!(!is_heif(data));
528        }
529        // short box size
530        assert!(!is_heif(b"\0\0\0\x13ftypmif1\0\0\0\0mif1"));
531    }
532
533    #[test]
534    fn box_splitter() {
535        let buf = b"0123456789abcdef";
536        let mut boxp = BoxSplitter::new(buf);
537        assert_err_pat!(boxp.slice(17), Error::InvalidFormat(_));
538        assert_eq!(boxp.slice(16).unwrap(), buf);
539        assert_err_pat!(boxp.slice(std::usize::MAX), Error::InvalidFormat(_));
540
541        let mut boxp = BoxSplitter::new(buf);
542        assert_eq!(boxp.slice(1).unwrap(), b"0");
543        assert_eq!(boxp.uint16().unwrap(), 0x3132);
544        assert_eq!(boxp.uint32().unwrap(), 0x33343536);
545        assert_eq!(boxp.uint64().unwrap(), 0x3738396162636465);
546    }
547}