ttf_parser/tables/cff/
dict.rs

1use core::convert::TryFrom;
2use core::ops::Range;
3
4use crate::Stream;
5
6// Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data.
7const TWO_BYTE_OPERATOR_MARK: u8 = 12;
8const FLOAT_STACK_LEN: usize = 64;
9const END_OF_FLOAT_FLAG: u8 = 0xf;
10
11#[derive(Clone, Copy, Debug)]
12pub struct Operator(pub u16);
13
14impl Operator {
15    #[inline]
16    pub fn get(self) -> u16 {
17        self.0
18    }
19}
20
21pub struct DictionaryParser<'a> {
22    data: &'a [u8],
23    // The current offset.
24    offset: usize,
25    // Offset to the last operands start.
26    operands_offset: usize,
27    // Actual operands.
28    //
29    // While CFF can contain only i32 and f32 values, we have to store operands as f64
30    // since f32 cannot represent the whole i32 range.
31    // Meaning we have a choice of storing operands as f64 or as enum of i32/f32.
32    // In both cases the type size would be 8 bytes, so it's easier to simply use f64.
33    operands: &'a mut [f64],
34    // An amount of operands in the `operands` array.
35    operands_len: u16,
36}
37
38impl<'a> DictionaryParser<'a> {
39    #[inline]
40    pub fn new(data: &'a [u8], operands_buffer: &'a mut [f64]) -> Self {
41        DictionaryParser {
42            data,
43            offset: 0,
44            operands_offset: 0,
45            operands: operands_buffer,
46            operands_len: 0,
47        }
48    }
49
50    #[inline(never)]
51    pub fn parse_next(&mut self) -> Option<Operator> {
52        let mut s = Stream::new_at(self.data, self.offset)?;
53        self.operands_offset = self.offset;
54        while !s.at_end() {
55            let b = s.read::<u8>()?;
56            // 0..=21 bytes are operators.
57            if is_dict_one_byte_op(b) {
58                let mut operator = u16::from(b);
59
60                // Check that operator is two byte long.
61                if b == TWO_BYTE_OPERATOR_MARK {
62                    // Use a 1200 'prefix' to make two byte operators more readable.
63                    // 12 3 => 1203
64                    operator = 1200 + u16::from(s.read::<u8>()?);
65                }
66
67                self.offset = s.offset();
68                return Some(Operator(operator));
69            } else {
70                skip_number(b, &mut s)?;
71            }
72        }
73
74        None
75    }
76
77    /// Parses operands of the current operator.
78    ///
79    /// In the DICT structure, operands are defined before an operator.
80    /// So we are trying to find an operator first and the we can actually parse the operands.
81    ///
82    /// Since this methods is pretty expensive and we do not care about most of the operators,
83    /// we can speed up parsing by parsing operands only for required operators.
84    ///
85    /// We still have to "skip" operands during operators search (see `skip_number()`),
86    /// but it's still faster that a naive method.
87    pub fn parse_operands(&mut self) -> Option<()> {
88        let mut s = Stream::new_at(self.data, self.operands_offset)?;
89        self.operands_len = 0;
90        while !s.at_end() {
91            let b = s.read::<u8>()?;
92            // 0..=21 bytes are operators.
93            if is_dict_one_byte_op(b) {
94                break;
95            } else {
96                let op = parse_number(b, &mut s)?;
97                self.operands[usize::from(self.operands_len)] = op;
98                self.operands_len += 1;
99
100                if usize::from(self.operands_len) >= self.operands.len() {
101                    break;
102                }
103            }
104        }
105
106        Some(())
107    }
108
109    #[inline]
110    pub fn operands(&self) -> &[f64] {
111        &self.operands[..usize::from(self.operands_len)]
112    }
113
114    #[inline]
115    pub fn parse_number(&mut self) -> Option<f64> {
116        self.parse_operands()?;
117        self.operands().get(0).cloned()
118    }
119
120    #[inline]
121    pub fn parse_offset(&mut self) -> Option<usize> {
122        self.parse_operands()?;
123        let operands = self.operands();
124        if operands.len() == 1 {
125            usize::try_from(operands[0] as i32).ok()
126        } else {
127            None
128        }
129    }
130
131    #[inline]
132    pub fn parse_range(&mut self) -> Option<Range<usize>> {
133        self.parse_operands()?;
134        let operands = self.operands();
135        if operands.len() == 2 {
136            let len = usize::try_from(operands[0] as i32).ok()?;
137            let start = usize::try_from(operands[1] as i32).ok()?;
138            let end = start.checked_add(len)?;
139            Some(start..end)
140        } else {
141            None
142        }
143    }
144}
145
146// One-byte CFF DICT Operators according to the
147// Adobe Technical Note #5176, Appendix H CFF DICT Encoding.
148pub fn is_dict_one_byte_op(b: u8) -> bool {
149    match b {
150        0..=27 => true,
151        28..=30 => false,  // numbers
152        31 => true,        // Reserved
153        32..=254 => false, // numbers
154        255 => true,       // Reserved
155    }
156}
157
158// Adobe Technical Note #5177, Table 3 Operand Encoding
159pub fn parse_number(b0: u8, s: &mut Stream) -> Option<f64> {
160    match b0 {
161        28 => {
162            let n = i32::from(s.read::<i16>()?);
163            Some(f64::from(n))
164        }
165        29 => {
166            let n = s.read::<i32>()?;
167            Some(f64::from(n))
168        }
169        30 => parse_float(s),
170        32..=246 => {
171            let n = i32::from(b0) - 139;
172            Some(f64::from(n))
173        }
174        247..=250 => {
175            let b1 = i32::from(s.read::<u8>()?);
176            let n = (i32::from(b0) - 247) * 256 + b1 + 108;
177            Some(f64::from(n))
178        }
179        251..=254 => {
180            let b1 = i32::from(s.read::<u8>()?);
181            let n = -(i32::from(b0) - 251) * 256 - b1 - 108;
182            Some(f64::from(n))
183        }
184        _ => None,
185    }
186}
187
188fn parse_float(s: &mut Stream) -> Option<f64> {
189    let mut data = [0u8; FLOAT_STACK_LEN];
190    let mut idx = 0;
191
192    loop {
193        let b1: u8 = s.read()?;
194        let nibble1 = b1 >> 4;
195        let nibble2 = b1 & 15;
196
197        if nibble1 == END_OF_FLOAT_FLAG {
198            break;
199        }
200
201        idx = parse_float_nibble(nibble1, idx, &mut data)?;
202
203        if nibble2 == END_OF_FLOAT_FLAG {
204            break;
205        }
206
207        idx = parse_float_nibble(nibble2, idx, &mut data)?;
208    }
209
210    let s = core::str::from_utf8(&data[..idx]).ok()?;
211    let n = s.parse().ok()?;
212    Some(n)
213}
214
215// Adobe Technical Note #5176, Table 5 Nibble Definitions
216fn parse_float_nibble(nibble: u8, mut idx: usize, data: &mut [u8]) -> Option<usize> {
217    if idx == FLOAT_STACK_LEN {
218        return None;
219    }
220
221    match nibble {
222        0..=9 => {
223            data[idx] = b'0' + nibble;
224        }
225        10 => {
226            data[idx] = b'.';
227        }
228        11 => {
229            data[idx] = b'E';
230        }
231        12 => {
232            if idx + 1 == FLOAT_STACK_LEN {
233                return None;
234            }
235
236            data[idx] = b'E';
237            idx += 1;
238            data[idx] = b'-';
239        }
240        13 => {
241            return None;
242        }
243        14 => {
244            data[idx] = b'-';
245        }
246        _ => {
247            return None;
248        }
249    }
250
251    idx += 1;
252    Some(idx)
253}
254
255// Just like `parse_number`, but doesn't actually parses the data.
256pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> {
257    match b0 {
258        28 => s.skip::<u16>(),
259        29 => s.skip::<u32>(),
260        30 => {
261            while !s.at_end() {
262                let b1 = s.read::<u8>()?;
263                let nibble1 = b1 >> 4;
264                let nibble2 = b1 & 15;
265                if nibble1 == END_OF_FLOAT_FLAG || nibble2 == END_OF_FLOAT_FLAG {
266                    break;
267                }
268            }
269        }
270        32..=246 => {}
271        247..=250 => s.skip::<u8>(),
272        251..=254 => s.skip::<u8>(),
273        _ => return None,
274    }
275
276    Some(())
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn parse_dict_number() {
285        assert_eq!(
286            parse_number(0xFA, &mut Stream::new(&[0x7C])).unwrap(),
287            1000.0
288        );
289        assert_eq!(
290            parse_number(0xFE, &mut Stream::new(&[0x7C])).unwrap(),
291            -1000.0
292        );
293        assert_eq!(
294            parse_number(0x1C, &mut Stream::new(&[0x27, 0x10])).unwrap(),
295            10000.0
296        );
297        assert_eq!(
298            parse_number(0x1C, &mut Stream::new(&[0xD8, 0xF0])).unwrap(),
299            -10000.0
300        );
301        assert_eq!(
302            parse_number(0x1D, &mut Stream::new(&[0x00, 0x01, 0x86, 0xA0])).unwrap(),
303            100000.0
304        );
305        assert_eq!(
306            parse_number(0x1D, &mut Stream::new(&[0xFF, 0xFE, 0x79, 0x60])).unwrap(),
307            -100000.0
308        );
309    }
310}