read_fonts/tables/
cmap.rs

1//! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table
2
3include!("../../generated/generated_cmap.rs");
4
5#[cfg(feature = "std")]
6use crate::collections::IntSet;
7use crate::{FontRef, TableProvider};
8use std::ops::Range;
9
10// See <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#windows-platform-platform-id--3>
11const WINDOWS_SYMBOL_ENCODING: u16 = 0;
12const WINDOWS_UNICODE_BMP_ENCODING: u16 = 1;
13const WINDOWS_UNICODE_FULL_ENCODING: u16 = 10;
14
15// See <https://docs.microsoft.com/en-us/typography/opentype/spec/name#platform-specific-encoding-and-language-ids-unicode-platform-platform-id--0>
16const UNICODE_1_0_ENCODING: u16 = 0;
17const UNICODE_1_1_ENCODING: u16 = 1;
18const UNICODE_ISO_ENCODING: u16 = 2;
19const UNICODE_2_0_BMP_ENCODING: u16 = 3;
20const UNICODE_2_0_FULL_ENCODING: u16 = 4;
21const UNICODE_FULL_ENCODING: u16 = 6;
22
23/// Result of mapping a codepoint with a variation selector.
24#[derive(Copy, Clone, PartialEq, Eq, Debug)]
25pub enum MapVariant {
26    /// The variation selector should be ignored and the default mapping
27    /// of the character should be used.
28    UseDefault,
29    /// The variant glyph mapped by a codepoint and associated variation
30    /// selector.
31    Variant(GlyphId),
32}
33
34impl<'a> Cmap<'a> {
35    /// Map a codepoint to a nominal glyph identifier
36    ///
37    /// This uses the first available subtable that provides a valid mapping.
38    ///
39    /// # Note:
40    ///
41    /// Mapping logic is currently only implemented for the most common subtable
42    /// formats.
43    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
44        let codepoint = codepoint.into();
45        for record in self.encoding_records() {
46            if let Ok(subtable) = record.subtable(self.offset_data()) {
47                if let Some(gid) = subtable.map_codepoint(codepoint) {
48                    return Some(gid);
49                }
50            }
51        }
52        None
53    }
54
55    /// Returns the index, encoding record and subtable for the most
56    /// comprehensive mapping available.
57    ///
58    /// Comprehensive means that tables capable of mapping the Unicode full
59    /// repertoire are chosen over those that only support the basic
60    /// multilingual plane. The exception is that symbol mappings are
61    /// preferred above all others
62    /// (see <https://github.com/harfbuzz/harfbuzz/issues/1918>).
63    pub fn best_subtable(&self) -> Option<(u16, EncodingRecord, CmapSubtable<'a>)> {
64        // Follows the HarfBuzz approach
65        // See <https://github.com/harfbuzz/harfbuzz/blob/a9a78e1bff9d4a62429d22277fea4e0e76e9ac7e/src/hb-ot-cmap-table.hh#L1962>
66        let offset_data = self.offset_data();
67        let records = self.encoding_records();
68        let find = |platform_id, encoding_id| {
69            for (index, record) in records.iter().enumerate() {
70                if record.platform_id() != platform_id || record.encoding_id() != encoding_id {
71                    continue;
72                }
73                if let Ok(subtable) = record.subtable(offset_data) {
74                    match subtable {
75                        CmapSubtable::Format0(_)
76                        | CmapSubtable::Format4(_)
77                        | CmapSubtable::Format6(_)
78                        | CmapSubtable::Format10(_)
79                        | CmapSubtable::Format12(_)
80                        | CmapSubtable::Format13(_) => {
81                            return Some((index as u16, *record, subtable))
82                        }
83                        _ => {}
84                    }
85                }
86            }
87            None
88        };
89        // Symbol subtable.
90        // Prefer symbol if available.
91        // https://github.com/harfbuzz/harfbuzz/issues/1918
92        find(PlatformId::Windows, WINDOWS_SYMBOL_ENCODING)
93            // 32-bit subtables:
94            .or_else(|| find(PlatformId::Windows, WINDOWS_UNICODE_FULL_ENCODING))
95            .or_else(|| find(PlatformId::Unicode, UNICODE_FULL_ENCODING))
96            .or_else(|| find(PlatformId::Unicode, UNICODE_2_0_FULL_ENCODING))
97            // 16-bit subtables:
98            .or_else(|| find(PlatformId::Windows, WINDOWS_UNICODE_BMP_ENCODING))
99            .or_else(|| find(PlatformId::Unicode, UNICODE_2_0_BMP_ENCODING))
100            .or_else(|| find(PlatformId::Unicode, UNICODE_ISO_ENCODING))
101            .or_else(|| find(PlatformId::Unicode, UNICODE_1_1_ENCODING))
102            .or_else(|| find(PlatformId::Unicode, UNICODE_1_0_ENCODING))
103            // MacRoman subtable:
104            .or_else(|| find(PlatformId::Macintosh, 0))
105    }
106
107    /// Returns the index and subtable for the first mapping capable of
108    /// handling Unicode variation sequences.
109    ///
110    /// This is always a [format 14](https://learn.microsoft.com/en-us/typography/opentype/spec/cmap#format-14-unicode-variation-sequences)
111    /// subtable.
112    pub fn uvs_subtable(&self) -> Option<(u16, Cmap14<'a>)> {
113        let offset_data = self.offset_data();
114        for (index, record) in self.encoding_records().iter().enumerate() {
115            if let Ok(CmapSubtable::Format14(cmap14)) = record.subtable(offset_data) {
116                return Some((index as u16, cmap14));
117            };
118        }
119        None
120    }
121
122    /// Returns the subtable at the given index.
123    pub fn subtable(&self, index: u16) -> Result<CmapSubtable<'a>, ReadError> {
124        self.encoding_records()
125            .get(index as usize)
126            .ok_or(ReadError::OutOfBounds)
127            .and_then(|encoding| encoding.subtable(self.offset_data()))
128    }
129
130    #[cfg(feature = "std")]
131    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
132        for record in self.encoding_records() {
133            if let Ok(subtable) = record.subtable(self.offset_data()) {
134                match subtable {
135                    CmapSubtable::Format14(format14) => {
136                        format14.closure_glyphs(unicodes, glyph_set);
137                        return;
138                    }
139                    _ => {
140                        continue;
141                    }
142                }
143            }
144        }
145    }
146}
147
148impl EncodingRecord {
149    pub fn is_symbol(&self) -> bool {
150        self.platform_id() == PlatformId::Windows && self.encoding_id() == WINDOWS_SYMBOL_ENCODING
151    }
152
153    pub fn is_mac_roman(&self) -> bool {
154        self.platform_id() == PlatformId::Macintosh && self.encoding_id() == 0
155    }
156}
157
158impl<'a> CmapSubtable<'a> {
159    pub fn language(&self) -> u32 {
160        match self {
161            Self::Format0(item) => item.language() as u32,
162            Self::Format2(item) => item.language() as u32,
163            Self::Format4(item) => item.language() as u32,
164            Self::Format6(item) => item.language() as u32,
165            Self::Format10(item) => item.language(),
166            Self::Format12(item) => item.language(),
167            Self::Format13(item) => item.language(),
168            _ => 0,
169        }
170    }
171
172    /// Attempts to map the given codepoint to a nominal glyph identifier using
173    /// the underlying subtable.
174    #[inline]
175    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
176        match self {
177            Self::Format0(item) => item.map_codepoint(codepoint),
178            Self::Format4(item) => item.map_codepoint(codepoint),
179            Self::Format6(item) => item.map_codepoint(codepoint),
180            Self::Format10(item) => item.map_codepoint(codepoint),
181            Self::Format12(item) => item.map_codepoint(codepoint),
182            Self::Format13(item) => item.map_codepoint(codepoint),
183            _ => None,
184        }
185    }
186
187    /// Returns an iterator over all (codepoint, glyph identifier) pairs
188    /// in the subtable.
189    ///
190    /// Malicious and malformed fonts can produce a large number of invalid
191    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
192    /// that is limited to reasonable values.
193    pub fn iter(&self) -> CmapSubtableIter<'a> {
194        let limits = CmapIterLimits {
195            max_char: u32::MAX,
196            glyph_count: u32::MAX,
197        };
198        self.iter_with_limits(limits)
199    }
200
201    /// Returns an iterator over all (codepoint, glyph identifier) pairs
202    /// in the subtable within the given limits.    
203    pub fn iter_with_limits(&self, limits: CmapIterLimits) -> CmapSubtableIter<'a> {
204        match self {
205            Self::Format4(item) => CmapSubtableIter::Format4(item.iter()),
206            Self::Format6(item) => CmapSubtableIter::Format6(item.iter()),
207            Self::Format10(item) => CmapSubtableIter::Format10(item.iter()),
208            Self::Format12(item) => CmapSubtableIter::Format12(item.iter_with_limits(limits)),
209            Self::Format13(item) => CmapSubtableIter::Format13(item.iter_with_limits(limits)),
210            _ => CmapSubtableIter::None,
211        }
212    }
213}
214
215/// Iterator over all (codepoint, glyph identifier) pairs in
216/// the subtable.
217#[derive(Clone)]
218#[non_exhaustive]
219pub enum CmapSubtableIter<'a> {
220    None,
221    Format4(Cmap4Iter<'a>),
222    Format6(Cmap6Iter<'a>),
223    Format10(Cmap10Iter<'a>),
224    Format12(Cmap12Iter<'a>),
225    Format13(Cmap13Iter<'a>),
226}
227
228impl<'a> Iterator for CmapSubtableIter<'a> {
229    type Item = (u32, GlyphId);
230
231    #[inline]
232    fn next(&mut self) -> Option<Self::Item> {
233        match self {
234            Self::None => None,
235            Self::Format4(iter) => iter.next(),
236            Self::Format6(iter) => iter.next(),
237            Self::Format10(iter) => iter.next(),
238            Self::Format12(iter) => iter.next(),
239            Self::Format13(iter) => iter.next(),
240        }
241    }
242}
243
244impl Cmap0<'_> {
245    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
246        let codepoint = codepoint.into();
247
248        self.glyph_id_array()
249            .get(codepoint as usize)
250            .map(|g| GlyphId::new(*g as u32))
251    }
252}
253
254impl<'a> Cmap4<'a> {
255    /// Maps a codepoint to a nominal glyph identifier.
256    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
257        let codepoint = codepoint.into();
258        if codepoint > 0xFFFF {
259            return None;
260        }
261        let codepoint = codepoint as u16;
262        let mut lo = 0;
263        let mut hi = self.seg_count_x2() as usize / 2;
264        let start_codes = self.start_code();
265        let end_codes = self.end_code();
266        while lo < hi {
267            let i = (lo + hi) / 2;
268            let start_code = start_codes.get(i)?.get();
269            if codepoint < start_code {
270                hi = i;
271            } else if codepoint > end_codes.get(i)?.get() {
272                lo = i + 1;
273            } else {
274                return self.lookup_glyph_id(codepoint, i, start_code);
275            }
276        }
277        None
278    }
279
280    /// Returns an iterator over all (codepoint, glyph identifier) pairs
281    /// in the subtable.
282    pub fn iter(&self) -> Cmap4Iter<'a> {
283        Cmap4Iter::new(self.clone())
284    }
285
286    /// Does the final phase of glyph id lookup.
287    ///
288    /// Shared between Self::map and Cmap4Iter.
289    fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> {
290        let deltas = self.id_delta();
291        let range_offsets = self.id_range_offsets();
292        let delta = deltas.get(index)?.get() as i32;
293        let range_offset = range_offsets.get(index)?.get() as usize;
294        if range_offset == 0 {
295            return Some(GlyphId::from((codepoint as i32 + delta) as u16));
296        }
297        let mut offset = range_offset / 2 + (codepoint - start_code) as usize;
298        offset = offset.saturating_sub(range_offsets.len() - index);
299        let gid = self.glyph_id_array().get(offset)?.get();
300        (gid != 0).then_some(GlyphId::from((gid as i32 + delta) as u16))
301    }
302
303    /// Returns the [start_code, end_code] range at the given index.
304    fn code_range(&self, index: usize) -> Option<Range<u32>> {
305        // Extend to u32 to ensure we don't overflow on the end + 1 bound
306        // below.
307        let start = self.start_code().get(index)?.get() as u32;
308        let end = self.end_code().get(index)?.get() as u32;
309        // Use end + 1 here because the range in the table is inclusive
310        Some(start..end + 1)
311    }
312}
313
314/// Iterator over all (codepoint, glyph identifier) pairs in
315/// the subtable.
316#[derive(Clone)]
317pub struct Cmap4Iter<'a> {
318    subtable: Cmap4<'a>,
319    cur_range: Range<u32>,
320    cur_start_code: u16,
321    cur_range_ix: usize,
322}
323
324impl<'a> Cmap4Iter<'a> {
325    fn new(subtable: Cmap4<'a>) -> Self {
326        let cur_range = subtable.code_range(0).unwrap_or_default();
327        let cur_start_code = cur_range.start as u16;
328        Self {
329            subtable,
330            cur_range,
331            cur_start_code,
332            cur_range_ix: 0,
333        }
334    }
335}
336
337impl Iterator for Cmap4Iter<'_> {
338    type Item = (u32, GlyphId);
339
340    fn next(&mut self) -> Option<Self::Item> {
341        loop {
342            if let Some(codepoint) = self.cur_range.next() {
343                let Some(glyph_id) = self.subtable.lookup_glyph_id(
344                    codepoint as u16,
345                    self.cur_range_ix,
346                    self.cur_start_code,
347                ) else {
348                    continue;
349                };
350                return Some((codepoint, glyph_id));
351            } else {
352                self.cur_range_ix += 1;
353                let next_range = self.subtable.code_range(self.cur_range_ix)?;
354                // Groups should be in order and non-overlapping so make sure
355                // that the start code of next group is at least current_end + 1.
356                // Also avoid start sliding backwards if we see data where end < start by taking the max
357                // of next.end and curr.end as the new end.
358                // This prevents timeout and bizarre results in the face of numerous overlapping ranges
359                // https://github.com/googlefonts/fontations/issues/1100
360                // cmap4 ranges are u16 so no need to stress about values past char::MAX
361                self.cur_range = next_range.start.max(self.cur_range.end)
362                    ..next_range.end.max(self.cur_range.end);
363                self.cur_start_code = self.cur_range.start as u16;
364            }
365        }
366    }
367}
368
369impl<'a> Cmap6<'a> {
370    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
371        let codepoint = codepoint.into();
372
373        let first = self.first_code() as u32;
374        let idx = codepoint.checked_sub(first)?;
375        self.glyph_id_array()
376            .get(idx as usize)
377            .map(|g| GlyphId::new(g.get() as u32))
378    }
379
380    /// Returns an iterator over all (codepoint, glyph identifier) pairs
381    /// in the subtable.    
382    pub fn iter(&self) -> Cmap6Iter<'a> {
383        Cmap6Iter {
384            first: self.first_code() as u32,
385            glyph_ids: self.glyph_id_array(),
386            pos: 0,
387        }
388    }
389}
390
391/// Iterator over all (codepoint, glyph identifier) pairs in
392/// the subtable.
393#[derive(Clone)]
394pub struct Cmap6Iter<'a> {
395    first: u32,
396    glyph_ids: &'a [BigEndian<u16>],
397    pos: u32,
398}
399
400impl Iterator for Cmap6Iter<'_> {
401    type Item = (u32, GlyphId);
402
403    fn next(&mut self) -> Option<Self::Item> {
404        let gid = self.glyph_ids.get(self.pos as usize)?.get().into();
405        let codepoint = self.first + self.pos;
406        self.pos += 1;
407        Some((codepoint, gid))
408    }
409}
410
411impl<'a> Cmap10<'a> {
412    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
413        let codepoint = codepoint.into();
414        let idx = codepoint.checked_sub(self.start_char_code())?;
415        self.glyph_id_array()
416            .get(idx as usize)
417            .map(|g| GlyphId::new(g.get() as u32))
418    }
419
420    /// Returns an iterator over all (codepoint, glyph identifier) pairs
421    /// in the subtable.    
422    pub fn iter(&self) -> Cmap10Iter<'a> {
423        Cmap10Iter {
424            first: self.start_char_code(),
425            glyph_ids: self.glyph_id_array(),
426            pos: 0,
427        }
428    }
429}
430
431/// Iterator over all (codepoint, glyph identifier) pairs in
432/// the subtable.
433#[derive(Clone)]
434pub struct Cmap10Iter<'a> {
435    first: u32,
436    glyph_ids: &'a [BigEndian<u16>],
437    pos: u32,
438}
439
440impl Iterator for Cmap10Iter<'_> {
441    type Item = (u32, GlyphId);
442
443    fn next(&mut self) -> Option<Self::Item> {
444        let gid = self.glyph_ids.get(self.pos as usize)?.get().into();
445        let codepoint = self.first + self.pos;
446        self.pos += 1;
447        Some((codepoint, gid))
448    }
449}
450
451/// Trait to unify constant and sequential map groups.
452trait AnyMapGroup {
453    const IS_CONSTANT: bool;
454
455    fn start_char_code(&self) -> u32;
456    fn end_char_code(&self) -> u32;
457    /// Either start glyph id for a sequential group or just glyph id
458    /// for a constant group.
459    fn ref_glyph_id(&self) -> u32;
460
461    fn compute_glyph_id(codepoint: u32, start_char_code: u32, ref_glyph_id: u32) -> GlyphId {
462        if Self::IS_CONSTANT {
463            GlyphId::new(ref_glyph_id)
464        } else {
465            GlyphId::new(ref_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)))
466        }
467    }
468}
469
470impl AnyMapGroup for ConstantMapGroup {
471    const IS_CONSTANT: bool = true;
472
473    fn start_char_code(&self) -> u32 {
474        self.start_char_code()
475    }
476
477    fn end_char_code(&self) -> u32 {
478        self.end_char_code()
479    }
480
481    fn ref_glyph_id(&self) -> u32 {
482        self.glyph_id()
483    }
484}
485
486impl AnyMapGroup for SequentialMapGroup {
487    const IS_CONSTANT: bool = false;
488
489    fn start_char_code(&self) -> u32 {
490        self.start_char_code()
491    }
492
493    fn end_char_code(&self) -> u32 {
494        self.end_char_code()
495    }
496
497    fn ref_glyph_id(&self) -> u32 {
498        self.start_glyph_id()
499    }
500}
501
502/// Shared codepoint mapping code for cmap 12/13.
503fn cmap1213_map_codepoint<T: AnyMapGroup>(
504    groups: &[T],
505    codepoint: impl Into<u32>,
506) -> Option<GlyphId> {
507    let codepoint = codepoint.into();
508    let mut lo = 0;
509    let mut hi = groups.len();
510    while lo < hi {
511        let i = (lo + hi) / 2;
512        let group = groups.get(i)?;
513        if codepoint < group.start_char_code() {
514            hi = i;
515        } else if codepoint > group.end_char_code() {
516            lo = i + 1;
517        } else {
518            return Some(T::compute_glyph_id(
519                codepoint,
520                group.start_char_code(),
521                group.ref_glyph_id(),
522            ));
523        }
524    }
525    None
526}
527
528/// Character and glyph limits for iterating format 12 and 13 subtables.
529#[derive(Copy, Clone, Debug)]
530pub struct CmapIterLimits {
531    /// The maximum valid character.
532    pub max_char: u32,
533    /// The number of glyphs in the font.
534    pub glyph_count: u32,
535}
536
537impl CmapIterLimits {
538    /// Returns the default limits for the given font.
539    ///
540    /// This will limit pairs to `char::MAX` and the number of glyphs contained
541    /// in the font. If the font is missing a `maxp` table, the number of
542    /// glyphs will be limited to `u16::MAX`.
543    pub fn default_for_font(font: &FontRef) -> Self {
544        let glyph_count = font
545            .maxp()
546            .map(|maxp| maxp.num_glyphs())
547            .unwrap_or(u16::MAX) as u32;
548        Self {
549            // Limit to the valid range of Unicode characters
550            // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
551            max_char: char::MAX as u32,
552            glyph_count,
553        }
554    }
555}
556
557impl Default for CmapIterLimits {
558    fn default() -> Self {
559        Self {
560            max_char: char::MAX as u32,
561            // Revisit this when we actually support big glyph ids
562            glyph_count: u16::MAX as u32,
563        }
564    }
565}
566
567/// Remapped groups for iterating cmap12/13.
568#[derive(Clone, Debug)]
569struct Cmap1213IterGroup {
570    range: Range<u64>,
571    start_code: u32,
572    ref_glyph_id: u32,
573}
574
575/// Shared group resolution code for cmap 12/13.
576fn cmap1213_iter_group<T: AnyMapGroup>(
577    groups: &[T],
578    index: usize,
579    limits: &Option<CmapIterLimits>,
580) -> Option<Cmap1213IterGroup> {
581    let group = groups.get(index)?;
582    let start_code = group.start_char_code();
583    // Change to exclusive range. This can never overflow since the source
584    // is a 32-bit value
585    let end_code = group.end_char_code() as u64 + 1;
586    let start_glyph_id = group.ref_glyph_id();
587    let end_code = if let Some(limits) = limits {
588        // Set our end code to the minimum of our character and glyph
589        // count limit
590        if T::IS_CONSTANT {
591            end_code.min(limits.max_char as u64)
592        } else {
593            (limits.glyph_count as u64)
594                .saturating_sub(start_glyph_id as u64)
595                .saturating_add(start_code as u64)
596                .min(end_code.min(limits.max_char as u64))
597        }
598    } else {
599        end_code
600    };
601    Some(Cmap1213IterGroup {
602        range: start_code as u64..end_code,
603        start_code,
604        ref_glyph_id: start_glyph_id,
605    })
606}
607
608/// Shared iterator for cmap 12/13.
609#[derive(Clone)]
610struct Cmap1213Iter<'a, T> {
611    groups: &'a [T],
612    cur_group: Option<Cmap1213IterGroup>,
613    cur_group_ix: usize,
614    limits: Option<CmapIterLimits>,
615}
616
617impl<'a, T> Cmap1213Iter<'a, T>
618where
619    T: AnyMapGroup,
620{
621    fn new(groups: &'a [T], limits: Option<CmapIterLimits>) -> Self {
622        let cur_group = cmap1213_iter_group(groups, 0, &limits);
623        Self {
624            groups,
625            cur_group,
626            cur_group_ix: 0,
627            limits,
628        }
629    }
630}
631
632impl<T> Iterator for Cmap1213Iter<'_, T>
633where
634    T: AnyMapGroup,
635{
636    type Item = (u32, GlyphId);
637
638    fn next(&mut self) -> Option<Self::Item> {
639        loop {
640            let group = self.cur_group.as_mut()?;
641            if let Some(codepoint) = group.range.next() {
642                let codepoint = codepoint as u32;
643                let glyph_id = T::compute_glyph_id(codepoint, group.start_code, group.ref_glyph_id);
644                return Some((codepoint, glyph_id));
645            } else {
646                self.cur_group_ix += 1;
647                let mut next_group =
648                    cmap1213_iter_group(self.groups, self.cur_group_ix, &self.limits)?;
649                // Groups should be in order and non-overlapping so make sure
650                // that the start code of next group is at least
651                // current_end.
652                if next_group.range.start < group.range.end {
653                    next_group.range = group.range.end..next_group.range.end;
654                }
655                self.cur_group = Some(next_group);
656            }
657        }
658    }
659}
660
661impl<'a> Cmap12<'a> {
662    /// Maps a codepoint to a nominal glyph identifier.
663    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
664        cmap1213_map_codepoint(self.groups(), codepoint)
665    }
666
667    /// Returns an iterator over all (codepoint, glyph identifier) pairs
668    /// in the subtable.
669    ///
670    /// Malicious and malformed fonts can produce a large number of invalid
671    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
672    /// that is limited to reasonable values.
673    pub fn iter(&self) -> Cmap12Iter<'a> {
674        Cmap12Iter::new(self.clone(), None)
675    }
676
677    /// Returns an iterator over all (codepoint, glyph identifier) pairs
678    /// in the subtable within the given limits.
679    pub fn iter_with_limits(&self, limits: CmapIterLimits) -> Cmap12Iter<'a> {
680        Cmap12Iter::new(self.clone(), Some(limits))
681    }
682}
683
684/// Iterator over all (codepoint, glyph identifier) pairs in
685/// the subtable.
686#[derive(Clone)]
687pub struct Cmap12Iter<'a>(Cmap1213Iter<'a, SequentialMapGroup>);
688
689impl<'a> Cmap12Iter<'a> {
690    fn new(subtable: Cmap12<'a>, limits: Option<CmapIterLimits>) -> Self {
691        Self(Cmap1213Iter::new(subtable.groups(), limits))
692    }
693}
694
695impl Iterator for Cmap12Iter<'_> {
696    type Item = (u32, GlyphId);
697
698    fn next(&mut self) -> Option<Self::Item> {
699        self.0.next()
700    }
701}
702
703impl<'a> Cmap13<'a> {
704    /// Maps a codepoint to a nominal glyph identifier.
705    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
706        cmap1213_map_codepoint(self.groups(), codepoint)
707    }
708
709    /// Returns an iterator over all (codepoint, glyph identifier) pairs
710    /// in the subtable.
711    ///
712    /// Malicious and malformed fonts can produce a large number of invalid
713    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
714    /// that is limited to reasonable values.
715    pub fn iter(&self) -> Cmap13Iter<'a> {
716        Cmap13Iter::new(self.clone(), None)
717    }
718
719    /// Returns an iterator over all (codepoint, glyph identifier) pairs
720    /// in the subtable within the given limits.
721    pub fn iter_with_limits(&self, limits: CmapIterLimits) -> Cmap13Iter<'a> {
722        Cmap13Iter::new(self.clone(), Some(limits))
723    }
724}
725
726/// Iterator over all (codepoint, glyph identifier) pairs in
727/// the subtable.
728#[derive(Clone)]
729pub struct Cmap13Iter<'a>(Cmap1213Iter<'a, ConstantMapGroup>);
730
731impl<'a> Cmap13Iter<'a> {
732    fn new(subtable: Cmap13<'a>, limits: Option<CmapIterLimits>) -> Self {
733        Self(Cmap1213Iter::new(subtable.groups(), limits))
734    }
735}
736
737impl Iterator for Cmap13Iter<'_> {
738    type Item = (u32, GlyphId);
739
740    fn next(&mut self) -> Option<Self::Item> {
741        self.0.next()
742    }
743}
744
745impl<'a> Cmap14<'a> {
746    /// Maps a codepoint and variation selector to a nominal glyph identifier.
747    pub fn map_variant(
748        &self,
749        codepoint: impl Into<u32>,
750        selector: impl Into<u32>,
751    ) -> Option<MapVariant> {
752        let codepoint = codepoint.into();
753        let selector = selector.into();
754        let selector_records = self.var_selector();
755        // Variation selector records are sorted in order of var_selector. Binary search to find
756        // the appropriate record.
757        let selector_record = selector_records
758            .binary_search_by(|rec| {
759                let rec_selector: u32 = rec.var_selector().into();
760                rec_selector.cmp(&selector)
761            })
762            .ok()
763            .and_then(|idx| selector_records.get(idx))?;
764        // If a default UVS table is present in this selector record, binary search on the ranges
765        // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint.
766        // If found, ignore the selector and return a value indicating that the default cmap mapping
767        // should be used.
768        if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) {
769            use core::cmp::Ordering;
770            let found_default_uvs = default_uvs
771                .ranges()
772                .binary_search_by(|range| {
773                    let start = range.start_unicode_value().into();
774                    if codepoint < start {
775                        Ordering::Greater
776                    } else if codepoint > (start + range.additional_count() as u32) {
777                        Ordering::Less
778                    } else {
779                        Ordering::Equal
780                    }
781                })
782                .is_ok();
783            if found_default_uvs {
784                return Some(MapVariant::UseDefault);
785            }
786        }
787        // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph.
788        let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?;
789        let mapping = non_default_uvs.uvs_mapping();
790        let ix = mapping
791            .binary_search_by(|map| {
792                let map_codepoint: u32 = map.unicode_value().into();
793                map_codepoint.cmp(&codepoint)
794            })
795            .ok()?;
796        Some(MapVariant::Variant(GlyphId::from(
797            mapping.get(ix)?.glyph_id(),
798        )))
799    }
800
801    /// Returns an iterator over all (codepoint, selector, mapping variant)
802    /// triples in the subtable.
803    pub fn iter(&self) -> Cmap14Iter<'a> {
804        Cmap14Iter::new(self.clone())
805    }
806
807    fn selector(
808        &self,
809        index: usize,
810    ) -> (
811        Option<VariationSelector>,
812        Option<DefaultUvs<'a>>,
813        Option<NonDefaultUvs<'a>>,
814    ) {
815        let selector = self.var_selector().get(index).cloned();
816        let default_uvs = selector.as_ref().and_then(|selector| {
817            selector
818                .default_uvs(self.offset_data())
819                .transpose()
820                .ok()
821                .flatten()
822        });
823        let non_default_uvs = selector.as_ref().and_then(|selector| {
824            selector
825                .non_default_uvs(self.offset_data())
826                .transpose()
827                .ok()
828                .flatten()
829        });
830        (selector, default_uvs, non_default_uvs)
831    }
832
833    #[cfg(feature = "std")]
834    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
835        for selector in self.var_selector() {
836            if !unicodes.contains(selector.var_selector().to_u32()) {
837                continue;
838            }
839            if let Some(non_default_uvs) = selector
840                .non_default_uvs(self.offset_data())
841                .transpose()
842                .ok()
843                .flatten()
844            {
845                glyph_set.extend(
846                    non_default_uvs
847                        .uvs_mapping()
848                        .iter()
849                        .filter(|m| unicodes.contains(m.unicode_value().to_u32()))
850                        .map(|m| m.glyph_id().into()),
851                );
852            }
853        }
854    }
855}
856
857/// Iterator over all (codepoint, selector, mapping variant) triples
858/// in the subtable.
859#[derive(Clone)]
860pub struct Cmap14Iter<'a> {
861    subtable: Cmap14<'a>,
862    selector_record: Option<VariationSelector>,
863    default_uvs: Option<DefaultUvsIter<'a>>,
864    non_default_uvs: Option<NonDefaultUvsIter<'a>>,
865    cur_selector_ix: usize,
866}
867
868impl<'a> Cmap14Iter<'a> {
869    fn new(subtable: Cmap14<'a>) -> Self {
870        let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0);
871        Self {
872            subtable,
873            selector_record,
874            default_uvs: default_uvs.map(DefaultUvsIter::new),
875            non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new),
876            cur_selector_ix: 0,
877        }
878    }
879}
880
881impl Iterator for Cmap14Iter<'_> {
882    type Item = (u32, u32, MapVariant);
883
884    fn next(&mut self) -> Option<Self::Item> {
885        loop {
886            let selector_record = self.selector_record.as_ref()?;
887            let selector: u32 = selector_record.var_selector().into();
888            if let Some(default_uvs) = self.default_uvs.as_mut() {
889                if let Some(codepoint) = default_uvs.next() {
890                    return Some((codepoint, selector, MapVariant::UseDefault));
891                }
892            }
893            if let Some(non_default_uvs) = self.non_default_uvs.as_mut() {
894                if let Some((codepoint, variant)) = non_default_uvs.next() {
895                    return Some((codepoint, selector, MapVariant::Variant(variant.into())));
896                }
897            }
898            self.cur_selector_ix += 1;
899            let (selector_record, default_uvs, non_default_uvs) =
900                self.subtable.selector(self.cur_selector_ix);
901            self.selector_record = selector_record;
902            self.default_uvs = default_uvs.map(DefaultUvsIter::new);
903            self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new);
904        }
905    }
906}
907
908#[derive(Clone)]
909struct DefaultUvsIter<'a> {
910    ranges: std::slice::Iter<'a, UnicodeRange>,
911    cur_range: Range<u32>,
912}
913
914impl<'a> DefaultUvsIter<'a> {
915    fn new(ranges: DefaultUvs<'a>) -> Self {
916        let mut ranges = ranges.ranges().iter();
917        let cur_range = if let Some(range) = ranges.next() {
918            let start: u32 = range.start_unicode_value().into();
919            let end = start + range.additional_count() as u32 + 1;
920            start..end
921        } else {
922            0..0
923        };
924        Self { ranges, cur_range }
925    }
926}
927
928impl Iterator for DefaultUvsIter<'_> {
929    type Item = u32;
930
931    fn next(&mut self) -> Option<Self::Item> {
932        loop {
933            if let Some(codepoint) = self.cur_range.next() {
934                return Some(codepoint);
935            }
936            let range = self.ranges.next()?;
937            let start: u32 = range.start_unicode_value().into();
938            let end = start + range.additional_count() as u32 + 1;
939            self.cur_range = start..end;
940        }
941    }
942}
943
944#[derive(Clone)]
945struct NonDefaultUvsIter<'a> {
946    iter: std::slice::Iter<'a, UvsMapping>,
947}
948
949impl<'a> NonDefaultUvsIter<'a> {
950    fn new(uvs: NonDefaultUvs<'a>) -> Self {
951        Self {
952            iter: uvs.uvs_mapping().iter(),
953        }
954    }
955}
956
957impl Iterator for NonDefaultUvsIter<'_> {
958    type Item = (u32, GlyphId16);
959
960    fn next(&mut self) -> Option<Self::Item> {
961        let mapping = self.iter.next()?;
962        let codepoint: u32 = mapping.unicode_value().into();
963        let glyph_id = GlyphId16::new(mapping.glyph_id());
964        Some((codepoint, glyph_id))
965    }
966}
967
968#[cfg(test)]
969mod tests {
970    use font_test_data::{be_buffer, bebuffer::BeBuffer};
971
972    use super::*;
973    use crate::{FontRef, GlyphId, TableProvider};
974
975    #[test]
976    fn map_codepoints() {
977        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
978        let cmap = font.cmap().unwrap();
979        assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1)));
980        assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2)));
981        assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3)));
982        assert_eq!(cmap.map_codepoint('B'), None);
983
984        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
985        let cmap = font.cmap().unwrap();
986        assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1)));
987        assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2)));
988        assert_eq!(cmap.map_codepoint('B'), None);
989
990        let cmap0_data = cmap0_data();
991        let cmap = Cmap::read(FontData::new(cmap0_data.data())).unwrap();
992
993        assert_eq!(cmap.map_codepoint(0u8), Some(GlyphId::new(0)));
994        assert_eq!(cmap.map_codepoint(b' '), Some(GlyphId::new(178)));
995        assert_eq!(cmap.map_codepoint(b'r'), Some(GlyphId::new(193)));
996        assert_eq!(cmap.map_codepoint(b'X'), Some(GlyphId::new(13)));
997        assert_eq!(cmap.map_codepoint(255u8), Some(GlyphId::new(3)));
998
999        let cmap6_data = be_buffer! {
1000            // version
1001            0u16,
1002            // numTables
1003            1u16,
1004            // platformID
1005            1u16,
1006            // encodingID
1007            0u16,
1008            // subtableOffset
1009            12u32,
1010            // format
1011            6u16,
1012            // length
1013            32u16,
1014            // language
1015            0u16,
1016            // firstCode
1017            32u16,
1018            // entryCount
1019            5u16,
1020            // glyphIDArray
1021            [10u16, 15, 7, 20, 4]
1022        };
1023
1024        let cmap = Cmap::read(FontData::new(cmap6_data.data())).unwrap();
1025
1026        assert_eq!(cmap.map_codepoint(0u8), None);
1027        assert_eq!(cmap.map_codepoint(31u8), None);
1028        assert_eq!(cmap.map_codepoint(33u8), Some(GlyphId::new(15)));
1029        assert_eq!(cmap.map_codepoint(35u8), Some(GlyphId::new(20)));
1030        assert_eq!(cmap.map_codepoint(36u8), Some(GlyphId::new(4)));
1031        assert_eq!(cmap.map_codepoint(50u8), None);
1032    }
1033
1034    #[test]
1035    fn map_variants() {
1036        use super::MapVariant::*;
1037        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
1038        let cmap = font.cmap().unwrap();
1039        let cmap14 = find_cmap14(&cmap).unwrap();
1040        let selector = '\u{e0100}';
1041        assert_eq!(cmap14.map_variant('a', selector), None);
1042        assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault));
1043        assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault));
1044        assert_eq!(
1045            cmap14.map_variant('\u{4e08}', selector),
1046            Some(Variant(GlyphId::new(25)))
1047        );
1048        assert_eq!(
1049            cmap14.map_variant('\u{4e09}', selector),
1050            Some(Variant(GlyphId::new(26)))
1051        );
1052    }
1053
1054    #[test]
1055    #[cfg(feature = "std")]
1056    fn cmap14_closure_glyphs() {
1057        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
1058        let cmap = font.cmap().unwrap();
1059        let mut unicodes = IntSet::empty();
1060        unicodes.insert(0x4e08_u32);
1061        unicodes.insert(0xe0100_u32);
1062
1063        let mut glyph_set = IntSet::empty();
1064        glyph_set.insert(GlyphId::new(18));
1065        cmap.closure_glyphs(&unicodes, &mut glyph_set);
1066
1067        assert_eq!(glyph_set.len(), 2);
1068        assert!(glyph_set.contains(GlyphId::new(18)));
1069        assert!(glyph_set.contains(GlyphId::new(25)));
1070    }
1071
1072    #[test]
1073    fn cmap4_iter() {
1074        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
1075        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
1076        let mut count = 0;
1077        for (codepoint, glyph_id) in cmap4.iter() {
1078            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
1079            count += 1;
1080        }
1081        assert_eq!(count, 4);
1082        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
1083        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
1084        let mut count = 0;
1085        for (codepoint, glyph_id) in cmap4.iter() {
1086            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
1087            count += 1;
1088        }
1089        assert_eq!(count, 3);
1090    }
1091
1092    #[test]
1093    fn cmap4_iter_explicit_notdef() {
1094        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
1095        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
1096        let mut notdef_count = 0;
1097        for (_, glyph_id) in cmap4.iter() {
1098            notdef_count += (glyph_id == GlyphId::NOTDEF) as i32;
1099        }
1100        assert!(notdef_count > 0);
1101        assert_eq!(cmap4.map_codepoint(0xFFFF_u32), Some(GlyphId::NOTDEF));
1102    }
1103
1104    // Make sure we don't bail early when iterating ranges with holes.
1105    // Encountered with Gentium Basic and Gentium Basic Book.
1106    // See <https://github.com/googlefonts/fontations/issues/897>
1107    #[test]
1108    fn cmap4_iter_sparse_range() {
1109        #[rustfmt::skip]
1110        let cmap4_data: &[u16] = &[
1111            // format, length, lang
1112            4, 0, 0,
1113            // segCountX2
1114            4,
1115            // bin search data
1116            0, 0, 0,
1117            // end code
1118            262, 0xFFFF, 
1119            // reserved pad
1120            0,
1121            // start code
1122            259, 0xFFFF,
1123            // id delta
1124            0, 1, 
1125            // id range offset
1126            4, 0,
1127            // glyph ids
1128            236, 0, 0, 326,
1129        ];
1130        let mut buf = BeBuffer::new();
1131        for &word in cmap4_data {
1132            buf = buf.push(word);
1133        }
1134        let cmap4 = Cmap4::read(FontData::new(&buf)).unwrap();
1135        let mappings = cmap4
1136            .iter()
1137            .map(|(ch, gid)| (ch, gid.to_u32()))
1138            .collect::<Vec<_>>();
1139        assert_eq!(mappings, &[(259, 236), (262, 326), (65535, 0)]);
1140    }
1141
1142    const CMAP6_PAIRS: &[(u32, u32)] = &[
1143        (0x1723, 1),
1144        (0x1724, 2),
1145        (0x1725, 3),
1146        (0x1726, 4),
1147        (0x1727, 5),
1148    ];
1149
1150    #[test]
1151    fn cmap6_map() {
1152        let font = FontRef::new(font_test_data::CMAP6).unwrap();
1153        let cmap = font.cmap().unwrap();
1154        let CmapSubtable::Format6(cmap6) = cmap.subtable(0).unwrap() else {
1155            panic!("should be a format 6 subtable");
1156        };
1157        for (ch, gid) in CMAP6_PAIRS {
1158            assert_eq!(cmap6.map_codepoint(*ch).unwrap().to_u32(), *gid);
1159        }
1160        // Check out of bounds codepoints
1161        assert!(cmap6.map_codepoint(CMAP6_PAIRS[0].0 - 1).is_none());
1162        assert!(cmap6
1163            .map_codepoint(CMAP6_PAIRS.last().copied().unwrap().0 + 1)
1164            .is_none());
1165    }
1166
1167    #[test]
1168    fn cmap6_iter() {
1169        let font = FontRef::new(font_test_data::CMAP6).unwrap();
1170        let cmap = font.cmap().unwrap();
1171        let CmapSubtable::Format6(cmap6) = cmap.subtable(0).unwrap() else {
1172            panic!("should be a format 6 subtable");
1173        };
1174        let pairs = cmap6
1175            .iter()
1176            .map(|(ch, gid)| (ch, gid.to_u32()))
1177            .collect::<Vec<_>>();
1178        assert_eq!(pairs, CMAP6_PAIRS);
1179    }
1180
1181    const CMAP10_PAIRS: &[(u32, u32)] = &[(0x109423, 26), (0x109424, 27), (0x109425, 32)];
1182
1183    #[test]
1184    fn cmap10_map() {
1185        let font = FontRef::new(font_test_data::CMAP10).unwrap();
1186        let cmap = font.cmap().unwrap();
1187        let CmapSubtable::Format10(cmap10) = cmap.subtable(0).unwrap() else {
1188            panic!("should be a format 10 subtable");
1189        };
1190        for (ch, gid) in CMAP10_PAIRS {
1191            assert_eq!(cmap10.map_codepoint(*ch).unwrap().to_u32(), *gid);
1192        }
1193        // Check out of bounds codepoints
1194        assert!(cmap10.map_codepoint(CMAP10_PAIRS[0].0 - 1).is_none());
1195        assert!(cmap10
1196            .map_codepoint(CMAP10_PAIRS.last().copied().unwrap().0 + 1)
1197            .is_none());
1198    }
1199
1200    #[test]
1201    fn cmap10_iter() {
1202        let font = FontRef::new(font_test_data::CMAP10).unwrap();
1203        let cmap = font.cmap().unwrap();
1204        let CmapSubtable::Format10(cmap10) = cmap.subtable(0).unwrap() else {
1205            panic!("should be a format 10 subtable");
1206        };
1207        let pairs = cmap10
1208            .iter()
1209            .map(|(ch, gid)| (ch, gid.to_u32()))
1210            .collect::<Vec<_>>();
1211        assert_eq!(pairs, CMAP10_PAIRS);
1212    }
1213
1214    #[test]
1215    fn cmap12_iter() {
1216        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
1217        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
1218        let mut count = 0;
1219        for (codepoint, glyph_id) in cmap12.iter() {
1220            assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id));
1221            count += 1;
1222        }
1223        assert_eq!(count, 10);
1224    }
1225
1226    // oss-fuzz: detected integer addition overflow in Cmap12::group()
1227    // ref: https://oss-fuzz.com/testcase-detail/5141969742397440
1228    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69547
1229    #[test]
1230    fn cmap12_iter_avoid_overflow() {
1231        // reconstructed cmap from <https://oss-fuzz.com/testcase-detail/5141969742397440>
1232        let data = be_buffer! {
1233            12u16,      // format
1234            0u16,       // reserved, set to 0
1235            0u32,       // length, ignored
1236            0u32,       // language, ignored
1237            2u32,       // numGroups
1238            // groups: [startCode, endCode, startGlyphID]
1239            [0xFFFFFFFA_u32, 0xFFFFFFFC, 0], // group 0
1240            [0xFFFFFFFB_u32, 0xFFFFFFFF, 0] // group 1
1241        };
1242        let cmap12 = Cmap12::read(data.data().into()).unwrap();
1243        let _ = cmap12.iter().count();
1244    }
1245
1246    // oss-fuzz: timeout in Cmap12Iter
1247    // ref: https://oss-fuzz.com/testcase-detail/4628971063934976
1248    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69540
1249    #[test]
1250    fn cmap12_iter_avoid_timeout() {
1251        // ranges: [SequentialMapGroup { start_char_code: 170, end_char_code: 1330926671, start_glyph_id: 328960 }]
1252        let cmap12_data = be_buffer! {
1253            12u16,      // format
1254            0u16,       // reserved, set to 0
1255            0u32,       // length, ignored
1256            0u32,       // language, ignored
1257            1u32,       // numGroups
1258            // groups: [startCode, endCode, startGlyphID]
1259            [170u32, 1330926671, 328960] // group 0
1260        };
1261        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
1262        assert!(
1263            cmap12.iter_with_limits(CmapIterLimits::default()).count() <= char::MAX as usize + 1
1264        );
1265    }
1266
1267    // oss-fuzz: timeout in outlines, caused by cmap 12 iter
1268    // ref: <https://issues.oss-fuzz.com/issues/394638728>
1269    #[test]
1270    fn cmap12_iter_avoid_timeout2() {
1271        let cmap12_data = be_buffer! {
1272            12u16,      // format
1273            0u16,       // reserved, set to 0
1274            0u32,       // length, ignored
1275            0u32,       // language, ignored
1276            3u32,       // numGroups
1277            // groups: [startCode, endCode, startGlyphID]
1278            [199u32, 16777271, 2],
1279            [262u32, 262, 3],
1280            [268u32, 268, 4]
1281        };
1282        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
1283        // In the test case, maxp.numGlyphs = 8
1284        const MAX_GLYPHS: u32 = 8;
1285        let limits = CmapIterLimits {
1286            glyph_count: MAX_GLYPHS,
1287            ..Default::default()
1288        };
1289        assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
1290    }
1291
1292    #[test]
1293    fn cmap12_iter_glyph_limit() {
1294        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
1295        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
1296        let mut limits = CmapIterLimits::default_for_font(&font);
1297        // Ensure we obey the glyph count limit.
1298        // This font has 11 glyphs
1299        for glyph_count in 0..=11 {
1300            limits.glyph_count = glyph_count;
1301            assert_eq!(
1302                cmap12.iter_with_limits(limits).count(),
1303                // We always return one less than glyph count limit because
1304                // notdef is not mapped
1305                (glyph_count as usize).saturating_sub(1)
1306            );
1307        }
1308    }
1309
1310    #[test]
1311    fn cmap12_iter_range_clamping() {
1312        let data = be_buffer! {
1313            12u16,      // format
1314            0u16,       // reserved, set to 0
1315            0u32,       // length, ignored
1316            0u32,       // language, ignored
1317            2u32,       // numGroups
1318            // groups: [startCode, endCode, startGlyphID]
1319            [0u32, 16777215, 0], // group 0
1320            [255u32, 0xFFFFFFFF, 0] // group 1
1321        };
1322        let cmap12 = Cmap12::read(data.data().into()).unwrap();
1323        let ranges = cmap12
1324            .groups()
1325            .iter()
1326            .map(|group| (group.start_char_code(), group.end_char_code()))
1327            .collect::<Vec<_>>();
1328        // These groups overlap and extend to the whole u32 range
1329        assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
1330        // But we produce at most char::MAX + 1 results
1331        let limits = CmapIterLimits {
1332            glyph_count: u32::MAX,
1333            ..Default::default()
1334        };
1335        assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
1336    }
1337
1338    #[test]
1339    fn cmap12_iter_explicit_notdef() {
1340        let data = be_buffer! {
1341            12u16,      // format
1342            0u16,       // reserved, set to 0
1343            0u32,       // length, ignored
1344            0u32,       // language, ignored
1345            1u32,       // numGroups
1346            // groups: [startCode, endCode, startGlyphID]
1347            [0_u32, 1_u32, 0] // group 0
1348        };
1349        let cmap12 = Cmap12::read(data.data().into()).unwrap();
1350        for (i, (codepoint, glyph_id)) in cmap12.iter().enumerate() {
1351            assert_eq!(codepoint as usize, i);
1352            assert_eq!(glyph_id.to_u32() as usize, i);
1353        }
1354        assert_eq!(cmap12.iter().next().unwrap().1, GlyphId::NOTDEF);
1355    }
1356
1357    fn cmap13_data() -> Vec<u8> {
1358        let data = be_buffer! {
1359            13u16,      // format
1360            0u16,       // reserved, set to 0
1361            0u32,       // length, ignored
1362            0u32,       // language, ignored
1363            2u32,       // numGroups
1364            // groups: [startCode, endCode, startGlyphID]
1365            [0u32, 8, 20], // group 0
1366            [42u32, 46u32, 30] // group 1
1367        };
1368        data.to_vec()
1369    }
1370
1371    #[test]
1372    fn cmap13_map() {
1373        let data = cmap13_data();
1374        let cmap13 = Cmap13::read(FontData::new(&data)).unwrap();
1375        for ch in 0u32..=8 {
1376            assert_eq!(cmap13.map_codepoint(ch), Some(GlyphId::new(20)));
1377        }
1378        for ch in 9u32..42 {
1379            assert_eq!(cmap13.map_codepoint(ch), None);
1380        }
1381        for ch in 42u32..=46 {
1382            assert_eq!(cmap13.map_codepoint(ch), Some(GlyphId::new(30)));
1383        }
1384        for ch in 47u32..1024 {
1385            assert_eq!(cmap13.map_codepoint(ch), None);
1386        }
1387    }
1388
1389    #[test]
1390    fn cmap13_iter() {
1391        let data = cmap13_data();
1392        let cmap13 = Cmap13::read(FontData::new(&data)).unwrap();
1393        for (ch, gid) in cmap13.iter() {
1394            assert_eq!(cmap13.map_codepoint(ch), Some(gid));
1395        }
1396    }
1397
1398    #[test]
1399    fn cmap14_iter() {
1400        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
1401        let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap();
1402        let mut count = 0;
1403        for (codepoint, selector, mapping) in cmap14.iter() {
1404            assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping));
1405            count += 1;
1406        }
1407        assert_eq!(count, 7);
1408    }
1409
1410    fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> {
1411        cmap.encoding_records()
1412            .iter()
1413            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1414            .find_map(|subtable| match subtable {
1415                CmapSubtable::Format4(cmap4) => Some(cmap4),
1416                _ => None,
1417            })
1418    }
1419
1420    fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> {
1421        cmap.encoding_records()
1422            .iter()
1423            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1424            .find_map(|subtable| match subtable {
1425                CmapSubtable::Format12(cmap12) => Some(cmap12),
1426                _ => None,
1427            })
1428    }
1429
1430    fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> {
1431        cmap.encoding_records()
1432            .iter()
1433            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1434            .find_map(|subtable| match subtable {
1435                CmapSubtable::Format14(cmap14) => Some(cmap14),
1436                _ => None,
1437            })
1438    }
1439
1440    /// <https://github.com/googlefonts/fontations/issues/1100>
1441    ///
1442    /// Note that this doesn't demonstrate the timeout, merely that we've eliminated the underlying
1443    /// enthusiasm for non-ascending ranges that enabled it
1444    #[test]
1445    fn cmap4_bad_data() {
1446        let buf = font_test_data::cmap::repetitive_cmap4();
1447        let cmap4 = Cmap4::read(FontData::new(buf.as_slice())).unwrap();
1448
1449        // we should have unique, ascending codepoints, not duplicates and overlaps
1450        assert_eq!(
1451            (6..=64).collect::<Vec<_>>(),
1452            cmap4.iter().map(|(cp, _)| cp).collect::<Vec<_>>()
1453        );
1454    }
1455
1456    fn cmap0_data() -> BeBuffer {
1457        be_buffer! {
1458            // version
1459            0u16,
1460            // numTables
1461            1u16,
1462            // platformID
1463            1u16,
1464            // encodingID
1465            0u16,
1466            // subtableOffset
1467            12u32,
1468            // format
1469            0u16,
1470            // length
1471            274u16,
1472            // language
1473            0u16,
1474            // glyphIDArray
1475            [0u8, 249, 32, 2, 198, 23, 1, 4, 26, 36,
1476            171, 168, 69, 151, 208, 238, 226, 153, 161, 138,
1477            160, 130, 169, 223, 162, 207, 146, 227, 111, 248,
1478            163, 79, 178, 27, 50, 234, 213, 57, 45, 63,
1479            103, 186, 30, 105, 131, 118, 35, 140, 51, 211,
1480            75, 172, 56, 71, 137, 99, 22, 76, 61, 125,
1481            39, 8, 177, 117, 108, 97, 202, 92, 49, 134,
1482            93, 43, 80, 66, 84, 54, 180, 113, 11, 176,
1483            229, 48, 47, 17, 124, 40, 119, 21, 13, 133,
1484            181, 224, 33, 128, 44, 46, 38, 24, 65, 152,
1485            197, 225, 102, 251, 157, 126, 182, 242, 28, 184,
1486            90, 170, 201, 144, 193, 189, 250, 142, 77, 221,
1487            81, 164, 154, 60, 37, 200, 12, 53, 219, 89,
1488            31, 209, 188, 179, 253, 220, 127, 18, 19, 64,
1489            20, 141, 98, 173, 55, 194, 70, 107, 228, 104,
1490            10, 9, 15, 217, 255, 222, 196, 236, 67, 165,
1491            5, 143, 149, 100, 91, 95, 135, 235, 145, 204,
1492            72, 114, 246, 82, 245, 233, 106, 158, 185, 212,
1493            86, 243, 16, 195, 123, 190, 120, 187, 132, 139,
1494            192, 239, 110, 183, 240, 214, 166, 41, 59, 231,
1495            42, 94, 244, 83, 121, 25, 215, 96, 73, 87,
1496            174, 136, 62, 206, 156, 175, 230, 150, 116, 147,
1497            68, 122, 78, 112, 6, 167, 232, 254, 52, 34,
1498            191, 85, 241, 14, 216, 155, 29, 101, 115, 210,
1499            252, 218, 129, 247, 203, 159, 109, 74, 7, 58,
1500            237, 199, 88, 205, 148, 3]
1501        }
1502    }
1503
1504    #[test]
1505    fn best_subtable_full() {
1506        let font = FontRef::new(font_test_data::VORG).unwrap();
1507        let cmap = font.cmap().unwrap();
1508        let (index, record, _) = cmap.best_subtable().unwrap();
1509        assert_eq!(
1510            (index, record.platform_id(), record.encoding_id()),
1511            (3, PlatformId::Windows, WINDOWS_UNICODE_FULL_ENCODING)
1512        );
1513    }
1514
1515    #[test]
1516    fn best_subtable_bmp() {
1517        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
1518        let cmap = font.cmap().unwrap();
1519        let (index, record, _) = cmap.best_subtable().unwrap();
1520        assert_eq!(
1521            (index, record.platform_id(), record.encoding_id()),
1522            (0, PlatformId::Windows, WINDOWS_UNICODE_BMP_ENCODING)
1523        );
1524    }
1525
1526    #[test]
1527    fn best_subtable_symbol() {
1528        let font = FontRef::new(font_test_data::CMAP4_SYMBOL_PUA).unwrap();
1529        let cmap = font.cmap().unwrap();
1530        let (index, record, _) = cmap.best_subtable().unwrap();
1531        assert!(record.is_symbol());
1532        assert_eq!(
1533            (index, record.platform_id(), record.encoding_id()),
1534            (0, PlatformId::Windows, WINDOWS_SYMBOL_ENCODING)
1535        );
1536    }
1537
1538    #[test]
1539    fn uvs_subtable() {
1540        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
1541        let cmap = font.cmap().unwrap();
1542        let (index, _) = cmap.uvs_subtable().unwrap();
1543        assert_eq!(index, 0);
1544    }
1545}