read_fonts/tables/
cmap.rs

1//! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table
2
3include!("../../generated/generated_cmap.rs");
4
5#[cfg(feature = "std")]
6use crate::collections::IntSet;
7use crate::{FontRef, TableProvider};
8use std::ops::Range;
9
10/// Result of mapping a codepoint with a variation selector.
11#[derive(Copy, Clone, PartialEq, Eq, Debug)]
12pub enum MapVariant {
13    /// The variation selector should be ignored and the default mapping
14    /// of the character should be used.
15    UseDefault,
16    /// The variant glyph mapped by a codepoint and associated variation
17    /// selector.
18    Variant(GlyphId),
19}
20
21impl Cmap<'_> {
22    /// Map a codepoint to a nominal glyph identifier
23    ///
24    /// This uses the first available subtable that provides a valid mapping.
25    ///
26    /// # Note:
27    ///
28    /// Mapping logic is currently only implemented for the most common subtable
29    /// formats.
30    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
31        let codepoint = codepoint.into();
32        for record in self.encoding_records() {
33            if let Ok(subtable) = record.subtable(self.offset_data()) {
34                if let Some(gid) = match subtable {
35                    CmapSubtable::Format0(format0) => format0.map_codepoint(codepoint),
36                    CmapSubtable::Format4(format4) => format4.map_codepoint(codepoint),
37                    CmapSubtable::Format6(format6) => format6.map_codepoint(codepoint),
38                    CmapSubtable::Format12(format12) => format12.map_codepoint(codepoint),
39                    CmapSubtable::Format13(format13) => format13.map_codepoint(codepoint),
40                    _ => None,
41                } {
42                    return Some(gid);
43                }
44            }
45        }
46        None
47    }
48
49    #[cfg(feature = "std")]
50    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
51        for record in self.encoding_records() {
52            if let Ok(subtable) = record.subtable(self.offset_data()) {
53                match subtable {
54                    CmapSubtable::Format14(format14) => {
55                        format14.closure_glyphs(unicodes, glyph_set);
56                        return;
57                    }
58                    _ => {
59                        continue;
60                    }
61                }
62            }
63        }
64    }
65}
66
67impl CmapSubtable<'_> {
68    pub fn language(&self) -> u32 {
69        match self {
70            Self::Format0(item) => item.language() as u32,
71            Self::Format2(item) => item.language() as u32,
72            Self::Format4(item) => item.language() as u32,
73            Self::Format6(item) => item.language() as u32,
74            Self::Format10(item) => item.language(),
75            Self::Format12(item) => item.language(),
76            Self::Format13(item) => item.language(),
77            _ => 0,
78        }
79    }
80}
81
82impl Cmap0<'_> {
83    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
84        let codepoint = codepoint.into();
85
86        self.glyph_id_array()
87            .get(codepoint as usize)
88            .map(|g| GlyphId::new(*g as u32))
89    }
90}
91
92impl<'a> Cmap4<'a> {
93    /// Maps a codepoint to a nominal glyph identifier.
94    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
95        let codepoint = codepoint.into();
96        if codepoint > 0xFFFF {
97            return None;
98        }
99        let codepoint = codepoint as u16;
100        let mut lo = 0;
101        let mut hi = self.seg_count_x2() as usize / 2;
102        let start_codes = self.start_code();
103        let end_codes = self.end_code();
104        while lo < hi {
105            let i = (lo + hi) / 2;
106            let start_code = start_codes.get(i)?.get();
107            if codepoint < start_code {
108                hi = i;
109            } else if codepoint > end_codes.get(i)?.get() {
110                lo = i + 1;
111            } else {
112                return self.lookup_glyph_id(codepoint, i, start_code);
113            }
114        }
115        None
116    }
117
118    /// Returns an iterator over all (codepoint, glyph identifier) pairs
119    /// in the subtable.
120    pub fn iter(&self) -> Cmap4Iter<'a> {
121        Cmap4Iter::new(self.clone())
122    }
123
124    /// Does the final phase of glyph id lookup.
125    ///
126    /// Shared between Self::map and Cmap4Iter.
127    fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> {
128        let deltas = self.id_delta();
129        let range_offsets = self.id_range_offsets();
130        let delta = deltas.get(index)?.get() as i32;
131        let range_offset = range_offsets.get(index)?.get() as usize;
132        if range_offset == 0 {
133            return Some(GlyphId::from((codepoint as i32 + delta) as u16));
134        }
135        let mut offset = range_offset / 2 + (codepoint - start_code) as usize;
136        offset = offset.saturating_sub(range_offsets.len() - index);
137        let gid = self.glyph_id_array().get(offset)?.get();
138        (gid != 0).then_some(GlyphId::from((gid as i32 + delta) as u16))
139    }
140
141    /// Returns the [start_code, end_code] range at the given index.
142    fn code_range(&self, index: usize) -> Option<Range<u32>> {
143        // Extend to u32 to ensure we don't overflow on the end + 1 bound
144        // below.
145        let start = self.start_code().get(index)?.get() as u32;
146        let end = self.end_code().get(index)?.get() as u32;
147        // Use end + 1 here because the range in the table is inclusive
148        Some(start..end + 1)
149    }
150}
151
152/// Iterator over all (codepoint, glyph identifier) pairs in
153/// the subtable.
154#[derive(Clone)]
155pub struct Cmap4Iter<'a> {
156    subtable: Cmap4<'a>,
157    cur_range: Range<u32>,
158    cur_start_code: u16,
159    cur_range_ix: usize,
160}
161
162impl<'a> Cmap4Iter<'a> {
163    fn new(subtable: Cmap4<'a>) -> Self {
164        let cur_range = subtable.code_range(0).unwrap_or_default();
165        let cur_start_code = cur_range.start as u16;
166        Self {
167            subtable,
168            cur_range,
169            cur_start_code,
170            cur_range_ix: 0,
171        }
172    }
173}
174
175impl Iterator for Cmap4Iter<'_> {
176    type Item = (u32, GlyphId);
177
178    fn next(&mut self) -> Option<Self::Item> {
179        loop {
180            if let Some(codepoint) = self.cur_range.next() {
181                let Some(glyph_id) = self.subtable.lookup_glyph_id(
182                    codepoint as u16,
183                    self.cur_range_ix,
184                    self.cur_start_code,
185                ) else {
186                    continue;
187                };
188                return Some((codepoint, glyph_id));
189            } else {
190                self.cur_range_ix += 1;
191                let next_range = self.subtable.code_range(self.cur_range_ix)?;
192                // Groups should be in order and non-overlapping so make sure
193                // that the start code of next group is at least current_end + 1.
194                // Also avoid start sliding backwards if we see data where end < start by taking the max
195                // of next.end and curr.end as the new end.
196                // This prevents timeout and bizarre results in the face of numerous overlapping ranges
197                // https://github.com/googlefonts/fontations/issues/1100
198                // cmap4 ranges are u16 so no need to stress about values past char::MAX
199                self.cur_range = next_range.start.max(self.cur_range.end)
200                    ..next_range.end.max(self.cur_range.end);
201                self.cur_start_code = self.cur_range.start as u16;
202            }
203        }
204    }
205}
206
207impl Cmap6<'_> {
208    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
209        let codepoint = codepoint.into();
210
211        let first = self.first_code() as u32;
212        let idx = codepoint.checked_sub(first)?;
213        self.glyph_id_array()
214            .get(idx as usize)
215            .map(|g| GlyphId::new(g.get() as u32))
216    }
217}
218
219/// Trait to unify constant and sequential map groups.
220trait AnyMapGroup {
221    const IS_CONSTANT: bool;
222
223    fn start_char_code(&self) -> u32;
224    fn end_char_code(&self) -> u32;
225    /// Either start glyph id for a sequential group or just glyph id
226    /// for a constant group.
227    fn ref_glyph_id(&self) -> u32;
228
229    fn compute_glyph_id(codepoint: u32, start_char_code: u32, ref_glyph_id: u32) -> GlyphId {
230        if Self::IS_CONSTANT {
231            GlyphId::new(ref_glyph_id)
232        } else {
233            GlyphId::new(ref_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)))
234        }
235    }
236}
237
238impl AnyMapGroup for ConstantMapGroup {
239    const IS_CONSTANT: bool = true;
240
241    fn start_char_code(&self) -> u32 {
242        self.start_char_code()
243    }
244
245    fn end_char_code(&self) -> u32 {
246        self.end_char_code()
247    }
248
249    fn ref_glyph_id(&self) -> u32 {
250        self.glyph_id()
251    }
252}
253
254impl AnyMapGroup for SequentialMapGroup {
255    const IS_CONSTANT: bool = false;
256
257    fn start_char_code(&self) -> u32 {
258        self.start_char_code()
259    }
260
261    fn end_char_code(&self) -> u32 {
262        self.end_char_code()
263    }
264
265    fn ref_glyph_id(&self) -> u32 {
266        self.start_glyph_id()
267    }
268}
269
270/// Shared codepoint mapping code for cmap 12/13.
271fn cmap1213_map_codepoint<T: AnyMapGroup>(
272    groups: &[T],
273    codepoint: impl Into<u32>,
274) -> Option<GlyphId> {
275    let codepoint = codepoint.into();
276    let mut lo = 0;
277    let mut hi = groups.len();
278    while lo < hi {
279        let i = (lo + hi) / 2;
280        let group = groups.get(i)?;
281        if codepoint < group.start_char_code() {
282            hi = i;
283        } else if codepoint > group.end_char_code() {
284            lo = i + 1;
285        } else {
286            return Some(T::compute_glyph_id(
287                codepoint,
288                group.start_char_code(),
289                group.ref_glyph_id(),
290            ));
291        }
292    }
293    None
294}
295
296/// Character and glyph limits for iterating format 12 and 13 subtables.
297#[derive(Copy, Clone, Debug)]
298pub struct CmapIterLimits {
299    /// The maximum valid character.
300    pub max_char: u32,
301    /// The number of glyphs in the font.
302    pub glyph_count: u32,
303}
304
305impl CmapIterLimits {
306    /// Returns the default limits for the given font.
307    ///
308    /// This will limit pairs to `char::MAX` and the number of glyphs contained
309    /// in the font. If the font is missing a `maxp` table, the number of
310    /// glyphs will be limited to `u16::MAX`.
311    pub fn default_for_font(font: &FontRef) -> Self {
312        let glyph_count = font
313            .maxp()
314            .map(|maxp| maxp.num_glyphs())
315            .unwrap_or(u16::MAX) as u32;
316        Self {
317            // Limit to the valid range of Unicode characters
318            // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
319            max_char: char::MAX as u32,
320            glyph_count,
321        }
322    }
323}
324
325impl Default for CmapIterLimits {
326    fn default() -> Self {
327        Self {
328            max_char: char::MAX as u32,
329            // Revisit this when we actually support big glyph ids
330            glyph_count: u16::MAX as u32,
331        }
332    }
333}
334
335/// Remapped groups for iterating cmap12/13.
336#[derive(Clone, Debug)]
337struct Cmap1213IterGroup {
338    range: Range<u64>,
339    start_code: u32,
340    ref_glyph_id: u32,
341}
342
343/// Shared group resolution code for cmap 12/13.
344fn cmap1213_iter_group<T: AnyMapGroup>(
345    groups: &[T],
346    index: usize,
347    limits: &Option<CmapIterLimits>,
348) -> Option<Cmap1213IterGroup> {
349    let group = groups.get(index)?;
350    let start_code = group.start_char_code();
351    // Change to exclusive range. This can never overflow since the source
352    // is a 32-bit value
353    let end_code = group.end_char_code() as u64 + 1;
354    let start_glyph_id = group.ref_glyph_id();
355    let end_code = if let Some(limits) = limits {
356        // Set our end code to the minimum of our character and glyph
357        // count limit
358        if T::IS_CONSTANT {
359            end_code.min(limits.max_char as u64)
360        } else {
361            (limits.glyph_count as u64)
362                .saturating_sub(start_glyph_id as u64)
363                .saturating_add(start_code as u64)
364                .min(end_code.min(limits.max_char as u64))
365        }
366    } else {
367        end_code
368    };
369    Some(Cmap1213IterGroup {
370        range: start_code as u64..end_code,
371        start_code,
372        ref_glyph_id: start_glyph_id,
373    })
374}
375
376/// Shared iterator for cmap 12/13.
377#[derive(Clone)]
378struct Cmap1213Iter<'a, T> {
379    groups: &'a [T],
380    cur_group: Option<Cmap1213IterGroup>,
381    cur_group_ix: usize,
382    limits: Option<CmapIterLimits>,
383}
384
385impl<'a, T> Cmap1213Iter<'a, T>
386where
387    T: AnyMapGroup,
388{
389    fn new(groups: &'a [T], limits: Option<CmapIterLimits>) -> Self {
390        let cur_group = cmap1213_iter_group(groups, 0, &limits);
391        Self {
392            groups,
393            cur_group,
394            cur_group_ix: 0,
395            limits,
396        }
397    }
398}
399
400impl<T> Iterator for Cmap1213Iter<'_, T>
401where
402    T: AnyMapGroup,
403{
404    type Item = (u32, GlyphId);
405
406    fn next(&mut self) -> Option<Self::Item> {
407        loop {
408            let group = self.cur_group.as_mut()?;
409            if let Some(codepoint) = group.range.next() {
410                let codepoint = codepoint as u32;
411                let glyph_id = T::compute_glyph_id(codepoint, group.start_code, group.ref_glyph_id);
412                return Some((codepoint, glyph_id));
413            } else {
414                self.cur_group_ix += 1;
415                let mut next_group =
416                    cmap1213_iter_group(self.groups, self.cur_group_ix, &self.limits)?;
417                // Groups should be in order and non-overlapping so make sure
418                // that the start code of next group is at least
419                // current_end.
420                if next_group.range.start < group.range.end {
421                    next_group.range = group.range.end..next_group.range.end;
422                }
423                self.cur_group = Some(next_group);
424            }
425        }
426    }
427}
428
429impl<'a> Cmap12<'a> {
430    /// Maps a codepoint to a nominal glyph identifier.
431    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
432        cmap1213_map_codepoint(self.groups(), codepoint)
433    }
434
435    /// Returns an iterator over all (codepoint, glyph identifier) pairs
436    /// in the subtable.
437    ///
438    /// Malicious and malformed fonts can produce a large number of invalid
439    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
440    /// that is limited to reasonable values.
441    pub fn iter(&self) -> Cmap12Iter<'a> {
442        Cmap12Iter::new(self.clone(), None)
443    }
444
445    /// Returns an iterator over all (codepoint, glyph identifier) pairs
446    /// in the subtable within the given limits.
447    pub fn iter_with_limits(&self, limits: CmapIterLimits) -> Cmap12Iter<'a> {
448        Cmap12Iter::new(self.clone(), Some(limits))
449    }
450}
451
452/// Iterator over all (codepoint, glyph identifier) pairs in
453/// the subtable.
454#[derive(Clone)]
455pub struct Cmap12Iter<'a>(Cmap1213Iter<'a, SequentialMapGroup>);
456
457impl<'a> Cmap12Iter<'a> {
458    fn new(subtable: Cmap12<'a>, limits: Option<CmapIterLimits>) -> Self {
459        Self(Cmap1213Iter::new(subtable.groups(), limits))
460    }
461}
462
463impl Iterator for Cmap12Iter<'_> {
464    type Item = (u32, GlyphId);
465
466    fn next(&mut self) -> Option<Self::Item> {
467        self.0.next()
468    }
469}
470
471impl<'a> Cmap13<'a> {
472    /// Maps a codepoint to a nominal glyph identifier.
473    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
474        cmap1213_map_codepoint(self.groups(), codepoint)
475    }
476
477    /// Returns an iterator over all (codepoint, glyph identifier) pairs
478    /// in the subtable.
479    ///
480    /// Malicious and malformed fonts can produce a large number of invalid
481    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
482    /// that is limited to reasonable values.
483    pub fn iter(&self) -> Cmap13Iter<'a> {
484        Cmap13Iter::new(self.clone(), None)
485    }
486
487    /// Returns an iterator over all (codepoint, glyph identifier) pairs
488    /// in the subtable within the given limits.
489    pub fn iter_with_limits(&self, limits: CmapIterLimits) -> Cmap13Iter<'a> {
490        Cmap13Iter::new(self.clone(), Some(limits))
491    }
492}
493
494/// Iterator over all (codepoint, glyph identifier) pairs in
495/// the subtable.
496#[derive(Clone)]
497pub struct Cmap13Iter<'a>(Cmap1213Iter<'a, ConstantMapGroup>);
498
499impl<'a> Cmap13Iter<'a> {
500    fn new(subtable: Cmap13<'a>, limits: Option<CmapIterLimits>) -> Self {
501        Self(Cmap1213Iter::new(subtable.groups(), limits))
502    }
503}
504
505impl Iterator for Cmap13Iter<'_> {
506    type Item = (u32, GlyphId);
507
508    fn next(&mut self) -> Option<Self::Item> {
509        self.0.next()
510    }
511}
512
513impl<'a> Cmap14<'a> {
514    /// Maps a codepoint and variation selector to a nominal glyph identifier.
515    pub fn map_variant(
516        &self,
517        codepoint: impl Into<u32>,
518        selector: impl Into<u32>,
519    ) -> Option<MapVariant> {
520        let codepoint = codepoint.into();
521        let selector = selector.into();
522        let selector_records = self.var_selector();
523        // Variation selector records are sorted in order of var_selector. Binary search to find
524        // the appropriate record.
525        let selector_record = selector_records
526            .binary_search_by(|rec| {
527                let rec_selector: u32 = rec.var_selector().into();
528                rec_selector.cmp(&selector)
529            })
530            .ok()
531            .and_then(|idx| selector_records.get(idx))?;
532        // If a default UVS table is present in this selector record, binary search on the ranges
533        // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint.
534        // If found, ignore the selector and return a value indicating that the default cmap mapping
535        // should be used.
536        if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) {
537            use core::cmp::Ordering;
538            let found_default_uvs = default_uvs
539                .ranges()
540                .binary_search_by(|range| {
541                    let start = range.start_unicode_value().into();
542                    if codepoint < start {
543                        Ordering::Greater
544                    } else if codepoint > (start + range.additional_count() as u32) {
545                        Ordering::Less
546                    } else {
547                        Ordering::Equal
548                    }
549                })
550                .is_ok();
551            if found_default_uvs {
552                return Some(MapVariant::UseDefault);
553            }
554        }
555        // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph.
556        let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?;
557        let mapping = non_default_uvs.uvs_mapping();
558        let ix = mapping
559            .binary_search_by(|map| {
560                let map_codepoint: u32 = map.unicode_value().into();
561                map_codepoint.cmp(&codepoint)
562            })
563            .ok()?;
564        Some(MapVariant::Variant(GlyphId::from(
565            mapping.get(ix)?.glyph_id(),
566        )))
567    }
568
569    /// Returns an iterator over all (codepoint, selector, mapping variant)
570    /// triples in the subtable.
571    pub fn iter(&self) -> Cmap14Iter<'a> {
572        Cmap14Iter::new(self.clone())
573    }
574
575    fn selector(
576        &self,
577        index: usize,
578    ) -> (
579        Option<VariationSelector>,
580        Option<DefaultUvs<'a>>,
581        Option<NonDefaultUvs<'a>>,
582    ) {
583        let selector = self.var_selector().get(index).cloned();
584        let default_uvs = selector.as_ref().and_then(|selector| {
585            selector
586                .default_uvs(self.offset_data())
587                .transpose()
588                .ok()
589                .flatten()
590        });
591        let non_default_uvs = selector.as_ref().and_then(|selector| {
592            selector
593                .non_default_uvs(self.offset_data())
594                .transpose()
595                .ok()
596                .flatten()
597        });
598        (selector, default_uvs, non_default_uvs)
599    }
600
601    #[cfg(feature = "std")]
602    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
603        for selector in self.var_selector() {
604            if !unicodes.contains(selector.var_selector().to_u32()) {
605                continue;
606            }
607            if let Some(non_default_uvs) = selector
608                .non_default_uvs(self.offset_data())
609                .transpose()
610                .ok()
611                .flatten()
612            {
613                glyph_set.extend(
614                    non_default_uvs
615                        .uvs_mapping()
616                        .iter()
617                        .filter(|m| unicodes.contains(m.unicode_value().to_u32()))
618                        .map(|m| m.glyph_id().into()),
619                );
620            }
621        }
622    }
623}
624
625/// Iterator over all (codepoint, selector, mapping variant) triples
626/// in the subtable.
627#[derive(Clone)]
628pub struct Cmap14Iter<'a> {
629    subtable: Cmap14<'a>,
630    selector_record: Option<VariationSelector>,
631    default_uvs: Option<DefaultUvsIter<'a>>,
632    non_default_uvs: Option<NonDefaultUvsIter<'a>>,
633    cur_selector_ix: usize,
634}
635
636impl<'a> Cmap14Iter<'a> {
637    fn new(subtable: Cmap14<'a>) -> Self {
638        let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0);
639        Self {
640            subtable,
641            selector_record,
642            default_uvs: default_uvs.map(DefaultUvsIter::new),
643            non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new),
644            cur_selector_ix: 0,
645        }
646    }
647}
648
649impl Iterator for Cmap14Iter<'_> {
650    type Item = (u32, u32, MapVariant);
651
652    fn next(&mut self) -> Option<Self::Item> {
653        loop {
654            let selector_record = self.selector_record.as_ref()?;
655            let selector: u32 = selector_record.var_selector().into();
656            if let Some(default_uvs) = self.default_uvs.as_mut() {
657                if let Some(codepoint) = default_uvs.next() {
658                    return Some((codepoint, selector, MapVariant::UseDefault));
659                }
660            }
661            if let Some(non_default_uvs) = self.non_default_uvs.as_mut() {
662                if let Some((codepoint, variant)) = non_default_uvs.next() {
663                    return Some((codepoint, selector, MapVariant::Variant(variant.into())));
664                }
665            }
666            self.cur_selector_ix += 1;
667            let (selector_record, default_uvs, non_default_uvs) =
668                self.subtable.selector(self.cur_selector_ix);
669            self.selector_record = selector_record;
670            self.default_uvs = default_uvs.map(DefaultUvsIter::new);
671            self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new);
672        }
673    }
674}
675
676#[derive(Clone)]
677struct DefaultUvsIter<'a> {
678    ranges: std::slice::Iter<'a, UnicodeRange>,
679    cur_range: Range<u32>,
680}
681
682impl<'a> DefaultUvsIter<'a> {
683    fn new(ranges: DefaultUvs<'a>) -> Self {
684        let mut ranges = ranges.ranges().iter();
685        let cur_range = if let Some(range) = ranges.next() {
686            let start: u32 = range.start_unicode_value().into();
687            let end = start + range.additional_count() as u32 + 1;
688            start..end
689        } else {
690            0..0
691        };
692        Self { ranges, cur_range }
693    }
694}
695
696impl Iterator for DefaultUvsIter<'_> {
697    type Item = u32;
698
699    fn next(&mut self) -> Option<Self::Item> {
700        loop {
701            if let Some(codepoint) = self.cur_range.next() {
702                return Some(codepoint);
703            }
704            let range = self.ranges.next()?;
705            let start: u32 = range.start_unicode_value().into();
706            let end = start + range.additional_count() as u32 + 1;
707            self.cur_range = start..end;
708        }
709    }
710}
711
712#[derive(Clone)]
713struct NonDefaultUvsIter<'a> {
714    iter: std::slice::Iter<'a, UvsMapping>,
715}
716
717impl<'a> NonDefaultUvsIter<'a> {
718    fn new(uvs: NonDefaultUvs<'a>) -> Self {
719        Self {
720            iter: uvs.uvs_mapping().iter(),
721        }
722    }
723}
724
725impl Iterator for NonDefaultUvsIter<'_> {
726    type Item = (u32, GlyphId16);
727
728    fn next(&mut self) -> Option<Self::Item> {
729        let mapping = self.iter.next()?;
730        let codepoint: u32 = mapping.unicode_value().into();
731        let glyph_id = GlyphId16::new(mapping.glyph_id());
732        Some((codepoint, glyph_id))
733    }
734}
735
736#[cfg(test)]
737mod tests {
738    use font_test_data::{be_buffer, bebuffer::BeBuffer};
739
740    use super::*;
741    use crate::{FontRef, GlyphId, TableProvider};
742
743    #[test]
744    fn map_codepoints() {
745        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
746        let cmap = font.cmap().unwrap();
747        assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1)));
748        assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2)));
749        assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3)));
750        assert_eq!(cmap.map_codepoint('B'), None);
751
752        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
753        let cmap = font.cmap().unwrap();
754        assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1)));
755        assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2)));
756        assert_eq!(cmap.map_codepoint('B'), None);
757
758        let cmap0_data = cmap0_data();
759        let cmap = Cmap::read(FontData::new(cmap0_data.data())).unwrap();
760
761        assert_eq!(cmap.map_codepoint(0u8), Some(GlyphId::new(0)));
762        assert_eq!(cmap.map_codepoint(b' '), Some(GlyphId::new(178)));
763        assert_eq!(cmap.map_codepoint(b'r'), Some(GlyphId::new(193)));
764        assert_eq!(cmap.map_codepoint(b'X'), Some(GlyphId::new(13)));
765        assert_eq!(cmap.map_codepoint(255u8), Some(GlyphId::new(3)));
766
767        let cmap6_data = be_buffer! {
768            // version
769            0u16,
770            // numTables
771            1u16,
772            // platformID
773            1u16,
774            // encodingID
775            0u16,
776            // subtableOffset
777            12u32,
778            // format
779            6u16,
780            // length
781            32u16,
782            // language
783            0u16,
784            // firstCode
785            32u16,
786            // entryCount
787            5u16,
788            // glyphIDArray
789            [10u16, 15, 7, 20, 4]
790        };
791
792        let cmap = Cmap::read(FontData::new(cmap6_data.data())).unwrap();
793
794        assert_eq!(cmap.map_codepoint(0u8), None);
795        assert_eq!(cmap.map_codepoint(31u8), None);
796        assert_eq!(cmap.map_codepoint(33u8), Some(GlyphId::new(15)));
797        assert_eq!(cmap.map_codepoint(35u8), Some(GlyphId::new(20)));
798        assert_eq!(cmap.map_codepoint(36u8), Some(GlyphId::new(4)));
799        assert_eq!(cmap.map_codepoint(50u8), None);
800    }
801
802    #[test]
803    fn map_variants() {
804        use super::MapVariant::*;
805        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
806        let cmap = font.cmap().unwrap();
807        let cmap14 = find_cmap14(&cmap).unwrap();
808        let selector = '\u{e0100}';
809        assert_eq!(cmap14.map_variant('a', selector), None);
810        assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault));
811        assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault));
812        assert_eq!(
813            cmap14.map_variant('\u{4e08}', selector),
814            Some(Variant(GlyphId::new(25)))
815        );
816        assert_eq!(
817            cmap14.map_variant('\u{4e09}', selector),
818            Some(Variant(GlyphId::new(26)))
819        );
820    }
821
822    #[test]
823    #[cfg(feature = "std")]
824    fn cmap14_closure_glyphs() {
825        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
826        let cmap = font.cmap().unwrap();
827        let mut unicodes = IntSet::empty();
828        unicodes.insert(0x4e08_u32);
829        unicodes.insert(0xe0100_u32);
830
831        let mut glyph_set = IntSet::empty();
832        glyph_set.insert(GlyphId::new(18));
833        cmap.closure_glyphs(&unicodes, &mut glyph_set);
834
835        assert_eq!(glyph_set.len(), 2);
836        assert!(glyph_set.contains(GlyphId::new(18)));
837        assert!(glyph_set.contains(GlyphId::new(25)));
838    }
839
840    #[test]
841    fn cmap4_iter() {
842        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
843        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
844        let mut count = 0;
845        for (codepoint, glyph_id) in cmap4.iter() {
846            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
847            count += 1;
848        }
849        assert_eq!(count, 4);
850        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
851        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
852        let mut count = 0;
853        for (codepoint, glyph_id) in cmap4.iter() {
854            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
855            count += 1;
856        }
857        assert_eq!(count, 3);
858    }
859
860    #[test]
861    fn cmap4_iter_explicit_notdef() {
862        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
863        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
864        let mut notdef_count = 0;
865        for (_, glyph_id) in cmap4.iter() {
866            notdef_count += (glyph_id == GlyphId::NOTDEF) as i32;
867        }
868        assert!(notdef_count > 0);
869        assert_eq!(cmap4.map_codepoint(0xFFFF_u32), Some(GlyphId::NOTDEF));
870    }
871
872    // Make sure we don't bail early when iterating ranges with holes.
873    // Encountered with Gentium Basic and Gentium Basic Book.
874    // See <https://github.com/googlefonts/fontations/issues/897>
875    #[test]
876    fn cmap4_iter_sparse_range() {
877        #[rustfmt::skip]
878        let cmap4_data: &[u16] = &[
879            // format, length, lang
880            4, 0, 0,
881            // segCountX2
882            4,
883            // bin search data
884            0, 0, 0,
885            // end code
886            262, 0xFFFF, 
887            // reserved pad
888            0,
889            // start code
890            259, 0xFFFF,
891            // id delta
892            0, 1, 
893            // id range offset
894            4, 0,
895            // glyph ids
896            236, 0, 0, 326,
897        ];
898        let mut buf = BeBuffer::new();
899        for &word in cmap4_data {
900            buf = buf.push(word);
901        }
902        let cmap4 = Cmap4::read(FontData::new(&buf)).unwrap();
903        let mappings = cmap4
904            .iter()
905            .map(|(ch, gid)| (ch, gid.to_u32()))
906            .collect::<Vec<_>>();
907        assert_eq!(mappings, &[(259, 236), (262, 326), (65535, 0)]);
908    }
909
910    #[test]
911    fn cmap12_iter() {
912        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
913        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
914        let mut count = 0;
915        for (codepoint, glyph_id) in cmap12.iter() {
916            assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id));
917            count += 1;
918        }
919        assert_eq!(count, 10);
920    }
921
922    // oss-fuzz: detected integer addition overflow in Cmap12::group()
923    // ref: https://oss-fuzz.com/testcase-detail/5141969742397440
924    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69547
925    #[test]
926    fn cmap12_iter_avoid_overflow() {
927        // reconstructed cmap from <https://oss-fuzz.com/testcase-detail/5141969742397440>
928        let data = be_buffer! {
929            12u16,      // format
930            0u16,       // reserved, set to 0
931            0u32,       // length, ignored
932            0u32,       // language, ignored
933            2u32,       // numGroups
934            // groups: [startCode, endCode, startGlyphID]
935            [0xFFFFFFFA_u32, 0xFFFFFFFC, 0], // group 0
936            [0xFFFFFFFB_u32, 0xFFFFFFFF, 0] // group 1
937        };
938        let cmap12 = Cmap12::read(data.data().into()).unwrap();
939        let _ = cmap12.iter().count();
940    }
941
942    // oss-fuzz: timeout in Cmap12Iter
943    // ref: https://oss-fuzz.com/testcase-detail/4628971063934976
944    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69540
945    #[test]
946    fn cmap12_iter_avoid_timeout() {
947        // ranges: [SequentialMapGroup { start_char_code: 170, end_char_code: 1330926671, start_glyph_id: 328960 }]
948        let cmap12_data = be_buffer! {
949            12u16,      // format
950            0u16,       // reserved, set to 0
951            0u32,       // length, ignored
952            0u32,       // language, ignored
953            1u32,       // numGroups
954            // groups: [startCode, endCode, startGlyphID]
955            [170u32, 1330926671, 328960] // group 0
956        };
957        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
958        assert!(
959            cmap12.iter_with_limits(CmapIterLimits::default()).count() <= char::MAX as usize + 1
960        );
961    }
962
963    // oss-fuzz: timeout in outlines, caused by cmap 12 iter
964    // ref: <https://issues.oss-fuzz.com/issues/394638728>
965    #[test]
966    fn cmap12_iter_avoid_timeout2() {
967        let cmap12_data = be_buffer! {
968            12u16,      // format
969            0u16,       // reserved, set to 0
970            0u32,       // length, ignored
971            0u32,       // language, ignored
972            3u32,       // numGroups
973            // groups: [startCode, endCode, startGlyphID]
974            [199u32, 16777271, 2],
975            [262u32, 262, 3],
976            [268u32, 268, 4]
977        };
978        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
979        // In the test case, maxp.numGlyphs = 8
980        const MAX_GLYPHS: u32 = 8;
981        let limits = CmapIterLimits {
982            glyph_count: MAX_GLYPHS,
983            ..Default::default()
984        };
985        assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
986    }
987
988    #[test]
989    fn cmap12_iter_glyph_limit() {
990        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
991        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
992        let mut limits = CmapIterLimits::default_for_font(&font);
993        // Ensure we obey the glyph count limit.
994        // This font has 11 glyphs
995        for glyph_count in 0..=11 {
996            limits.glyph_count = glyph_count;
997            assert_eq!(
998                cmap12.iter_with_limits(limits).count(),
999                // We always return one less than glyph count limit because
1000                // notdef is not mapped
1001                (glyph_count as usize).saturating_sub(1)
1002            );
1003        }
1004    }
1005
1006    #[test]
1007    fn cmap12_iter_range_clamping() {
1008        let data = be_buffer! {
1009            12u16,      // format
1010            0u16,       // reserved, set to 0
1011            0u32,       // length, ignored
1012            0u32,       // language, ignored
1013            2u32,       // numGroups
1014            // groups: [startCode, endCode, startGlyphID]
1015            [0u32, 16777215, 0], // group 0
1016            [255u32, 0xFFFFFFFF, 0] // group 1
1017        };
1018        let cmap12 = Cmap12::read(data.data().into()).unwrap();
1019        let ranges = cmap12
1020            .groups()
1021            .iter()
1022            .map(|group| (group.start_char_code(), group.end_char_code()))
1023            .collect::<Vec<_>>();
1024        // These groups overlap and extend to the whole u32 range
1025        assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
1026        // But we produce at most char::MAX + 1 results
1027        let limits = CmapIterLimits {
1028            glyph_count: u32::MAX,
1029            ..Default::default()
1030        };
1031        assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
1032    }
1033
1034    #[test]
1035    fn cmap12_iter_explicit_notdef() {
1036        let data = be_buffer! {
1037            12u16,      // format
1038            0u16,       // reserved, set to 0
1039            0u32,       // length, ignored
1040            0u32,       // language, ignored
1041            1u32,       // numGroups
1042            // groups: [startCode, endCode, startGlyphID]
1043            [0_u32, 1_u32, 0] // group 0
1044        };
1045        let cmap12 = Cmap12::read(data.data().into()).unwrap();
1046        for (i, (codepoint, glyph_id)) in cmap12.iter().enumerate() {
1047            assert_eq!(codepoint as usize, i);
1048            assert_eq!(glyph_id.to_u32() as usize, i);
1049        }
1050        assert_eq!(cmap12.iter().next().unwrap().1, GlyphId::NOTDEF);
1051    }
1052
1053    fn cmap13_data() -> Vec<u8> {
1054        let data = be_buffer! {
1055            13u16,      // format
1056            0u16,       // reserved, set to 0
1057            0u32,       // length, ignored
1058            0u32,       // language, ignored
1059            2u32,       // numGroups
1060            // groups: [startCode, endCode, startGlyphID]
1061            [0u32, 8, 20], // group 0
1062            [42u32, 46u32, 30] // group 1
1063        };
1064        data.to_vec()
1065    }
1066
1067    #[test]
1068    fn cmap13_map() {
1069        let data = cmap13_data();
1070        let cmap13 = Cmap13::read(FontData::new(&data)).unwrap();
1071        for ch in 0u32..=8 {
1072            assert_eq!(cmap13.map_codepoint(ch), Some(GlyphId::new(20)));
1073        }
1074        for ch in 9u32..42 {
1075            assert_eq!(cmap13.map_codepoint(ch), None);
1076        }
1077        for ch in 42u32..=46 {
1078            assert_eq!(cmap13.map_codepoint(ch), Some(GlyphId::new(30)));
1079        }
1080        for ch in 47u32..1024 {
1081            assert_eq!(cmap13.map_codepoint(ch), None);
1082        }
1083    }
1084
1085    #[test]
1086    fn cmap13_iter() {
1087        let data = cmap13_data();
1088        let cmap13 = Cmap13::read(FontData::new(&data)).unwrap();
1089        for (ch, gid) in cmap13.iter() {
1090            assert_eq!(cmap13.map_codepoint(ch), Some(gid));
1091        }
1092    }
1093
1094    #[test]
1095    fn cmap14_iter() {
1096        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
1097        let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap();
1098        let mut count = 0;
1099        for (codepoint, selector, mapping) in cmap14.iter() {
1100            assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping));
1101            count += 1;
1102        }
1103        assert_eq!(count, 7);
1104    }
1105
1106    fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> {
1107        cmap.encoding_records()
1108            .iter()
1109            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1110            .find_map(|subtable| match subtable {
1111                CmapSubtable::Format4(cmap4) => Some(cmap4),
1112                _ => None,
1113            })
1114    }
1115
1116    fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> {
1117        cmap.encoding_records()
1118            .iter()
1119            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1120            .find_map(|subtable| match subtable {
1121                CmapSubtable::Format12(cmap12) => Some(cmap12),
1122                _ => None,
1123            })
1124    }
1125
1126    fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> {
1127        cmap.encoding_records()
1128            .iter()
1129            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
1130            .find_map(|subtable| match subtable {
1131                CmapSubtable::Format14(cmap14) => Some(cmap14),
1132                _ => None,
1133            })
1134    }
1135
1136    /// <https://github.com/googlefonts/fontations/issues/1100>
1137    ///
1138    /// Note that this doesn't demonstrate the timeout, merely that we've eliminated the underlying
1139    /// enthusiasm for non-ascending ranges that enabled it
1140    #[test]
1141    fn cmap4_bad_data() {
1142        let buf = font_test_data::cmap::repetitive_cmap4();
1143        let cmap4 = Cmap4::read(FontData::new(buf.as_slice())).unwrap();
1144
1145        // we should have unique, ascending codepoints, not duplicates and overlaps
1146        assert_eq!(
1147            (6..=64).collect::<Vec<_>>(),
1148            cmap4.iter().map(|(cp, _)| cp).collect::<Vec<_>>()
1149        );
1150    }
1151
1152    fn cmap0_data() -> BeBuffer {
1153        be_buffer! {
1154            // version
1155            0u16,
1156            // numTables
1157            1u16,
1158            // platformID
1159            1u16,
1160            // encodingID
1161            0u16,
1162            // subtableOffset
1163            12u32,
1164            // format
1165            0u16,
1166            // length
1167            274u16,
1168            // language
1169            0u16,
1170            // glyphIDArray
1171            [0u8, 249, 32, 2, 198, 23, 1, 4, 26, 36,
1172            171, 168, 69, 151, 208, 238, 226, 153, 161, 138,
1173            160, 130, 169, 223, 162, 207, 146, 227, 111, 248,
1174            163, 79, 178, 27, 50, 234, 213, 57, 45, 63,
1175            103, 186, 30, 105, 131, 118, 35, 140, 51, 211,
1176            75, 172, 56, 71, 137, 99, 22, 76, 61, 125,
1177            39, 8, 177, 117, 108, 97, 202, 92, 49, 134,
1178            93, 43, 80, 66, 84, 54, 180, 113, 11, 176,
1179            229, 48, 47, 17, 124, 40, 119, 21, 13, 133,
1180            181, 224, 33, 128, 44, 46, 38, 24, 65, 152,
1181            197, 225, 102, 251, 157, 126, 182, 242, 28, 184,
1182            90, 170, 201, 144, 193, 189, 250, 142, 77, 221,
1183            81, 164, 154, 60, 37, 200, 12, 53, 219, 89,
1184            31, 209, 188, 179, 253, 220, 127, 18, 19, 64,
1185            20, 141, 98, 173, 55, 194, 70, 107, 228, 104,
1186            10, 9, 15, 217, 255, 222, 196, 236, 67, 165,
1187            5, 143, 149, 100, 91, 95, 135, 235, 145, 204,
1188            72, 114, 246, 82, 245, 233, 106, 158, 185, 212,
1189            86, 243, 16, 195, 123, 190, 120, 187, 132, 139,
1190            192, 239, 110, 183, 240, 214, 166, 41, 59, 231,
1191            42, 94, 244, 83, 121, 25, 215, 96, 73, 87,
1192            174, 136, 62, 206, 156, 175, 230, 150, 116, 147,
1193            68, 122, 78, 112, 6, 167, 232, 254, 52, 34,
1194            191, 85, 241, 14, 216, 155, 29, 101, 115, 210,
1195            252, 218, 129, 247, 203, 159, 109, 74, 7, 58,
1196            237, 199, 88, 205, 148, 3]
1197        }
1198    }
1199}