swash/internal/
cmap.rs

1//! Character to glyph mapping table.
2
3use super::{raw_tag, Array, Bytes, RawFont, RawTag, Stream};
4
5pub const CMAP: RawTag = raw_tag(b"cmap");
6
7/// Finds a suitable character map subtable for the specified font.
8pub fn subtable<'a>(font: impl RawFont<'a>) -> Option<(u32, u8, bool)> {
9    let cmap = font.table_offset(CMAP);
10    if cmap == 0 {
11        return None;
12    }
13    let mut s = Stream::with_offset(font.data(), cmap as usize)?;
14    s.skip(2)?;
15    let len = s.read_u16()? as usize;
16    let b = Bytes::new(s.data());
17    let mut best = None;
18    for _ in 0..len {
19        let platform = s.read_u16()?;
20        let encoding = s.read_u16()?;
21        let offset = s.read_u32()?;
22        let format = b.read_u16(offset as usize)? as u8;
23        if format != 4 && format != 12 {
24            continue;
25        }
26        let offset = cmap.checked_add(offset)?;
27        if is_symbol(platform, encoding) {
28            return Some((offset, format, true));
29        } else if (format == 12 && is_unicode(platform, encoding))
30            || (best.is_none() && is_unicode(platform, encoding))
31        {
32            best = Some((offset, format, false));
33        }
34    }
35    best
36}
37
38/// Maps a codepoint to a glyph identifier.
39pub fn map(data: &[u8], subtable: u32, format: u8, codepoint: u32) -> Option<u16> {
40    if subtable == 0 {
41        return None;
42    }
43    let b = Bytes::with_offset(data, subtable as usize)?;
44    if format == 4 {
45        if codepoint >= 65535 {
46            return None;
47        }
48        let c = codepoint as u16;
49        let segcount_x2 = b.read_u16(6)? as usize;
50        let segcount = segcount_x2 / 2;
51        b.ensure_range(0, 16 + segcount_x2 * 4)?;
52        let end_codes_offset = 14;
53        let start_codes_offset = end_codes_offset + segcount_x2 + 2;
54        let mut l = 0;
55        let mut h = segcount;
56        while l < h {
57            let i = (l + h) / 2;
58            let i2 = i * 2;
59            let start = unsafe { b.read_unchecked::<u16>(start_codes_offset + i2) };
60            if c < start {
61                h = i;
62            } else if c > unsafe { b.read_unchecked::<u16>(end_codes_offset + i2) } {
63                l = i + 1;
64            } else {
65                let deltas_offset = start_codes_offset + segcount_x2;
66                let ranges_offset = deltas_offset + segcount_x2;
67                let mut range_base = ranges_offset + i2;
68                let range = unsafe { b.read_unchecked::<u16>(range_base) as usize };
69                let delta = unsafe { b.read_unchecked::<i16>(deltas_offset + i2) as i32 };
70                if range == 0 {
71                    return Some((codepoint as i32 + delta) as u16);
72                }
73                range_base += range;
74                let diff = (c - start) as usize * 2;
75                let id = b.read::<u16>(range_base + diff).unwrap_or(0);
76                return if id != 0 {
77                    Some((id as i32 + delta) as u16)
78                } else {
79                    Some(0)
80                };
81            }
82        }
83    } else if format == 12 {
84        let base = 16;
85        let len = b.read::<u32>(base - 4).unwrap_or(0) as usize;
86        b.ensure_range(base, len * 12)?;
87        let mut l = 0;
88        let mut h = len;
89        while l < h {
90            let i = (l + h) / 2;
91            let rec = base + i * 12;
92            let start = unsafe { b.read_unchecked::<u32>(rec) };
93            if codepoint < start {
94                h = i;
95            } else if codepoint > unsafe { b.read_unchecked::<u32>(rec + 4) } {
96                l = i + 1;
97            } else {
98                let delta = unsafe { b.read_unchecked::<u32>(rec + 8) };
99                return Some((codepoint - start + delta) as u16);
100            }
101        }
102    }
103    None
104}
105
106/// Enumerates all codepoint/glyph pairs in the table.
107pub fn enumerate(data: &[u8], subtable: u32, mut f: impl FnMut(u32, u16)) {
108    if subtable == 0 {
109        return;
110    }
111    let b = if let Some(b) = Bytes::with_offset(data, subtable as usize) {
112        b
113    } else {
114        return;
115    };
116    let format = b.read_or_default::<u16>(0);
117    if format == 4 {
118        let segcount_x2 = b.read::<u16>(6).unwrap_or(0) as usize;
119        let segcount = segcount_x2 / 2;
120        if !b.check_range(0, 16 + segcount_x2 * 4) {
121            return;
122        }
123        let end_code_offset = 14;
124        let start_code_offset = end_code_offset + segcount_x2 + 2;
125        let deltas_offset = start_code_offset + segcount_x2;
126        let ranges_offset = deltas_offset + segcount_x2;
127        let start_codes = b
128            .read_array::<u16>(start_code_offset, segcount)
129            .unwrap_or_else(|| Array::new(&[]));
130        let end_codes = b
131            .read_array::<u16>(end_code_offset, segcount)
132            .unwrap_or_else(|| Array::new(&[]));
133        let deltas = b
134            .read_array::<i16>(deltas_offset, segcount)
135            .unwrap_or_else(|| Array::new(&[]));
136        for (i, ((start, end), delta)) in start_codes
137            .iter()
138            .zip(end_codes.iter())
139            .zip(deltas.iter())
140            .enumerate()
141        {
142            let mut range_base = ranges_offset + i * 2;
143            if let Some(range) = b.read_u16(range_base) {
144                if range == 0 {
145                    for codepoint in start..=end {
146                        let id = (codepoint as i32 + delta as i32) as u16;
147                        if id != 0 {
148                            f(codepoint as u32, id);
149                        }
150                    }
151                } else {
152                    range_base += range as usize;
153                    for codepoint in start..=end {
154                        let diff = (codepoint - start) as usize * 2;
155                        if let Some(mut id) = b.read::<u16>(range_base + diff) {
156                            if id != 0 {
157                                id = (id as i32 + delta as i32) as u16;
158                                f(codepoint as u32, id);
159                            }
160                        }
161                    }
162                }
163            }
164        }
165    } else if format == 12 {
166        let base = 16;
167        let len = b.read::<u32>(base - 4).unwrap_or(0) as usize;
168        if !b.check_range(base, len * 12) {
169            return;
170        }
171        for i in 0..len {
172            let rec = base + i * 12;
173            let (start, end, offset) = unsafe {
174                (
175                    b.read_unchecked::<u32>(rec),
176                    b.read_unchecked::<u32>(rec + 4),
177                    b.read_unchecked::<u32>(rec + 8),
178                )
179            };
180            for codepoint in start..=end {
181                let id = (offset + codepoint - start) as u16;
182                if id != 0 {
183                    f(codepoint, id);
184                }
185            }
186        }
187    }
188}
189
190fn is_unicode(platform: u16, encoding: u16) -> bool {
191    matches!((platform, encoding), (0, _) | (3, 1) | (3, 10))
192}
193
194fn is_symbol(platform: u16, encoding: u16) -> bool {
195    platform == 3 && encoding == 0
196}
197
198/// Result of the mapping a codepoint with a variation selector.
199#[derive(Copy, Clone, PartialEq, Eq, Debug)]
200pub enum MapVariant {
201    /// Use the default glyph mapping.
202    UseDefault,
203    /// Use the specified variant.
204    Variant(u16),
205}
206
207/// Maps a codepoint with variation selector to a glyph identifier using the
208/// format 14 subtable at the specified offset in data.
209///
210/// <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-14-unicode-variation-sequences>
211pub fn map_variant(
212    data: &[u8],
213    offset: u32,
214    codepoint: u32,
215    variation_selector: u32,
216) -> Option<MapVariant> {
217    use core::cmp::Ordering;
218    let b = Bytes::with_offset(data, offset as usize)?;
219    let len = b.read_u32(6)? as usize;
220    let base = 10;
221    let mut lo = 0;
222    let mut hi = len;
223    let mut default_uvs_offset = 0;
224    let mut non_default_uvs_offset = 0;
225    while lo < hi {
226        let i = (lo + hi) / 2;
227        let rec = base + i * 11;
228        let vs = b.read_u24(rec)?;
229        match variation_selector.cmp(&vs) {
230            Ordering::Less => hi = i,
231            Ordering::Greater => lo = i + 1,
232            Ordering::Equal => {
233                default_uvs_offset = b.read_u32(rec + 3)? as usize;
234                non_default_uvs_offset = b.read_u32(rec + 7)? as usize;
235                break;
236            }
237        }
238    }
239    if default_uvs_offset != 0 {
240        let base = default_uvs_offset;
241        let len = b.read_u32(base)? as usize;
242        let mut lo = 0;
243        let mut hi = len;
244        while lo < hi {
245            let i = (lo + hi) / 2;
246            let rec = base + 4 + i * 4;
247            let start = b.read_u24(rec)?;
248            if codepoint < start {
249                hi = i;
250            } else if codepoint > (start + b.read_u8(rec + 3)? as u32) {
251                lo = i + 1;
252            } else {
253                // Fallback to standard mapping.
254                return Some(MapVariant::UseDefault);
255            }
256        }
257    }
258    if non_default_uvs_offset != 0 {
259        let base = non_default_uvs_offset;
260        let len = b.read_u32(base)? as usize;
261        let mut lo = 0;
262        let mut hi = len;
263        while lo < hi {
264            let i = (lo + hi) / 2;
265            let rec = base + 4 + i * 5;
266            let value = b.read_u24(rec)?;
267            match codepoint.cmp(&value) {
268                Ordering::Less => hi = i,
269                Ordering::Greater => lo = i + 1,
270                Ordering::Equal => return Some(MapVariant::Variant(b.read_u16(rec + 3)?)),
271            }
272        }
273    }
274    None
275}