swash/
string.rs

1/*!
2Localized names and other metadata.
3*/
4
5use core::fmt::Write;
6
7use super::internal::*;
8use super::FontRef;
9
10const NAME: RawTag = raw_tag(b"name");
11
12/// Identifier for well-known localized strings in a font.
13#[derive(Copy, Clone, PartialEq, Eq, Debug)]
14pub enum StringId {
15    Copyright,
16    Family,
17    SubFamily,
18    UniqueId,
19    Full,
20    Version,
21    PostScript,
22    Trademark,
23    Manufacturer,
24    Designer,
25    Description,
26    VendorUrl,
27    DesignerUrl,
28    License,
29    LicenseUrl,
30    TypographicFamily,
31    TypographicSubFamily,
32    CompatibleFull,
33    SampleText,
34    PostScriptCid,
35    WwsFamily,
36    WwsSubFamily,
37    LightBackgroundPalette,
38    DarkBackgroundPalette,
39    VariationsPostScriptNamePrefix,
40    Other(u16),
41}
42
43impl StringId {
44    pub fn from_raw(value: u16) -> Self {
45        use StringId::*;
46        match value {
47            0 => Copyright,
48            1 => Family,
49            2 => SubFamily,
50            3 => UniqueId,
51            4 => Full,
52            5 => Version,
53            6 => PostScript,
54            7 => Trademark,
55            8 => Manufacturer,
56            9 => Designer,
57            10 => Description,
58            11 => VendorUrl,
59            12 => DesignerUrl,
60            13 => License,
61            14 => LicenseUrl,
62            16 => TypographicFamily,
63            17 => TypographicSubFamily,
64            18 => CompatibleFull,
65            19 => SampleText,
66            20 => PostScriptCid,
67            21 => WwsFamily,
68            22 => WwsSubFamily,
69            23 => LightBackgroundPalette,
70            24 => DarkBackgroundPalette,
71            25 => VariationsPostScriptNamePrefix,
72            _ => Other(value),
73        }
74    }
75
76    pub fn to_raw(self) -> u16 {
77        use StringId::*;
78        match self {
79            Other(id) => id,
80            Copyright => 0,
81            Family => 1,
82            SubFamily => 2,
83            UniqueId => 3,
84            Full => 4,
85            Version => 5,
86            PostScript => 6,
87            Trademark => 7,
88            Manufacturer => 8,
89            Designer => 9,
90            Description => 10,
91            VendorUrl => 11,
92            DesignerUrl => 12,
93            License => 13,
94            LicenseUrl => 14,
95            TypographicFamily => 16,
96            TypographicSubFamily => 17,
97            CompatibleFull => 18,
98            SampleText => 19,
99            PostScriptCid => 20,
100            WwsFamily => 21,
101            WwsSubFamily => 22,
102            LightBackgroundPalette => 23,
103            DarkBackgroundPalette => 24,
104            VariationsPostScriptNamePrefix => 25,
105        }
106    }
107}
108
109/// Iterator over a collection of localized strings.
110#[derive(Copy, Clone)]
111pub struct LocalizedStrings<'a> {
112    data: Bytes<'a>,
113    len: usize,
114    pos: usize,
115}
116
117impl<'a> LocalizedStrings<'a> {
118    pub(crate) fn new(data: &'a [u8]) -> Self {
119        let data = Bytes::new(data);
120        let len = data.read_or_default::<u16>(2) as usize;
121        Self { data, len, pos: 0 }
122    }
123
124    pub(crate) fn from_font(font: &FontRef<'a>) -> Self {
125        Self::new(font.table_data(NAME).unwrap_or(&[]))
126    }
127
128    /// Searches for a string with the specified identifier, and if specified,
129    /// language.
130    ///
131    /// ## Iteration behavior
132    /// This function searches the entire string collection without regard
133    /// for the current state of the iterator.
134    pub fn find_by_id(&self, id: StringId, language: Option<&str>) -> Option<LocalizedString<'a>> {
135        let mut first = None;
136        let mut best = None;
137        let raw_id = id.to_raw();
138        for i in 0..self.len() {
139            let rec = match self.get(i) {
140                Some(rec) => rec,
141                _ => continue,
142            };
143            if rec.raw_id() != raw_id {
144                continue;
145            }
146            if first.is_none() {
147                first = Some(rec);
148            }
149            let encoding = rec.encoding();
150            if let Some(lang) = language {
151                if rec.language().starts_with(lang) {
152                    if encoding == Encoding::Unicode {
153                        return Some(rec);
154                    } else if encoding.is_decodable() {
155                        best = Some(rec);
156                    }
157                }
158            } else if rec.language() == "" {
159                if encoding == Encoding::Unicode {
160                    return Some(rec);
161                } else if encoding.is_decodable() {
162                    best = Some(rec);
163                }
164            }
165        }
166        if best.is_some() {
167            best
168        } else if language.is_none() {
169            first
170        } else {
171            None
172        }
173    }
174
175    /// Returns the string at the specified index.
176    fn get(&self, index: usize) -> Option<LocalizedString<'a>> {
177        if index >= self.len {
178            return None;
179        }
180        let b = &self.data;
181        let offset = 6 + index * 12;
182        b.ensure_range(offset, 12)?;
183        Some(LocalizedString {
184            data: *b,
185            storage: b.read_or_default::<u16>(4) as usize,
186            offset,
187        })
188    }
189}
190
191impl_iter!(LocalizedStrings, LocalizedString);
192
193/// Represents a single localized string in a font.
194///
195/// Localized strings contain an [identifier](StringId) that describes the
196/// content of the string (such as family name, copyright notice, sample text, etc),
197/// a language that specifies the audience for which the string is intended and
198/// some encoded data containing the value of the string. A string with a
199/// particular identifier can appear multiple times in a font with various
200/// languages and encodings.
201#[derive(Copy, Clone)]
202pub struct LocalizedString<'a> {
203    data: Bytes<'a>,
204    storage: usize,
205    offset: usize,
206}
207
208impl<'a> LocalizedString<'a> {
209    /// Returns the string identifier.
210    pub fn id(&self) -> StringId {
211        StringId::from_raw(self.raw_id())
212    }
213
214    /// Returns the language of the string.
215    pub fn language(&self) -> &str {
216        get_language(self.platform_id(), self.language_id())
217    }
218
219    /// Returns true if the encoding for the string is unicode.
220    pub fn is_unicode(&self) -> bool {
221        self.encoding() == Encoding::Unicode
222    }
223
224    /// Returns true if the string can be decoded.
225    pub fn is_decodable(&self) -> bool {
226        self.encoding().is_decodable()
227    }
228
229    /// Returns an iterator over the sequence of characters representing the
230    /// decoded string if the encoding is known. Will generate an empty string
231    /// otherwise.
232    pub fn chars(&self) -> Chars<'a> {
233        let encoding = self.encoding();
234        if !encoding.is_decodable() {
235            return Chars {
236                record: *self,
237                bytes: &[],
238                encoding,
239                offset: 0,
240                len: 0,
241                cur: 0,
242            };
243        }
244        let len = self.data.read_or_default::<u16>(self.offset + 8) as usize;
245        let offset = self.data.read_or_default::<u16>(self.offset + 10) as usize + self.storage;
246        Chars {
247            record: *self,
248            bytes: if encoding == Encoding::MacRoman {
249                self.bytes().unwrap_or(&[])
250            } else {
251                &[]
252            },
253            encoding,
254            offset,
255            len,
256            cur: 0,
257        }
258    }
259
260    fn raw_id(&self) -> u16 {
261        self.data.read::<u16>(self.offset + 6).unwrap_or(0xFFFF)
262    }
263
264    fn platform_id(&self) -> u16 {
265        self.data.read_or_default::<u16>(self.offset)
266    }
267
268    fn encoding_id(&self) -> u16 {
269        self.data.read_or_default::<u16>(self.offset + 2)
270    }
271
272    fn language_id(&self) -> u16 {
273        self.data.read_or_default::<u16>(self.offset + 4)
274    }
275
276    fn encoding(&self) -> Encoding {
277        Encoding::from_raw_parts(self.platform_id(), self.encoding_id())
278    }
279
280    fn bytes(&self) -> Option<&'a [u8]> {
281        let len = self.data.read::<u16>(self.offset + 8)? as usize;
282        let offset = self.data.read::<u16>(self.offset + 10)? as usize + self.storage;
283        self.data.read_bytes(offset, len)
284    }
285}
286
287impl<'a> core::fmt::Display for LocalizedString<'a> {
288    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
289        for c in self.chars() {
290            f.write_char(c)?;
291        }
292        Ok(())
293    }
294}
295
296/// Iterator over the characters in a localized string.
297#[derive(Copy, Clone)]
298pub struct Chars<'a> {
299    record: LocalizedString<'a>,
300    bytes: &'a [u8],
301    encoding: Encoding,
302    offset: usize,
303    len: usize,
304    cur: usize,
305}
306
307impl<'a> Iterator for Chars<'a> {
308    type Item = char;
309
310    fn next(&mut self) -> Option<Self::Item> {
311        if self.cur >= self.len {
312            return None;
313        }
314        use core::char::from_u32;
315        let rep = core::char::REPLACEMENT_CHARACTER;
316        let d = &self.record.data;
317        match self.encoding {
318            Encoding::Unicode => {
319                let mut c = d.read::<u16>(self.offset + self.cur)? as u32;
320                self.cur += 2;
321                if (0xD800..0xDC00).contains(&c) {
322                    let c2 = d.read::<u16>(self.offset + self.cur)? as u32;
323                    self.cur += 2;
324                    c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000;
325                }
326                Some(from_u32(c).unwrap_or(rep))
327            }
328            Encoding::MacRoman => {
329                let c = self.bytes[self.cur] as u32;
330                self.cur += 1;
331                if c > 127 {
332                    let idx = c as usize - 128;
333                    Some(from_u32(MAC_ROMAN[idx] as u32).unwrap_or(rep))
334                } else {
335                    Some(from_u32(c).unwrap_or(rep))
336                }
337            }
338            _ => None,
339        }
340    }
341}
342
343impl<'a> IntoIterator for LocalizedString<'a> {
344    type IntoIter = Chars<'a>;
345    type Item = char;
346
347    fn into_iter(self) -> Self::IntoIter {
348        self.chars()
349    }
350}
351
352/// Encoding of a localized string.
353///
354/// Fonts can contain a variety of platform specific and legacy encodings.
355/// Only the ones we decode are listed here.
356#[derive(Copy, Clone, PartialEq, Debug)]
357pub enum Encoding {
358    Unicode,
359    MacRoman,
360    Other { platform_id: u16, encoding_id: u16 },
361}
362
363impl Encoding {
364    pub(crate) fn from_raw_parts(platform_id: u16, encoding_id: u16) -> Self {
365        match (platform_id, encoding_id) {
366            (0, _) => Self::Unicode,
367            (1, 0) => Self::MacRoman,
368            (3, 0) => Self::Unicode,
369            (3, 1) => Self::Unicode,
370            (3, 10) => Self::Unicode,
371            _ => Self::Other {
372                platform_id,
373                encoding_id,
374            },
375        }
376    }
377
378    /// Returns true if this encoding is can be turned into a string.
379    pub fn is_decodable(&self) -> bool {
380        !matches!(self, Self::Other { .. })
381    }
382}
383
384#[rustfmt::skip]
385const MAC_ROMAN: [u16; 128] = [
386    196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233,
387    232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249,
388    251, 252, 8224, 176, 162, 163, 167, 8226, 182, 223, 174, 169, 8482, 180,
389    168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719,
390    960, 8747, 170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710,
391    171, 187, 8230, 160, 192, 195, 213, 338, 339, 8211, 8212, 8220, 8221, 8216,
392    8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
393    8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212,
394    63743, 210, 218, 219, 217, 305, 710, 732, 175, 728, 729, 730, 184, 733,
395    731, 711,
396];
397
398#[rustfmt::skip]
399const LANGUAGES: [(u32, &'static str); 334] = [
400    (0x10000, "en"), (0x10001, "fr"), (0x10002, "de"), (0x10003, "it"), (0x10004, "nl"),
401    (0x10005, "sv"), (0x10006, "es"), (0x10007, "da"), (0x10008, "pt"), (0x10009, "no"),
402    (0x1000A, "he"), (0x1000B, "ja"), (0x1000C, "ar"), (0x1000D, "fi"), (0x1000E, "el"),
403    (0x1000F, "is"), (0x10010, "mt"), (0x10011, "tr"), (0x10012, "hr"), (0x10013, "zh-tw"),
404    (0x10014, "ur"), (0x10015, "hi"), (0x10016, "th"), (0x10017, "ko"), (0x10018, "lt"),
405    (0x10019, "pl"), (0x1001A, "hu"), (0x1001B, "et"), (0x1001C, "lv"), (0x1001E, "fo"),
406    (0x1001F, "fa"), (0x10020, "ru"), (0x10021, "zh-cn"), (0x10022, "nl"), (0x10023, "ga"),
407    (0x10024, "sq"), (0x10025, "ro"), (0x10026, "cs"), (0x10027, "sk"), (0x10028, "sl"),
408    (0x10029, "yi"), (0x1002A, "sr"), (0x1002B, "mk"), (0x1002C, "bg"), (0x1002D, "uk"),
409    (0x1002E, "be"), (0x1002F, "uz"), (0x10030, "kk"), (0x10031, "az"), (0x10031, "az"),
410    (0x10032, "ar"), (0x10033, "hy"), (0x10034, "ka"), (0x10035, "mo"), (0x10036, "ky"),
411    (0x10037, "tg"), (0x10038, "tk"), (0x10039, "mn"), (0x10039, "mn"), (0x1003A, "mn"),
412    (0x1003B, "ps"), (0x1003C, "ku"), (0x1003D, "ks"), (0x1003E, "sd"), (0x1003F, "bo"),
413    (0x10040, "ne"), (0x10041, "sa"), (0x10042, "mr"), (0x10043, "bn"), (0x10044, "as"),
414    (0x10045, "gu"), (0x10046, "pa"), (0x10047, "or"), (0x10048, "ml"), (0x10049, "kn"),
415    (0x1004A, "ta"), (0x1004B, "te"), (0x1004C, "si"), (0x1004D, "my"), (0x1004E, "km"),
416    (0x1004F, "lo"), (0x10050, "vi"), (0x10051, "id"), (0x10052, "tl"), (0x10053, "ms"),
417    (0x10054, "ms"), (0x10055, "am"), (0x10056, "ti"), (0x10057, "om"), (0x10058, "so"),
418    (0x10059, "sw"), (0x1005A, "rw"), (0x1005B, "rn"), (0x1005C, "ny"), (0x1005D, "mg"),
419    (0x1005E, "eo"), (0x10080, "cy"), (0x10081, "eu"), (0x10082, "ca"), (0x10083, "la"),
420    (0x10084, "qu"), (0x10085, "gn"), (0x10086, "ay"), (0x10087, "tt"), (0x10088, "ug"),
421    (0x10089, "dz"), (0x1008A, "jw"), (0x1008B, "su"), (0x1008C, "gl"), (0x1008D, "af"),
422    (0x1008E, "br"), (0x1008F, "iu"), (0x10090, "gd"), (0x10091, "gv"), (0x10092, "ga"),
423    (0x10093, "to"), (0x10094, "el"), (0x10095, "ik"), (0x10096, "az"), (0x30001, "ar"),
424    (0x30004, "zh"), (0x30009, "en"), (0x30401, "ar"), (0x30402, "bg"), (0x30403, "ca"),
425    (0x30404, "zh-tw"), (0x30405, "cs"), (0x30406, "da"), (0x30407, "de"), (0x30408, "el"),
426    (0x30409, "en"), (0x3040A, "es"), (0x3040B, "fi"), (0x3040C, "fr"), (0x3040D, "he"),
427    (0x3040E, "hu"), (0x3040F, "is"), (0x30410, "it"), (0x30411, "ja"), (0x30412, "ko"),
428    (0x30413, "nl"), (0x30414, "no"), (0x30415, "pl"), (0x30416, "pt"), (0x30417, "rm"),
429    (0x30418, "ro"), (0x30419, "ru"), (0x3041A, "hr"), (0x3041B, "sk"), (0x3041C, "sq"),
430    (0x3041D, "sv"), (0x3041E, "th"), (0x3041F, "tr"), (0x30420, "ur"), (0x30421, "id"),
431    (0x30422, "uk"), (0x30423, "be"), (0x30424, "sl"), (0x30425, "et"), (0x30426, "lv"),
432    (0x30427, "lt"), (0x30428, "tg"), (0x30429, "fa"), (0x3042A, "vi"), (0x3042B, "hy"),
433    (0x3042C, "az"), (0x3042D, "eu"), (0x3042E, "wen"), (0x3042F, "mk"), (0x30430, "st"),
434    (0x30431, "ts"), (0x30432, "tn"), (0x30433, "ven"), (0x30434, "xh"), (0x30435, "zu"),
435    (0x30436, "af"), (0x30437, "ka"), (0x30438, "fo"), (0x30439, "hi"), (0x3043A, "mt"),
436    (0x3043B, "se"), (0x3043C, "ga"), (0x3043D, "yi"), (0x3043E, "ms"), (0x3043F, "kk"),
437    (0x30440, "ky"), (0x30441, "sw"), (0x30442, "tk"), (0x30443, "uz"), (0x30444, "tt"),
438    (0x30445, "bn"), (0x30446, "pa"), (0x30447, "gu"), (0x30448, "or"), (0x30449, "ta"),
439    (0x3044A, "te"), (0x3044B, "kn"), (0x3044C, "ml"), (0x3044D, "as"), (0x3044E, "mr"),
440    (0x3044F, "sa"), (0x30450, "mn"), (0x30451, "bo"), (0x30452, "cy"), (0x30453, "km"),
441    (0x30454, "lo"), (0x30455, "my"), (0x30456, "gl"), (0x30457, "kok"), (0x30458, "mni"),
442    (0x30459, "sd"), (0x3045A, "syr"), (0x3045B, "si"), (0x3045C, "chr"), (0x3045D, "iu"),
443    (0x3045E, "am"), (0x30460, "ks"), (0x30461, "ne"), (0x30462, "fy"), (0x30463, "ps"),
444    (0x30464, "phi"), (0x30465, "div"), (0x30468, "ha"), (0x3046A, "yo"), (0x30470, "ibo"),
445    (0x30471, "kau"), (0x30472, "om"), (0x30473, "ti"), (0x30474, "gn"), (0x30475, "haw"),
446    (0x30476, "la"), (0x30477, "so"), (0x30479, "pap"), (0x30481, "mi"), (0x30801, "ar"),
447    (0x30804, "zh-cn"), (0x30807, "de"), (0x30809, "en"), (0x3080A, "es"), (0x3080C, "fr"),
448    (0x30810, "it"), (0x30812, "ko"), (0x30813, "nl"), (0x30814, "nn"), (0x30816, "pt"),
449    (0x30818, "mo"), (0x30819, "ru"), (0x3081A, "sr"), (0x3081D, "sv"), (0x30820, "ur"),
450    (0x30827, "lt"), (0x3082C, "az"), (0x3083C, "gd"), (0x3083E, "ms"), (0x30843, "uz"),
451    (0x30845, "bn"), (0x30846, "ar"), (0x30850, "mn"), (0x30851, "bo"), (0x30851, "dz"),
452    (0x30860, "ks"), (0x30861, "ne"), (0x30873, "ti"), (0x30C01, "ar"), (0x30C04, "zh-hk"),
453    (0x30C07, "de"), (0x30C09, "en"), (0x30C0A, "es"), (0x30C0C, "fr"), (0x30C1A, "sr"),
454    (0x31001, "ar"), (0x31004, "zh-sg"), (0x31007, "de"), (0x31009, "en"), (0x3100A, "es"),
455    (0x3100C, "fr"), (0x31401, "ar"), (0x31404, "zh-mo"), (0x31407, "de"), (0x31409, "en"),
456    (0x3140A, "es"), (0x3140C, "fr"), (0x3141A, "bs"), (0x31801, "ar"), (0x31809, "en"),
457    (0x3180A, "es"), (0x3180C, "fr"), (0x31C01, "ar"), (0x31C09, "en"), (0x31C0A, "es"),
458    (0x31C0C, "fr"), (0x32001, "ar"), (0x32009, "en"), (0x3200A, "es"), (0x3200C, "fr"),
459    (0x32401, "ar"), (0x32409, "en"), (0x3240A, "es"), (0x3240C, "fr"), (0x32801, "ar"),
460    (0x32809, "en"), (0x3280A, "es"), (0x3280C, "fr"), (0x32C01, "ar"), (0x32C09, "en"),
461    (0x32C0A, "es"), (0x32C0C, "fr"), (0x33001, "ar"), (0x33009, "en"), (0x3300A, "es"),
462    (0x3300C, "fr"), (0x33401, "ar"), (0x33409, "en"), (0x3340A, "es"), (0x3340C, "fr"),
463    (0x33801, "ar"), (0x3380A, "es"), (0x3380C, "fr"), (0x33C01, "ar"), (0x33C09, "en"),
464    (0x33C0A, "es"), (0x33C0C, "fr"), (0x34001, "ar"), (0x34009, "en"), (0x3400A, "es"),
465    (0x34409, "en"), (0x3440A, "es"), (0x34809, "en"), (0x3480A, "es"), (0x34C0A, "es"),
466    (0x3500A, "es"), (0x3540A, "es"), (0x3E40A, "es"), (0x3E40C, "fr"),
467];
468
469fn get_language(platform_id: u16, language_id: u16) -> &'static str {
470    match platform_id {
471        0 => "",
472        1 | 3 => {
473            let key = (platform_id as u32) << 16 | language_id as u32;
474            if let Ok(idx) = LANGUAGES.binary_search_by(|x| x.0.cmp(&key)) {
475                LANGUAGES[idx].1
476            } else {
477                "zz"
478            }
479        }
480        _ => "zz",
481    }
482}