rustybuzz/hb/
tag.rs

1use core::str::FromStr;
2
3use smallvec::SmallVec;
4
5use super::common::TagExt;
6use super::{hb_tag_t, script, tag_table, Language, Script};
7
8type ThreeTags = SmallVec<[hb_tag_t; 3]>;
9
10trait SmallVecExt {
11    fn left(&self) -> usize;
12    fn is_full(&self) -> bool;
13}
14
15impl<A: smallvec::Array> SmallVecExt for SmallVec<A> {
16    fn left(&self) -> usize {
17        self.inline_size() - self.len()
18    }
19
20    fn is_full(&self) -> bool {
21        self.len() == self.inline_size()
22    }
23}
24
25/// Converts an `Script` and an `Language` to script and language tags.
26pub fn tags_from_script_and_language(
27    script: Option<Script>,
28    language: Option<&Language>,
29) -> (ThreeTags, ThreeTags) {
30    let mut needs_script = true;
31    let mut scripts = SmallVec::new();
32    let mut languages = SmallVec::new();
33
34    let mut private_use_subtag = None;
35    let mut prefix = "";
36    if let Some(language) = language {
37        let language = language.as_str();
38        if language.starts_with("x-") {
39            private_use_subtag = Some(language);
40        } else {
41            let bytes = language.as_bytes();
42            let mut i = 1;
43            while i < bytes.len() {
44                if bytes.get(i - 1) == Some(&b'-') && bytes.get(i + 1) == Some(&b'-') {
45                    if bytes[i] == b'x' {
46                        private_use_subtag = Some(&language[i..]);
47                        if prefix.is_empty() {
48                            prefix = &language[..i - 1];
49                        }
50
51                        break;
52                    } else {
53                        prefix = &language[..i - 1];
54                    }
55                }
56
57                i += 1;
58            }
59
60            if prefix.is_empty() {
61                prefix = &language[..i];
62            }
63        }
64
65        needs_script = !parse_private_use_subtag(
66            private_use_subtag,
67            "-hbsc",
68            u8::to_ascii_lowercase,
69            &mut scripts,
70        );
71
72        let needs_language = !parse_private_use_subtag(
73            private_use_subtag,
74            "-hbot",
75            u8::to_ascii_uppercase,
76            &mut languages,
77        );
78
79        if needs_language {
80            if let Ok(prefix) = Language::from_str(prefix) {
81                tags_from_language(&prefix, &mut languages);
82            }
83        }
84    }
85
86    if needs_script {
87        all_tags_from_script(script, &mut scripts);
88    }
89
90    (scripts, languages)
91}
92
93fn parse_private_use_subtag(
94    private_use_subtag: Option<&str>,
95    prefix: &str,
96    normalize: fn(&u8) -> u8,
97    tags: &mut ThreeTags,
98) -> bool {
99    let private_use_subtag = match private_use_subtag {
100        Some(v) => v,
101        None => return false,
102    };
103
104    let private_use_subtag = match private_use_subtag.find(prefix) {
105        Some(idx) => &private_use_subtag[idx + prefix.len()..],
106        None => return false,
107    };
108
109    let mut tag = SmallVec::<[u8; 4]>::new();
110    for c in private_use_subtag.bytes().take(4) {
111        if c.is_ascii_alphanumeric() {
112            tag.push((normalize)(&c));
113        } else {
114            break;
115        }
116    }
117
118    if tag.is_empty() {
119        return false;
120    }
121
122    let mut tag = hb_tag_t::from_bytes_lossy(tag.as_slice());
123
124    // Some bits magic from HarfBuzz...
125    if tag.as_u32() & 0xDFDFDFDF == hb_tag_t::default_script().as_u32() {
126        tag = hb_tag_t(tag.as_u32() ^ !0xDFDFDFDF);
127    }
128
129    tags.push(tag);
130
131    true
132}
133
134fn lang_cmp(s1: &str, s2: &str) -> core::cmp::Ordering {
135    let da = s1.find('-').unwrap_or(s1.len());
136    let db = s2.find('-').unwrap_or(s2.len());
137    let n = core::cmp::max(da, db);
138    let ea = core::cmp::min(n, s1.len());
139    let eb = core::cmp::min(n, s2.len());
140    s1[..ea].cmp(&s2[..eb])
141}
142
143fn tags_from_language(language: &Language, tags: &mut ThreeTags) {
144    let language = language.as_str();
145
146    // Check for matches of multiple subtags.
147    if tag_table::tags_from_complex_language(language, tags) {
148        return;
149    }
150
151    let mut sublang = language;
152
153    // Find a language matching in the first component.
154    if let Some(i) = language.find('-') {
155        // If there is an extended language tag, use it.
156        if language.len() >= 6 {
157            let extlang = match language[i + 1..].find('-') {
158                Some(idx) => idx == 3,
159                None => language.len() - i - 1 == 3,
160            };
161
162            if extlang && language.as_bytes()[i + 1].is_ascii_alphabetic() {
163                sublang = &language[i + 1..];
164            }
165        }
166    }
167
168    use tag_table::OPEN_TYPE_LANGUAGES as LANGUAGES;
169
170    if let Ok(mut idx) = LANGUAGES.binary_search_by(|v| lang_cmp(v.language, sublang)) {
171        while idx != 0 && LANGUAGES[idx].language == LANGUAGES[idx - 1].language {
172            idx -= 1;
173        }
174
175        let len = core::cmp::min(tags.left(), LANGUAGES.len() - idx - 1);
176        for i in 0..len {
177            if LANGUAGES[idx + i].language != LANGUAGES[idx].language {
178                break;
179            }
180
181            if LANGUAGES[idx + i].tag.is_null() {
182                break;
183            }
184
185            if tags.is_full() {
186                break;
187            }
188
189            tags.push(LANGUAGES[idx + i].tag);
190        }
191
192        return;
193    }
194
195    if language.len() == 3 {
196        tags.push(hb_tag_t::from_bytes_lossy(language.as_bytes()).to_uppercase());
197    }
198}
199
200fn all_tags_from_script(script: Option<Script>, tags: &mut ThreeTags) {
201    if let Some(script) = script {
202        if let Some(tag) = new_tag_from_script(script) {
203            // Script::Myanmar maps to 'mym2', but there is no 'mym3'.
204            if tag != hb_tag_t::from_bytes(b"mym2") {
205                let mut tag3 = tag.to_bytes();
206                tag3[3] = b'3';
207                tags.push(hb_tag_t::from_bytes(&tag3));
208            }
209
210            if !tags.is_full() {
211                tags.push(tag);
212            }
213        }
214
215        if !tags.is_full() {
216            tags.push(old_tag_from_script(script));
217        }
218    }
219}
220
221fn new_tag_from_script(script: Script) -> Option<hb_tag_t> {
222    match script {
223        script::BENGALI => Some(hb_tag_t::from_bytes(b"bng2")),
224        script::DEVANAGARI => Some(hb_tag_t::from_bytes(b"dev2")),
225        script::GUJARATI => Some(hb_tag_t::from_bytes(b"gjr2")),
226        script::GURMUKHI => Some(hb_tag_t::from_bytes(b"gur2")),
227        script::KANNADA => Some(hb_tag_t::from_bytes(b"knd2")),
228        script::MALAYALAM => Some(hb_tag_t::from_bytes(b"mlm2")),
229        script::ORIYA => Some(hb_tag_t::from_bytes(b"ory2")),
230        script::TAMIL => Some(hb_tag_t::from_bytes(b"tml2")),
231        script::TELUGU => Some(hb_tag_t::from_bytes(b"tel2")),
232        script::MYANMAR => Some(hb_tag_t::from_bytes(b"mym2")),
233        _ => None,
234    }
235}
236
237fn old_tag_from_script(script: Script) -> hb_tag_t {
238    // This seems to be accurate as of end of 2012.
239    match script {
240        // Katakana and Hiragana both map to 'kana'.
241        script::HIRAGANA => hb_tag_t::from_bytes(b"kana"),
242
243        // Spaces at the end are preserved, unlike ISO 15924.
244        script::LAO => hb_tag_t::from_bytes(b"lao "),
245        script::YI => hb_tag_t::from_bytes(b"yi  "),
246        // Unicode-5.0 additions.
247        script::NKO => hb_tag_t::from_bytes(b"nko "),
248        // Unicode-5.1 additions.
249        script::VAI => hb_tag_t::from_bytes(b"vai "),
250
251        // Else, just change first char to lowercase and return.
252        _ => hb_tag_t(script.tag().as_u32() | 0x20000000),
253    }
254}
255
256#[rustfmt::skip]
257#[cfg(test)]
258mod tests {
259    #![allow(non_snake_case)]
260
261    use super::*;
262    use core::str::FromStr;
263    use alloc::vec::Vec;
264
265    fn new_tag_to_script(tag: hb_tag_t) -> Option<Script> {
266        match &tag.to_bytes() {
267            b"bng2" => Some(script::BENGALI),
268            b"dev2" => Some(script::DEVANAGARI),
269            b"gjr2" => Some(script::GUJARATI),
270            b"gur2" => Some(script::GURMUKHI),
271            b"knd2" => Some(script::KANNADA),
272            b"mlm2" => Some(script::MALAYALAM),
273            b"ory2" => Some(script::ORIYA),
274            b"tml2" => Some(script::TAMIL),
275            b"tel2" => Some(script::TELUGU),
276            b"mym2" => Some(script::MYANMAR),
277            _ => Some(script::UNKNOWN),
278        }
279    }
280
281    fn old_tag_to_script(tag: hb_tag_t) -> Option<Script> {
282        if tag == hb_tag_t::default_script() {
283            return None;
284        }
285
286        let mut bytes = tag.to_bytes();
287
288        // This side of the conversion is fully algorithmic.
289
290        // Any spaces at the end of the tag are replaced by repeating the last
291        // letter.  Eg 'nko ' -> 'Nkoo'
292        if bytes[2] == b' ' {
293            bytes[2] = bytes[1];
294        }
295        if bytes[3] == b' ' {
296            bytes[3] = bytes[2];
297        }
298
299        // Change first char to uppercase.
300        bytes[0] = bytes[0].to_ascii_uppercase();
301
302        Some(Script(hb_tag_t::from_bytes(&bytes)))
303    }
304
305    fn tag_to_script(tag: hb_tag_t) -> Option<Script> {
306        let bytes = tag.to_bytes();
307        if bytes[3] == b'2' || bytes[3] == b'3' {
308            let mut tag2 = bytes;
309            tag2[3] = b'2';
310            return new_tag_to_script(hb_tag_t::from_bytes(&tag2));
311        }
312
313        old_tag_to_script(tag)
314    }
315
316    fn test_simple_tags(tag: &str, script: Script) {
317        let tag = hb_tag_t::from_bytes_lossy(tag.as_bytes());
318
319        let (scripts, _) = tags_from_script_and_language(Some(script), None);
320        if !scripts.is_empty() {
321            assert_eq!(tag, scripts[0]);
322        } else {
323            assert_eq!(tag, hb_tag_t::default_script());
324        }
325
326        assert_eq!(tag_to_script(tag), Some(script));
327    }
328
329    #[test]
330    fn tag_to_uppercase() {
331        assert_eq!(hb_tag_t::from_bytes(b"abcd").to_uppercase(), hb_tag_t::from_bytes(b"ABCD"));
332        assert_eq!(hb_tag_t::from_bytes(b"abc ").to_uppercase(), hb_tag_t::from_bytes(b"ABC "));
333        assert_eq!(hb_tag_t::from_bytes(b"ABCD").to_uppercase(), hb_tag_t::from_bytes(b"ABCD"));
334    }
335
336    #[test]
337    fn tag_to_lowercase() {
338        assert_eq!(hb_tag_t::from_bytes(b"abcd").to_lowercase(), hb_tag_t::from_bytes(b"abcd"));
339        assert_eq!(hb_tag_t::from_bytes(b"abc ").to_lowercase(), hb_tag_t::from_bytes(b"abc "));
340        assert_eq!(hb_tag_t::from_bytes(b"ABCD").to_lowercase(), hb_tag_t::from_bytes(b"abcd"));
341    }
342
343    #[test]
344    fn script_degenerate() {
345        assert_eq!(hb_tag_t::from_bytes(b"DFLT"), hb_tag_t::default_script());
346
347        // Hiragana and Katakana both map to 'kana'.
348        test_simple_tags("kana", script::KATAKANA);
349
350        let (scripts, _) = tags_from_script_and_language(Some(script::HIRAGANA), None);
351        assert_eq!(scripts.as_slice(), &[hb_tag_t::from_bytes(b"kana")]);
352
353        // Spaces are replaced
354        assert_eq!(tag_to_script(hb_tag_t::from_bytes(b"be  ")), Script::from_iso15924_tag(hb_tag_t::from_bytes(b"Beee")));
355    }
356
357    #[test]
358    fn script_simple() {
359        // Arbitrary non-existent script.
360        test_simple_tags("wwyz", Script::from_iso15924_tag(hb_tag_t::from_bytes(b"wWyZ")).unwrap());
361
362        // These we don't really care about.
363        test_simple_tags("zyyy", script::COMMON);
364        test_simple_tags("zinh", script::INHERITED);
365        test_simple_tags("zzzz", script::UNKNOWN);
366
367        test_simple_tags("arab", script::ARABIC);
368        test_simple_tags("copt", script::COPTIC);
369        test_simple_tags("kana", script::KATAKANA);
370        test_simple_tags("latn", script::LATIN);
371
372        // These are trickier since their OT script tags have space.
373        test_simple_tags("lao ", script::LAO);
374        test_simple_tags("yi  ", script::YI);
375        // Unicode-5.0 additions.
376        test_simple_tags("nko ", script::NKO);
377        // Unicode-5.1 additions.
378        test_simple_tags("vai ", script::VAI);
379
380        // https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
381
382        // Unicode-5.2 additions.
383        test_simple_tags("mtei", script::MEETEI_MAYEK);
384        // Unicode-6.0 additions.
385        test_simple_tags("mand", script::MANDAIC);
386    }
387
388    macro_rules! test_script_from_language {
389        ($name:ident, $tag:expr, $lang:expr, $script:expr) => {
390            #[test]
391            fn $name() {
392                let tag = hb_tag_t::from_bytes_lossy($tag.as_bytes());
393                let (scripts, _) = tags_from_script_and_language(
394                    $script, Language::from_str($lang).ok().as_ref(),
395                );
396                if !scripts.is_empty() {
397                    assert_eq!(scripts.as_slice(), &[tag]);
398                }
399            }
400        };
401    }
402
403    test_script_from_language!(script_from_language_01, "", "", None);
404    test_script_from_language!(script_from_language_02, "", "en", None);
405    test_script_from_language!(script_from_language_03, "copt", "en", Some(script::COPTIC));
406    test_script_from_language!(script_from_language_04, "", "x-hbsc", None);
407    test_script_from_language!(script_from_language_05, "copt", "x-hbsc", Some(script::COPTIC));
408    test_script_from_language!(script_from_language_06, "abc ", "x-hbscabc", None);
409    test_script_from_language!(script_from_language_07, "deva", "x-hbscdeva", None);
410    test_script_from_language!(script_from_language_08, "dev2", "x-hbscdev2", None);
411    test_script_from_language!(script_from_language_09, "dev3", "x-hbscdev3", None);
412    test_script_from_language!(script_from_language_10, "copt", "x-hbotpap0-hbsccopt", None);
413    test_script_from_language!(script_from_language_11, "", "en-x-hbsc", None);
414    test_script_from_language!(script_from_language_12, "copt", "en-x-hbsc", Some(script::COPTIC));
415    test_script_from_language!(script_from_language_13, "abc ", "en-x-hbscabc", None);
416    test_script_from_language!(script_from_language_14, "deva", "en-x-hbscdeva", None);
417    test_script_from_language!(script_from_language_15, "dev2", "en-x-hbscdev2", None);
418    test_script_from_language!(script_from_language_16, "dev3", "en-x-hbscdev3", None);
419    test_script_from_language!(script_from_language_17, "copt", "en-x-hbotpap0-hbsccopt", None);
420
421    #[test]
422    fn script_indic() {
423        fn check(tag1: &str, tag2: &str, tag3: &str, script: Script) {
424            let tag1 = hb_tag_t::from_bytes_lossy(tag1.as_bytes());
425            let tag2 = hb_tag_t::from_bytes_lossy(tag2.as_bytes());
426            let tag3 = hb_tag_t::from_bytes_lossy(tag3.as_bytes());
427
428            let (scripts, _) = tags_from_script_and_language(Some(script), None);
429            assert_eq!(scripts.as_slice(), &[tag1, tag2, tag3]);
430            assert_eq!(tag_to_script(tag1), Some(script));
431            assert_eq!(tag_to_script(tag2), Some(script));
432            assert_eq!(tag_to_script(tag3), Some(script));
433        }
434
435        check("bng3", "bng2", "beng", script::BENGALI);
436        check("dev3", "dev2", "deva", script::DEVANAGARI);
437        check("gjr3", "gjr2", "gujr", script::GUJARATI);
438        check("gur3", "gur2", "guru", script::GURMUKHI);
439        check("knd3", "knd2", "knda", script::KANNADA);
440        check("mlm3", "mlm2", "mlym", script::MALAYALAM);
441        check("ory3", "ory2", "orya", script::ORIYA);
442        check("tml3", "tml2", "taml", script::TAMIL);
443        check("tel3", "tel2", "telu", script::TELUGU);
444    }
445
446    // TODO: swap tag and lang
447    macro_rules! test_tag_from_language {
448        ($name:ident, $tag:expr, $lang:expr) => {
449            #[test]
450            fn $name() {
451                let tag = hb_tag_t::from_bytes_lossy($tag.as_bytes());
452                let (_, languages) = tags_from_script_and_language(
453                    None, Language::from_str(&$lang.to_lowercase()).ok().as_ref(),
454                );
455                if !languages.is_empty() {
456                    assert_eq!(languages[0], tag);
457                }
458            }
459        };
460    }
461
462    test_tag_from_language!(tag_from_language_dflt, "dflt", "");
463    test_tag_from_language!(tag_from_language_ALT, "ALT", "alt");
464    test_tag_from_language!(tag_from_language_ARA, "ARA", "ar");
465    test_tag_from_language!(tag_from_language_AZE, "AZE", "az");
466    test_tag_from_language!(tag_from_language_az_ir, "AZE", "az-ir");
467    test_tag_from_language!(tag_from_language_az_az, "AZE", "az-az");
468    test_tag_from_language!(tag_from_language_ENG, "ENG", "en");
469    test_tag_from_language!(tag_from_language_en_US, "ENG", "en_US");
470    test_tag_from_language!(tag_from_language_CJA, "CJA", "cja"); /* Western Cham */
471    test_tag_from_language!(tag_from_language_CJM, "CJM", "cjm"); /* Eastern Cham */
472    test_tag_from_language!(tag_from_language_ENV, "EVN", "eve");
473    test_tag_from_language!(tag_from_language_HAL, "HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
474    test_tag_from_language!(tag_from_language_flm, "HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
475    test_tag_from_language!(tag_from_language_hy, "HYE0", "hy");
476    test_tag_from_language!(tag_from_language_hyw, "HYE", "hyw");
477    test_tag_from_language!(tag_from_language_bgr, "QIN", "bgr"); /* Bawm Chin */
478    test_tag_from_language!(tag_from_language_cbl, "QIN", "cbl"); /* Bualkhaw Chin */
479    test_tag_from_language!(tag_from_language_cka, "QIN", "cka"); /* Khumi Awa Chin */
480    test_tag_from_language!(tag_from_language_cmr, "QIN", "cmr"); /* Mro-Khimi Chin */
481    test_tag_from_language!(tag_from_language_cnb, "QIN", "cnb"); /* Chinbon Chin */
482    test_tag_from_language!(tag_from_language_cnh, "QIN", "cnh"); /* Hakha Chin */
483    test_tag_from_language!(tag_from_language_cnk, "QIN", "cnk"); /* Khumi Chin */
484    test_tag_from_language!(tag_from_language_cnw, "QIN", "cnw"); /* Ngawn Chin */
485    test_tag_from_language!(tag_from_language_csh, "QIN", "csh"); /* Asho Chin */
486    test_tag_from_language!(tag_from_language_csy, "QIN", "csy"); /* Siyin Chin */
487    test_tag_from_language!(tag_from_language_ctd, "QIN", "ctd"); /* Tedim Chin */
488    test_tag_from_language!(tag_from_language_czt, "QIN", "czt"); /* Zotung Chin */
489    test_tag_from_language!(tag_from_language_dao, "QIN", "dao"); /* Daai Chin */
490    test_tag_from_language!(tag_from_language_htl, "QIN", "hlt"); /* Matu Chin */
491    test_tag_from_language!(tag_from_language_mrh, "QIN", "mrh"); /* Mara Chin */
492    test_tag_from_language!(tag_from_language_pck, "QIN", "pck"); /* Paite Chin */
493    test_tag_from_language!(tag_from_language_sez, "QIN", "sez"); /* Senthang Chin */
494    test_tag_from_language!(tag_from_language_tcp, "QIN", "tcp"); /* Tawr Chin */
495    test_tag_from_language!(tag_from_language_tcz, "QIN", "tcz"); /* Thado Chin */
496    test_tag_from_language!(tag_from_language_yos, "QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
497    test_tag_from_language!(tag_from_language_zom, "QIN", "zom"); /* Zou */
498    test_tag_from_language!(tag_from_language_FAR, "FAR", "fa");
499    test_tag_from_language!(tag_from_language_fa_IR, "FAR", "fa_IR");
500    test_tag_from_language!(tag_from_language_man, "MNK", "man");
501    test_tag_from_language!(tag_from_language_SWA, "SWA", "aii"); /* Swadaya Aramaic */
502    test_tag_from_language!(tag_from_language_SYR, "SYR", "syr"); /* Syriac [macrolanguage] */
503    test_tag_from_language!(tag_from_language_amw, "SYR", "amw"); /* Western Neo-Aramaic */
504    test_tag_from_language!(tag_from_language_cld, "SYR", "cld"); /* Chaldean Neo-Aramaic */
505    test_tag_from_language!(tag_from_language_syc, "SYR", "syc"); /* Classical Syriac */
506    test_tag_from_language!(tag_from_language_TUA, "TUA", "tru"); /* Turoyo Aramaic */
507    test_tag_from_language!(tag_from_language_zh, "ZHS", "zh"); /* Chinese */
508    test_tag_from_language!(tag_from_language_zh_cn, "ZHS", "zh-cn"); /* Chinese (China) */
509    test_tag_from_language!(tag_from_language_zh_sg, "ZHS", "zh-sg"); /* Chinese (Singapore) */
510    test_tag_from_language!(tag_from_language_zh_mo, "ZHTM", "zh-mo"); /* Chinese (Macao) */
511    test_tag_from_language!(tag_from_language_zh_hant_mo, "ZHTM", "zh-hant-mo"); /* Chinese (Macao) */
512    test_tag_from_language!(tag_from_language_zh_hans_mo, "ZHS", "zh-hans-mo"); /* Chinese (Simplified, Macao) */
513    test_tag_from_language!(tag_from_language_ZHH, "ZHH", "zh-HK"); /* Chinese (Hong Kong) */
514    test_tag_from_language!(tag_from_language_zh_HanT_hK, "ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */
515    test_tag_from_language!(tag_from_language_zh_HanS_hK, "ZHS", "zH-HanS-hK"); /* Chinese (Simplified, Hong Kong) */
516    test_tag_from_language!(tag_from_language_zh_tw, "ZHT", "zh-tw"); /* Chinese (Taiwan) */
517    test_tag_from_language!(tag_from_language_ZHS, "ZHS", "zh-Hans"); /* Chinese (Simplified) */
518    test_tag_from_language!(tag_from_language_ZHT, "ZHT", "zh-Hant"); /* Chinese (Traditional) */
519    test_tag_from_language!(tag_from_language_zh_xx, "ZHS", "zh-xx"); /* Chinese (Other) */
520    test_tag_from_language!(tag_from_language_zh_Hans_TW, "ZHS", "zh-Hans-TW");
521    test_tag_from_language!(tag_from_language_yue, "ZHH", "yue");
522    test_tag_from_language!(tag_from_language_yue_Hant, "ZHH", "yue-Hant");
523    test_tag_from_language!(tag_from_language_yue_Hans, "ZHS", "yue-Hans");
524    test_tag_from_language!(tag_from_language_ABC, "ABC", "abc");
525    test_tag_from_language!(tag_from_language_ABCD, "ABCD", "x-hbotabcd");
526    test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc_zxc, "ABC", "asdf-asdf-wer-x-hbotabc-zxc");
527    test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc, "ABC", "asdf-asdf-wer-x-hbotabc");
528    test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabcd, "ABCD", "asdf-asdf-wer-x-hbotabcd");
529    test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbot_zxc, "dflt", "asdf-asdf-wer-x-hbot-zxc");
530    test_tag_from_language!(tag_from_language_xy, "dflt", "xy");
531    test_tag_from_language!(tag_from_language_xyz, "XYZ", "xyz"); /* Unknown ISO 639-3 */
532    test_tag_from_language!(tag_from_language_xyz_qw, "XYZ", "xyz-qw"); /* Unknown ISO 639-3 */
533
534    /*
535     * Invalid input. The precise answer does not matter, as long as it
536     * does not crash or get into an infinite loop.
537     */
538    test_tag_from_language!(tag_from_language__fonipa, "IPPH", "-fonipa");
539
540    /*
541     * Tags that contain "-fonipa" as a substring but which do not contain
542     * the subtag "fonipa".
543     */
544    test_tag_from_language!(tag_from_language_en_fonipax, "ENG", "en-fonipax");
545    test_tag_from_language!(tag_from_language_en_x_fonipa, "ENG", "en-x-fonipa");
546    test_tag_from_language!(tag_from_language_en_a_fonipa, "ENG", "en-a-fonipa");
547    test_tag_from_language!(tag_from_language_en_a_qwe_b_fonipa, "ENG", "en-a-qwe-b-fonipa");
548
549    /* International Phonetic Alphabet */
550    test_tag_from_language!(tag_from_language_en_fonipa, "IPPH", "en-fonipa");
551    test_tag_from_language!(tag_from_language_en_fonipax_fonipa, "IPPH", "en-fonipax-fonipa");
552    test_tag_from_language!(tag_from_language_rm_ch_fonipa_sursilv_x_foobar, "IPPH", "rm-CH-fonipa-sursilv-x-foobar");
553    test_tag_from_language!(tag_from_language_IPPH, "IPPH", "und-fonipa");
554    test_tag_from_language!(tag_from_language_zh_fonipa, "IPPH", "zh-fonipa");
555
556    /* North American Phonetic Alphabet (Americanist Phonetic Notation) */
557    test_tag_from_language!(tag_from_language_en_fonnapa, "APPH", "en-fonnapa");
558    test_tag_from_language!(tag_from_language_chr_fonnapa, "APPH", "chr-fonnapa");
559    test_tag_from_language!(tag_from_language_APPH, "APPH", "und-fonnapa");
560
561    /* Khutsuri Georgian */
562    test_tag_from_language!(tag_from_language_ka_geok, "KGE", "ka-Geok");
563    test_tag_from_language!(tag_from_language_KGE, "KGE", "und-Geok");
564
565    /* Irish Traditional */
566    test_tag_from_language!(tag_from_language_IRT, "IRT", "ga-Latg");
567
568    /* Moldavian */
569    test_tag_from_language!(tag_from_language_MOL, "MOL", "ro-MD");
570
571    /* Polytonic Greek */
572    test_tag_from_language!(tag_from_language_PGR, "PGR", "el-polyton");
573    test_tag_from_language!(tag_from_language_el_CY_polyton, "PGR", "el-CY-polyton");
574
575    /* Estrangela Syriac */
576    test_tag_from_language!(tag_from_language_aii_Syre, "SYRE", "aii-Syre");
577    test_tag_from_language!(tag_from_language_de_Syre, "SYRE", "de-Syre");
578    test_tag_from_language!(tag_from_language_syr_Syre, "SYRE", "syr-Syre");
579    test_tag_from_language!(tag_from_language_und_Syre, "SYRE", "und-Syre");
580
581    /* Western Syriac */
582    test_tag_from_language!(tag_from_language_aii_Syrj, "SYRJ", "aii-Syrj");
583    test_tag_from_language!(tag_from_language_de_Syrj, "SYRJ", "de-Syrj");
584    test_tag_from_language!(tag_from_language_syr_Syrj, "SYRJ", "syr-Syrj");
585    test_tag_from_language!(tag_from_language_SYRJ, "SYRJ", "und-Syrj");
586
587    /* Eastern Syriac */
588    test_tag_from_language!(tag_from_language_aii_Syrn, "SYRN", "aii-Syrn");
589    test_tag_from_language!(tag_from_language_de_Syrn, "SYRN", "de-Syrn");
590    test_tag_from_language!(tag_from_language_syr_Syrn, "SYRN", "syr-Syrn");
591    test_tag_from_language!(tag_from_language_SYRN, "SYRN", "und-Syrn");
592
593    /* Test that x-hbot overrides the base language */
594    test_tag_from_language!(tag_from_language_fa_x_hbotabc_zxc, "ABC", "fa-x-hbotabc-zxc");
595    test_tag_from_language!(tag_from_language_fa_ir_x_hbotabc_zxc, "ABC", "fa-ir-x-hbotabc-zxc");
596    test_tag_from_language!(tag_from_language_zh_x_hbotabc_zxc, "ABC", "zh-x-hbotabc-zxc");
597    test_tag_from_language!(tag_from_language_zh_cn_x_hbotabc_zxc, "ABC", "zh-cn-x-hbotabc-zxc");
598    test_tag_from_language!(tag_from_language_zh_xy_x_hbotabc_zxc, "ABC", "zh-xy-x-hbotabc-zxc");
599    test_tag_from_language!(tag_from_language_xyz_xy_x_hbotabc_zxc, "ABC", "xyz-xy-x-hbotabc-zxc");
600
601    /* Unnormalized BCP 47 tags */
602    test_tag_from_language!(tag_from_language_ar_aao, "ARA", "ar-aao");
603    test_tag_from_language!(tag_from_language_art_lojban, "JBO", "art-lojban");
604    test_tag_from_language!(tag_from_language_kok_gom, "KOK", "kok-gom");
605    test_tag_from_language!(tag_from_language_i_lux, "LTZ", "i-lux");
606    test_tag_from_language!(tag_from_language_drh, "MNG", "drh");
607    test_tag_from_language!(tag_from_language_ar_ary1, "MOR", "ar-ary");
608    test_tag_from_language!(tag_from_language_ar_ary_DZ, "MOR", "ar-ary-DZ");
609    test_tag_from_language!(tag_from_language_no_bok, "NOR", "no-bok");
610    test_tag_from_language!(tag_from_language_no_nyn, "NYN", "no-nyn");
611    test_tag_from_language!(tag_from_language_i_hak, "ZHS", "i-hak");
612    test_tag_from_language!(tag_from_language_zh_guoyu, "ZHS", "zh-guoyu");
613    test_tag_from_language!(tag_from_language_zh_min, "ZHS", "zh-min");
614    test_tag_from_language!(tag_from_language_zh_min_nan, "ZHS", "zh-min-nan");
615    test_tag_from_language!(tag_from_language_zh_xiang, "ZHS", "zh-xiang");
616
617    /* BCP 47 tags that look similar to unrelated language system tags */
618    test_tag_from_language!(tag_from_language_als, "SQI", "als");
619    test_tag_from_language!(tag_from_language_far, "dflt", "far");
620
621    /* A UN M.49 region code, not an extended language subtag */
622    test_tag_from_language!(tag_from_language_ar_001, "ARA", "ar-001");
623
624    /* An invalid tag */
625    test_tag_from_language!(tag_from_language_invalid, "TRK", "tr@foo=bar");
626
627    macro_rules! test_tags {
628        ($name:ident, $script:expr, $lang:expr, $scripts:expr, $langs:expr) => {
629            #[test]
630            fn $name() {
631                let (scripts, languages) = tags_from_script_and_language(
632                    $script, Language::from_str($lang).ok().as_ref(),
633                );
634
635                let exp_scripts: Vec<hb_tag_t> = $scripts.iter().map(|v| hb_tag_t::from_bytes_lossy(*v)).collect();
636                let exp_langs: Vec<hb_tag_t> = $langs.iter().map(|v| hb_tag_t::from_bytes_lossy(*v)).collect();
637
638                assert_eq!(exp_scripts, scripts.as_slice());
639                assert_eq!(exp_langs, languages.as_slice());
640            }
641        };
642    }
643
644    test_tags!(tag_full_en, None, "en", &[], &[b"ENG"]);
645    test_tags!(tag_full_en_x_hbscdflt, None, "en-x-hbscdflt", &[b"DFLT"], &[b"ENG"]);
646    test_tags!(tag_full_en_latin, Some(script::LATIN), "en", &[b"latn"], &[b"ENG"]);
647    test_tags!(tag_full_und_fonnapa, None, "und-fonnapa", &[], &[b"APPH"]);
648    test_tags!(tag_full_en_fonnapa, None, "en-fonnapa", &[], &[b"APPH"]);
649    test_tags!(tag_full_x_hbot1234_hbsc5678, None, "x-hbot1234-hbsc5678", &[b"5678"], &[b"1234"]);
650    test_tags!(tag_full_x_hbsc5678_hbot1234, None, "x-hbsc5678-hbot1234", &[b"5678"], &[b"1234"]);
651    test_tags!(tag_full_ml, Some(script::MALAYALAM), "ml", &[b"mlm3", b"mlm2", b"mlym"], &[b"MAL", b"MLR"]);
652    test_tags!(tag_full_xyz, None, "xyz", &[], &[b"XYZ"]);
653    test_tags!(tag_full_xy, None, "xy", &[], &[]);
654}