swash/text/
lang.rs

1use crate::{tag_from_bytes, Tag};
2use core::fmt;
3
4use super::lang_data::*;
5
6/// Chinese, Japanese and Korean languages.
7#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
8#[repr(u8)]
9pub enum Cjk {
10    None = 0,
11    Traditional = 1,
12    Simplified = 2,
13    Japanese = 3,
14    Korean = 4,
15}
16
17/// Representation of a language and its associated script and region.
18#[derive(Copy, Clone, PartialEq, Eq)]
19pub struct Language {
20    language: [u8; 3],
21    script: [u8; 4],
22    region: [u8; 2],
23    lang_len: u8,
24    script_len: u8,
25    region_len: u8,
26    cjk: Cjk,
27    name_index: u16,
28    tag: Option<Tag>,
29}
30
31impl Language {
32    /// Parses a language tag.
33    pub fn parse(tag: &str) -> Option<Self> {
34        let mut lang = Self {
35            language: [0; 3],
36            region: [0; 2],
37            script: [0; 4],
38            lang_len: 0,
39            region_len: 0,
40            script_len: 0,
41            cjk: Cjk::None,
42            name_index: 0xFFFF,
43            tag: None,
44        };
45        let mut has_region = false;
46        let mut zh = false;
47        let mut lang_index = 0xFFFF;
48        for (i, part) in tag.split('-').enumerate() {
49            let bytes = part.as_bytes();
50            let len = bytes.len();
51            match i {
52                0 => {
53                    match len {
54                        2 => {
55                            let a = bytes[0].to_ascii_lowercase();
56                            let b = bytes[1].to_ascii_lowercase();
57                            match (a, b) {
58                                (b'z', b'h') => zh = true,
59                                (b'j', b'a') => lang.cjk = Cjk::Japanese,
60                                (b'k', b'o') => lang.cjk = Cjk::Korean,
61                                _ => {}
62                            };
63                            lang.language[0] = a;
64                            lang.language[1] = b;
65                            lang.lang_len = 2;
66                            let key = tag2(&[a, b]);
67                            if let Ok(index) = LANG_BY_TAG2.binary_search_by(|x| x.0.cmp(&key)) {
68                                lang_index = LANG_BY_TAG2.get(index)?.1;
69                            }
70                        }
71                        3 => {
72                            let a = bytes[0].to_ascii_lowercase();
73                            let b = bytes[1].to_ascii_lowercase();
74                            let c = bytes[2].to_ascii_lowercase();
75                            zh = a == b'z' && b == b'h' && c == b'o';
76                            lang.language[0] = a;
77                            lang.language[1] = b;
78                            lang.language[2] = c;
79                            lang.lang_len = 3;
80                            let key = tag3(&[a, b, c]);
81                            if let Ok(index) = LANG_BY_TAG3.binary_search_by(|x| x.0.cmp(&key)) {
82                                lang_index = LANG_BY_TAG3.get(index)?.1 as u16;
83                            }
84                        }
85                        _ => return None,
86                    };
87                }
88                1 => match len {
89                    2 => {
90                        let a = bytes[0].to_ascii_uppercase();
91                        let b = bytes[1].to_ascii_uppercase();
92                        lang.region[0] = a;
93                        lang.region[1] = b;
94                        lang.region_len = 2;
95                        has_region = true;
96                    }
97                    4 => {
98                        let a = bytes[0].to_ascii_uppercase();
99                        let b = bytes[1].to_ascii_lowercase();
100                        let c = bytes[2].to_ascii_lowercase();
101                        let d = bytes[3].to_ascii_lowercase();
102                        lang.script[0] = a;
103                        lang.script[1] = b;
104                        lang.script[2] = c;
105                        lang.script[3] = d;
106                        lang.script_len = 4;
107                    }
108                    _ => break,
109                },
110                2 => {
111                    if has_region || len != 2 {
112                        break;
113                    }
114                    let a = bytes[0].to_ascii_uppercase();
115                    let b = bytes[1].to_ascii_uppercase();
116                    lang.region[0] = a;
117                    lang.region[1] = b;
118                    lang.region_len = 2;
119                    has_region = true;
120                }
121                _ => break,
122            }
123        }
124        lang.name_index = lang_index;
125        if lang_index != 0xFFFF {
126            lang.tag = Some(*LANG_TAGS.get(lang_index as usize)?);
127        } else if zh {
128            let (tag, cjk) = match lang.script().unwrap_or("") {
129                "Hant" => (tag_from_bytes(b"ZHT "), Cjk::Traditional),
130                "Hans" => (tag_from_bytes(b"ZHS "), Cjk::Simplified),
131                _ => (tag_from_bytes(b"ZHT "), Cjk::Traditional),
132            };
133            lang.tag = Some(tag);
134            lang.cjk = cjk;
135            lang.name_index = match LANG_TAGS.binary_search_by(|x| x.cmp(&tag)) {
136                Ok(index) => index as u16,
137                _ => 0xFFFF,
138            };
139        }
140        Some(lang)
141    }
142
143    /// Returns the language associated with the specified OpenType language
144    /// tag.
145    pub fn from_opentype(tag: Tag) -> Option<Self> {
146        if tag == tag_from_bytes(b"ZHT ") {
147            return Self::parse("zh-Hant");
148        } else if tag == tag_from_bytes(b"ZHS ") {
149            return Self::parse("zh-Hans");
150        }
151        let name_index = match LANG_TAGS.binary_search_by(|x| x.cmp(&tag)) {
152            Ok(index) => index,
153            _ => return None,
154        };
155        Self::parse(LANG_ENTRIES.get(name_index)?.1)
156    }
157
158    /// Returns the language component.
159    pub fn language(&self) -> &str {
160        unsafe { core::str::from_utf8_unchecked(&self.language[..self.lang_len as usize]) }
161    }
162
163    /// Returns the script component.
164    pub fn script(&self) -> Option<&str> {
165        Some(if self.script_len == 4 {
166            unsafe { core::str::from_utf8_unchecked(&self.script) }
167        } else {
168            return None;
169        })
170    }
171
172    /// Returns the region component.
173    pub fn region(&self) -> Option<&str> {
174        Some(if self.region_len == 2 {
175            unsafe { core::str::from_utf8_unchecked(&self.region) }
176        } else {
177            return None;
178        })
179    }
180
181    /// Returns the CJK language.
182    pub fn cjk(&self) -> Cjk {
183        self.cjk
184    }
185
186    /// Returns the name of the language.
187    pub fn name(&self) -> Option<&'static str> {
188        LANG_ENTRIES.get(self.name_index as usize).map(|e| e.0)
189    }
190
191    /// Returns the associated OpenType language tag.
192    pub fn to_opentype(self) -> Option<Tag> {
193        self.tag
194    }
195}
196
197impl fmt::Display for Language {
198    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199        write!(f, "{}", self.language())?;
200        if let Some(script) = self.script() {
201            write!(f, "-{}", script)?;
202        }
203        if let Some(region) = self.region() {
204            write!(f, "-{}", region)?;
205        }
206        if let Some(name) = self.name() {
207            write!(f, " ({})", name)?;
208        }
209        Ok(())
210    }
211}
212
213impl fmt::Debug for Language {
214    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215        write!(f, "{}", self.language())?;
216        if let Some(script) = self.script() {
217            write!(f, "-{}", script)?;
218        }
219        if let Some(region) = self.region() {
220            write!(f, "-{}", region)?;
221        }
222        if let Some(tag) = self.tag {
223            let tag = tag.to_be_bytes();
224            if let Ok(s) = core::str::from_utf8(&tag) {
225                write!(f, " ({})", s)?;
226            }
227        }
228        if let Some(name) = self.name() {
229            write!(f, " \"{}\"", name)?;
230        }
231        Ok(())
232    }
233}