swash/text/
lang.rs
1use crate::{tag_from_bytes, Tag};
2use core::fmt;
3
4use super::lang_data::*;
5
6#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
8#[repr(u8)]
9pub enum Cjk {
10 None = 0,
11 Traditional = 1,
12 Simplified = 2,
13 Japanese = 3,
14 Korean = 4,
15}
16
17#[derive(Copy, Clone, PartialEq, Eq)]
19pub struct Language {
20 language: [u8; 3],
21 script: [u8; 4],
22 region: [u8; 2],
23 lang_len: u8,
24 script_len: u8,
25 region_len: u8,
26 cjk: Cjk,
27 name_index: u16,
28 tag: Option<Tag>,
29}
30
31impl Language {
32 pub fn parse(tag: &str) -> Option<Self> {
34 let mut lang = Self {
35 language: [0; 3],
36 region: [0; 2],
37 script: [0; 4],
38 lang_len: 0,
39 region_len: 0,
40 script_len: 0,
41 cjk: Cjk::None,
42 name_index: 0xFFFF,
43 tag: None,
44 };
45 let mut has_region = false;
46 let mut zh = false;
47 let mut lang_index = 0xFFFF;
48 for (i, part) in tag.split('-').enumerate() {
49 let bytes = part.as_bytes();
50 let len = bytes.len();
51 match i {
52 0 => {
53 match len {
54 2 => {
55 let a = bytes[0].to_ascii_lowercase();
56 let b = bytes[1].to_ascii_lowercase();
57 match (a, b) {
58 (b'z', b'h') => zh = true,
59 (b'j', b'a') => lang.cjk = Cjk::Japanese,
60 (b'k', b'o') => lang.cjk = Cjk::Korean,
61 _ => {}
62 };
63 lang.language[0] = a;
64 lang.language[1] = b;
65 lang.lang_len = 2;
66 let key = tag2(&[a, b]);
67 if let Ok(index) = LANG_BY_TAG2.binary_search_by(|x| x.0.cmp(&key)) {
68 lang_index = LANG_BY_TAG2.get(index)?.1;
69 }
70 }
71 3 => {
72 let a = bytes[0].to_ascii_lowercase();
73 let b = bytes[1].to_ascii_lowercase();
74 let c = bytes[2].to_ascii_lowercase();
75 zh = a == b'z' && b == b'h' && c == b'o';
76 lang.language[0] = a;
77 lang.language[1] = b;
78 lang.language[2] = c;
79 lang.lang_len = 3;
80 let key = tag3(&[a, b, c]);
81 if let Ok(index) = LANG_BY_TAG3.binary_search_by(|x| x.0.cmp(&key)) {
82 lang_index = LANG_BY_TAG3.get(index)?.1 as u16;
83 }
84 }
85 _ => return None,
86 };
87 }
88 1 => match len {
89 2 => {
90 let a = bytes[0].to_ascii_uppercase();
91 let b = bytes[1].to_ascii_uppercase();
92 lang.region[0] = a;
93 lang.region[1] = b;
94 lang.region_len = 2;
95 has_region = true;
96 }
97 4 => {
98 let a = bytes[0].to_ascii_uppercase();
99 let b = bytes[1].to_ascii_lowercase();
100 let c = bytes[2].to_ascii_lowercase();
101 let d = bytes[3].to_ascii_lowercase();
102 lang.script[0] = a;
103 lang.script[1] = b;
104 lang.script[2] = c;
105 lang.script[3] = d;
106 lang.script_len = 4;
107 }
108 _ => break,
109 },
110 2 => {
111 if has_region || len != 2 {
112 break;
113 }
114 let a = bytes[0].to_ascii_uppercase();
115 let b = bytes[1].to_ascii_uppercase();
116 lang.region[0] = a;
117 lang.region[1] = b;
118 lang.region_len = 2;
119 has_region = true;
120 }
121 _ => break,
122 }
123 }
124 lang.name_index = lang_index;
125 if lang_index != 0xFFFF {
126 lang.tag = Some(*LANG_TAGS.get(lang_index as usize)?);
127 } else if zh {
128 let (tag, cjk) = match lang.script().unwrap_or("") {
129 "Hant" => (tag_from_bytes(b"ZHT "), Cjk::Traditional),
130 "Hans" => (tag_from_bytes(b"ZHS "), Cjk::Simplified),
131 _ => (tag_from_bytes(b"ZHT "), Cjk::Traditional),
132 };
133 lang.tag = Some(tag);
134 lang.cjk = cjk;
135 lang.name_index = match LANG_TAGS.binary_search_by(|x| x.cmp(&tag)) {
136 Ok(index) => index as u16,
137 _ => 0xFFFF,
138 };
139 }
140 Some(lang)
141 }
142
143 pub fn from_opentype(tag: Tag) -> Option<Self> {
146 if tag == tag_from_bytes(b"ZHT ") {
147 return Self::parse("zh-Hant");
148 } else if tag == tag_from_bytes(b"ZHS ") {
149 return Self::parse("zh-Hans");
150 }
151 let name_index = match LANG_TAGS.binary_search_by(|x| x.cmp(&tag)) {
152 Ok(index) => index,
153 _ => return None,
154 };
155 Self::parse(LANG_ENTRIES.get(name_index)?.1)
156 }
157
158 pub fn language(&self) -> &str {
160 unsafe { core::str::from_utf8_unchecked(&self.language[..self.lang_len as usize]) }
161 }
162
163 pub fn script(&self) -> Option<&str> {
165 Some(if self.script_len == 4 {
166 unsafe { core::str::from_utf8_unchecked(&self.script) }
167 } else {
168 return None;
169 })
170 }
171
172 pub fn region(&self) -> Option<&str> {
174 Some(if self.region_len == 2 {
175 unsafe { core::str::from_utf8_unchecked(&self.region) }
176 } else {
177 return None;
178 })
179 }
180
181 pub fn cjk(&self) -> Cjk {
183 self.cjk
184 }
185
186 pub fn name(&self) -> Option<&'static str> {
188 LANG_ENTRIES.get(self.name_index as usize).map(|e| e.0)
189 }
190
191 pub fn to_opentype(self) -> Option<Tag> {
193 self.tag
194 }
195}
196
197impl fmt::Display for Language {
198 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199 write!(f, "{}", self.language())?;
200 if let Some(script) = self.script() {
201 write!(f, "-{}", script)?;
202 }
203 if let Some(region) = self.region() {
204 write!(f, "-{}", region)?;
205 }
206 if let Some(name) = self.name() {
207 write!(f, " ({})", name)?;
208 }
209 Ok(())
210 }
211}
212
213impl fmt::Debug for Language {
214 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215 write!(f, "{}", self.language())?;
216 if let Some(script) = self.script() {
217 write!(f, "-{}", script)?;
218 }
219 if let Some(region) = self.region() {
220 write!(f, "-{}", region)?;
221 }
222 if let Some(tag) = self.tag {
223 let tag = tag.to_be_bytes();
224 if let Ok(s) = core::str::from_utf8(&tag) {
225 write!(f, " ({})", s)?;
226 }
227 }
228 if let Some(name) = self.name() {
229 write!(f, " \"{}\"", name)?;
230 }
231 Ok(())
232 }
233}