1use core::convert::TryFrom;
2
3pub use unicode_ccc::CanonicalCombiningClass;
4pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t;
6
7use crate::Script;
8
9pub mod hb_unicode_funcs_t {
13 pub type space_t = u8;
14 pub const NOT_SPACE: u8 = 0;
15 pub const SPACE_EM: u8 = 1;
16 pub const SPACE_EM_2: u8 = 2;
17 pub const SPACE_EM_3: u8 = 3;
18 pub const SPACE_EM_4: u8 = 4;
19 pub const SPACE_EM_5: u8 = 5;
20 pub const SPACE_EM_6: u8 = 6;
21 pub const SPACE_EM_16: u8 = 16;
22 pub const SPACE_4_EM_18: u8 = 17; pub const SPACE: u8 = 18;
24 pub const SPACE_FIGURE: u8 = 19;
25 pub const SPACE_PUNCTUATION: u8 = 20;
26 pub const SPACE_NARROW: u8 = 21;
27}
28
29#[allow(dead_code)]
30pub mod modified_combining_class {
31 pub const CCC10: u8 = 22; pub const CCC11: u8 = 15; pub const CCC12: u8 = 16; pub const CCC13: u8 = 17; pub const CCC14: u8 = 23; pub const CCC15: u8 = 18; pub const CCC16: u8 = 19; pub const CCC17: u8 = 20; pub const CCC18: u8 = 21; pub const CCC19: u8 = 14; pub const CCC20: u8 = 24; pub const CCC21: u8 = 12; pub const CCC22: u8 = 25; pub const CCC23: u8 = 13; pub const CCC24: u8 = 10; pub const CCC25: u8 = 11; pub const CCC26: u8 = 26; pub const CCC27: u8 = 28; pub const CCC28: u8 = 29; pub const CCC29: u8 = 30; pub const CCC30: u8 = 31; pub const CCC31: u8 = 32; pub const CCC32: u8 = 33; pub const CCC33: u8 = 27; pub const CCC34: u8 = 34; pub const CCC35: u8 = 35; pub const CCC36: u8 = 36; pub const CCC84: u8 = 0; pub const CCC91: u8 = 0; pub const CCC103: u8 = 3; pub const CCC107: u8 = 107; pub const CCC118: u8 = 118; pub const CCC122: u8 = 122; pub const CCC129: u8 = 129; pub const CCC130: u8 = 132; pub const CCC132: u8 = 131; }
108
109#[rustfmt::skip]
110const MODIFIED_COMBINING_CLASS: &[u8; 256] = &[
111 CanonicalCombiningClass::NotReordered as u8,
112 CanonicalCombiningClass::Overlay as u8,
113 2, 3, 4, 5, 6,
114 CanonicalCombiningClass::Nukta as u8,
115 CanonicalCombiningClass::KanaVoicing as u8,
116 CanonicalCombiningClass::Virama as u8,
117
118 modified_combining_class::CCC10,
120 modified_combining_class::CCC11,
121 modified_combining_class::CCC12,
122 modified_combining_class::CCC13,
123 modified_combining_class::CCC14,
124 modified_combining_class::CCC15,
125 modified_combining_class::CCC16,
126 modified_combining_class::CCC17,
127 modified_combining_class::CCC18,
128 modified_combining_class::CCC19,
129 modified_combining_class::CCC20,
130 modified_combining_class::CCC21,
131 modified_combining_class::CCC22,
132 modified_combining_class::CCC23,
133 modified_combining_class::CCC24,
134 modified_combining_class::CCC25,
135 modified_combining_class::CCC26,
136
137 modified_combining_class::CCC27,
139 modified_combining_class::CCC28,
140 modified_combining_class::CCC29,
141 modified_combining_class::CCC30,
142 modified_combining_class::CCC31,
143 modified_combining_class::CCC32,
144 modified_combining_class::CCC33,
145 modified_combining_class::CCC34,
146 modified_combining_class::CCC35,
147
148 modified_combining_class::CCC36,
150
151 37, 38, 39,
152 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
153 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
154 80, 81, 82, 83,
155
156 modified_combining_class::CCC84,
158 85, 86, 87, 88, 89, 90,
159 modified_combining_class::CCC91,
160 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
161
162 modified_combining_class::CCC103,
164 104, 105, 106,
165 modified_combining_class::CCC107,
166 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
167
168 modified_combining_class::CCC118,
170 119, 120, 121,
171 modified_combining_class::CCC122,
172 123, 124, 125, 126, 127, 128,
173
174 modified_combining_class::CCC129,
176 modified_combining_class::CCC130,
177 131,
178 modified_combining_class::CCC132,
179 133, 134, 135, 136, 137, 138, 139,
180
181
182 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
183 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
184 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
185 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
186 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
187 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
188
189 CanonicalCombiningClass::AttachedBelowLeft as u8,
190 201,
191 CanonicalCombiningClass::AttachedBelow as u8,
192 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
193 CanonicalCombiningClass::AttachedAbove as u8,
194 215,
195 CanonicalCombiningClass::AttachedAboveRight as u8,
196 217,
197 CanonicalCombiningClass::BelowLeft as u8,
198 219,
199 CanonicalCombiningClass::Below as u8,
200 221,
201 CanonicalCombiningClass::BelowRight as u8,
202 223,
203 CanonicalCombiningClass::Left as u8,
204 225,
205 CanonicalCombiningClass::Right as u8,
206 227,
207 CanonicalCombiningClass::AboveLeft as u8,
208 229,
209 CanonicalCombiningClass::Above as u8,
210 231,
211 CanonicalCombiningClass::AboveRight as u8,
212 CanonicalCombiningClass::DoubleBelow as u8,
213 CanonicalCombiningClass::DoubleAbove as u8,
214 235, 236, 237, 238, 239,
215 CanonicalCombiningClass::IotaSubscript as u8,
216 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
217 255, ];
219
220pub trait GeneralCategoryExt {
221 fn to_rb(&self) -> u32;
222 fn from_rb(gc: u32) -> Self;
223 fn is_mark(&self) -> bool;
224 fn is_letter(&self) -> bool;
225}
226
227#[rustfmt::skip]
228impl GeneralCategoryExt for hb_unicode_general_category_t {
229 fn to_rb(&self) -> u32 {
230 match *self {
231 hb_unicode_general_category_t::ClosePunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,
232 hb_unicode_general_category_t::ConnectorPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,
233 hb_unicode_general_category_t::Control => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL,
234 hb_unicode_general_category_t::CurrencySymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,
235 hb_unicode_general_category_t::DashPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,
236 hb_unicode_general_category_t::DecimalNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,
237 hb_unicode_general_category_t::EnclosingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,
238 hb_unicode_general_category_t::FinalPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,
239 hb_unicode_general_category_t::Format => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
240 hb_unicode_general_category_t::InitialPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,
241 hb_unicode_general_category_t::LetterNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,
242 hb_unicode_general_category_t::LineSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,
243 hb_unicode_general_category_t::LowercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
244 hb_unicode_general_category_t::MathSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,
245 hb_unicode_general_category_t::ModifierLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,
246 hb_unicode_general_category_t::ModifierSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,
247 hb_unicode_general_category_t::NonspacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
248 hb_unicode_general_category_t::OpenPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,
249 hb_unicode_general_category_t::OtherLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
250 hb_unicode_general_category_t::OtherNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,
251 hb_unicode_general_category_t::OtherPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,
252 hb_unicode_general_category_t::OtherSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,
253 hb_unicode_general_category_t::ParagraphSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,
254 hb_unicode_general_category_t::PrivateUse => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,
255 hb_unicode_general_category_t::SpaceSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
256 hb_unicode_general_category_t::SpacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,
257 hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
258 hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
259 hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
260 hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,
261 }
262 }
263
264 fn from_rb(gc: u32) -> Self {
265 match gc {
266 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION => hb_unicode_general_category_t::ClosePunctuation,
267 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION => hb_unicode_general_category_t::ConnectorPunctuation,
268 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL => hb_unicode_general_category_t::Control,
269 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL => hb_unicode_general_category_t::CurrencySymbol,
270 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION => hb_unicode_general_category_t::DashPunctuation,
271 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER => hb_unicode_general_category_t::DecimalNumber,
272 hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK => hb_unicode_general_category_t::EnclosingMark,
273 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION => hb_unicode_general_category_t::FinalPunctuation,
274 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT => hb_unicode_general_category_t::Format,
275 hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION => hb_unicode_general_category_t::InitialPunctuation,
276 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER => hb_unicode_general_category_t::LetterNumber,
277 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR => hb_unicode_general_category_t::LineSeparator,
278 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER => hb_unicode_general_category_t::LowercaseLetter,
279 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL => hb_unicode_general_category_t::MathSymbol,
280 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER => hb_unicode_general_category_t::ModifierLetter,
281 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL => hb_unicode_general_category_t::ModifierSymbol,
282 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK => hb_unicode_general_category_t::NonspacingMark,
283 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION => hb_unicode_general_category_t::OpenPunctuation,
284 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER => hb_unicode_general_category_t::OtherLetter,
285 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER => hb_unicode_general_category_t::OtherNumber,
286 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION => hb_unicode_general_category_t::OtherPunctuation,
287 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL => hb_unicode_general_category_t::OtherSymbol,
288 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR => hb_unicode_general_category_t::ParagraphSeparator,
289 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE => hb_unicode_general_category_t::PrivateUse,
290 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR => hb_unicode_general_category_t::SpaceSeparator,
291 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK => hb_unicode_general_category_t::SpacingMark,
292 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE => hb_unicode_general_category_t::Surrogate,
293 hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter,
294 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned,
295 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter,
296 _ => unreachable!(),
297 }
298 }
299
300 fn is_mark(&self) -> bool {
301 match *self {
302 hb_unicode_general_category_t::SpacingMark |
303 hb_unicode_general_category_t::EnclosingMark |
304 hb_unicode_general_category_t::NonspacingMark => true,
305 _ => false,
306 }
307 }
308
309 fn is_letter(&self) -> bool {
310 match *self {
311 hb_unicode_general_category_t::LowercaseLetter |
312 hb_unicode_general_category_t::ModifierLetter |
313 hb_unicode_general_category_t::OtherLetter |
314 hb_unicode_general_category_t::TitlecaseLetter |
315 hb_unicode_general_category_t::UppercaseLetter => true,
316 _ => false,
317 }
318 }
319}
320
321pub trait CharExt {
322 fn script(self) -> Script;
323 fn general_category(self) -> hb_unicode_general_category_t;
324 fn combining_class(self) -> CanonicalCombiningClass;
325 fn space_fallback(self) -> hb_unicode_funcs_t::space_t;
326 fn modified_combining_class(self) -> u8;
327 fn mirrored(self) -> Option<char>;
328 fn is_emoji_extended_pictographic(self) -> bool;
329 fn is_default_ignorable(self) -> bool;
330 fn is_variation_selector(self) -> bool;
331 fn vertical(self) -> Option<char>;
332}
333
334impl CharExt for char {
335 fn script(self) -> Script {
336 use crate::script;
337 use unicode_script as us;
338
339 match unicode_script::UnicodeScript::script(&self) {
340 us::Script::Common => script::COMMON,
341 us::Script::Inherited => script::INHERITED,
342 us::Script::Adlam => script::ADLAM,
343 us::Script::Ahom => script::AHOM,
344 us::Script::Anatolian_Hieroglyphs => script::ANATOLIAN_HIEROGLYPHS,
345 us::Script::Arabic => script::ARABIC,
346 us::Script::Armenian => script::ARMENIAN,
347 us::Script::Avestan => script::AVESTAN,
348 us::Script::Balinese => script::BALINESE,
349 us::Script::Bamum => script::BAMUM,
350 us::Script::Bassa_Vah => script::BASSA_VAH,
351 us::Script::Batak => script::BATAK,
352 us::Script::Bengali => script::BENGALI,
353 us::Script::Bhaiksuki => script::BHAIKSUKI,
354 us::Script::Bopomofo => script::BOPOMOFO,
355 us::Script::Brahmi => script::BRAHMI,
356 us::Script::Braille => script::BRAILLE,
357 us::Script::Buginese => script::BUGINESE,
358 us::Script::Buhid => script::BUHID,
359 us::Script::Canadian_Aboriginal => script::CANADIAN_SYLLABICS,
360 us::Script::Carian => script::CARIAN,
361 us::Script::Caucasian_Albanian => script::CAUCASIAN_ALBANIAN,
362 us::Script::Chakma => script::CHAKMA,
363 us::Script::Cham => script::CHAM,
364 us::Script::Cherokee => script::CHEROKEE,
365 us::Script::Chorasmian => script::CHORASMIAN,
366 us::Script::Coptic => script::COPTIC,
367 us::Script::Cuneiform => script::CUNEIFORM,
368 us::Script::Cypriot => script::CYPRIOT,
369 us::Script::Cyrillic => script::CYRILLIC,
370 us::Script::Deseret => script::DESERET,
371 us::Script::Devanagari => script::DEVANAGARI,
372 us::Script::Dives_Akuru => script::DIVES_AKURU,
373 us::Script::Dogra => script::DOGRA,
374 us::Script::Duployan => script::DUPLOYAN,
375 us::Script::Egyptian_Hieroglyphs => script::EGYPTIAN_HIEROGLYPHS,
376 us::Script::Elbasan => script::ELBASAN,
377 us::Script::Elymaic => script::ELYMAIC,
378 us::Script::Ethiopic => script::ETHIOPIC,
379 us::Script::Georgian => script::GEORGIAN,
380 us::Script::Glagolitic => script::GLAGOLITIC,
381 us::Script::Gothic => script::GOTHIC,
382 us::Script::Grantha => script::GRANTHA,
383 us::Script::Greek => script::GREEK,
384 us::Script::Gujarati => script::GUJARATI,
385 us::Script::Gunjala_Gondi => script::GUNJALA_GONDI,
386 us::Script::Gurmukhi => script::GURMUKHI,
387 us::Script::Han => script::HAN,
388 us::Script::Hangul => script::HANGUL,
389 us::Script::Hanifi_Rohingya => script::HANIFI_ROHINGYA,
390 us::Script::Hanunoo => script::HANUNOO,
391 us::Script::Hatran => script::HATRAN,
392 us::Script::Hebrew => script::HEBREW,
393 us::Script::Hiragana => script::HIRAGANA,
394 us::Script::Imperial_Aramaic => script::IMPERIAL_ARAMAIC,
395 us::Script::Inscriptional_Pahlavi => script::INSCRIPTIONAL_PAHLAVI,
396 us::Script::Inscriptional_Parthian => script::INSCRIPTIONAL_PARTHIAN,
397 us::Script::Javanese => script::JAVANESE,
398 us::Script::Kaithi => script::KAITHI,
399 us::Script::Kannada => script::KANNADA,
400 us::Script::Katakana => script::KATAKANA,
401 us::Script::Kayah_Li => script::KAYAH_LI,
402 us::Script::Kharoshthi => script::KHAROSHTHI,
403 us::Script::Khitan_Small_Script => script::KHITAN_SMALL_SCRIPT,
404 us::Script::Khmer => script::KHMER,
405 us::Script::Khojki => script::KHOJKI,
406 us::Script::Khudawadi => script::KHUDAWADI,
407 us::Script::Lao => script::LAO,
408 us::Script::Latin => script::LATIN,
409 us::Script::Lepcha => script::LEPCHA,
410 us::Script::Limbu => script::LIMBU,
411 us::Script::Linear_A => script::LINEAR_A,
412 us::Script::Linear_B => script::LINEAR_B,
413 us::Script::Lisu => script::LISU,
414 us::Script::Lycian => script::LYCIAN,
415 us::Script::Lydian => script::LYDIAN,
416 us::Script::Mahajani => script::MAHAJANI,
417 us::Script::Makasar => script::MAKASAR,
418 us::Script::Malayalam => script::MALAYALAM,
419 us::Script::Mandaic => script::MANDAIC,
420 us::Script::Manichaean => script::MANICHAEAN,
421 us::Script::Marchen => script::MARCHEN,
422 us::Script::Masaram_Gondi => script::MASARAM_GONDI,
423 us::Script::Medefaidrin => script::MEDEFAIDRIN,
424 us::Script::Meetei_Mayek => script::MEETEI_MAYEK,
425 us::Script::Mende_Kikakui => script::MENDE_KIKAKUI,
426 us::Script::Meroitic_Cursive => script::MEROITIC_CURSIVE,
427 us::Script::Meroitic_Hieroglyphs => script::MEROITIC_HIEROGLYPHS,
428 us::Script::Miao => script::MIAO,
429 us::Script::Modi => script::MODI,
430 us::Script::Mongolian => script::MONGOLIAN,
431 us::Script::Mro => script::MRO,
432 us::Script::Multani => script::MULTANI,
433 us::Script::Myanmar => script::MYANMAR,
434 us::Script::Nabataean => script::NABATAEAN,
435 us::Script::Nandinagari => script::NANDINAGARI,
436 us::Script::New_Tai_Lue => script::NEW_TAI_LUE,
437 us::Script::Newa => script::NEWA,
438 us::Script::Nko => script::NKO,
439 us::Script::Nushu => script::NUSHU,
440 us::Script::Nyiakeng_Puachue_Hmong => script::NYIAKENG_PUACHUE_HMONG,
441 us::Script::Ogham => script::OGHAM,
442 us::Script::Ol_Chiki => script::OL_CHIKI,
443 us::Script::Old_Hungarian => script::OLD_HUNGARIAN,
444 us::Script::Old_Italic => script::OLD_ITALIC,
445 us::Script::Old_North_Arabian => script::OLD_NORTH_ARABIAN,
446 us::Script::Old_Permic => script::OLD_PERMIC,
447 us::Script::Old_Persian => script::OLD_PERSIAN,
448 us::Script::Old_Sogdian => script::OLD_SOGDIAN,
449 us::Script::Old_South_Arabian => script::OLD_SOUTH_ARABIAN,
450 us::Script::Old_Turkic => script::OLD_TURKIC,
451 us::Script::Oriya => script::ORIYA,
452 us::Script::Osage => script::OSAGE,
453 us::Script::Osmanya => script::OSMANYA,
454 us::Script::Pahawh_Hmong => script::PAHAWH_HMONG,
455 us::Script::Palmyrene => script::PALMYRENE,
456 us::Script::Pau_Cin_Hau => script::PAU_CIN_HAU,
457 us::Script::Phags_Pa => script::PHAGS_PA,
458 us::Script::Phoenician => script::PHOENICIAN,
459 us::Script::Psalter_Pahlavi => script::PSALTER_PAHLAVI,
460 us::Script::Rejang => script::REJANG,
461 us::Script::Runic => script::RUNIC,
462 us::Script::Samaritan => script::SAMARITAN,
463 us::Script::Saurashtra => script::SAURASHTRA,
464 us::Script::Sharada => script::SHARADA,
465 us::Script::Shavian => script::SHAVIAN,
466 us::Script::Siddham => script::SIDDHAM,
467 us::Script::SignWriting => script::SIGNWRITING,
468 us::Script::Sinhala => script::SINHALA,
469 us::Script::Sogdian => script::SOGDIAN,
470 us::Script::Sora_Sompeng => script::SORA_SOMPENG,
471 us::Script::Soyombo => script::SOYOMBO,
472 us::Script::Sundanese => script::SUNDANESE,
473 us::Script::Syloti_Nagri => script::SYLOTI_NAGRI,
474 us::Script::Syriac => script::SYRIAC,
475 us::Script::Tagalog => script::TAGALOG,
476 us::Script::Tagbanwa => script::TAGBANWA,
477 us::Script::Tai_Le => script::TAI_LE,
478 us::Script::Tai_Tham => script::TAI_THAM,
479 us::Script::Tai_Viet => script::TAI_VIET,
480 us::Script::Takri => script::TAKRI,
481 us::Script::Tamil => script::TAMIL,
482 us::Script::Tangut => script::TANGUT,
483 us::Script::Telugu => script::TELUGU,
484 us::Script::Thaana => script::THAANA,
485 us::Script::Thai => script::THAI,
486 us::Script::Tibetan => script::TIBETAN,
487 us::Script::Tifinagh => script::TIFINAGH,
488 us::Script::Tirhuta => script::TIRHUTA,
489 us::Script::Ugaritic => script::UGARITIC,
490 us::Script::Vai => script::VAI,
491 us::Script::Wancho => script::WANCHO,
492 us::Script::Warang_Citi => script::WARANG_CITI,
493 us::Script::Yezidi => script::YEZIDI,
494 us::Script::Yi => script::YI,
495 us::Script::Zanabazar_Square => script::ZANABAZAR_SQUARE,
496 _ => script::UNKNOWN,
497 }
498 }
499
500 fn general_category(self) -> hb_unicode_general_category_t {
501 unicode_properties::general_category::UnicodeGeneralCategory::general_category(self)
502 }
503
504 fn combining_class(self) -> CanonicalCombiningClass {
505 unicode_ccc::get_canonical_combining_class(self)
506 }
507
508 fn space_fallback(self) -> hb_unicode_funcs_t::space_t {
509 use hb_unicode_funcs_t::*;
510
511 match self {
513 '\u{0020}' => SPACE, '\u{00A0}' => SPACE, '\u{2000}' => SPACE_EM_2, '\u{2001}' => SPACE_EM, '\u{2002}' => SPACE_EM_2, '\u{2003}' => SPACE_EM, '\u{2004}' => SPACE_EM_3, '\u{2005}' => SPACE_EM_4, '\u{2006}' => SPACE_EM_6, '\u{2007}' => SPACE_FIGURE, '\u{2008}' => SPACE_PUNCTUATION, '\u{2009}' => SPACE_EM_5, '\u{200A}' => SPACE_EM_16, '\u{202F}' => SPACE_NARROW, '\u{205F}' => SPACE_4_EM_18, '\u{3000}' => SPACE_EM, _ => NOT_SPACE, }
531 }
532
533 fn modified_combining_class(self) -> u8 {
534 let mut u = self;
535
536 if u == '\u{1037}' {
538 u = '\u{103A}';
539 }
540
541 if u == '\u{1A60}' {
544 return 254;
545 }
546
547 if u == '\u{0FC6}' {
550 return 254;
551 }
552
553 if u == '\u{0F39}' {
555 return 127;
556 }
557
558 let k = unicode_ccc::get_canonical_combining_class(u);
559 MODIFIED_COMBINING_CLASS[k as usize]
560 }
561
562 fn mirrored(self) -> Option<char> {
563 unicode_bidi_mirroring::get_mirrored(self)
564 }
565
566 fn is_emoji_extended_pictographic(self) -> bool {
567 match self as u32 {
569 0x00A9 => true,
570 0x00AE => true,
571 0x203C => true,
572 0x2049 => true,
573 0x2122 => true,
574 0x2139 => true,
575 0x2194..=0x2199 => true,
576 0x21A9..=0x21AA => true,
577 0x231A..=0x231B => true,
578 0x2328 => true,
579 0x2388 => true,
580 0x23CF => true,
581 0x23E9..=0x23F3 => true,
582 0x23F8..=0x23FA => true,
583 0x24C2 => true,
584 0x25AA..=0x25AB => true,
585 0x25B6 => true,
586 0x25C0 => true,
587 0x25FB..=0x25FE => true,
588 0x2600..=0x2605 => true,
589 0x2607..=0x2612 => true,
590 0x2614..=0x2685 => true,
591 0x2690..=0x2705 => true,
592 0x2708..=0x2712 => true,
593 0x2714 => true,
594 0x2716 => true,
595 0x271D => true,
596 0x2721 => true,
597 0x2728 => true,
598 0x2733..=0x2734 => true,
599 0x2744 => true,
600 0x2747 => true,
601 0x274C => true,
602 0x274E => true,
603 0x2753..=0x2755 => true,
604 0x2757 => true,
605 0x2763..=0x2767 => true,
606 0x2795..=0x2797 => true,
607 0x27A1 => true,
608 0x27B0 => true,
609 0x27BF => true,
610 0x2934..=0x2935 => true,
611 0x2B05..=0x2B07 => true,
612 0x2B1B..=0x2B1C => true,
613 0x2B50 => true,
614 0x2B55 => true,
615 0x3030 => true,
616 0x303D => true,
617 0x3297 => true,
618 0x3299 => true,
619 0x1F000..=0x1F0FF => true,
620 0x1F10D..=0x1F10F => true,
621 0x1F12F => true,
622 0x1F16C..=0x1F171 => true,
623 0x1F17E..=0x1F17F => true,
624 0x1F18E => true,
625 0x1F191..=0x1F19A => true,
626 0x1F1AD..=0x1F1E5 => true,
627 0x1F201..=0x1F20F => true,
628 0x1F21A => true,
629 0x1F22F => true,
630 0x1F232..=0x1F23A => true,
631 0x1F23C..=0x1F23F => true,
632 0x1F249..=0x1F3FA => true,
633 0x1F400..=0x1F53D => true,
634 0x1F546..=0x1F64F => true,
635 0x1F680..=0x1F6FF => true,
636 0x1F774..=0x1F77F => true,
637 0x1F7D5..=0x1F7FF => true,
638 0x1F80C..=0x1F80F => true,
639 0x1F848..=0x1F84F => true,
640 0x1F85A..=0x1F85F => true,
641 0x1F888..=0x1F88F => true,
642 0x1F8AE..=0x1F8FF => true,
643 0x1F90C..=0x1F93A => true,
644 0x1F93C..=0x1F945 => true,
645 0x1F947..=0x1FFFD => true,
646 _ => false,
647 }
648 }
649
650 fn is_default_ignorable(self) -> bool {
688 let ch = u32::from(self);
689 let plane = ch >> 16;
690 if plane == 0 {
691 let page = ch >> 8;
693 match page {
694 0x00 => ch == 0x00AD,
695 0x03 => ch == 0x034F,
696 0x06 => ch == 0x061C,
697 0x17 => (0x17B4..=0x17B5).contains(&ch),
698 0x18 => (0x180B..=0x180E).contains(&ch),
699 0x20 => {
700 (0x200B..=0x200F).contains(&ch)
701 || (0x202A..=0x202E).contains(&ch)
702 || (0x2060..=0x206F).contains(&ch)
703 }
704 0xFE => (0xFE00..=0xFE0F).contains(&ch) || ch == 0xFEFF,
705 0xFF => (0xFFF0..=0xFFF8).contains(&ch),
706 _ => false,
707 }
708 } else {
709 match plane {
711 0x01 => (0x1D173..=0x1D17A).contains(&ch),
712 0x0E => (0xE0000..=0xE0FFF).contains(&ch),
713 _ => false,
714 }
715 }
716 }
717
718 fn is_variation_selector(self) -> bool {
719 let ch = u32::from(self);
722 (0x0FE00..=0x0FE0F).contains(&ch) || (0xE0100..=0xE01EF).contains(&ch) }
725
726 fn vertical(self) -> Option<char> {
727 Some(match u32::from(self) >> 8 {
728 0x20 => match self {
729 '\u{2013}' => '\u{fe32}', '\u{2014}' => '\u{fe31}', '\u{2025}' => '\u{fe30}', '\u{2026}' => '\u{fe19}', _ => return None,
734 },
735 0x30 => match self {
736 '\u{3001}' => '\u{fe11}', '\u{3002}' => '\u{fe12}', '\u{3008}' => '\u{fe3f}', '\u{3009}' => '\u{fe40}', '\u{300a}' => '\u{fe3d}', '\u{300b}' => '\u{fe3e}', '\u{300c}' => '\u{fe41}', '\u{300d}' => '\u{fe42}', '\u{300e}' => '\u{fe43}', '\u{300f}' => '\u{fe44}', '\u{3010}' => '\u{fe3b}', '\u{3011}' => '\u{fe3c}', '\u{3014}' => '\u{fe39}', '\u{3015}' => '\u{fe3a}', '\u{3016}' => '\u{fe17}', '\u{3017}' => '\u{fe18}', _ => return None,
753 },
754 0xfe => match self {
755 '\u{fe4f}' => '\u{fe34}', _ => return None,
757 },
758 0xff => match self {
759 '\u{ff01}' => '\u{fe15}', '\u{ff08}' => '\u{fe35}', '\u{ff09}' => '\u{fe36}', '\u{ff0c}' => '\u{fe10}', '\u{ff1a}' => '\u{fe13}', '\u{ff1b}' => '\u{fe14}', '\u{ff1f}' => '\u{fe16}', '\u{ff3b}' => '\u{fe47}', '\u{ff3d}' => '\u{fe48}', '\u{ff3f}' => '\u{fe33}', '\u{ff5b}' => '\u{fe37}', '\u{ff5d}' => '\u{fe38}', _ => return None,
772 },
773 _ => return None,
774 })
775 }
776}
777
778const S_BASE: u32 = 0xAC00;
779const L_BASE: u32 = 0x1100;
780const V_BASE: u32 = 0x1161;
781const T_BASE: u32 = 0x11A7;
782const L_COUNT: u32 = 19;
783const V_COUNT: u32 = 21;
784const T_COUNT: u32 = 28;
785const N_COUNT: u32 = V_COUNT * T_COUNT;
786const S_COUNT: u32 = L_COUNT * N_COUNT;
787
788pub fn compose(a: char, b: char) -> Option<char> {
789 if let Some(ab) = compose_hangul(a, b) {
790 return Some(ab);
791 }
792
793 let needle = (a as u64) << 32 | (b as u64);
794 super::unicode_norm::COMPOSITION_TABLE
795 .binary_search_by(|item| item.0.cmp(&needle))
796 .map(|idx| super::unicode_norm::COMPOSITION_TABLE[idx].1)
797 .ok()
798}
799
800fn compose_hangul(a: char, b: char) -> Option<char> {
801 let l = u32::from(a);
802 let v = u32::from(b);
803 if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
804 let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
805 Some(char::try_from(r).unwrap())
806 } else if S_BASE <= l
807 && l <= (S_BASE + S_COUNT - T_COUNT)
808 && T_BASE <= v
809 && v < (T_BASE + T_COUNT)
810 && (l - S_BASE) % T_COUNT == 0
811 {
812 let r = l + (v - T_BASE);
813 Some(char::try_from(r).unwrap())
814 } else {
815 None
816 }
817}
818
819pub fn decompose(ab: char) -> Option<(char, char)> {
820 if let Some(ab) = decompose_hangul(ab) {
821 return Some(ab);
822 }
823
824 super::unicode_norm::DECOMPOSITION_TABLE
825 .binary_search_by(|item| item.0.cmp(&ab))
826 .map(|idx| {
827 let chars = &super::unicode_norm::DECOMPOSITION_TABLE[idx];
828 (chars.1, chars.2.unwrap_or('\0'))
829 })
830 .ok()
831}
832
833pub fn decompose_hangul(ab: char) -> Option<(char, char)> {
834 let si = u32::from(ab).wrapping_sub(S_BASE);
835 if si >= S_COUNT {
836 return None;
837 }
838
839 let (a, b) = if si % T_COUNT != 0 {
840 (S_BASE + (si / T_COUNT) * T_COUNT, T_BASE + (si % T_COUNT))
842 } else {
843 (L_BASE + (si / N_COUNT), V_BASE + (si % N_COUNT) / T_COUNT)
845 };
846
847 Some((char::try_from(a).unwrap(), char::try_from(b).unwrap()))
848}
849
850#[cfg(test)]
851mod tests {
852 #[test]
853 fn check_unicode_version() {
854 assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (14, 0, 0));
855 assert_eq!(unicode_ccc::UNICODE_VERSION, (14, 0, 0));
856 assert_eq!(unicode_properties::UNICODE_VERSION, (15, 0, 0));
857 assert_eq!(unicode_script::UNICODE_VERSION, (15, 1, 0));
858 assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (14, 0, 0));
859 }
860}
861
862pub mod hb_gc {
864 pub const RB_UNICODE_GENERAL_CATEGORY_CONTROL: u32 = 0;
865 pub const RB_UNICODE_GENERAL_CATEGORY_FORMAT: u32 = 1;
866 pub const RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED: u32 = 2;
867 pub const RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE: u32 = 3;
868 pub const RB_UNICODE_GENERAL_CATEGORY_SURROGATE: u32 = 4;
869 pub const RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER: u32 = 5;
870 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER: u32 = 6;
871 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER: u32 = 7;
872 pub const RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER: u32 = 8;
873 pub const RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER: u32 = 9;
874 pub const RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK: u32 = 10;
875 pub const RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK: u32 = 11;
876 pub const RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK: u32 = 12;
877 pub const RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER: u32 = 13;
878 pub const RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER: u32 = 14;
879 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER: u32 = 15;
880 pub const RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: u32 = 16;
881 pub const RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: u32 = 17;
882 pub const RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: u32 = 18;
883 pub const RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: u32 = 19;
884 pub const RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: u32 = 20;
885 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: u32 = 21;
886 pub const RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: u32 = 22;
887 pub const RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL: u32 = 23;
888 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL: u32 = 24;
889 pub const RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL: u32 = 25;
890 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL: u32 = 26;
891 pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
892 pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
893 pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
894}