swash/text/
unicode.rs
1pub use super::compose::Decompose;
6#[doc(inline)]
7pub use super::unicode_data::{
8 BidiClass, Block, Category, ClusterBreak, JoiningType, LineBreak, Script, WordBreak,
9 UNICODE_VERSION,
10};
11
12use super::compose::{compose_pair, decompose, decompose_compat};
13use super::unicode_data::{
14 get_record_index, MyanmarClass, Record, UseClass, BRACKETS, MIRRORS, RECORDS, SCRIPTS_BY_TAG,
15 SCRIPT_COMPLEXITY, SCRIPT_NAMES, SCRIPT_TAGS,
16};
17use crate::Tag;
18
19use core::char::from_u32_unchecked;
20
21const RECORD_MASK: u16 = 0x1FFF;
22const BOUNDARY_SHIFT: u16 = 13;
23
24#[derive(Copy, Clone, PartialEq, Eq, Default)]
26pub struct Properties(u16);
27
28impl Properties {
29 fn new(ch: u32) -> Self {
30 Self(get_record_index(ch as usize) as u16)
31 }
32
33 pub fn category(self) -> Category {
35 self.record().category
36 }
37
38 pub fn block(self) -> Block {
40 self.record().block
41 }
42
43 pub fn script(self) -> Script {
45 self.record().script
46 }
47
48 pub fn combining_class(self) -> u8 {
50 self.record().combining_class
51 }
52
53 pub fn bidi_class(self) -> BidiClass {
55 self.record().bidi_class
56 }
57
58 pub fn joining_type(self) -> JoiningType {
60 self.record().joining_type
61 }
62
63 pub fn cluster_break(self) -> ClusterBreak {
65 self.record().cluster_break
66 }
67
68 pub fn word_break(self) -> WordBreak {
70 self.record().word_break
71 }
72
73 pub fn line_break(self) -> LineBreak {
75 self.record().line_break
76 }
77
78 pub fn is_emoji(self) -> bool {
80 self.record().flags.is_emoji()
81 }
82
83 pub fn is_extended_pictographic(self) -> bool {
85 self.record().flags.is_extended_pictographic()
86 }
87
88 pub fn is_open_bracket(self) -> bool {
90 self.record().flags.is_open_bracket()
91 }
92
93 pub fn is_close_bracket(self) -> bool {
95 self.record().flags.is_close_bracket()
96 }
97
98 pub(crate) fn is_ignorable(self) -> bool {
99 self.record().flags.is_ignorable()
100 }
101
102 pub(crate) fn is_variation_selector(self) -> bool {
103 self.record().flags.is_variation_selector()
104 }
105
106 pub(crate) fn contributes_to_shaping(self) -> bool {
107 self.record().flags.contributes_to_shaping()
108 }
109
110 pub(crate) fn with_boundary(mut self, b: u16) -> Self {
111 self.set_boundary(b);
112 self
113 }
114
115 pub(crate) fn boundary(self) -> u16 {
116 self.0 >> BOUNDARY_SHIFT
117 }
118
119 pub(crate) fn set_boundary(&mut self, boundary: u16) {
120 self.0 = (self.0 & RECORD_MASK) | (boundary & 0b11) << BOUNDARY_SHIFT;
121 }
122
123 pub(crate) fn use_class(self) -> (UseClass, bool, bool) {
124 let r = self.record();
125 (
126 r.use_class,
127 r.flags.needs_decomp(),
128 r.flags.is_extended_pictographic(),
129 )
130 }
131
132 pub(crate) fn myanmar_class(self) -> (MyanmarClass, bool) {
133 let r = self.record();
134 (r.myanmar_class, r.flags.is_extended_pictographic())
135 }
136
137 pub(crate) fn cluster_class(self) -> (ClusterBreak, bool) {
138 let r = self.record();
139 (r.cluster_break, r.flags.is_extended_pictographic())
140 }
141
142 #[inline(always)]
143 fn record(self) -> &'static Record {
144 unsafe { RECORDS.get_unchecked((self.0 & RECORD_MASK) as usize) }
147 }
148}
149
150impl From<char> for Properties {
151 fn from(ch: char) -> Self {
152 Self::new(ch as u32)
153 }
154}
155
156impl From<&'_ char> for Properties {
157 fn from(ch: &'_ char) -> Self {
158 Self::new(*ch as u32)
159 }
160}
161
162impl From<u32> for Properties {
163 fn from(ch: u32) -> Self {
164 Self::new(ch)
165 }
166}
167
168impl From<&'_ u32> for Properties {
169 fn from(ch: &'_ u32) -> Self {
170 Self::new(*ch)
171 }
172}
173
174pub trait Codepoint: Sized + Copy {
176 fn properties(self) -> Properties;
178
179 fn category(self) -> Category {
181 self.properties().category()
182 }
183
184 fn block(self) -> Block {
186 self.properties().block()
187 }
188
189 fn script(self) -> Script {
191 self.properties().script()
192 }
193
194 fn combining_class(self) -> u8 {
196 self.properties().combining_class()
197 }
198
199 fn bidi_class(self) -> BidiClass {
201 self.properties().bidi_class()
202 }
203
204 fn joining_type(self) -> JoiningType {
206 self.properties().joining_type()
207 }
208
209 fn cluster_break(self) -> ClusterBreak {
211 self.properties().cluster_break()
212 }
213
214 fn word_break(self) -> WordBreak {
216 self.properties().word_break()
217 }
218
219 fn line_break(self) -> LineBreak {
221 self.properties().line_break()
222 }
223
224 fn is_emoji(self) -> bool {
226 self.properties().is_emoji()
227 }
228
229 fn is_extended_pictographic(self) -> bool {
231 self.properties().is_extended_pictographic()
232 }
233
234 fn bracket_type(self) -> BracketType;
236
237 fn opening_bracket(self) -> Option<char>;
240
241 fn closing_bracket(self) -> Option<char>;
244
245 fn mirror(self) -> Option<char>;
247
248 fn compose(a: char, b: char) -> Option<char>;
250
251 fn decompose(self) -> Decompose;
253
254 fn decompose_compatible(self) -> Decompose;
256}
257
258impl Codepoint for char {
259 fn properties(self) -> Properties {
260 Properties::from(self)
261 }
262
263 fn bracket_type(self) -> BracketType {
264 match self.closing_bracket() {
265 Some(other) => BracketType::Open(other),
266 _ => match self.opening_bracket() {
267 Some(other) => BracketType::Close(other),
268 _ => BracketType::None,
269 },
270 }
271 }
272
273 fn opening_bracket(self) -> Option<char> {
274 let c = self as u32;
275 if let Ok(idx) = BRACKETS.binary_search_by(|x| (x.1 as u32).cmp(&c)) {
276 return Some(unsafe { from_u32_unchecked(BRACKETS[idx].0 as u32) });
277 }
278 None
279 }
280
281 fn closing_bracket(self) -> Option<char> {
282 let c = self as u32;
283 if let Ok(idx) = BRACKETS.binary_search_by(|x| (x.0 as u32).cmp(&c)) {
284 return Some(unsafe { from_u32_unchecked(BRACKETS[idx].1 as u32) });
285 }
286 None
287 }
288
289 fn mirror(self) -> Option<char> {
290 let c = self as u32;
291 if let Ok(idx) = MIRRORS.binary_search_by(|x| (x.0 as u32).cmp(&c)) {
292 return Some(unsafe { from_u32_unchecked(MIRRORS[idx].1 as u32) });
293 }
294 None
295 }
296
297 fn compose(a: char, b: char) -> Option<char> {
298 compose_pair(a, b)
299 }
300
301 fn decompose(self) -> Decompose {
302 decompose(self)
303 }
304
305 fn decompose_compatible(self) -> Decompose {
306 decompose_compat(self)
307 }
308}
309
310#[derive(Copy, Clone, PartialEq, Eq, Debug)]
312pub enum BracketType {
313 None,
315 Open(char),
317 Close(char),
319}
320
321impl Script {
322 pub fn from_opentype(tag: Tag) -> Option<Self> {
325 match SCRIPTS_BY_TAG.binary_search_by(|x| x.0.cmp(&tag)) {
326 Ok(index) => Some(SCRIPTS_BY_TAG[index].1),
327 _ => None,
328 }
329 }
330
331 pub fn name(self) -> &'static str {
333 SCRIPT_NAMES[self as usize]
334 }
335
336 pub fn is_complex(self) -> bool {
338 SCRIPT_COMPLEXITY[self as usize]
339 }
340
341 pub fn is_joined(self) -> bool {
343 matches!(
344 self,
345 Script::Arabic
346 | Script::Mongolian
347 | Script::Syriac
348 | Script::Nko
349 | Script::PhagsPa
350 | Script::Mandaic
351 | Script::Manichaean
352 | Script::PsalterPahlavi
353 | Script::Adlam
354 )
355 }
356
357 pub fn to_opentype(self) -> Tag {
359 SCRIPT_TAGS[self as usize]
360 }
361}
362
363impl WordBreak {
364 pub(crate) const fn mask(self) -> u32 {
365 1 << (self as u32)
366 }
367}
368
369impl BidiClass {
370 pub const fn mask(self) -> u32 {
372 1 << (self as u32)
373 }
374
375 pub fn needs_resolution(self) -> bool {
378 use BidiClass::*;
379 const OVERRIDE_MASK: u32 = RLE.mask() | LRE.mask() | RLO.mask() | LRO.mask();
380 const ISOLATE_MASK: u32 = RLI.mask() | LRI.mask() | FSI.mask();
381 const EXPLICIT_MASK: u32 = OVERRIDE_MASK | ISOLATE_MASK;
382 const BIDI_MASK: u32 = EXPLICIT_MASK | R.mask() | AL.mask() | AN.mask();
383 self.mask() & BIDI_MASK != 0
384 }
385}