swash/text/
compose.rs
1use super::unicode_data::{
2 compose_index, decompose_compat_index, decompose_index, COMPOSE0, COMPOSE1, COMPOSE1_COUNT,
3 DECOMPOSE, DECOMPOSE_COMPAT,
4};
5use core::char::from_u32_unchecked;
6
7#[derive(Copy, Clone)]
9pub struct Decompose {
10 inner: DecomposeInner,
11 len: u8,
12 cur: u8,
13}
14
15impl Decompose {
16 pub fn chars(&self) -> &[char] {
19 match self.inner {
20 DecomposeInner::Slice(chars) => chars,
21 DecomposeInner::Array(ref chars, len) => &chars[..len as usize],
22 }
23 }
24}
25
26impl Iterator for Decompose {
27 type Item = char;
28
29 fn next(&mut self) -> Option<Self::Item> {
30 if self.cur >= self.len {
31 return None;
32 }
33 let item = self.chars()[self.cur as usize];
34 self.cur += 1;
35 Some(item)
36 }
37}
38
39#[derive(Copy, Clone)]
40enum DecomposeInner {
41 Slice(&'static [char]),
42 Array([char; 3], u32),
43}
44
45impl DecomposeInner {
46 fn len(&self) -> u8 {
47 match self {
48 Self::Slice(s) => s.len() as u8,
49 Self::Array(_, len) => *len as u8,
50 }
51 }
52}
53
54impl From<DecomposeInner> for Decompose {
55 fn from(inner: DecomposeInner) -> Self {
56 Self {
57 inner,
58 len: inner.len(),
59 cur: 0,
60 }
61 }
62}
63
64pub fn compose_pair(a: char, b: char) -> Option<char> {
65 if let Some(c) = compose_hangul(a, b) {
66 return Some(c);
67 }
68 let l = pair_index(a as u32, &COMPOSE0[..])?;
69 let r = pair_index(b as u32, &COMPOSE1[..])?;
70 let c = compose_index(l * COMPOSE1_COUNT + r) as u32;
71 if c != 0 {
72 return Some(unsafe { core::char::from_u32_unchecked(c) });
73 }
74 None
75}
76
77fn pair_index(c: u32, table: &[(u32, u16, u16)]) -> Option<usize> {
78 let c = c as usize;
79 for entry in table {
80 let start = entry.0 as usize;
81 if start == 0 || c < start {
82 return None;
83 }
84 let end = start + entry.1 as usize;
85 if c <= end {
86 return Some(entry.2 as usize + (c - start));
87 }
88 }
89 None
90}
91
92const LBASE: u32 = 0x1100;
93const VBASE: u32 = 0x1161;
94const TBASE: u32 = 0x11A7;
95const LCOUNT: u32 = 19;
96const VCOUNT: u32 = 21;
97const TCOUNT: u32 = 28;
98const SBASE: u32 = 0xAC00;
99const NCOUNT: u32 = VCOUNT * TCOUNT;
100const SCOUNT: u32 = LCOUNT * NCOUNT;
101
102fn is_hangul(c: char) -> bool {
103 let c = c as u32;
104 (SBASE..(SBASE + SCOUNT)).contains(&c)
105}
106
107fn compose_hangul(a: char, b: char) -> Option<char> {
108 let a = a as u32;
109 let b = b as u32;
110 if !(VBASE..(TBASE + TCOUNT)).contains(&b) {
111 return None;
112 }
113 if !(LBASE..(LBASE + LCOUNT)).contains(&a) && !(SBASE..(SBASE + SCOUNT)).contains(&a) {
114 return None;
115 }
116 if a >= SBASE {
117 if (a - SBASE) % TCOUNT == 0 {
118 Some(unsafe { from_u32_unchecked(a + (b - TBASE)) })
119 } else {
120 None
121 }
122 } else {
123 let li = a - LBASE;
124 let vi = b - VBASE;
125 Some(unsafe { from_u32_unchecked(SBASE + li * NCOUNT + vi * TCOUNT) })
126 }
127}
128
129fn decompose_hangul(c: char) -> DecomposeInner {
130 let si = c as u32 - SBASE;
131 let li = si / NCOUNT;
132 let mut chars = [' '; 3];
133 let mut len = 2;
134 unsafe {
135 chars[0] = from_u32_unchecked(LBASE + li);
136 let vi = (si % NCOUNT) / TCOUNT;
137 chars[1] = from_u32_unchecked(VBASE + vi);
138 let ti = si % TCOUNT;
139 if ti > 0 {
140 chars[2] = from_u32_unchecked(TBASE + ti);
141 len += 1;
142 }
143 }
144 DecomposeInner::Array(chars, len)
145}
146
147pub fn decompose(c: char) -> Decompose {
148 if c <= '\x7F' {
149 DecomposeInner::Array([c, ' ', ' '], 1).into()
150 } else if is_hangul(c) {
151 decompose_hangul(c).into()
152 } else {
153 let index = decompose_index(c as usize);
154 if index == 0 {
155 DecomposeInner::Array([c, ' ', ' '], 1).into()
156 } else {
157 let buf = &DECOMPOSE[index..];
158 let end = 1 + buf[0] as usize;
159 DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
160 .into()
161 }
162 }
163}
164
165pub fn decompose_compat(c: char) -> Decompose {
166 if c <= '\x7F' {
167 DecomposeInner::Array([c, ' ', ' '], 1).into()
168 } else if is_hangul(c) {
169 decompose_hangul(c).into()
170 } else {
171 let index = decompose_compat_index(c as usize);
172 if index == 0 {
173 DecomposeInner::Array([c, ' ', ' '], 1).into()
174 } else if index == 1 {
175 let index = decompose_index(c as usize);
176 let buf = &DECOMPOSE[index..];
177 let end = 1 + buf[0] as usize;
178 DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
179 .into()
180 } else {
181 let buf = &DECOMPOSE_COMPAT[index..];
182 let end = 1 + buf[0] as usize;
183 DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
184 .into()
185 }
186 }
187}