swash/text/
compose.rs

1use super::unicode_data::{
2    compose_index, decompose_compat_index, decompose_index, COMPOSE0, COMPOSE1, COMPOSE1_COUNT,
3    DECOMPOSE, DECOMPOSE_COMPAT,
4};
5use core::char::from_u32_unchecked;
6
7/// Decomposition of a character.
8#[derive(Copy, Clone)]
9pub struct Decompose {
10    inner: DecomposeInner,
11    len: u8,
12    cur: u8,
13}
14
15impl Decompose {
16    /// Returns the sequence of characters that represent the
17    /// decomposition.
18    pub fn chars(&self) -> &[char] {
19        match self.inner {
20            DecomposeInner::Slice(chars) => chars,
21            DecomposeInner::Array(ref chars, len) => &chars[..len as usize],
22        }
23    }
24}
25
26impl Iterator for Decompose {
27    type Item = char;
28
29    fn next(&mut self) -> Option<Self::Item> {
30        if self.cur >= self.len {
31            return None;
32        }
33        let item = self.chars()[self.cur as usize];
34        self.cur += 1;
35        Some(item)
36    }
37}
38
39#[derive(Copy, Clone)]
40enum DecomposeInner {
41    Slice(&'static [char]),
42    Array([char; 3], u32),
43}
44
45impl DecomposeInner {
46    fn len(&self) -> u8 {
47        match self {
48            Self::Slice(s) => s.len() as u8,
49            Self::Array(_, len) => *len as u8,
50        }
51    }
52}
53
54impl From<DecomposeInner> for Decompose {
55    fn from(inner: DecomposeInner) -> Self {
56        Self {
57            inner,
58            len: inner.len(),
59            cur: 0,
60        }
61    }
62}
63
64pub fn compose_pair(a: char, b: char) -> Option<char> {
65    if let Some(c) = compose_hangul(a, b) {
66        return Some(c);
67    }
68    let l = pair_index(a as u32, &COMPOSE0[..])?;
69    let r = pair_index(b as u32, &COMPOSE1[..])?;
70    let c = compose_index(l * COMPOSE1_COUNT + r) as u32;
71    if c != 0 {
72        return Some(unsafe { core::char::from_u32_unchecked(c) });
73    }
74    None
75}
76
77fn pair_index(c: u32, table: &[(u32, u16, u16)]) -> Option<usize> {
78    let c = c as usize;
79    for entry in table {
80        let start = entry.0 as usize;
81        if start == 0 || c < start {
82            return None;
83        }
84        let end = start + entry.1 as usize;
85        if c <= end {
86            return Some(entry.2 as usize + (c - start));
87        }
88    }
89    None
90}
91
92const LBASE: u32 = 0x1100;
93const VBASE: u32 = 0x1161;
94const TBASE: u32 = 0x11A7;
95const LCOUNT: u32 = 19;
96const VCOUNT: u32 = 21;
97const TCOUNT: u32 = 28;
98const SBASE: u32 = 0xAC00;
99const NCOUNT: u32 = VCOUNT * TCOUNT;
100const SCOUNT: u32 = LCOUNT * NCOUNT;
101
102fn is_hangul(c: char) -> bool {
103    let c = c as u32;
104    (SBASE..(SBASE + SCOUNT)).contains(&c)
105}
106
107fn compose_hangul(a: char, b: char) -> Option<char> {
108    let a = a as u32;
109    let b = b as u32;
110    if !(VBASE..(TBASE + TCOUNT)).contains(&b) {
111        return None;
112    }
113    if !(LBASE..(LBASE + LCOUNT)).contains(&a) && !(SBASE..(SBASE + SCOUNT)).contains(&a) {
114        return None;
115    }
116    if a >= SBASE {
117        if (a - SBASE) % TCOUNT == 0 {
118            Some(unsafe { from_u32_unchecked(a + (b - TBASE)) })
119        } else {
120            None
121        }
122    } else {
123        let li = a - LBASE;
124        let vi = b - VBASE;
125        Some(unsafe { from_u32_unchecked(SBASE + li * NCOUNT + vi * TCOUNT) })
126    }
127}
128
129fn decompose_hangul(c: char) -> DecomposeInner {
130    let si = c as u32 - SBASE;
131    let li = si / NCOUNT;
132    let mut chars = [' '; 3];
133    let mut len = 2;
134    unsafe {
135        chars[0] = from_u32_unchecked(LBASE + li);
136        let vi = (si % NCOUNT) / TCOUNT;
137        chars[1] = from_u32_unchecked(VBASE + vi);
138        let ti = si % TCOUNT;
139        if ti > 0 {
140            chars[2] = from_u32_unchecked(TBASE + ti);
141            len += 1;
142        }
143    }
144    DecomposeInner::Array(chars, len)
145}
146
147pub fn decompose(c: char) -> Decompose {
148    if c <= '\x7F' {
149        DecomposeInner::Array([c, ' ', ' '], 1).into()
150    } else if is_hangul(c) {
151        decompose_hangul(c).into()
152    } else {
153        let index = decompose_index(c as usize);
154        if index == 0 {
155            DecomposeInner::Array([c, ' ', ' '], 1).into()
156        } else {
157            let buf = &DECOMPOSE[index..];
158            let end = 1 + buf[0] as usize;
159            DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
160                .into()
161        }
162    }
163}
164
165pub fn decompose_compat(c: char) -> Decompose {
166    if c <= '\x7F' {
167        DecomposeInner::Array([c, ' ', ' '], 1).into()
168    } else if is_hangul(c) {
169        decompose_hangul(c).into()
170    } else {
171        let index = decompose_compat_index(c as usize);
172        if index == 0 {
173            DecomposeInner::Array([c, ' ', ' '], 1).into()
174        } else if index == 1 {
175            let index = decompose_index(c as usize);
176            let buf = &DECOMPOSE[index..];
177            let end = 1 + buf[0] as usize;
178            DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
179                .into()
180        } else {
181            let buf = &DECOMPOSE_COMPAT[index..];
182            let end = 1 + buf[0] as usize;
183            DecomposeInner::Slice(unsafe { &*(&buf[1..end] as *const [u32] as *const [char]) })
184                .into()
185        }
186    }
187}