rustybuzz/hb/
ot_shape_complex_hangul.rs

1use alloc::boxed::Box;
2
3use super::buffer::*;
4use super::ot_map::*;
5use super::ot_shape::*;
6use super::ot_shape_complex::*;
7use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_NONE;
8use super::ot_shape_plan::hb_ot_shape_plan_t;
9use super::*;
10use crate::BufferFlags;
11
12const LJMO: u8 = 1;
13const VJMO: u8 = 2;
14const TJMO: u8 = 3;
15
16impl hb_glyph_info_t {
17    fn hangul_shaping_feature(&self) -> u8 {
18        self.complex_var_u8_auxiliary()
19    }
20
21    fn set_hangul_shaping_feature(&mut self, feature: u8) {
22        self.set_complex_var_u8_auxiliary(feature)
23    }
24}
25
26fn collect_features_hangul(planner: &mut hb_ot_shape_planner_t) {
27    planner
28        .ot_map
29        .add_feature(hb_tag_t::from_bytes(b"ljmo"), F_NONE, 1);
30    planner
31        .ot_map
32        .add_feature(hb_tag_t::from_bytes(b"vjmo"), F_NONE, 1);
33    planner
34        .ot_map
35        .add_feature(hb_tag_t::from_bytes(b"tjmo"), F_NONE, 1);
36}
37
38fn override_features_hangul(planner: &mut hb_ot_shape_planner_t) {
39    // Uniscribe does not apply 'calt' for Hangul, and certain fonts
40    // (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups
41    // in calt, which is not desirable.
42    planner
43        .ot_map
44        .disable_feature(hb_tag_t::from_bytes(b"calt"));
45}
46
47struct hangul_shape_plan_t {
48    mask_array: [hb_mask_t; 4],
49}
50
51fn data_create_hangul(map: &hb_ot_map_t) -> hangul_shape_plan_t {
52    hangul_shape_plan_t {
53        mask_array: [
54            0,
55            map.get_1_mask(hb_tag_t::from_bytes(b"ljmo")),
56            map.get_1_mask(hb_tag_t::from_bytes(b"vjmo")),
57            map.get_1_mask(hb_tag_t::from_bytes(b"tjmo")),
58        ],
59    }
60}
61
62const L_BASE: u32 = 0x1100;
63const V_BASE: u32 = 0x1161;
64const T_BASE: u32 = 0x11A7;
65const L_COUNT: u32 = 19;
66const V_COUNT: u32 = 21;
67const T_COUNT: u32 = 28;
68const N_COUNT: u32 = V_COUNT * T_COUNT;
69const S_COUNT: u32 = L_COUNT * N_COUNT;
70const S_BASE: u32 = 0xAC00;
71
72fn is_combining_l(u: u32) -> bool {
73    (L_BASE..=L_BASE + L_COUNT - 1).contains(&u)
74}
75
76fn is_combining_v(u: u32) -> bool {
77    (V_BASE..=V_BASE + V_COUNT - 1).contains(&u)
78}
79
80fn is_combining_t(u: u32) -> bool {
81    (T_BASE + 1..=T_BASE + T_COUNT - 1).contains(&u)
82}
83
84fn is_combined_s(u: u32) -> bool {
85    (S_BASE..=S_BASE + S_COUNT - 1).contains(&u)
86}
87
88fn is_l(u: u32) -> bool {
89    (0x1100..=0x115F).contains(&u) || (0xA960..=0xA97C).contains(&u)
90}
91
92fn is_v(u: u32) -> bool {
93    (0x1160..=0x11A7).contains(&u) || (0xD7B0..=0xD7C6).contains(&u)
94}
95
96fn is_t(u: u32) -> bool {
97    (0x11A8..=0x11FF).contains(&u) || (0xD7CB..=0xD7FB).contains(&u)
98}
99
100fn is_hangul_tone(u: u32) -> bool {
101    (0x302E..=0x302F).contains(&u)
102}
103
104fn is_zero_width_char(face: &hb_font_t, c: char) -> bool {
105    if let Some(glyph) = face.get_nominal_glyph(c as u32) {
106        face.glyph_h_advance(glyph) == 0
107    } else {
108        false
109    }
110}
111
112fn preprocess_text_hangul(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
113    // Hangul syllables come in two shapes: LV, and LVT.  Of those:
114    //
115    //   - LV can be precomposed, or decomposed.  Lets call those
116    //     <LV> and <L,V>,
117    //   - LVT can be fully precomposed, partially precomposed, or
118    //     fully decomposed.  Ie. <LVT>, <LV,T>, or <L,V,T>.
119    //
120    // The composition / decomposition is mechanical.  However, not
121    // all <L,V> sequences compose, and not all <LV,T> sequences
122    // compose.
123    //
124    // Here are the specifics:
125    //
126    //   - <L>: U+1100..115F, U+A960..A97F
127    //   - <V>: U+1160..11A7, U+D7B0..D7C7
128    //   - <T>: U+11A8..11FF, U+D7CB..D7FB
129    //
130    //   - Only the <L,V> sequences for some of the U+11xx ranges combine.
131    //   - Only <LV,T> sequences for some of the Ts in U+11xx range combine.
132    //
133    // Here is what we want to accomplish in this shaper:
134    //
135    //   - If the whole syllable can be precomposed, do that,
136    //   - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features.
137    //   - If a valid syllable is followed by a Hangul tone mark, reorder the tone
138    //     mark to precede the whole syllable - unless it is a zero-width glyph, in
139    //     which case we leave it untouched, assuming it's designed to overstrike.
140    //
141    // That is, of the different possible syllables:
142    //
143    //   <L>
144    //   <L,V>
145    //   <L,V,T>
146    //   <LV>
147    //   <LVT>
148    //   <LV, T>
149    //
150    // - <L> needs no work.
151    //
152    // - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
153    //   should fully decompose them if font supports.
154    //
155    // - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
156    //
157    // - <LV,T> we should compose if the whole thing can be composed, otherwise we should
158    //   decompose.
159
160    buffer.clear_output();
161    // Extent of most recently seen syllable; valid only if start < end
162    let mut start = 0;
163    let mut end = 0;
164    buffer.idx = 0;
165    while buffer.idx < buffer.len {
166        let u = buffer.cur(0).glyph_id;
167        let c = buffer.cur(0).as_char();
168
169        if is_hangul_tone(u) {
170            // We could cache the width of the tone marks and the existence of dotted-circle,
171            // but the use of the Hangul tone mark characters seems to be rare enough that
172            // I didn't bother for now.
173            if start < end && end == buffer.out_len {
174                // Tone mark follows a valid syllable; move it in front, unless it's zero width.
175                buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx));
176                buffer.next_glyph();
177                if !is_zero_width_char(face, c) {
178                    buffer.merge_out_clusters(start, end + 1);
179                    let out_info = buffer.out_info_mut();
180                    let tone = out_info[end];
181                    for i in (0..end - start).rev() {
182                        out_info[i + start + 1] = out_info[i + start];
183                    }
184                    out_info[start] = tone;
185                }
186            } else {
187                // No valid syllable as base for tone mark; try to insert dotted circle.
188                if !buffer
189                    .flags
190                    .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE)
191                    && face.has_glyph(0x25CC)
192                {
193                    let mut chars = [0; 2];
194                    if !is_zero_width_char(face, c) {
195                        chars[0] = u;
196                        chars[1] = 0x25CC;
197                    } else {
198                        chars[0] = 0x25CC;
199                        chars[1] = u;
200                    }
201
202                    buffer.replace_glyphs(1, 2, &chars);
203                } else {
204                    // No dotted circle available in the font; just leave tone mark untouched.
205                    buffer.next_glyph();
206                }
207            }
208
209            start = buffer.out_len;
210            end = buffer.out_len;
211            continue;
212        }
213
214        // Remember current position as a potential syllable start;
215        // will only be used if we set end to a later position.
216        start = buffer.out_len;
217
218        if is_l(u) && buffer.idx + 1 < buffer.len {
219            let l = u;
220            let v = buffer.cur(1).glyph_id;
221            if is_v(v) {
222                // Have <L,V> or <L,V,T>.
223                let mut t = 0;
224                let mut tindex = 0;
225                if buffer.idx + 2 < buffer.len {
226                    t = buffer.cur(2).glyph_id;
227                    if is_t(t) {
228                        // Only used if isCombiningT (t); otherwise invalid.
229                        tindex = t - T_BASE;
230                    } else {
231                        // The next character was not a trailing jamo.
232                        t = 0;
233                    }
234                }
235
236                let offset = if t != 0 { 3 } else { 2 };
237                buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset));
238
239                // We've got a syllable <L,V,T?>; see if it can potentially be composed.
240                if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) {
241                    // Try to compose; if this succeeds, end is set to start+1.
242                    let s = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT + tindex;
243                    if face.has_glyph(s) {
244                        let n = if t != 0 { 3 } else { 2 };
245                        buffer.replace_glyphs(n, 1, &[s]);
246                        end = start + 1;
247                        continue;
248                    }
249                }
250
251                // We didn't compose, either because it's an Old Hangul syllable without a
252                // precomposed character in Unicode, or because the font didn't support the
253                // necessary precomposed glyph.
254                // Set jamo features on the individual glyphs, and advance past them.
255                buffer.cur_mut(0).set_hangul_shaping_feature(LJMO);
256                buffer.next_glyph();
257                buffer.cur_mut(0).set_hangul_shaping_feature(VJMO);
258                buffer.next_glyph();
259                if t != 0 {
260                    buffer.cur_mut(0).set_hangul_shaping_feature(TJMO);
261                    buffer.next_glyph();
262                    end = start + 3;
263                } else {
264                    end = start + 2;
265                }
266
267                if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES {
268                    buffer.merge_out_clusters(start, end);
269                }
270
271                continue;
272            }
273        } else if is_combined_s(u) {
274            // Have <LV>, <LVT>, or <LV,T>
275            let s = u;
276            let has_glyph = face.has_glyph(s);
277
278            let lindex = (s - S_BASE) / N_COUNT;
279            let nindex = (s - S_BASE) % N_COUNT;
280            let vindex = nindex / T_COUNT;
281            let tindex = nindex % T_COUNT;
282
283            if tindex == 0 && buffer.idx + 1 < buffer.len && is_combining_t(buffer.cur(1).glyph_id)
284            {
285                // <LV,T>, try to combine.
286                let new_tindex = buffer.cur(1).glyph_id - T_BASE;
287                let new_s = s + new_tindex;
288
289                if face.has_glyph(new_s) {
290                    buffer.replace_glyphs(2, 1, &[new_s]);
291                    end = start + 1;
292                    continue;
293                } else {
294                    // Mark unsafe between LV and T.
295                    buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
296                }
297            }
298
299            // Otherwise, decompose if font doesn't support <LV> or <LVT>,
300            // or if having non-combining <LV,T>.  Note that we already handled
301            // combining <LV,T> above.
302            if !has_glyph
303                || (tindex == 0 && buffer.idx + 1 < buffer.len && is_t(buffer.cur(1).glyph_id))
304            {
305                let decomposed = [L_BASE + lindex, V_BASE + vindex, T_BASE + tindex];
306                if face.has_glyph(decomposed[0])
307                    && face.has_glyph(decomposed[1])
308                    && (tindex == 0 || face.has_glyph(decomposed[2]))
309                {
310                    let mut s_len = if tindex != 0 { 3 } else { 2 };
311                    buffer.replace_glyphs(1, s_len, &decomposed);
312
313                    // If we decomposed an LV because of a non-combining T following,
314                    // we want to include this T in the syllable.
315                    if has_glyph && tindex == 0 {
316                        buffer.next_glyph();
317                        s_len += 1;
318                    }
319
320                    // We decomposed S: apply jamo features to the individual glyphs
321                    // that are now in `buffer.out_info`.
322                    end = start + s_len;
323
324                    buffer.out_info_mut()[start + 0].set_hangul_shaping_feature(LJMO);
325                    buffer.out_info_mut()[start + 1].set_hangul_shaping_feature(VJMO);
326                    if start + 2 < end {
327                        buffer.out_info_mut()[start + 2].set_hangul_shaping_feature(TJMO);
328                    }
329
330                    if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES {
331                        buffer.merge_out_clusters(start, end);
332                    }
333
334                    continue;
335                } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id)
336                {
337                    // Mark unsafe between LV and T.
338                    buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
339                }
340            }
341
342            if has_glyph {
343                // We didn't decompose the S, so just advance past it.
344                end = start + 1;
345                buffer.next_glyph();
346                continue;
347            }
348        }
349
350        // Didn't find a recognizable syllable, so we leave end <= start;
351        // this will prevent tone-mark reordering happening.
352        buffer.next_glyph();
353    }
354
355    buffer.sync();
356}
357
358fn setup_masks_hangul(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
359    let hangul_plan = plan.data::<hangul_shape_plan_t>();
360    for info in buffer.info_slice_mut() {
361        info.mask |= hangul_plan.mask_array[info.hangul_shaping_feature() as usize];
362    }
363}
364
365pub const HANGUL_SHAPER: hb_ot_complex_shaper_t = hb_ot_complex_shaper_t {
366    collect_features: Some(collect_features_hangul),
367    override_features: Some(override_features_hangul),
368    create_data: Some(|plan| Box::new(data_create_hangul(&plan.ot_map))),
369    preprocess_text: Some(preprocess_text_hangul),
370    postprocess_glyphs: None,
371    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
372    decompose: None,
373    compose: None,
374    setup_masks: Some(setup_masks_hangul),
375    gpos_tag: None,
376    reorder_marks: None,
377    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
378    fallback_position: false,
379};