rustybuzz/hb/
ot_shape_complex_use.rs

1use alloc::boxed::Box;
2
3use super::algs::*;
4use super::buffer::hb_buffer_t;
5use super::ot_layout::*;
6use super::ot_map::*;
7use super::ot_shape::*;
8use super::ot_shape_complex::*;
9use super::ot_shape_complex_arabic::arabic_shape_plan_t;
10use super::ot_shape_normalize::*;
11use super::ot_shape_plan::hb_ot_shape_plan_t;
12use super::unicode::{CharExt, GeneralCategoryExt};
13use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
14
15pub const UNIVERSAL_SHAPER: hb_ot_complex_shaper_t = hb_ot_complex_shaper_t {
16    collect_features: Some(collect_features),
17    override_features: None,
18    create_data: Some(|plan| Box::new(UniversalShapePlan::new(plan))),
19    preprocess_text: Some(preprocess_text),
20    postprocess_glyphs: None,
21    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
22    decompose: None,
23    compose: Some(compose),
24    setup_masks: Some(setup_masks),
25    gpos_tag: None,
26    reorder_marks: None,
27    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
28    fallback_position: false,
29};
30
31pub type Category = u8;
32#[allow(dead_code)]
33pub mod category {
34    pub const O: u8 = 0; // OTHER
35
36    pub const B: u8 = 1; // BASE
37
38    // pub const IND: u8     = 3;    // BASE_IND
39
40    pub const N: u8 = 4; // BASE_NUM
41    pub const GB: u8 = 5; // BASE_OTHER
42    pub const CGJ: u8 = 6;
43
44    // pub const CGJ: u8     = 6;    // CGJ
45    // pub const F: u8       = 7;    // CONS_FINAL
46    // pub const FM: u8 = 8;         // CONS_FINAL_MOD
47    // pub const M: u8       = 9;    // CONS_MED
48    // pub const CM: u8      = 10;   // CONS_MOD
49
50    pub const SUB: u8 = 11; // CONS_SUB
51    pub const H: u8 = 12; // HALANT
52
53    pub const HN: u8 = 13; // HALANT_NUM
54    pub const ZWNJ: u8 = 14; // Zero width non-joiner
55
56    // pub const ZWJ: u8     = 15;   // Zero width joiner
57    pub const WJ: u8 = 16; // Word joiner
58
59    pub const RSV: u8 = 17; // Reserved characters
60    pub const R: u8 = 18; // REPHA
61    pub const S: u8 = 19; // SYM
62
63    // pub const SM: u8      = 20;   // SYM_MOD
64    // pub const VS: u8      = 21;   // VARIATION_SELECTOR
65    // pub const V: u8       = 36;   // VOWEL
66    // pub const VM: u8      = 40;   // VOWEL_MOD
67
68    pub const CS: u8 = 43; // CONS_WITH_STACKER
69
70    // https://github.com/harfbuzz/harfbuzz/issues/1102
71    pub const IS: u8 = 44; // HALANT_OR_VOWEL_MODIFIER
72
73    pub const SK: u8 = 48; // SAKOT
74
75    pub const FABV: u8 = 24; // CONS_FINAL_ABOVE
76    pub const FBLW: u8 = 25; // CONS_FINAL_BELOW
77    pub const FPST: u8 = 26; // CONS_FINAL_POST
78    pub const MABV: u8 = 27; // CONS_MED_ABOVE
79    pub const MBLW: u8 = 28; // CONS_MED_BELOW
80    pub const MPST: u8 = 29; // CONS_MED_POST
81    pub const MPRE: u8 = 30; // CONS_MED_PRE
82    pub const CMABV: u8 = 31; // CONS_MOD_ABOVE
83    pub const CMBLW: u8 = 32; // CONS_MOD_BELOW
84    pub const VABV: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
85    pub const VBLW: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST
86    pub const VPST: u8 = 35; // VOWEL_POST UIPC = Right
87    pub const VPRE: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
88    pub const VMABV: u8 = 37; // VOWEL_MOD_ABOVE
89    pub const VMBLW: u8 = 38; // VOWEL_MOD_BELOW
90    pub const VMPST: u8 = 39; // VOWEL_MOD_POST
91    pub const VMPRE: u8 = 23; // VOWEL_MOD_PRE
92    pub const SMABV: u8 = 41; // SYM_MOD_ABOVE
93    pub const SMBLW: u8 = 42; // SYM_MOD_BELOW
94    pub const FMABV: u8 = 45; // CONS_FINAL_MOD UIPC = Top
95    pub const FMBLW: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom
96    pub const FMPST: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable
97    pub const G: u8 = 49; // HIEROGLYPH
98    pub const J: u8 = 50; // HIEROGLYPH_JOINER
99    pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN
100    pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END
101}
102
103// These features are applied all at once, before reordering,
104// constrained to the syllable.
105const BASIC_FEATURES: &[hb_tag_t] = &[
106    hb_tag_t::from_bytes(b"rkrf"),
107    hb_tag_t::from_bytes(b"abvf"),
108    hb_tag_t::from_bytes(b"blwf"),
109    hb_tag_t::from_bytes(b"half"),
110    hb_tag_t::from_bytes(b"pstf"),
111    hb_tag_t::from_bytes(b"vatu"),
112    hb_tag_t::from_bytes(b"cjct"),
113];
114
115const TOPOGRAPHICAL_FEATURES: &[hb_tag_t] = &[
116    hb_tag_t::from_bytes(b"isol"),
117    hb_tag_t::from_bytes(b"init"),
118    hb_tag_t::from_bytes(b"medi"),
119    hb_tag_t::from_bytes(b"fina"),
120];
121
122// Same order as use_topographical_features.
123#[derive(Clone, Copy, PartialEq)]
124enum JoiningForm {
125    Isolated = 0,
126    Initial,
127    Medial,
128    Terminal,
129}
130
131// These features are applied all at once, after reordering and clearing syllables.
132const OTHER_FEATURES: &[hb_tag_t] = &[
133    hb_tag_t::from_bytes(b"abvs"),
134    hb_tag_t::from_bytes(b"blws"),
135    hb_tag_t::from_bytes(b"haln"),
136    hb_tag_t::from_bytes(b"pres"),
137    hb_tag_t::from_bytes(b"psts"),
138];
139
140impl hb_glyph_info_t {
141    pub(crate) fn use_category(&self) -> Category {
142        self.complex_var_u8_category()
143    }
144
145    fn set_use_category(&mut self, c: Category) {
146        self.set_complex_var_u8_category(c)
147    }
148
149    fn is_halant_use(&self) -> bool {
150        matches!(self.use_category(), category::H | category::IS) && !_hb_glyph_info_ligated(self)
151    }
152}
153
154struct UniversalShapePlan {
155    rphf_mask: hb_mask_t,
156    arabic_plan: Option<arabic_shape_plan_t>,
157}
158
159impl UniversalShapePlan {
160    fn new(plan: &hb_ot_shape_plan_t) -> UniversalShapePlan {
161        let mut arabic_plan = None;
162
163        if plan.script.map_or(false, has_arabic_joining) {
164            arabic_plan = Some(crate::hb::ot_shape_complex_arabic::data_create_arabic(plan));
165        }
166
167        UniversalShapePlan {
168            rphf_mask: plan.ot_map.get_1_mask(hb_tag_t::from_bytes(b"rphf")),
169            arabic_plan,
170        }
171    }
172}
173
174fn collect_features(planner: &mut hb_ot_shape_planner_t) {
175    // Do this before any lookups have been applied.
176    planner.ot_map.add_gsub_pause(Some(setup_syllables));
177
178    // Default glyph pre-processing group
179    planner
180        .ot_map
181        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
182    planner
183        .ot_map
184        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
185    planner
186        .ot_map
187        .enable_feature(hb_tag_t::from_bytes(b"nukt"), F_PER_SYLLABLE, 1);
188    planner.ot_map.enable_feature(
189        hb_tag_t::from_bytes(b"akhn"),
190        F_MANUAL_ZWJ | F_PER_SYLLABLE,
191        1,
192    );
193
194    // Reordering group
195    planner
196        .ot_map
197        .add_gsub_pause(Some(crate::hb::ot_layout::_hb_clear_substitution_flags));
198    planner.ot_map.add_feature(
199        hb_tag_t::from_bytes(b"rphf"),
200        F_MANUAL_ZWJ | F_PER_SYLLABLE,
201        1,
202    );
203    planner.ot_map.add_gsub_pause(Some(record_rphf));
204    planner
205        .ot_map
206        .add_gsub_pause(Some(crate::hb::ot_layout::_hb_clear_substitution_flags));
207    planner.ot_map.enable_feature(
208        hb_tag_t::from_bytes(b"pref"),
209        F_MANUAL_ZWJ | F_PER_SYLLABLE,
210        1,
211    );
212    planner.ot_map.add_gsub_pause(Some(record_pref));
213
214    // Orthographic unit shaping group
215    for feature in BASIC_FEATURES {
216        planner
217            .ot_map
218            .enable_feature(*feature, F_MANUAL_ZWJ | F_PER_SYLLABLE, 1);
219    }
220
221    planner.ot_map.add_gsub_pause(Some(reorder));
222
223    // Topographical features
224    for feature in TOPOGRAPHICAL_FEATURES {
225        planner.ot_map.add_feature(*feature, F_NONE, 1);
226    }
227    planner.ot_map.add_gsub_pause(None);
228
229    // Standard typographic presentation
230    for feature in OTHER_FEATURES {
231        planner.ot_map.enable_feature(*feature, F_NONE, 1);
232    }
233}
234
235fn setup_syllables(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
236    super::ot_shape_complex_use_machine::find_syllables(buffer);
237
238    foreach_syllable!(buffer, start, end, {
239        buffer.unsafe_to_break(Some(start), Some(end));
240    });
241
242    setup_rphf_mask(plan, buffer);
243    setup_topographical_masks(plan, buffer);
244}
245
246fn setup_rphf_mask(plan: &hb_ot_shape_plan_t, buffer: &mut hb_buffer_t) {
247    let universal_plan = plan.data::<UniversalShapePlan>();
248
249    let mask = universal_plan.rphf_mask;
250    if mask == 0 {
251        return;
252    }
253
254    let mut start = 0;
255    let mut end = buffer.next_syllable(0);
256    while start < buffer.len {
257        let limit = if buffer.info[start].use_category() == category::R {
258            1
259        } else {
260            core::cmp::min(3, end - start)
261        };
262
263        for i in start..start + limit {
264            buffer.info[i].mask |= mask;
265        }
266
267        start = end;
268        end = buffer.next_syllable(start);
269    }
270}
271
272fn setup_topographical_masks(plan: &hb_ot_shape_plan_t, buffer: &mut hb_buffer_t) {
273    use super::ot_shape_complex_use_machine::SyllableType;
274
275    if plan.data::<UniversalShapePlan>().arabic_plan.is_some() {
276        return;
277    }
278
279    let mut masks = [0; 4];
280    let mut all_masks = 0;
281    for i in 0..4 {
282        masks[i] = plan.ot_map.get_1_mask(TOPOGRAPHICAL_FEATURES[i]);
283        if masks[i] == plan.ot_map.get_global_mask() {
284            masks[i] = 0;
285        }
286
287        all_masks |= masks[i];
288    }
289
290    if all_masks == 0 {
291        return;
292    }
293
294    let other_masks = !all_masks;
295
296    let mut last_start = 0;
297    let mut last_form = None;
298    let mut start = 0;
299    let mut end = buffer.next_syllable(0);
300    while start < buffer.len {
301        let syllable = buffer.info[start].syllable() & 0x0F;
302        if syllable == SyllableType::HieroglyphCluster as u8
303            || syllable == SyllableType::NonCluster as u8
304        {
305            last_form = None;
306        } else {
307            let join = last_form == Some(JoiningForm::Terminal)
308                || last_form == Some(JoiningForm::Isolated);
309
310            if join {
311                // Fixup previous syllable's form.
312                let form = if last_form == Some(JoiningForm::Terminal) {
313                    JoiningForm::Medial
314                } else {
315                    JoiningForm::Initial
316                };
317
318                for i in last_start..start {
319                    buffer.info[i].mask =
320                        (buffer.info[i].mask & other_masks) | masks[form as usize];
321                }
322            }
323
324            // Form for this syllable.
325            let form = if join {
326                JoiningForm::Terminal
327            } else {
328                JoiningForm::Isolated
329            };
330            last_form = Some(form);
331            for i in start..end {
332                buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize];
333            }
334        }
335
336        last_start = start;
337        start = end;
338        end = buffer.next_syllable(start);
339    }
340}
341
342fn record_rphf(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
343    let universal_plan = plan.data::<UniversalShapePlan>();
344
345    let mask = universal_plan.rphf_mask;
346    if mask == 0 {
347        return;
348    }
349
350    let mut start = 0;
351    let mut end = buffer.next_syllable(0);
352    while start < buffer.len {
353        // Mark a substituted repha as USE_R.
354        for i in start..end {
355            if buffer.info[i].mask & mask == 0 {
356                break;
357            }
358
359            if _hb_glyph_info_substituted(&buffer.info[i]) {
360                buffer.info[i].set_use_category(category::R);
361                break;
362            }
363        }
364
365        start = end;
366        end = buffer.next_syllable(start);
367    }
368}
369
370fn reorder(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
371    use super::ot_shape_complex_use_machine::SyllableType;
372
373    crate::hb::ot_shape_complex_syllabic::insert_dotted_circles(
374        face,
375        buffer,
376        SyllableType::BrokenCluster as u8,
377        category::B,
378        Some(category::R),
379        None,
380    );
381
382    let mut start = 0;
383    let mut end = buffer.next_syllable(0);
384    while start < buffer.len {
385        reorder_syllable(start, end, buffer);
386        start = end;
387        end = buffer.next_syllable(start);
388    }
389}
390
391const fn category_flag(c: Category) -> u32 {
392    rb_flag(c as u32)
393}
394
395const fn category_flag64(c: Category) -> u64 {
396    rb_flag64(c as u32)
397}
398
399const BASE_FLAGS: u64 = category_flag64(category::FABV)
400    | category_flag64(category::FBLW)
401    | category_flag64(category::FPST)
402    | category_flag64(category::MABV)
403    | category_flag64(category::MBLW)
404    | category_flag64(category::MPST)
405    | category_flag64(category::MPRE)
406    | category_flag64(category::VABV)
407    | category_flag64(category::VBLW)
408    | category_flag64(category::VPST)
409    | category_flag64(category::VPRE)
410    | category_flag64(category::VMABV)
411    | category_flag64(category::VMBLW)
412    | category_flag64(category::VMPST)
413    | category_flag64(category::VMPRE);
414
415fn reorder_syllable(start: usize, end: usize, buffer: &mut hb_buffer_t) {
416    use super::ot_shape_complex_use_machine::SyllableType;
417
418    let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32;
419    // Only a few syllable types need reordering.
420    if (rb_flag_unsafe(syllable_type)
421        & (rb_flag(SyllableType::ViramaTerminatedCluster as u32)
422            | rb_flag(SyllableType::SakotTerminatedCluster as u32)
423            | rb_flag(SyllableType::StandardCluster as u32)
424            | rb_flag(SyllableType::BrokenCluster as u32)
425            | 0))
426        == 0
427    {
428        return;
429    }
430
431    // Move things forward.
432    if buffer.info[start].use_category() == category::R && end - start > 1 {
433        // Got a repha.  Reorder it towards the end, but before the first post-base glyph.
434        for i in start + 1..end {
435            let is_post_base_glyph =
436                (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & BASE_FLAGS) != 0
437                    || buffer.info[i].is_halant_use();
438
439            if is_post_base_glyph || i == end - 1 {
440                // If we hit a post-base glyph, move before it; otherwise move to the
441                // end. Shift things in between backward.
442
443                let mut i = i;
444                if is_post_base_glyph {
445                    i -= 1;
446                }
447
448                buffer.merge_clusters(start, i + 1);
449                let t = buffer.info[start];
450                for k in 0..i - start {
451                    buffer.info[k + start] = buffer.info[k + start + 1];
452                }
453                buffer.info[i] = t;
454
455                break;
456            }
457        }
458    }
459
460    // Move things back.
461    let mut j = start;
462    for i in start..end {
463        let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32);
464        if buffer.info[i].is_halant_use() {
465            // If we hit a halant, move after it; otherwise move to the beginning, and
466            // shift things in between forward.
467            j = i + 1;
468        } else if (flag & (category_flag(category::VPRE) | category_flag(category::VMPRE))) != 0
469            && _hb_glyph_info_get_lig_comp(&buffer.info[i]) == 0
470            && j < i
471        {
472            // Only move the first component of a MultipleSubst.
473            buffer.merge_clusters(j, i + 1);
474            let t = buffer.info[i];
475            for k in (0..i - j).rev() {
476                buffer.info[k + j + 1] = buffer.info[k + j];
477            }
478            buffer.info[j] = t;
479        }
480    }
481}
482
483fn record_pref(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
484    let mut start = 0;
485    let mut end = buffer.next_syllable(0);
486    while start < buffer.len {
487        // Mark a substituted pref as VPre, as they behave the same way.
488        for i in start..end {
489            if _hb_glyph_info_substituted(&buffer.info[i]) {
490                buffer.info[i].set_use_category(category::VPRE);
491                break;
492            }
493        }
494
495        start = end;
496        end = buffer.next_syllable(start);
497    }
498}
499
500fn has_arabic_joining(script: Script) -> bool {
501    // List of scripts that have data in arabic-table.
502    matches!(
503        script,
504        script::ADLAM
505            | script::ARABIC
506            | script::CHORASMIAN
507            | script::HANIFI_ROHINGYA
508            | script::MANDAIC
509            | script::MANICHAEAN
510            | script::MONGOLIAN
511            | script::NKO
512            | script::OLD_UYGHUR
513            | script::PHAGS_PA
514            | script::PSALTER_PAHLAVI
515            | script::SOGDIAN
516            | script::SYRIAC
517    )
518}
519
520fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
521    super::ot_shape_complex_vowel_constraints::preprocess_text_vowel_constraints(buffer);
522}
523
524fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
525    // Avoid recomposing split matras.
526    if a.general_category().is_mark() {
527        return None;
528    }
529
530    crate::hb::unicode::compose(a, b)
531}
532
533fn setup_masks(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
534    let universal_plan = plan.data::<UniversalShapePlan>();
535
536    // Do this before allocating use_category().
537    if let Some(ref arabic_plan) = universal_plan.arabic_plan {
538        crate::hb::ot_shape_complex_arabic::setup_masks_inner(arabic_plan, plan.script, buffer);
539    }
540
541    // We cannot setup masks here. We save information about characters
542    // and setup masks later on in a pause-callback.
543    for info in buffer.info_slice_mut() {
544        info.set_use_category(super::ot_shape_complex_use_table::get_category(info));
545    }
546}