rustybuzz/hb/
ot_shape_complex_khmer.rs

1use alloc::boxed::Box;
2
3use super::buffer::hb_buffer_t;
4use super::ot_map::*;
5use super::ot_shape::*;
6use super::ot_shape_complex::*;
7use super::ot_shape_complex_indic::{category, position};
8use super::ot_shape_normalize::*;
9use super::ot_shape_plan::hb_ot_shape_plan_t;
10use super::unicode::{CharExt, GeneralCategoryExt};
11use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t};
12
13pub const KHMER_SHAPER: hb_ot_complex_shaper_t = hb_ot_complex_shaper_t {
14    collect_features: Some(collect_features),
15    override_features: Some(override_features),
16    create_data: Some(|plan| Box::new(KhmerShapePlan::new(plan))),
17    preprocess_text: None,
18    postprocess_glyphs: None,
19    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
20    decompose: Some(decompose),
21    compose: Some(compose),
22    setup_masks: Some(setup_masks),
23    gpos_tag: None,
24    reorder_marks: None,
25    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
26    fallback_position: false,
27};
28
29const KHMER_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
30    // Basic features.
31    // These features are applied all at once, before reordering, constrained
32    // to the syllable.
33    (
34        hb_tag_t::from_bytes(b"pref"),
35        F_MANUAL_JOINERS | F_PER_SYLLABLE,
36    ),
37    (
38        hb_tag_t::from_bytes(b"blwf"),
39        F_MANUAL_JOINERS | F_PER_SYLLABLE,
40    ),
41    (
42        hb_tag_t::from_bytes(b"abvf"),
43        F_MANUAL_JOINERS | F_PER_SYLLABLE,
44    ),
45    (
46        hb_tag_t::from_bytes(b"pstf"),
47        F_MANUAL_JOINERS | F_PER_SYLLABLE,
48    ),
49    (
50        hb_tag_t::from_bytes(b"cfar"),
51        F_MANUAL_JOINERS | F_PER_SYLLABLE,
52    ),
53    // Other features.
54    // These features are applied all at once after clearing syllables.
55    (hb_tag_t::from_bytes(b"pres"), F_GLOBAL_MANUAL_JOINERS),
56    (hb_tag_t::from_bytes(b"abvs"), F_GLOBAL_MANUAL_JOINERS),
57    (hb_tag_t::from_bytes(b"blws"), F_GLOBAL_MANUAL_JOINERS),
58    (hb_tag_t::from_bytes(b"psts"), F_GLOBAL_MANUAL_JOINERS),
59];
60
61// Must be in the same order as the KHMER_FEATURES array.
62mod khmer_feature {
63    pub const PREF: usize = 0;
64    pub const BLWF: usize = 1;
65    pub const ABVF: usize = 2;
66    pub const PSTF: usize = 3;
67    pub const CFAR: usize = 4;
68}
69
70impl hb_glyph_info_t {
71    fn set_khmer_properties(&mut self) {
72        let u = self.glyph_id;
73        let (mut cat, pos) = crate::hb::ot_shape_complex_indic::get_category_and_position(u);
74
75        // Re-assign category
76
77        // These categories are experimentally extracted from what Uniscribe allows.
78
79        match u {
80            0x179A => cat = category::RA,
81            0x17CC | 0x17C9 | 0x17CA => cat = category::ROBATIC,
82            0x17C6 | 0x17CB | 0x17CD | 0x17CE | 0x17CF | 0x17D0 | 0x17D1 => cat = category::X_GROUP,
83            // Just guessing. Uniscribe doesn't categorize it.
84            0x17C7 | 0x17C8 | 0x17DD | 0x17D3 => cat = category::Y_GROUP,
85            _ => {}
86        }
87
88        // Re-assign position.
89
90        if cat == category::M {
91            match pos {
92                position::PRE_C => cat = category::V_PRE,
93                position::BELOW_C => cat = category::V_BLW,
94                position::ABOVE_C => cat = category::V_AVB,
95                position::POST_C => cat = category::V_PST,
96                _ => {}
97            }
98        }
99
100        self.set_indic_category(cat);
101    }
102}
103
104struct KhmerShapePlan {
105    mask_array: [hb_mask_t; KHMER_FEATURES.len()],
106}
107
108impl KhmerShapePlan {
109    fn new(plan: &hb_ot_shape_plan_t) -> Self {
110        let mut mask_array = [0; KHMER_FEATURES.len()];
111        for (i, feature) in KHMER_FEATURES.iter().enumerate() {
112            mask_array[i] = if feature.1 & F_GLOBAL != 0 {
113                0
114            } else {
115                plan.ot_map.get_1_mask(feature.0)
116            }
117        }
118
119        KhmerShapePlan { mask_array }
120    }
121}
122
123fn collect_features(planner: &mut hb_ot_shape_planner_t) {
124    // Do this before any lookups have been applied.
125    planner.ot_map.add_gsub_pause(Some(setup_syllables));
126    planner.ot_map.add_gsub_pause(Some(reorder));
127
128    // Testing suggests that Uniscribe does NOT pause between basic
129    // features.  Test with KhmerUI.ttf and the following three
130    // sequences:
131    //
132    //   U+1789,U+17BC
133    //   U+1789,U+17D2,U+1789
134    //   U+1789,U+17D2,U+1789,U+17BC
135    //
136    // https://github.com/harfbuzz/harfbuzz/issues/974
137    planner
138        .ot_map
139        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
140    planner
141        .ot_map
142        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
143
144    for feature in KHMER_FEATURES.iter().take(5) {
145        planner.ot_map.add_feature(feature.0, feature.1, 1);
146    }
147
148    for feature in KHMER_FEATURES.iter().skip(5) {
149        planner.ot_map.add_feature(feature.0, feature.1, 1);
150    }
151}
152
153fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
154    super::ot_shape_complex_khmer_machine::find_syllables_khmer(buffer);
155
156    let mut start = 0;
157    let mut end = buffer.next_syllable(0);
158    while start < buffer.len {
159        buffer.unsafe_to_break(Some(start), Some(end));
160        start = end;
161        end = buffer.next_syllable(start);
162    }
163}
164
165fn reorder(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
166    use super::ot_shape_complex_khmer_machine::SyllableType;
167
168    super::ot_shape_complex_syllabic::insert_dotted_circles(
169        face,
170        buffer,
171        SyllableType::BrokenCluster as u8,
172        category::DOTTED_CIRCLE,
173        Some(category::REPHA),
174        None,
175    );
176
177    let khmer_plan = plan.data::<KhmerShapePlan>();
178
179    let mut start = 0;
180    let mut end = buffer.next_syllable(0);
181    while start < buffer.len {
182        reorder_syllable(khmer_plan, start, end, buffer);
183        start = end;
184        end = buffer.next_syllable(start);
185    }
186}
187
188fn reorder_syllable(
189    khmer_plan: &KhmerShapePlan,
190    start: usize,
191    end: usize,
192    buffer: &mut hb_buffer_t,
193) {
194    use super::ot_shape_complex_khmer_machine::SyllableType;
195
196    let syllable_type = match buffer.info[start].syllable() & 0x0F {
197        0 => SyllableType::ConsonantSyllable,
198        1 => SyllableType::BrokenCluster,
199        2 => SyllableType::NonKhmerCluster,
200        _ => unreachable!(),
201    };
202
203    match syllable_type {
204        SyllableType::ConsonantSyllable | SyllableType::BrokenCluster => {
205            reorder_consonant_syllable(khmer_plan, start, end, buffer);
206        }
207        SyllableType::NonKhmerCluster => {}
208    }
209}
210
211// Rules from:
212// https://docs.microsoft.com/en-us/typography/script-development/devanagari
213fn reorder_consonant_syllable(
214    plan: &KhmerShapePlan,
215    start: usize,
216    end: usize,
217    buffer: &mut hb_buffer_t,
218) {
219    // Setup masks.
220    {
221        // Post-base
222        let mask = plan.mask_array[khmer_feature::BLWF]
223            | plan.mask_array[khmer_feature::ABVF]
224            | plan.mask_array[khmer_feature::PSTF];
225        for info in &mut buffer.info[start + 1..end] {
226            info.mask |= mask;
227        }
228    }
229
230    let mut num_coengs = 0;
231    for i in start + 1..end {
232        // When a COENG + (Cons | IndV) combination are found (and subscript count
233        // is less than two) the character combination is handled according to the
234        // subscript type of the character following the COENG.
235        //
236        // ...
237        //
238        // Subscript Type 2 - The COENG + RO characters are reordered to immediately
239        // before the base glyph. Then the COENG + RO characters are assigned to have
240        // the 'pref' OpenType feature applied to them.
241        if buffer.info[i].indic_category() == category::COENG && num_coengs <= 2 && i + 1 < end {
242            num_coengs += 1;
243
244            if buffer.info[i + 1].indic_category() == category::RA {
245                for j in 0..2 {
246                    buffer.info[i + j].mask |= plan.mask_array[khmer_feature::PREF];
247                }
248
249                // Move the Coeng,Ro sequence to the start.
250                buffer.merge_clusters(start, i + 2);
251                let t0 = buffer.info[i];
252                let t1 = buffer.info[i + 1];
253                for k in (0..i - start).rev() {
254                    buffer.info[k + start + 2] = buffer.info[k + start];
255                }
256
257                buffer.info[start] = t0;
258                buffer.info[start + 1] = t1;
259
260                // Mark the subsequent stuff with 'cfar'.  Used in Khmer.
261                // Read the feature spec.
262                // This allows distinguishing the following cases with MS Khmer fonts:
263                // U+1784,U+17D2,U+179A,U+17D2,U+1782
264                // U+1784,U+17D2,U+1782,U+17D2,U+179A
265                if plan.mask_array[khmer_feature::CFAR] != 0 {
266                    for j in i + 2..end {
267                        buffer.info[j].mask |= plan.mask_array[khmer_feature::CFAR];
268                    }
269                }
270
271                num_coengs = 2; // Done.
272            }
273        } else if buffer.info[i].indic_category() == category::V_PRE {
274            // Reorder left matra piece.
275
276            // Move to the start.
277            buffer.merge_clusters(start, i + 1);
278            let t = buffer.info[i];
279            for k in (0..i - start).rev() {
280                buffer.info[k + start + 1] = buffer.info[k + start];
281            }
282            buffer.info[start] = t;
283        }
284    }
285}
286
287fn override_features(planner: &mut hb_ot_shape_planner_t) {
288    // Khmer spec has 'clig' as part of required shaping features:
289    // "Apply feature 'clig' to form ligatures that are desired for
290    // typographical correctness.", hence in overrides...
291    planner
292        .ot_map
293        .enable_feature(hb_tag_t::from_bytes(b"clig"), F_NONE, 1);
294
295    planner
296        .ot_map
297        .disable_feature(hb_tag_t::from_bytes(b"liga"));
298}
299
300fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
301    // Decompose split matras that don't have Unicode decompositions.
302    match ab {
303        '\u{17BE}' | '\u{17BF}' | '\u{17C0}' | '\u{17C4}' | '\u{17C5}' => Some(('\u{17C1}', ab)),
304        _ => crate::hb::unicode::decompose(ab),
305    }
306}
307
308fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
309    // Avoid recomposing split matras.
310    if a.general_category().is_mark() {
311        return None;
312    }
313
314    crate::hb::unicode::compose(a, b)
315}
316
317fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
318    // We cannot setup masks here.  We save information about characters
319    // and setup masks later on in a pause-callback.
320    for info in buffer.info_slice_mut() {
321        info.set_khmer_properties();
322    }
323}