rustybuzz/hb/
ot_shape_complex_arabic.rs

1use alloc::boxed::Box;
2
3use super::algs::*;
4use super::buffer::*;
5use super::ot_layout::*;
6use super::ot_map::*;
7use super::ot_shape::*;
8use super::ot_shape_complex::*;
9use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO;
10use super::ot_shape_plan::hb_ot_shape_plan_t;
11use super::unicode::*;
12use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
13
14const HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH: hb_buffer_scratch_flags_t =
15    HB_BUFFER_SCRATCH_FLAG_COMPLEX0;
16
17// See:
18// https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516
19fn is_word_category(gc: hb_unicode_general_category_t) -> bool {
20    (rb_flag_unsafe(gc.to_rb())
21        & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED)
22            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE)
23            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER)
24            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER)
25            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK)
26            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
27            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
28            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
29            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER)
30            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER)
31            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL)
32            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL)
33            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL)
34            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
35        != 0
36}
37
38#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)]
39pub enum hb_arabic_joining_type_t {
40    U = 0,
41    L = 1,
42    R = 2,
43    D = 3,
44    // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants.
45    GroupAlaph = 4,
46    GroupDalathRish = 5,
47    T = 7,
48    X = 8, // means: use general-category to choose between U or T.
49}
50
51fn get_joining_type(u: char, gc: hb_unicode_general_category_t) -> hb_arabic_joining_type_t {
52    let j_type = super::ot_shape_complex_arabic_table::joining_type(u);
53    if j_type != hb_arabic_joining_type_t::X {
54        return j_type;
55    }
56
57    let ok = rb_flag_unsafe(gc.to_rb())
58        & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
59            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
60            | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT));
61
62    if ok != 0 {
63        hb_arabic_joining_type_t::T
64    } else {
65        hb_arabic_joining_type_t::U
66    }
67}
68
69fn feature_is_syriac(tag: hb_tag_t) -> bool {
70    matches!(tag.to_bytes()[3], b'2' | b'3')
71}
72
73const ARABIC_FEATURES: &[hb_tag_t] = &[
74    hb_tag_t::from_bytes(b"isol"),
75    hb_tag_t::from_bytes(b"fina"),
76    hb_tag_t::from_bytes(b"fin2"),
77    hb_tag_t::from_bytes(b"fin3"),
78    hb_tag_t::from_bytes(b"medi"),
79    hb_tag_t::from_bytes(b"med2"),
80    hb_tag_t::from_bytes(b"init"),
81];
82
83mod arabic_action_t {
84    pub const ISOL: u8 = 0;
85    pub const FINA: u8 = 1;
86    pub const FIN2: u8 = 2;
87    pub const FIN3: u8 = 3;
88    pub const MEDI: u8 = 4;
89    pub const MED2: u8 = 5;
90    pub const INIT: u8 = 6;
91    pub const NONE: u8 = 7;
92
93    // We abuse the same byte for other things...
94    pub const STRETCHING_FIXED: u8 = 8;
95    pub const STRETCHING_REPEATING: u8 = 9;
96
97    #[inline]
98    pub fn is_stch(n: u8) -> bool {
99        matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING)
100    }
101}
102
103const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[
104    // jt_U,          jt_L,          jt_R,
105    // jt_D,          jg_ALAPH,      jg_DALATH_RISH
106
107    // State 0: prev was U, not willing to join.
108    [
109        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
110        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
111        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
112        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
113        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
114        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
115    ],
116    // State 1: prev was R or action::ISOL/ALAPH, not willing to join.
117    [
118        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
119        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
120        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
121        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
122        (arabic_action_t::NONE, arabic_action_t::FIN2, 5),
123        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
124    ],
125    // State 2: prev was D/L in action::ISOL form, willing to join.
126    [
127        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
128        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
129        (arabic_action_t::INIT, arabic_action_t::FINA, 1),
130        (arabic_action_t::INIT, arabic_action_t::FINA, 3),
131        (arabic_action_t::INIT, arabic_action_t::FINA, 4),
132        (arabic_action_t::INIT, arabic_action_t::FINA, 6),
133    ],
134    // State 3: prev was D in action::FINA form, willing to join.
135    [
136        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
137        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
138        (arabic_action_t::MEDI, arabic_action_t::FINA, 1),
139        (arabic_action_t::MEDI, arabic_action_t::FINA, 3),
140        (arabic_action_t::MEDI, arabic_action_t::FINA, 4),
141        (arabic_action_t::MEDI, arabic_action_t::FINA, 6),
142    ],
143    // State 4: prev was action::FINA ALAPH, not willing to join.
144    [
145        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
146        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
147        (arabic_action_t::MED2, arabic_action_t::ISOL, 1),
148        (arabic_action_t::MED2, arabic_action_t::ISOL, 2),
149        (arabic_action_t::MED2, arabic_action_t::FIN2, 5),
150        (arabic_action_t::MED2, arabic_action_t::ISOL, 6),
151    ],
152    // State 5: prev was FIN2/FIN3 ALAPH, not willing to join.
153    [
154        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
155        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
156        (arabic_action_t::ISOL, arabic_action_t::ISOL, 1),
157        (arabic_action_t::ISOL, arabic_action_t::ISOL, 2),
158        (arabic_action_t::ISOL, arabic_action_t::FIN2, 5),
159        (arabic_action_t::ISOL, arabic_action_t::ISOL, 6),
160    ],
161    // State 6: prev was DALATH/RISH, not willing to join.
162    [
163        (arabic_action_t::NONE, arabic_action_t::NONE, 0),
164        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
165        (arabic_action_t::NONE, arabic_action_t::ISOL, 1),
166        (arabic_action_t::NONE, arabic_action_t::ISOL, 2),
167        (arabic_action_t::NONE, arabic_action_t::FIN3, 5),
168        (arabic_action_t::NONE, arabic_action_t::ISOL, 6),
169    ],
170];
171
172impl hb_glyph_info_t {
173    fn arabic_shaping_action(&self) -> u8 {
174        self.complex_var_u8_auxiliary()
175    }
176
177    fn set_arabic_shaping_action(&mut self, action: u8) {
178        self.set_complex_var_u8_auxiliary(action)
179    }
180}
181
182fn collect_features(planner: &mut hb_ot_shape_planner_t) {
183    // We apply features according to the Arabic spec, with pauses
184    // in between most.
185    //
186    // The pause between init/medi/... and rlig is required.  See eg:
187    // https://bugzilla.mozilla.org/show_bug.cgi?id=644184
188    //
189    // The pauses between init/medi/... themselves are not necessarily
190    // needed as only one of those features is applied to any character.
191    // The only difference it makes is when fonts have contextual
192    // substitutions.  We now follow the order of the spec, which makes
193    // for better experience if that's what Uniscribe is doing.
194    //
195    // At least for Arabic, looks like Uniscribe has a pause between
196    // rlig and calt.  Otherwise the IranNastaliq's ALLAH ligature won't
197    // work.  However, testing shows that rlig and calt are applied
198    // together for Mongolian in Uniscribe.  As such, we only add a
199    // pause for Arabic, not other scripts.
200    //
201    // A pause after calt is required to make KFGQPC Uthmanic Script HAFS
202    // work correctly.  See https://github.com/harfbuzz/harfbuzz/issues/505
203
204    planner
205        .ot_map
206        .enable_feature(hb_tag_t::from_bytes(b"stch"), F_NONE, 1);
207    planner.ot_map.add_gsub_pause(Some(record_stch));
208
209    planner
210        .ot_map
211        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_NONE, 1);
212    planner
213        .ot_map
214        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_NONE, 1);
215
216    planner.ot_map.add_gsub_pause(None);
217
218    for feature in ARABIC_FEATURES {
219        let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature);
220        let flags = if has_fallback { F_HAS_FALLBACK } else { F_NONE };
221        planner.ot_map.add_feature(*feature, flags, 1);
222        planner.ot_map.add_gsub_pause(None);
223    }
224
225    // Normally, Unicode says a ZWNJ means "don't ligate".  In Arabic script
226    // however, it says a ZWJ should also mean "don't ligate".  So we run
227    // the main ligating features as MANUAL_ZWJ.
228
229    planner.ot_map.enable_feature(
230        hb_tag_t::from_bytes(b"rlig"),
231        F_MANUAL_ZWJ | F_HAS_FALLBACK,
232        1,
233    );
234
235    if planner.script == Some(script::ARABIC) {
236        planner.ot_map.add_gsub_pause(Some(arabic_fallback_shape));
237    }
238
239    // No pause after rclt.
240    // See 98460779bae19e4d64d29461ff154b3527bf8420
241    planner
242        .ot_map
243        .enable_feature(hb_tag_t::from_bytes(b"rclt"), F_MANUAL_ZWJ, 1);
244    planner
245        .ot_map
246        .enable_feature(hb_tag_t::from_bytes(b"calt"), F_MANUAL_ZWJ, 1);
247    planner.ot_map.add_gsub_pause(None);
248
249    // The spec includes 'cswh'.  Earlier versions of Windows
250    // used to enable this by default, but testing suggests
251    // that Windows 8 and later do not enable it by default,
252    // and spec now says 'Off by default'.
253    // We disabled this in ae23c24c32.
254    // Note that IranNastaliq uses this feature extensively
255    // to fixup broken glyph sequences.  Oh well...
256    // Test case: U+0643,U+0640,U+0631.
257
258    // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, F_NONE, 1);
259    planner
260        .ot_map
261        .enable_feature(hb_tag_t::from_bytes(b"mset"), F_NONE, 1);
262}
263
264pub struct arabic_shape_plan_t {
265    // The "+ 1" in the next array is to accommodate for the "NONE" command,
266    // which is not an OpenType feature, but this simplifies the code by not
267    // having to do a "if (... < NONE) ..." and just rely on the fact that
268    // mask_array[NONE] == 0.
269    mask_array: [hb_mask_t; ARABIC_FEATURES.len() + 1],
270    has_stch: bool,
271}
272
273pub fn data_create_arabic(plan: &hb_ot_shape_plan_t) -> arabic_shape_plan_t {
274    let has_stch = plan.ot_map.get_1_mask(hb_tag_t::from_bytes(b"stch")) != 0;
275
276    let mut mask_array = [0; ARABIC_FEATURES.len() + 1];
277    for i in 0..ARABIC_FEATURES.len() {
278        mask_array[i] = plan.ot_map.get_1_mask(ARABIC_FEATURES[i]);
279    }
280
281    arabic_shape_plan_t {
282        mask_array,
283        has_stch,
284    }
285}
286
287fn arabic_joining(buffer: &mut hb_buffer_t) {
288    let mut prev: Option<usize> = None;
289    let mut state = 0;
290
291    // Check pre-context.
292    for i in 0..buffer.context_len[0] {
293        let c = buffer.context[0][i];
294        let this_type = get_joining_type(c, c.general_category());
295        if this_type == hb_arabic_joining_type_t::T {
296            continue;
297        }
298
299        state = STATE_TABLE[state][this_type as usize].2 as usize;
300        break;
301    }
302
303    for i in 0..buffer.len {
304        let this_type = get_joining_type(
305            buffer.info[i].as_char(),
306            _hb_glyph_info_get_general_category(&buffer.info[i]),
307        );
308        if this_type == hb_arabic_joining_type_t::T {
309            buffer.info[i].set_arabic_shaping_action(arabic_action_t::NONE);
310            continue;
311        }
312
313        let entry = &STATE_TABLE[state][this_type as usize];
314        if entry.0 != arabic_action_t::NONE && prev.is_some() {
315            if let Some(prev) = prev {
316                buffer.info[prev].set_arabic_shaping_action(entry.0);
317                buffer.unsafe_to_break(Some(prev), Some(i + 1));
318            }
319        }
320        // States that have a possible prev_action.
321        else {
322            if let Some(prev) = prev {
323                if this_type >= hb_arabic_joining_type_t::R || (2 <= state && state <= 5) {
324                    buffer.unsafe_to_concat(Some(prev), Some(i + 1));
325                }
326            } else {
327                if this_type >= hb_arabic_joining_type_t::R {
328                    buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1));
329                }
330            }
331        }
332
333        buffer.info[i].set_arabic_shaping_action(entry.1);
334
335        prev = Some(i);
336        state = entry.2 as usize;
337    }
338
339    for i in 0..buffer.context_len[1] {
340        let c = buffer.context[1][i];
341        let this_type = get_joining_type(c, c.general_category());
342        if this_type == hb_arabic_joining_type_t::T {
343            continue;
344        }
345
346        let entry = &STATE_TABLE[state][this_type as usize];
347        if entry.0 != arabic_action_t::NONE && prev.is_some() {
348            if let Some(prev) = prev {
349                buffer.info[prev].set_arabic_shaping_action(entry.0);
350                buffer.unsafe_to_break(Some(prev), Some(buffer.len));
351            }
352        }
353        // States that have a possible prev_action.
354        else if 2 <= state && state <= 5 {
355            if let Some(prev) = prev {
356                buffer.unsafe_to_concat(Some(prev), Some(buffer.len));
357            }
358        }
359
360        break;
361    }
362}
363
364fn mongolian_variation_selectors(buffer: &mut hb_buffer_t) {
365    // Copy arabic_shaping_action() from base to Mongolian variation selectors.
366    let len = buffer.len;
367    let info = &mut buffer.info;
368    for i in 1..len {
369        if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F {
370            let a = info[i - 1].arabic_shaping_action();
371            info[i].set_arabic_shaping_action(a);
372        }
373    }
374}
375
376fn setup_masks_arabic_plan(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
377    let arabic_plan = plan.data::<arabic_shape_plan_t>();
378    setup_masks_inner(arabic_plan, plan.script, buffer)
379}
380
381pub fn setup_masks_inner(
382    arabic_plan: &arabic_shape_plan_t,
383    script: Option<Script>,
384    buffer: &mut hb_buffer_t,
385) {
386    arabic_joining(buffer);
387    if script == Some(script::MONGOLIAN) {
388        mongolian_variation_selectors(buffer);
389    }
390
391    for info in buffer.info_slice_mut() {
392        info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize];
393    }
394}
395
396fn arabic_fallback_shape(_: &hb_ot_shape_plan_t, _: &hb_font_t, _: &mut hb_buffer_t) {}
397
398// Stretch feature: "stch".
399// See example here:
400// https://docs.microsoft.com/en-us/typography/script-development/syriac
401// We implement this in a generic way, such that the Arabic subtending
402// marks can use it as well.
403fn record_stch(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
404    let arabic_plan = plan.data::<arabic_shape_plan_t>();
405    if !arabic_plan.has_stch {
406        return;
407    }
408
409    // 'stch' feature was just applied.  Look for anything that multiplied,
410    // and record it for stch treatment later.  Note that rtlm, frac, etc
411    // are applied before stch, but we assume that they didn't result in
412    // anything multiplying into 5 pieces, so it's safe-ish...
413
414    let len = buffer.len;
415    let info = &mut buffer.info;
416    let mut has_stch = false;
417    for glyph_info in &mut info[..len] {
418        if _hb_glyph_info_multiplied(glyph_info) {
419            let comp = if _hb_glyph_info_get_lig_comp(glyph_info) % 2 != 0 {
420                arabic_action_t::STRETCHING_REPEATING
421            } else {
422                arabic_action_t::STRETCHING_FIXED
423            };
424
425            glyph_info.set_arabic_shaping_action(comp);
426            has_stch = true;
427        }
428    }
429
430    if has_stch {
431        buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
432    }
433}
434
435fn apply_stch(face: &hb_font_t, buffer: &mut hb_buffer_t) {
436    if buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH == 0 {
437        return;
438    }
439
440    // The Arabic shaper currently always processes in RTL mode, so we should
441    // stretch / position the stretched pieces to the left / preceding glyphs.
442
443    // We do a two pass implementation:
444    // First pass calculates the exact number of extra glyphs we need,
445    // We then enlarge buffer to have that much room,
446    // Second pass applies the stretch, copying things to the end of buffer.
447
448    let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT
449    const MEASURE: usize = 0;
450    const CUT: usize = 1;
451
452    for step in 0..2 {
453        let new_len = buffer.len + extra_glyphs_needed; // write head during CUT
454        let mut i = buffer.len;
455        let mut j = new_len;
456        while i != 0 {
457            if !arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
458                if step == CUT {
459                    j -= 1;
460                    buffer.info[j] = buffer.info[i - 1];
461                    buffer.pos[j] = buffer.pos[i - 1];
462                }
463
464                i -= 1;
465                continue;
466            }
467
468            // Yay, justification!
469
470            let mut w_total = 0; // Total to be filled
471            let mut w_fixed = 0; // Sum of fixed tiles
472            let mut w_repeating = 0; // Sum of repeating tiles
473            let mut n_repeating: i32 = 0;
474
475            let end = i;
476            while i != 0 && arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
477                i -= 1;
478                let width = face.glyph_h_advance(buffer.info[i].as_glyph()) as i32;
479
480                if buffer.info[i].arabic_shaping_action() == arabic_action_t::STRETCHING_FIXED {
481                    w_fixed += width;
482                } else {
483                    w_repeating += width;
484                    n_repeating += 1;
485                }
486            }
487
488            let start = i;
489            let mut context = i;
490            while context != 0
491                && !arabic_action_t::is_stch(buffer.info[context - 1].arabic_shaping_action())
492                && (_hb_glyph_info_is_default_ignorable(&buffer.info[context - 1])
493                    || is_word_category(_hb_glyph_info_get_general_category(
494                        &buffer.info[context - 1],
495                    )))
496            {
497                context -= 1;
498                w_total += buffer.pos[context].x_advance;
499            }
500
501            i += 1; // Don't touch i again.
502
503            // Number of additional times to repeat each repeating tile.
504            let mut n_copies: i32 = 0;
505
506            let w_remaining = w_total - w_fixed;
507            if w_remaining > w_repeating && w_repeating > 0 {
508                n_copies = w_remaining / (w_repeating) - 1;
509            }
510
511            // See if we can improve the fit by adding an extra repeat and squeezing them together a bit.
512            let mut extra_repeat_overlap = 0;
513            let shortfall = w_remaining - w_repeating * (n_copies + 1);
514            if shortfall > 0 && n_repeating > 0 {
515                n_copies += 1;
516                let excess = (n_copies + 1) * w_repeating - w_remaining;
517                if excess > 0 {
518                    extra_repeat_overlap = excess / (n_copies * n_repeating);
519                }
520            }
521
522            if step == MEASURE {
523                extra_glyphs_needed += (n_copies * n_repeating) as usize;
524            } else {
525                buffer.unsafe_to_break(Some(context), Some(end));
526                let mut x_offset = 0;
527                for k in (start + 1..=end).rev() {
528                    let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()) as i32;
529
530                    let mut repeat = 1;
531                    if buffer.info[k - 1].arabic_shaping_action()
532                        == arabic_action_t::STRETCHING_REPEATING
533                    {
534                        repeat += n_copies;
535                    }
536
537                    for n in 0..repeat {
538                        x_offset -= width;
539                        if n > 0 {
540                            x_offset += extra_repeat_overlap;
541                        }
542
543                        buffer.pos[k - 1].x_offset = x_offset;
544
545                        // Append copy.
546                        j -= 1;
547                        buffer.info[j] = buffer.info[k - 1];
548                        buffer.pos[j] = buffer.pos[k - 1];
549                    }
550                }
551            }
552
553            i -= 1;
554        }
555
556        if step == MEASURE {
557            buffer.ensure(buffer.len + extra_glyphs_needed);
558        } else {
559            debug_assert_eq!(j, 0);
560            buffer.set_len(new_len);
561        }
562    }
563}
564
565fn postprocess_glyphs_arabic(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
566    apply_stch(face, buffer)
567}
568
569// http://www.unicode.org/reports/tr53/
570const MODIFIER_COMBINING_MARKS: &[u32] = &[
571    0x0654, // ARABIC HAMZA ABOVE
572    0x0655, // ARABIC HAMZA BELOW
573    0x0658, // ARABIC MARK NOON GHUNNA
574    0x06DC, // ARABIC SMALL HIGH SEEN
575    0x06E3, // ARABIC SMALL LOW SEEN
576    0x06E7, // ARABIC SMALL HIGH YEH
577    0x06E8, // ARABIC SMALL HIGH NOON
578    0x08CA, // ARABIC SMALL HIGH FARSI YEH
579    0x08CB, // ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW
580    0x08CD, // ARABIC SMALL HIGH ZAH
581    0x08CE, // ARABIC LARGE ROUND DOT ABOVE
582    0x08CF, // ARABIC LARGE ROUND DOT BELOW
583    0x08D3, // ARABIC SMALL LOW WAW
584    0x08F3, // ARABIC SMALL HIGH WAW
585];
586
587fn reorder_marks_arabic(
588    _: &hb_ot_shape_plan_t,
589    buffer: &mut hb_buffer_t,
590    mut start: usize,
591    end: usize,
592) {
593    let mut i = start;
594    for cc in [220u8, 230].iter().cloned() {
595        while i < end && _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) < cc {
596            i += 1;
597        }
598
599        if i == end {
600            break;
601        }
602
603        if _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) > cc {
604            continue;
605        }
606
607        let mut j = i;
608        while j < end
609            && _hb_glyph_info_get_modified_combining_class(&buffer.info[j]) == cc
610            && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id)
611        {
612            j += 1;
613        }
614
615        if i == j {
616            continue;
617        }
618
619        // Shift it!
620        let mut temp = [hb_glyph_info_t::default(); MAX_COMBINING_MARKS];
621        debug_assert!(j - i <= MAX_COMBINING_MARKS);
622        buffer.merge_clusters(start, j);
623
624        temp[..j - i].copy_from_slice(&buffer.info[i..j]);
625
626        for k in (0..i - start).rev() {
627            buffer.info[k + start + j - i] = buffer.info[k + start];
628        }
629
630        buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]);
631
632        // Renumber CC such that the reordered sequence is still sorted.
633        // 22 and 26 are chosen because they are smaller than all Arabic categories,
634        // and are folded back to 220/230 respectively during fallback mark positioning.
635        //
636        // We do this because the CGJ-handling logic in the normalizer relies on
637        // mark sequences having an increasing order even after this reordering.
638        // https://github.com/harfbuzz/harfbuzz/issues/554
639        // This, however, does break some obscure sequences, where the normalizer
640        // might compose a sequence that it should not.  For example, in the seequence
641        // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
642        // renumbering, we will.
643        let new_start = start + j - i;
644        let new_cc = if cc == 220 {
645            modified_combining_class::CCC22
646        } else {
647            modified_combining_class::CCC26
648        };
649
650        while start < new_start {
651            _hb_glyph_info_set_modified_combining_class(&mut buffer.info[start], new_cc);
652            start += 1;
653        }
654
655        i = j;
656    }
657}
658
659pub const ARABIC_SHAPER: hb_ot_complex_shaper_t = hb_ot_complex_shaper_t {
660    collect_features: Some(collect_features),
661    override_features: None,
662    create_data: Some(|plan| Box::new(data_create_arabic(plan))),
663    preprocess_text: None,
664    postprocess_glyphs: Some(postprocess_glyphs_arabic),
665    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO,
666    decompose: None,
667    compose: None,
668    setup_masks: Some(setup_masks_arabic_plan),
669    gpos_tag: None,
670    reorder_marks: Some(reorder_marks_arabic),
671    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
672    fallback_position: true,
673};