rustybuzz/hb/
ot_shape_complex_indic.rs

Help
1use alloc::boxed::Box;
2use core::cmp;
3use core::convert::TryFrom;
4use core::ops::Range;
5
6use ttf_parser::GlyphId;
7
8use super::algs::*;
9use super::buffer::hb_buffer_t;
10use super::ot_layout::*;
11use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext};
12use super::ot_map::*;
13use super::ot_shape::*;
14use super::ot_shape_complex::*;
15use super::ot_shape_normalize::*;
16use super::ot_shape_plan::hb_ot_shape_plan_t;
17use super::unicode::{hb_gc, CharExt, GeneralCategoryExt};
18use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
19
20pub const INDIC_SHAPER: hb_ot_complex_shaper_t = hb_ot_complex_shaper_t {
21    collect_features: Some(collect_features),
22    override_features: Some(override_features),
23    create_data: Some(|plan| Box::new(IndicShapePlan::new(plan))),
24    preprocess_text: Some(preprocess_text),
25    postprocess_glyphs: None,
26    normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
27    decompose: Some(decompose),
28    compose: Some(compose),
29    setup_masks: Some(setup_masks),
30    gpos_tag: None,
31    reorder_marks: None,
32    zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
33    fallback_position: false,
34};
35
36pub type Category = u8;
37pub mod category {
38    pub const X: u8 = 0;
39    pub const C: u8 = 1;
40    pub const V: u8 = 2;
41    pub const N: u8 = 3;
42    pub const H: u8 = 4;
43    pub const ZWNJ: u8 = 5;
44    pub const ZWJ: u8 = 6;
45    pub const M: u8 = 7;
46    pub const SM: u8 = 8;
47    // OT_VD = 9, UNUSED; we use OT_A instead.
48    pub const A: u8 = 10;
49    pub const PLACEHOLDER: u8 = 11;
50    pub const DOTTED_CIRCLE: u8 = 12;
51    pub const RS: u8 = 13; // Register Shifter, used in Khmer OT spec.
52    pub const COENG: u8 = 14; // Khmer-style Virama.
53    pub const REPHA: u8 = 15; // Atomically-encoded logical or visual repha.
54    pub const RA: u8 = 16;
55    pub const CM: u8 = 17; // Consonant-Medial.
56    pub const SYMBOL: u8 = 18; // Avagraha, etc that take marks (SM,A,VD).
57    pub const CS: u8 = 19;
58    pub const ROBATIC: u8 = 20;
59    pub const X_GROUP: u8 = 21;
60    pub const Y_GROUP: u8 = 22;
61    pub const MW: u8 = 23;
62    pub const MY: u8 = 24;
63    pub const PT: u8 = 25;
64    // The following are used by Khmer & Myanmar shapers.  Defined here for them to share.
65    pub const V_AVB: u8 = 26;
66    pub const V_BLW: u8 = 27;
67    pub const V_PRE: u8 = 28;
68    pub const V_PST: u8 = 29;
69    pub const VS: u8 = 30; // Variation selectors
70    pub const P: u8 = 31; // Punctuation
71    pub const D: u8 = 32; // Digits except zero
72    pub const ML: u8 = 33; // Medial la
73}
74
75pub type Position = u8;
76pub mod position {
77    pub const START: u8 = 0;
78    pub const RA_TO_BECOME_REPH: u8 = 1;
79    pub const PRE_M: u8 = 2;
80    pub const PRE_C: u8 = 3;
81    pub const BASE_C: u8 = 4;
82    pub const AFTER_MAIN: u8 = 5;
83    pub const ABOVE_C: u8 = 6;
84    pub const BEFORE_SUB: u8 = 7;
85    pub const BELOW_C: u8 = 8;
86    pub const AFTER_SUB: u8 = 9;
87    pub const BEFORE_POST: u8 = 10;
88    pub const POST_C: u8 = 11;
89    pub const AFTER_POST: u8 = 12;
90    pub const FINAL_C: u8 = 13;
91    pub const SMVD: u8 = 14;
92    pub const END: u8 = 15;
93}
94
95#[allow(dead_code)]
96#[derive(Clone, Copy, PartialEq)]
97pub enum SyllabicCategory {
98    Other,
99    Avagraha,
100    Bindu,
101    BrahmiJoiningNumber,
102    CantillationMark,
103    Consonant,
104    ConsonantDead,
105    ConsonantFinal,
106    ConsonantHeadLetter,
107    ConsonantInitialPostfixed,
108    ConsonantKiller,
109    ConsonantMedial,
110    ConsonantPlaceholder,
111    ConsonantPrecedingRepha,
112    ConsonantPrefixed,
113    ConsonantSubjoined,
114    ConsonantSucceedingRepha,
115    ConsonantWithStacker,
116    GeminationMark,
117    InvisibleStacker,
118    Joiner,
119    ModifyingLetter,
120    NonJoiner,
121    Nukta,
122    Number,
123    NumberJoiner,
124    PureKiller,
125    RegisterShifter,
126    SyllableModifier,
127    ToneLetter,
128    ToneMark,
129    Virama,
130    Visarga,
131    Vowel,
132    VowelDependent,
133    VowelIndependent,
134}
135
136#[allow(dead_code)]
137#[derive(Clone, Copy)]
138pub enum MatraCategory {
139    NotApplicable,
140    Left,
141    Top,
142    Bottom,
143    Right,
144    BottomAndLeft,
145    BottomAndRight,
146    LeftAndRight,
147    TopAndBottom,
148    TopAndBottomAndRight,
149    TopAndBottomAndLeft,
150    TopAndLeft,
151    TopAndLeftAndRight,
152    TopAndRight,
153    Overstruck,
154    VisualOrderLeft,
155}
156
157const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
158    // Basic features.
159    // These features are applied in order, one at a time, after initial_reordering,
160    // constrained to the syllable.
161    (
162        hb_tag_t::from_bytes(b"nukt"),
163        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
164    ),
165    (
166        hb_tag_t::from_bytes(b"akhn"),
167        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
168    ),
169    (
170        hb_tag_t::from_bytes(b"rphf"),
171        F_MANUAL_JOINERS | F_PER_SYLLABLE,
172    ),
173    (
174        hb_tag_t::from_bytes(b"rkrf"),
175        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
176    ),
177    (
178        hb_tag_t::from_bytes(b"pref"),
179        F_MANUAL_JOINERS | F_PER_SYLLABLE,
180    ),
181    (
182        hb_tag_t::from_bytes(b"blwf"),
183        F_MANUAL_JOINERS | F_PER_SYLLABLE,
184    ),
185    (
186        hb_tag_t::from_bytes(b"abvf"),
187        F_MANUAL_JOINERS | F_PER_SYLLABLE,
188    ),
189    (
190        hb_tag_t::from_bytes(b"half"),
191        F_MANUAL_JOINERS | F_PER_SYLLABLE,
192    ),
193    (
194        hb_tag_t::from_bytes(b"pstf"),
195        F_MANUAL_JOINERS | F_PER_SYLLABLE,
196    ),
197    (
198        hb_tag_t::from_bytes(b"vatu"),
199        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
200    ),
201    (
202        hb_tag_t::from_bytes(b"cjct"),
203        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
204    ),
205    // Other features.
206    // These features are applied all at once, after final_reordering, constrained
207    // to the syllable.
208    // Default Bengali font in Windows for example has intermixed
209    // lookups for init,pres,abvs,blws features.
210    (
211        hb_tag_t::from_bytes(b"init"),
212        F_MANUAL_JOINERS | F_PER_SYLLABLE,
213    ),
214    (
215        hb_tag_t::from_bytes(b"pres"),
216        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
217    ),
218    (
219        hb_tag_t::from_bytes(b"abvs"),
220        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
221    ),
222    (
223        hb_tag_t::from_bytes(b"blws"),
224        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
225    ),
226    (
227        hb_tag_t::from_bytes(b"psts"),
228        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
229    ),
230    (
231        hb_tag_t::from_bytes(b"haln"),
232        F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
233    ),
234];
235
236// Must be in the same order as the INDIC_FEATURES array.
237#[allow(dead_code)]
238mod indic_feature {
239    pub const NUKT: usize = 0;
240    pub const AKHN: usize = 1;
241    pub const RPHF: usize = 2;
242    pub const RKRF: usize = 3;
243    pub const PREF: usize = 4;
244    pub const BLWF: usize = 5;
245    pub const ABVF: usize = 6;
246    pub const HALF: usize = 7;
247    pub const PSTF: usize = 8;
248    pub const VATU: usize = 9;
249    pub const CJCT: usize = 10;
250    pub const INIT: usize = 11;
251    pub const PRES: usize = 12;
252    pub const ABVS: usize = 13;
253    pub const BLWS: usize = 14;
254    pub const PSTS: usize = 15;
255    pub const HALN: usize = 16;
256}
257
258const fn category_flag(c: Category) -> u32 {
259    rb_flag(c as u32)
260}
261
262const MEDIAL_FLAGS: u32 = category_flag(category::CM);
263// Note:
264//
265// We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
266// cannot happen in a consonant syllable.  The plus side however is, we can call the
267// consonant syllable logic from the vowel syllable function and get it all right!
268const CONSONANT_FLAGS: u32 = category_flag(category::C)
269    | category_flag(category::CS)
270    | category_flag(category::RA)
271    | MEDIAL_FLAGS
272    | category_flag(category::V)
273    | category_flag(category::PLACEHOLDER)
274    | category_flag(category::DOTTED_CIRCLE);
275const JOINER_FLAGS: u32 = category_flag(category::ZWJ) | category_flag(category::ZWNJ);
276
277// This is a hack for now.  We should move this data into the main Indic table.
278// Or completely remove it and just check in the tables.
279const RA_CHARS: &[u32] = &[
280    0x0930, // Devanagari
281    0x09B0, // Bengali
282    0x09F0, // Bengali
283    0x0A30, // Gurmukhi. No Reph
284    0x0AB0, // Gujarati
285    0x0B30, // Oriya
286    0x0BB0, // Tamil. No Reph
287    0x0C30, // Telugu. Reph formed only with ZWJ
288    0x0CB0, // Kannada
289    0x0D30, // Malayalam. No Reph, Logical Repha
290    0x0DBB, // Sinhala. Reph formed only with ZWJ
291];
292
293#[derive(Clone, Copy, PartialEq)]
294enum BasePosition {
295    LastSinhala,
296    Last,
297}
298
299#[derive(Clone, Copy, PartialEq)]
300enum RephPosition {
301    AfterMain = position::AFTER_MAIN as isize,
302    BeforeSub = position::BEFORE_SUB as isize,
303    AfterSub = position::AFTER_SUB as isize,
304    BeforePost = position::BEFORE_POST as isize,
305    AfterPost = position::AFTER_POST as isize,
306}
307
308#[derive(Clone, Copy, PartialEq)]
309enum RephMode {
310    /// Reph formed out of initial Ra,H sequence.
311    Implicit,
312    /// Reph formed out of initial Ra,H,ZWJ sequence.
313    Explicit,
314    /// Encoded Repha character, needs reordering.
315    LogRepha,
316}
317
318#[derive(Clone, Copy, PartialEq)]
319enum BlwfMode {
320    /// Below-forms feature applied to pre-base and post-base.
321    PreAndPost,
322    /// Below-forms feature applied to post-base only.
323    PostOnly,
324}
325
326#[derive(Clone, Copy)]
327struct IndicConfig {
328    script: Option<Script>,
329    has_old_spec: bool,
330    virama: u32,
331    base_pos: BasePosition,
332    reph_pos: RephPosition,
333    reph_mode: RephMode,
334    blwf_mode: BlwfMode,
335}
336
337impl IndicConfig {
338    const fn new(
339        script: Option<Script>,
340        has_old_spec: bool,
341        virama: u32,
342        base_pos: BasePosition,
343        reph_pos: RephPosition,
344        reph_mode: RephMode,
345        blwf_mode: BlwfMode,
346    ) -> Self {
347        IndicConfig {
348            script,
349            has_old_spec,
350            virama,
351            base_pos,
352            reph_pos,
353            reph_mode,
354            blwf_mode,
355        }
356    }
357}
358
359const INDIC_CONFIGS: &[IndicConfig] = &[
360    IndicConfig::new(
361        None,
362        false,
363        0,
364        BasePosition::Last,
365        RephPosition::BeforePost,
366        RephMode::Implicit,
367        BlwfMode::PreAndPost,
368    ),
369    IndicConfig::new(
370        Some(script::DEVANAGARI),
371        true,
372        0x094D,
373        BasePosition::Last,
374        RephPosition::BeforePost,
375        RephMode::Implicit,
376        BlwfMode::PreAndPost,
377    ),
378    IndicConfig::new(
379        Some(script::BENGALI),
380        true,
381        0x09CD,
382        BasePosition::Last,
383        RephPosition::AfterSub,
384        RephMode::Implicit,
385        BlwfMode::PreAndPost,
386    ),
387    IndicConfig::new(
388        Some(script::GURMUKHI),
389        true,
390        0x0A4D,
391        BasePosition::Last,
392        RephPosition::BeforeSub,
393        RephMode::Implicit,
394        BlwfMode::PreAndPost,
395    ),
396    IndicConfig::new(
397        Some(script::GUJARATI),
398        true,
399        0x0ACD,
400        BasePosition::Last,
401        RephPosition::BeforePost,
402        RephMode::Implicit,
403        BlwfMode::PreAndPost,
404    ),
405    IndicConfig::new(
406        Some(script::ORIYA),
407        true,
408        0x0B4D,
409        BasePosition::Last,
410        RephPosition::AfterMain,
411        RephMode::Implicit,
412        BlwfMode::PreAndPost,
413    ),
414    IndicConfig::new(
415        Some(script::TAMIL),
416        true,
417        0x0BCD,
418        BasePosition::Last,
419        RephPosition::AfterPost,
420        RephMode::Implicit,
421        BlwfMode::PreAndPost,
422    ),
423    IndicConfig::new(
424        Some(script::TELUGU),
425        true,
426        0x0C4D,
427        BasePosition::Last,
428        RephPosition::AfterPost,
429        RephMode::Explicit,
430        BlwfMode::PostOnly,
431    ),
432    IndicConfig::new(
433        Some(script::KANNADA),
434        true,
435        0x0CCD,
436        BasePosition::Last,
437        RephPosition::AfterPost,
438        RephMode::Implicit,
439        BlwfMode::PostOnly,
440    ),
441    IndicConfig::new(
442        Some(script::MALAYALAM),
443        true,
444        0x0D4D,
445        BasePosition::Last,
446        RephPosition::AfterMain,
447        RephMode::LogRepha,
448        BlwfMode::PreAndPost,
449    ),
450    IndicConfig::new(
451        Some(script::SINHALA),
452        false,
453        0x0DCA,
454        BasePosition::LastSinhala,
455        RephPosition::AfterPost,
456        RephMode::Explicit,
457        BlwfMode::PreAndPost,
458    ),
459];
460
461struct IndicWouldSubstituteFeature {
462    lookups: Range<usize>,
463    zero_context: bool,
464}
465
466impl IndicWouldSubstituteFeature {
467    pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self {
468        IndicWouldSubstituteFeature {
469            lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) {
470                Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
471                None => 0..0,
472            },
473            zero_context,
474        }
475    }
476
477    pub fn would_substitute(
478        &self,
479        map: &hb_ot_map_t,
480        face: &hb_font_t,
481        glyphs: &[GlyphId],
482    ) -> bool {
483        for index in self.lookups.clone() {
484            let lookup = map.lookup(TableIndex::GSUB, index);
485            let ctx = WouldApplyContext {
486                glyphs,
487                zero_context: self.zero_context,
488            };
489            if face
490                .gsub
491                .as_ref()
492                .and_then(|table| table.get_lookup(lookup.index))
493                .map_or(false, |lookup| lookup.would_apply(&ctx))
494            {
495                return true;
496            }
497        }
498
499        false
500    }
501}
502
503struct IndicShapePlan {
504    config: IndicConfig,
505    is_old_spec: bool,
506    // virama_glyph: Option<u32>,
507    rphf: IndicWouldSubstituteFeature,
508    pref: IndicWouldSubstituteFeature,
509    blwf: IndicWouldSubstituteFeature,
510    pstf: IndicWouldSubstituteFeature,
511    vatu: IndicWouldSubstituteFeature,
512    mask_array: [hb_mask_t; INDIC_FEATURES.len()],
513}
514
515impl IndicShapePlan {
516    fn new(plan: &hb_ot_shape_plan_t) -> Self {
517        let script = plan.script;
518        let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) {
519            *c
520        } else {
521            INDIC_CONFIGS[0]
522        };
523
524        let is_old_spec = config.has_old_spec
525            && plan
526                .ot_map
527                .chosen_script(TableIndex::GSUB)
528                .map_or(true, |tag| tag.to_bytes()[3] != b'2');
529
530        // Use zero-context would_substitute() matching for new-spec of the main
531        // Indic scripts, and scripts with one spec only, but not for old-specs.
532        // The new-spec for all dual-spec scripts says zero-context matching happens.
533        //
534        // However, testing with Malayalam shows that old and new spec both allow
535        // context.  Testing with Bengali new-spec however shows that it doesn't.
536        // So, the heuristic here is the way it is.  It should *only* be changed,
537        // as we discover more cases of what Windows does.  DON'T TOUCH OTHERWISE.
538        let zero_context = is_old_spec && script != Some(script::MALAYALAM);
539
540        let mut mask_array = [0; INDIC_FEATURES.len()];
541        for (i, feature) in INDIC_FEATURES.iter().enumerate() {
542            mask_array[i] = if feature.1 & F_GLOBAL != 0 {
543                0
544            } else {
545                plan.ot_map.get_1_mask(feature.0)
546            }
547        }
548
549        // TODO: what is this?
550        // let mut virama_glyph = None;
551        // if config.virama != 0 {
552        //     if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
553        //         virama_glyph = Some(g.0 as u32);
554        //     }
555        // }
556
557        IndicShapePlan {
558            config,
559            is_old_spec,
560            // virama_glyph,
561            rphf: IndicWouldSubstituteFeature::new(
562                &plan.ot_map,
563                hb_tag_t::from_bytes(b"rphf"),
564                zero_context,
565            ),
566            pref: IndicWouldSubstituteFeature::new(
567                &plan.ot_map,
568                hb_tag_t::from_bytes(b"pref"),
569                zero_context,
570            ),
571            blwf: IndicWouldSubstituteFeature::new(
572                &plan.ot_map,
573                hb_tag_t::from_bytes(b"blwf"),
574                zero_context,
575            ),
576            pstf: IndicWouldSubstituteFeature::new(
577                &plan.ot_map,
578                hb_tag_t::from_bytes(b"pstf"),
579                zero_context,
580            ),
581            vatu: IndicWouldSubstituteFeature::new(
582                &plan.ot_map,
583                hb_tag_t::from_bytes(b"vatu"),
584                zero_context,
585            ),
586            mask_array,
587        }
588    }
589}
590
591impl hb_glyph_info_t {
592    pub(crate) fn indic_category(&self) -> Category {
593        self.complex_var_u8_category()
594    }
595
596    pub(crate) fn set_indic_category(&mut self, c: Category) {
597        self.set_complex_var_u8_category(c)
598    }
599
600    pub(crate) fn indic_position(&self) -> Position {
601        self.complex_var_u8_auxiliary()
602    }
603
604    pub(crate) fn set_indic_position(&mut self, c: Position) {
605        self.set_complex_var_u8_auxiliary(c)
606    }
607
608    fn is_one_of(&self, flags: u32) -> bool {
609        // If it ligated, all bets are off.
610        if _hb_glyph_info_ligated(self) {
611            return false;
612        }
613
614        rb_flag_unsafe(self.indic_category() as u32) & flags != 0
615    }
616
617    fn is_joiner(&self) -> bool {
618        self.is_one_of(JOINER_FLAGS)
619    }
620
621    pub(crate) fn is_consonant(&self) -> bool {
622        self.is_one_of(CONSONANT_FLAGS)
623    }
624
625    fn is_halant(&self) -> bool {
626        self.is_one_of(rb_flag(category::H as u32))
627    }
628
629    fn set_indic_properties(&mut self) {
630        let u = self.glyph_id;
631        let (mut cat, mut pos) = get_category_and_position(u);
632
633        // Re-assign category
634
635        // The following act more like the Bindus.
636        match u {
637            0x0953..=0x0954 => cat = category::SM,
638            // The following act like consonants.
639            0x0A72..=0x0A73 | 0x1CF5..=0x1CF6 => cat = category::C,
640            // TODO: The following should only be allowed after a Visarga.
641            // For now, just treat them like regular tone marks.
642            0x1CE2..=0x1CE8 => cat = category::A,
643            // TODO: The following should only be allowed after some of
644            // the nasalization marks, maybe only for U+1CE9..U+1CF1.
645            // For now, just treat them like tone marks.
646            0x1CED => cat = category::A,
647            // The following take marks in standalone clusters, similar to Avagraha.
648            0xA8F2..=0xA8F7 | 0x1CE9..=0x1CEC | 0x1CEE..=0x1CF1 => cat = category::SYMBOL,
649            // https://github.com/harfbuzz/harfbuzz/issues/524
650            0x0A51 => {
651                cat = category::M;
652                pos = position::BELOW_C;
653            }
654            // According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
655            // so the Indic shaper needs to know their categories.
656            0x11301 | 0x11303 => cat = category::SM,
657            0x1133B | 0x1133C => cat = category::N,
658            // https://github.com/harfbuzz/harfbuzz/issues/552
659            0x0AFB => cat = category::N,
660            // https://github.com/harfbuzz/harfbuzz/issues/2849
661            0x0B55 => cat = category::N,
662            // https://github.com/harfbuzz/harfbuzz/issues/538
663            0x0980 => cat = category::PLACEHOLDER,
664            // https://github.com/harfbuzz/harfbuzz/issues/1613
665            0x09FC => cat = category::PLACEHOLDER,
666            // https://github.com/harfbuzz/harfbuzz/issues/623
667            0x0C80 => cat = category::PLACEHOLDER,
668            0x0D04 => cat = category::PLACEHOLDER,
669            0x2010 | 0x2011 => cat = category::PLACEHOLDER,
670            0x25CC => cat = category::DOTTED_CIRCLE,
671            _ => {}
672        }
673
674        // Re-assign position.
675
676        if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != 0 {
677            pos = position::BASE_C;
678            if RA_CHARS.contains(&u) {
679                cat = category::RA;
680            }
681        } else if cat == category::M {
682            pos = matra_position_indic(u, pos);
683        } else if (rb_flag_unsafe(cat as u32)
684            & (category_flag(category::SM)
685                | category_flag(category::A)
686                | category_flag(category::SYMBOL)))
687            != 0
688        {
689            pos = position::SMVD;
690        }
691
692        // Oriya Bindu is BeforeSub in the spec.
693        if u == 0x0B01 {
694            pos = position::BEFORE_SUB;
695        }
696
697        self.set_indic_category(cat);
698        self.set_indic_position(pos);
699    }
700}
701
702fn collect_features(planner: &mut hb_ot_shape_planner_t) {
703    // Do this before any lookups have been applied.
704    planner.ot_map.add_gsub_pause(Some(setup_syllables));
705
706    planner
707        .ot_map
708        .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
709    // The Indic specs do not require ccmp, but we apply it here since if
710    // there is a use of it, it's typically at the beginning.
711    planner
712        .ot_map
713        .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
714
715    planner.ot_map.add_gsub_pause(Some(initial_reordering));
716
717    for feature in INDIC_FEATURES.iter().take(10) {
718        planner.ot_map.add_feature(feature.0, feature.1, 1);
719        planner.ot_map.add_gsub_pause(None);
720    }
721
722    planner.ot_map.add_gsub_pause(Some(final_reordering));
723
724    for feature in INDIC_FEATURES.iter().skip(10) {
725        planner.ot_map.add_feature(feature.0, feature.1, 1);
726    }
727}
728
729fn override_features(planner: &mut hb_ot_shape_planner_t) {
730    planner
731        .ot_map
732        .disable_feature(hb_tag_t::from_bytes(b"liga"));
733}
734
735fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
736    super::ot_shape_complex_vowel_constraints::preprocess_text_vowel_constraints(buffer);
737}
738
739fn decompose(ctx: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
740    // Don't decompose these.
741    match ab {
742        '\u{0931}' |               // DEVANAGARI LETTER RRA
743        // https://github.com/harfbuzz/harfbuzz/issues/779
744        '\u{09DC}' |               // BENGALI LETTER RRA
745        '\u{09DD}' |               // BENGALI LETTER RHA
746        '\u{0B94}' => return None, // TAMIL LETTER AU
747        _ => {}
748    }
749
750    if ab == '\u{0DDA}' || ('\u{0DDC}'..='\u{0DDE}').contains(&ab) {
751        // Sinhala split matras...  Let the fun begin.
752        //
753        // These four characters have Unicode decompositions.  However, Uniscribe
754        // decomposes them "Khmer-style", that is, it uses the character itself to
755        // get the second half.  The first half of all four decompositions is always
756        // U+0DD9.
757        //
758        // Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
759        // broken with Uniscribe.  But we need to support them.  As such, we only
760        // do the Uniscribe-style decomposition if the character is transformed into
761        // its "sec.half" form by the 'pstf' feature.  Otherwise, we fall back to
762        // Unicode decomposition.
763        //
764        // Note that we can't unconditionally use Unicode decomposition.  That would
765        // break some other fonts, that are designed to work with Uniscribe, and
766        // don't have positioning features for the Unicode-style decomposition.
767        //
768        // Argh...
769        //
770        // The Uniscribe behavior is now documented in the newly published Sinhala
771        // spec in 2012:
772        //
773        //   https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
774
775        let mut ok = false;
776        if let Some(g) = ctx.face.get_nominal_glyph(u32::from(ab)) {
777            let indic_plan = ctx.plan.data::<IndicShapePlan>();
778            ok = indic_plan
779                .pstf
780                .would_substitute(&ctx.plan.ot_map, ctx.face, &[g]);
781        }
782
783        if ok {
784            // Ok, safe to use Uniscribe-style decomposition.
785            return Some(('\u{0DD9}', ab));
786        }
787    }
788
789    crate::hb::unicode::decompose(ab)
790}
791
792fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
793    // Avoid recomposing split matras.
794    if a.general_category().is_mark() {
795        return None;
796    }
797
798    // Composition-exclusion exceptions that we want to recompose.
799    if a == '\u{09AF}' && b == '\u{09BC}' {
800        return Some('\u{09DF}');
801    }
802
803    crate::hb::unicode::compose(a, b)
804}
805
806fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
807    // We cannot setup masks here.  We save information about characters
808    // and setup masks later on in a pause-callback.
809    for info in buffer.info_slice_mut() {
810        info.set_indic_properties();
811    }
812}
813
814fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
815    super::ot_shape_complex_indic_machine::find_syllables_indic(buffer);
816
817    let mut start = 0;
818    let mut end = buffer.next_syllable(0);
819    while start < buffer.len {
820        buffer.unsafe_to_break(Some(start), Some(end));
821        start = end;
822        end = buffer.next_syllable(start);
823    }
824}
825
826fn initial_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
827    use super::ot_shape_complex_indic_machine::SyllableType;
828
829    let indic_plan = plan.data::<IndicShapePlan>();
830
831    update_consonant_positions(plan, indic_plan, face, buffer);
832    super::ot_shape_complex_syllabic::insert_dotted_circles(
833        face,
834        buffer,
835        SyllableType::BrokenCluster as u8,
836        category::DOTTED_CIRCLE,
837        Some(category::REPHA),
838        Some(position::END),
839    );
840
841    let mut start = 0;
842    let mut end = buffer.next_syllable(0);
843    while start < buffer.len {
844        initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
845        start = end;
846        end = buffer.next_syllable(start);
847    }
848}
849
850fn update_consonant_positions(
851    plan: &hb_ot_shape_plan_t,
852    indic_plan: &IndicShapePlan,
853    face: &hb_font_t,
854    buffer: &mut hb_buffer_t,
855) {
856    if indic_plan.config.base_pos != BasePosition::Last {
857        return;
858    }
859
860    let mut virama_glyph = None;
861    if indic_plan.config.virama != 0 {
862        virama_glyph = face.get_nominal_glyph(indic_plan.config.virama);
863    }
864
865    if let Some(virama) = virama_glyph {
866        for info in buffer.info_slice_mut() {
867            if info.indic_position() == position::BASE_C {
868                let consonant = info.as_glyph();
869                info.set_indic_position(consonant_position_from_face(
870                    plan, indic_plan, face, consonant, virama,
871                ));
872            }
873        }
874    }
875}
876
877fn consonant_position_from_face(
878    plan: &hb_ot_shape_plan_t,
879    indic_plan: &IndicShapePlan,
880    face: &hb_font_t,
881    consonant: GlyphId,
882    virama: GlyphId,
883) -> u8 {
884    // For old-spec, the order of glyphs is Consonant,Virama,
885    // whereas for new-spec, it's Virama,Consonant.  However,
886    // some broken fonts (like Free Sans) simply copied lookups
887    // from old-spec to new-spec without modification.
888    // And oddly enough, Uniscribe seems to respect those lookups.
889    // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
890    // base at 0.  The font however, only has lookups matching
891    // 930,94D in 'blwf', not the expected 94D,930 (with new-spec
892    // table).  As such, we simply match both sequences.  Seems
893    // to work.
894    //
895    // Vatu is done as well, for:
896    // https://github.com/harfbuzz/harfbuzz/issues/1587
897
898    if indic_plan
899        .blwf
900        .would_substitute(&plan.ot_map, face, &[virama, consonant])
901        || indic_plan
902            .blwf
903            .would_substitute(&plan.ot_map, face, &[consonant, virama])
904        || indic_plan
905            .vatu
906            .would_substitute(&plan.ot_map, face, &[virama, consonant])
907        || indic_plan
908            .vatu
909            .would_substitute(&plan.ot_map, face, &[consonant, virama])
910    {
911        return position::BELOW_C;
912    }
913
914    if indic_plan
915        .pstf
916        .would_substitute(&plan.ot_map, face, &[virama, consonant])
917        || indic_plan
918            .pstf
919            .would_substitute(&plan.ot_map, face, &[consonant, virama])
920    {
921        return position::POST_C;
922    }
923
924    if indic_plan
925        .pref
926        .would_substitute(&plan.ot_map, face, &[virama, consonant])
927        || indic_plan
928            .pref
929            .would_substitute(&plan.ot_map, face, &[consonant, virama])
930    {
931        return position::POST_C;
932    }
933
934    position::BASE_C
935}
936
937fn initial_reordering_syllable(
938    plan: &hb_ot_shape_plan_t,
939    indic_plan: &IndicShapePlan,
940    face: &hb_font_t,
941    start: usize,
942    end: usize,
943    buffer: &mut hb_buffer_t,
944) {
945    use super::ot_shape_complex_indic_machine::SyllableType;
946
947    let syllable_type = match buffer.info[start].syllable() & 0x0F {
948        0 => SyllableType::ConsonantSyllable,
949        1 => SyllableType::VowelSyllable,
950        2 => SyllableType::StandaloneCluster,
951        3 => SyllableType::SymbolCluster,
952        4 => SyllableType::BrokenCluster,
953        5 => SyllableType::NonIndicCluster,
954        _ => unreachable!(),
955    };
956
957    match syllable_type {
958        // We made the vowels look like consonants.  So let's call the consonant logic!
959        SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => {
960            initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
961        }
962        // We already inserted dotted-circles, so just call the standalone_cluster.
963        SyllableType::BrokenCluster | SyllableType::StandaloneCluster => {
964            initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
965        }
966        SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {}
967    }
968}
969
970// Rules from:
971// https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
972fn initial_reordering_consonant_syllable(
973    plan: &hb_ot_shape_plan_t,
974    indic_plan: &IndicShapePlan,
975    face: &hb_font_t,
976    start: usize,
977    end: usize,
978    buffer: &mut hb_buffer_t,
979) {
980    // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
981    // For compatibility with legacy usage in Kannada,
982    // Ra+h+ZWJ must behave like Ra+ZWJ+h...
983    if buffer.script == Some(script::KANNADA)
984        && start + 3 <= end
985        && buffer.info[start].is_one_of(category_flag(category::RA))
986        && buffer.info[start + 1].is_one_of(category_flag(category::H))
987        && buffer.info[start + 2].is_one_of(category_flag(category::ZWJ))
988    {
989        buffer.merge_clusters(start + 1, start + 3);
990        buffer.info.swap(start + 1, start + 2);
991    }
992
993    // 1. Find base consonant:
994    //
995    // The shaping engine finds the base consonant of the syllable, using the
996    // following algorithm: starting from the end of the syllable, move backwards
997    // until a consonant is found that does not have a below-base or post-base
998    // form (post-base forms have to follow below-base forms), or that is not a
999    // pre-base-reordering Ra, or arrive at the first consonant. The consonant
1000    // stopped at will be the base.
1001    //
1002    //   - If the syllable starts with Ra + Halant (in a script that has Reph)
1003    //     and has more than one consonant, Ra is excluded from candidates for
1004    //     base consonants.
1005
1006    let mut base = end;
1007    let mut has_reph = false;
1008
1009    {
1010        // -> If the syllable starts with Ra + Halant (in a script that has Reph)
1011        //    and has more than one consonant, Ra is excluded from candidates for
1012        //    base consonants.
1013        let mut limit = start;
1014        if indic_plan.mask_array[indic_feature::RPHF] != 0
1015            && start + 3 <= end
1016            && ((indic_plan.config.reph_mode == RephMode::Implicit
1017                && !buffer.info[start + 2].is_joiner())
1018                || (indic_plan.config.reph_mode == RephMode::Explicit
1019                    && buffer.info[start + 2].indic_category() == category::ZWJ))
1020        {
1021            // See if it matches the 'rphf' feature.
1022            let glyphs = &[
1023                buffer.info[start].as_glyph(),
1024                buffer.info[start + 1].as_glyph(),
1025                if indic_plan.config.reph_mode == RephMode::Explicit {
1026                    buffer.info[start + 2].as_glyph()
1027                } else {
1028                    GlyphId(0)
1029                },
1030            ];
1031            if indic_plan
1032                .rphf
1033                .would_substitute(&plan.ot_map, face, &glyphs[0..2])
1034                || (indic_plan.config.reph_mode == RephMode::Explicit
1035                    && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
1036            {
1037                limit += 2;
1038                while limit < end && buffer.info[limit].is_joiner() {
1039                    limit += 1;
1040                }
1041                base = start;
1042                has_reph = true;
1043            }
1044        } else if indic_plan.config.reph_mode == RephMode::LogRepha
1045            && buffer.info[start].indic_category() == category::REPHA
1046        {
1047            limit += 1;
1048            while limit < end && buffer.info[limit].is_joiner() {
1049                limit += 1;
1050            }
1051            base = start;
1052            has_reph = true;
1053        }
1054
1055        match indic_plan.config.base_pos {
1056            BasePosition::Last => {
1057                // -> starting from the end of the syllable, move backwards
1058                let mut i = end;
1059                let mut seen_below = false;
1060                loop {
1061                    i -= 1;
1062                    // -> until a consonant is found
1063                    if buffer.info[i].is_consonant() {
1064                        // -> that does not have a below-base or post-base form
1065                        // (post-base forms have to follow below-base forms),
1066                        if buffer.info[i].indic_position() != position::BELOW_C
1067                            && (buffer.info[i].indic_position() != position::POST_C || seen_below)
1068                        {
1069                            base = i;
1070                            break;
1071                        }
1072                        if buffer.info[i].indic_position() == position::BELOW_C {
1073                            seen_below = true;
1074                        }
1075
1076                        // -> or that is not a pre-base-reordering Ra,
1077                        //
1078                        // IMPLEMENTATION NOTES:
1079                        //
1080                        // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
1081                        // by the logic above already.
1082
1083                        // -> or arrive at the first consonant. The consonant stopped at will
1084                        // be the base.
1085                        base = i;
1086                    } else {
1087                        // A ZWJ after a Halant stops the base search, and requests an explicit
1088                        // half form.
1089                        // A ZWJ before a Halant, requests a subjoined form instead, and hence
1090                        // search continues.  This is particularly important for Bengali
1091                        // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
1092                        if start < i
1093                            && buffer.info[i].indic_category() == category::ZWJ
1094                            && buffer.info[i - 1].indic_category() == category::H
1095                        {
1096                            break;
1097                        }
1098                    }
1099
1100                    if i <= limit {
1101                        break;
1102                    }
1103                }
1104            }
1105            BasePosition::LastSinhala => {
1106                // Sinhala base positioning is slightly different from main Indic, in that:
1107                // 1. Its ZWJ behavior is different,
1108                // 2. We don't need to look into the font for consonant positions.
1109
1110                if !has_reph {
1111                    base = limit;
1112                }
1113
1114                // Find the last base consonant that is not blocked by ZWJ.  If there is
1115                // a ZWJ right before a base consonant, that would request a subjoined form.
1116                for i in limit..end {
1117                    if buffer.info[i].is_consonant() {
1118                        if limit < i && buffer.info[i - 1].indic_category() == category::ZWJ {
1119                            break;
1120                        } else {
1121                            base = i;
1122                        }
1123                    }
1124                }
1125
1126                // Mark all subsequent consonants as below.
1127                for i in base + 1..end {
1128                    if buffer.info[i].is_consonant() {
1129                        buffer.info[i].set_indic_position(position::BELOW_C);
1130                    }
1131                }
1132            }
1133        }
1134
1135        // -> If the syllable starts with Ra + Halant (in a script that has Reph)
1136        //    and has more than one consonant, Ra is excluded from candidates for
1137        //    base consonants.
1138        //
1139        //  Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
1140        if has_reph && base == start && limit - base <= 2 {
1141            // Have no other consonant, so Reph is not formed and Ra becomes base.
1142            has_reph = false;
1143        }
1144    }
1145
1146    // 2. Decompose and reorder Matras:
1147    //
1148    // Each matra and any syllable modifier sign in the syllable are moved to the
1149    // appropriate position relative to the consonant(s) in the syllable. The
1150    // shaping engine decomposes two- or three-part matras into their constituent
1151    // parts before any repositioning. Matra characters are classified by which
1152    // consonant in a conjunct they have affinity for and are reordered to the
1153    // following positions:
1154    //
1155    //   - Before first half form in the syllable
1156    //   - After subjoined consonants
1157    //   - After post-form consonant
1158    //   - After main consonant (for above marks)
1159    //
1160    // IMPLEMENTATION NOTES:
1161    //
1162    // The normalize() routine has already decomposed matras for us, so we don't
1163    // need to worry about that.
1164
1165    // 3.  Reorder marks to canonical order:
1166    //
1167    // Adjacent nukta and halant or nukta and vedic sign are always repositioned
1168    // if necessary, so that the nukta is first.
1169    //
1170    // IMPLEMENTATION NOTES:
1171    //
1172    // We don't need to do this: the normalize() routine already did this for us.
1173
1174    // Reorder characters
1175
1176    for i in start..base {
1177        let pos = buffer.info[i].indic_position();
1178        buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos));
1179    }
1180
1181    if base < end {
1182        buffer.info[base].set_indic_position(position::BASE_C);
1183    }
1184
1185    // Mark final consonants.  A final consonant is one appearing after a matra.
1186    // Happens in Sinhala.
1187    for i in base + 1..end {
1188        if buffer.info[i].indic_category() == category::M {
1189            for j in i + 1..end {
1190                if buffer.info[j].is_consonant() {
1191                    buffer.info[j].set_indic_position(position::FINAL_C);
1192                    break;
1193                }
1194            }
1195
1196            break;
1197        }
1198    }
1199
1200    // Handle beginning Ra
1201    if has_reph {
1202        buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH);
1203    }
1204
1205    // For old-style Indic script tags, move the first post-base Halant after
1206    // last consonant.
1207    //
1208    // Reports suggest that in some scripts Uniscribe does this only if there
1209    // is *not* a Halant after last consonant already.  We know that is the
1210    // case for Kannada, while it reorders unconditionally in other scripts,
1211    // eg. Malayalam, Bengali, and Devanagari.  We don't currently know about
1212    // other scripts, so we block Kannada.
1213    //
1214    // Kannada test case:
1215    // U+0C9A,U+0CCD,U+0C9A,U+0CCD
1216    // With some versions of Lohit Kannada.
1217    // https://bugs.freedesktop.org/show_bug.cgi?id=59118
1218    //
1219    // Malayalam test case:
1220    // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1221    // With lohit-ttf-20121122/Lohit-Malayalam.ttf
1222    //
1223    // Bengali test case:
1224    // U+0998,U+09CD,U+09AF,U+09CD
1225    // With Windows XP vrinda.ttf
1226    // https://github.com/harfbuzz/harfbuzz/issues/1073
1227    //
1228    // Devanagari test case:
1229    // U+091F,U+094D,U+0930,U+094D
1230    // With chandas.ttf
1231    // https://github.com/harfbuzz/harfbuzz/issues/1071
1232    if indic_plan.is_old_spec {
1233        let disallow_double_halants = buffer.script == Some(script::KANNADA);
1234        for i in base + 1..end {
1235            if buffer.info[i].indic_category() == category::H {
1236                let mut j = end - 1;
1237                while j > i {
1238                    if buffer.info[j].is_consonant()
1239                        || (disallow_double_halants
1240                            && buffer.info[j].indic_category() == category::H)
1241                    {
1242                        break;
1243                    }
1244
1245                    j -= 1;
1246                }
1247
1248                if buffer.info[j].indic_category() != category::H && j > i {
1249                    // Move Halant to after last consonant.
1250                    let t = buffer.info[i];
1251                    for k in 0..j - i {
1252                        buffer.info[k + i] = buffer.info[k + i + 1];
1253                    }
1254                    buffer.info[j] = t;
1255                }
1256
1257                break;
1258            }
1259        }
1260    }
1261
1262    // Attach misc marks to previous char to move with them.
1263    {
1264        let mut last_pos = position::START;
1265        for i in start..end {
1266            let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1267                & (category_flag(category::ZWJ)
1268                    | category_flag(category::ZWNJ)
1269                    | category_flag(category::N)
1270                    | category_flag(category::RS)
1271                    | category_flag(category::CM)
1272                    | category_flag(category::H))
1273                != 0;
1274            if ok {
1275                buffer.info[i].set_indic_position(last_pos);
1276
1277                if buffer.info[i].indic_category() == category::H
1278                    && buffer.info[i].indic_position() == position::PRE_M
1279                {
1280                    // Uniscribe doesn't move the Halant with Left Matra.
1281                    // TEST: U+092B,U+093F,U+094DE
1282                    // We follow.  This is important for the Sinhala
1283                    // U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
1284                    // where U+0DD9 is a left matra and U+0DCA is the virama.
1285                    // We don't want to move the virama with the left matra.
1286                    // TEST: U+0D9A,U+0DDA
1287                    for j in (start + 1..=i).rev() {
1288                        if buffer.info[j - 1].indic_position() != position::PRE_M {
1289                            let pos = buffer.info[j - 1].indic_position();
1290                            buffer.info[i].set_indic_position(pos);
1291                            break;
1292                        }
1293                    }
1294                }
1295            } else if buffer.info[i].indic_position() != position::SMVD {
1296                last_pos = buffer.info[i].indic_position();
1297            }
1298        }
1299    }
1300    // For post-base consonants let them own anything before them
1301    // since the last consonant or matra.
1302    {
1303        let mut last = base;
1304        for i in base + 1..end {
1305            if buffer.info[i].is_consonant() {
1306                for j in last + 1..i {
1307                    if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) {
1308                        let pos = buffer.info[i].indic_position();
1309                        buffer.info[j].set_indic_position(pos);
1310                    }
1311                }
1312
1313                last = i;
1314            } else if buffer.info[i].indic_category() == category::M {
1315                last = i;
1316            }
1317        }
1318    }
1319
1320    {
1321        // Use syllable() for sort accounting temporarily.
1322        let syllable = buffer.info[start].syllable();
1323        for i in start..end {
1324            buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1325        }
1326
1327        buffer.info[start..end].sort_by(|a, b| a.indic_position().cmp(&b.indic_position()));
1328
1329        // Find base again.
1330        base = end;
1331        for i in start..end {
1332            if buffer.info[i].indic_position() == position::BASE_C {
1333                base = i;
1334                break;
1335            }
1336        }
1337        // Things are out-of-control for post base positions, they may shuffle
1338        // around like crazy.  In old-spec mode, we move halants around, so in
1339        // that case merge all clusters after base.  Otherwise, check the sort
1340        // order and merge as needed.
1341        // For pre-base stuff, we handle cluster issues in final reordering.
1342        //
1343        // We could use buffer->sort() for this, if there was no special
1344        // reordering of pre-base stuff happening later...
1345        // We don't want to merge_clusters all of that, which buffer->sort()
1346        // would.  Here's a concrete example:
1347        //
1348        // Assume there's a pre-base consonant and explicit Halant before base,
1349        // followed by a prebase-reordering (left) Matra:
1350        //
1351        //   C,H,ZWNJ,B,M
1352        //
1353        // At this point in reordering we would have:
1354        //
1355        //   M,C,H,ZWNJ,B
1356        //
1357        // whereas in final reordering we will bring the Matra closer to Base:
1358        //
1359        //   C,H,ZWNJ,M,B
1360        //
1361        // That's why we don't want to merge-clusters anything before the Base
1362        // at this point.  But if something moved from after Base to before it,
1363        // we should merge clusters from base to them.  In final-reordering, we
1364        // only move things around before base, and merge-clusters up to base.
1365        // These two merge-clusters from the two sides of base will interlock
1366        // to merge things correctly.  See:
1367        // https://github.com/harfbuzz/harfbuzz/issues/2272
1368        if indic_plan.is_old_spec || end - start > 127 {
1369            buffer.merge_clusters(base, end);
1370        } else {
1371            // Note! syllable() is a one-byte field.
1372            for i in base..end {
1373                if buffer.info[i].syllable() != 255 {
1374                    let mut min = i;
1375                    let mut max = i;
1376                    let mut j = start + buffer.info[i].syllable() as usize;
1377                    while j != i {
1378                        min = cmp::min(min, j);
1379                        max = cmp::max(max, j);
1380                        let next = start + buffer.info[j].syllable() as usize;
1381                        buffer.info[j].set_syllable(255); // So we don't process j later again.
1382                        j = next;
1383                    }
1384
1385                    buffer.merge_clusters(cmp::max(base, min), max + 1);
1386                }
1387            }
1388        }
1389
1390        // Put syllable back in.
1391        for info in &mut buffer.info[start..end] {
1392            info.set_syllable(syllable);
1393        }
1394    }
1395
1396    // Setup masks now
1397
1398    {
1399        // Reph
1400        for info in &mut buffer.info[start..end] {
1401            if info.indic_position() != position::RA_TO_BECOME_REPH {
1402                break;
1403            }
1404
1405            info.mask |= indic_plan.mask_array[indic_feature::RPHF];
1406        }
1407
1408        // Pre-base
1409        let mut mask = indic_plan.mask_array[indic_feature::HALF];
1410        if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1411            mask |= indic_plan.mask_array[indic_feature::BLWF];
1412        }
1413
1414        for info in &mut buffer.info[start..base] {
1415            info.mask |= mask;
1416        }
1417
1418        // Base
1419        mask = 0;
1420        if base < end {
1421            buffer.info[base].mask |= mask;
1422        }
1423
1424        // Post-base
1425        mask = indic_plan.mask_array[indic_feature::BLWF]
1426            | indic_plan.mask_array[indic_feature::ABVF]
1427            | indic_plan.mask_array[indic_feature::PSTF];
1428        for i in base + 1..end {
1429            buffer.info[i].mask |= mask;
1430        }
1431    }
1432
1433    if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1434        // Old-spec eye-lash Ra needs special handling.  From the
1435        // spec:
1436        //
1437        // "The feature 'below-base form' is applied to consonants
1438        // having below-base forms and following the base consonant.
1439        // The exception is vattu, which may appear below half forms
1440        // as well as below the base glyph. The feature 'below-base
1441        // form' will be applied to all such occurrences of Ra as well."
1442        //
1443        // Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1444        // with Sanskrit 2003 font.
1445        //
1446        // However, note that Ra,Halant,ZWJ is the correct way to
1447        // request eyelash form of Ra, so we wouldbn't inhibit it
1448        // in that sequence.
1449        //
1450        // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1451        for i in start..base.saturating_sub(1) {
1452            if buffer.info[i].indic_category() == category::RA
1453                && buffer.info[i + 1].indic_category() == category::H
1454                && (i + 2 == base || buffer.info[i + 2].indic_category() != category::ZWJ)
1455            {
1456                buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF];
1457                buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF];
1458            }
1459        }
1460    }
1461
1462    let pref_len = 2;
1463    if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end {
1464        // Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1465        for i in base + 1..end - pref_len + 1 {
1466            let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()];
1467            if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1468                buffer.info[i + 0].mask = indic_plan.mask_array[indic_feature::PREF];
1469                buffer.info[i + 1].mask = indic_plan.mask_array[indic_feature::PREF];
1470                break;
1471            }
1472        }
1473    }
1474
1475    // Apply ZWJ/ZWNJ effects
1476    for i in start + 1..end {
1477        if buffer.info[i].is_joiner() {
1478            let non_joiner = buffer.info[i].indic_category() == category::ZWNJ;
1479            let mut j = i;
1480
1481            loop {
1482                j -= 1;
1483
1484                // ZWJ/ZWNJ should disable CJCT.  They do that by simply
1485                // being there, since we don't skip them for the CJCT
1486                // feature (ie. F_MANUAL_ZWJ)
1487
1488                // A ZWNJ disables HALF.
1489                if non_joiner {
1490                    buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1491                }
1492
1493                if j <= start || buffer.info[j].is_consonant() {
1494                    break;
1495                }
1496            }
1497        }
1498    }
1499}
1500
1501fn initial_reordering_standalone_cluster(
1502    plan: &hb_ot_shape_plan_t,
1503    indic_plan: &IndicShapePlan,
1504    face: &hb_font_t,
1505    start: usize,
1506    end: usize,
1507    buffer: &mut hb_buffer_t,
1508) {
1509    // We treat placeholder/dotted-circle as if they are consonants, so we
1510    // should just chain.  Only if not in compatibility mode that is...
1511    initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1512}
1513
1514fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
1515    if buffer.is_empty() {
1516        return;
1517    }
1518
1519    let indic_plan = plan.data::<IndicShapePlan>();
1520
1521    let mut virama_glyph = None;
1522    if indic_plan.config.virama != 0 {
1523        if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) {
1524            virama_glyph = Some(g.0 as u32);
1525        }
1526    }
1527
1528    let mut start = 0;
1529    let mut end = buffer.next_syllable(0);
1530    while start < buffer.len {
1531        final_reordering_impl(indic_plan, virama_glyph, start, end, buffer);
1532        start = end;
1533        end = buffer.next_syllable(start);
1534    }
1535}
1536
1537fn final_reordering_impl(
1538    plan: &IndicShapePlan,
1539    virama_glyph: Option<u32>,
1540    start: usize,
1541    end: usize,
1542    buffer: &mut hb_buffer_t,
1543) {
1544    // This function relies heavily on halant glyphs.  Lots of ligation
1545    // and possibly multiple substitutions happened prior to this
1546    // phase, and that might have messed up our properties.  Recover
1547    // from a particular case of that where we're fairly sure that a
1548    // class of OT_H is desired but has been lost.
1549    //
1550    // We don't call load_virama_glyph(), since we know it's already loaded.
1551    if let Some(virama_glyph) = virama_glyph {
1552        for info in &mut buffer.info[start..end] {
1553            if info.glyph_id == virama_glyph
1554                && _hb_glyph_info_ligated(info)
1555                && _hb_glyph_info_multiplied(info)
1556            {
1557                // This will make sure that this glyph passes is_halant() test.
1558                info.set_indic_category(category::H);
1559                _hb_glyph_info_clear_ligated_and_multiplied(info);
1560            }
1561        }
1562    }
1563
1564    // 4. Final reordering:
1565    //
1566    // After the localized forms and basic shaping forms GSUB features have been
1567    // applied (see below), the shaping engine performs some final glyph
1568    // reordering before applying all the remaining font features to the entire
1569    // syllable.
1570
1571    let mut try_pref = plan.mask_array[indic_feature::PREF] != 0;
1572
1573    let mut base = start;
1574    while base < end {
1575        if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 {
1576            if try_pref && base + 1 < end {
1577                for i in base + 1..end {
1578                    if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1579                        if !(_hb_glyph_info_substituted(&buffer.info[i])
1580                            && _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]))
1581                        {
1582                            // Ok, this was a 'pref' candidate but didn't form any.
1583                            // Base is around here...
1584                            base = i;
1585                            while base < end && buffer.info[base].is_halant() {
1586                                base += 1;
1587                            }
1588
1589                            buffer.info[base].set_indic_position(position::BASE_C);
1590                            try_pref = false;
1591                        }
1592
1593                        break;
1594                    }
1595                }
1596            }
1597
1598            // For Malayalam, skip over unformed below- (but NOT post-) forms.
1599            if buffer.script == Some(script::MALAYALAM) {
1600                let mut i = base + 1;
1601                while i < end {
1602                    while i < end && buffer.info[i].is_joiner() {
1603                        i += 1;
1604                    }
1605
1606                    if i == end || !buffer.info[i].is_halant() {
1607                        break;
1608                    }
1609
1610                    i += 1; // Skip halant.
1611
1612                    while i < end && buffer.info[i].is_joiner() {
1613                        i += 1;
1614                    }
1615
1616                    if i < end
1617                        && buffer.info[i].is_consonant()
1618                        && buffer.info[i].indic_position() == position::BELOW_C
1619                    {
1620                        base = i;
1621                        buffer.info[base].set_indic_position(position::BASE_C);
1622                    }
1623
1624                    i += 1;
1625                }
1626            }
1627
1628            if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 {
1629                base -= 1;
1630            }
1631
1632            break;
1633        }
1634
1635        base += 1;
1636    }
1637
1638    if base == end && start < base && buffer.info[base - 1].is_one_of(rb_flag(category::ZWJ as u32))
1639    {
1640        base -= 1;
1641    }
1642
1643    if base < end {
1644        while start < base
1645            && buffer.info[base]
1646                .is_one_of(rb_flag(category::N as u32) | rb_flag(category::H as u32))
1647        {
1648            base -= 1;
1649        }
1650    }
1651
1652    // - Reorder matras:
1653    //
1654    //   If a pre-base matra character had been reordered before applying basic
1655    //   features, the glyph can be moved closer to the main consonant based on
1656    //   whether half-forms had been formed. Actual position for the matra is
1657    //   defined as “after last standalone halant glyph, after initial matra
1658    //   position and before the main consonant”. If ZWJ or ZWNJ follow this
1659    //   halant, position is moved after it.
1660    //
1661    // IMPLEMENTATION NOTES:
1662    //
1663    // It looks like the last sentence is wrong.  Testing, with Windows 7 Uniscribe
1664    // and Devanagari shows that the behavior is best described as:
1665    //
1666    // "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1667    //  If ZWNJ follows this halant, position is moved after it."
1668    //
1669    // Test case, with Adobe Devanagari or Nirmala UI:
1670    //
1671    //   U+091F,U+094D,U+200C,U+092F,U+093F
1672    //   (Matra moves to the middle, after ZWNJ.)
1673    //
1674    //   U+091F,U+094D,U+200D,U+092F,U+093F
1675    //   (Matra does NOT move, stays to the left.)
1676    //
1677    // https://github.com/harfbuzz/harfbuzz/issues/1070
1678
1679    // Otherwise there can't be any pre-base matra characters.
1680    if start + 1 < end && start < base {
1681        // If we lost track of base, alas, position before last thingy.
1682        let mut new_pos = if base == end { base - 2 } else { base - 1 };
1683
1684        // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1685        // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1686        // We want to position matra after them.
1687        if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1688            loop {
1689                while new_pos > start
1690                    && !buffer.info[new_pos]
1691                        .is_one_of(rb_flag(category::M as u32) | rb_flag(category::H as u32))
1692                {
1693                    new_pos -= 1;
1694                }
1695
1696                // If we found no Halant we are done.
1697                // Otherwise only proceed if the Halant does
1698                // not belong to the Matra itself!
1699                if buffer.info[new_pos].is_halant()
1700                    && buffer.info[new_pos].indic_position() != position::PRE_M
1701                {
1702                    if new_pos + 1 < end {
1703                        // -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1704                        if buffer.info[new_pos + 1].indic_category() == category::ZWJ {
1705                            // Keep searching.
1706                            if new_pos > start {
1707                                new_pos -= 1;
1708                                continue;
1709                            }
1710                        }
1711
1712                        // -> If ZWNJ follows this halant, position is moved after it.
1713                        //
1714                        // IMPLEMENTATION NOTES:
1715                        //
1716                        // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1717                        // sequence for a consonant syllable; any pre-base matras occurring after it
1718                        // will belong to the subsequent syllable.
1719                    }
1720                } else {
1721                    new_pos = start; // No move.
1722                }
1723
1724                break;
1725            }
1726        }
1727
1728        if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M {
1729            // Now go see if there's actually any matras...
1730            for i in (start + 1..=new_pos).rev() {
1731                if buffer.info[i - 1].indic_position() == position::PRE_M {
1732                    let old_pos = i - 1;
1733                    // Shouldn't actually happen.
1734                    if old_pos < base && base <= new_pos {
1735                        base -= 1;
1736                    }
1737
1738                    let tmp = buffer.info[old_pos];
1739                    for i in 0..new_pos - old_pos {
1740                        buffer.info[i + old_pos] = buffer.info[i + old_pos + 1];
1741                    }
1742                    buffer.info[new_pos] = tmp;
1743
1744                    // Note: this merge_clusters() is intentionally *after* the reordering.
1745                    // Indic matra reordering is special and tricky...
1746                    buffer.merge_clusters(new_pos, cmp::min(end, base + 1));
1747
1748                    new_pos -= 1;
1749                }
1750            }
1751        } else {
1752            for i in start..base {
1753                if buffer.info[i].indic_position() == position::PRE_M {
1754                    buffer.merge_clusters(i, cmp::min(end, base + 1));
1755                    break;
1756                }
1757            }
1758        }
1759    }
1760
1761    // - Reorder reph:
1762    //
1763    //   Reph’s original position is always at the beginning of the syllable,
1764    //   (i.e. it is not reordered at the character reordering stage). However,
1765    //   it will be reordered according to the basic-forms shaping results.
1766    //   Possible positions for reph, depending on the script, are; after main,
1767    //   before post-base consonant forms, and after post-base consonant forms.
1768
1769    // Two cases:
1770    //
1771    // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1772    //   we should only move it if the sequence ligated to the repha form.
1773    //
1774    // - If repha is encoded separately and in the logical position, we should only
1775    //   move it if it did NOT ligate.  If it ligated, it's probably the font trying
1776    //   to make it work without the reordering.
1777
1778    if start + 1 < end
1779        && buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH
1780        && (buffer.info[start].indic_category() == category::REPHA)
1781            ^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start])
1782    {
1783        let mut new_reph_pos;
1784        loop {
1785            let reph_pos = plan.config.reph_pos;
1786
1787            // 1. If reph should be positioned after post-base consonant forms,
1788            //    proceed to step 5.
1789            if reph_pos != RephPosition::AfterPost {
1790                // 2. If the reph repositioning class is not after post-base: target
1791                //    position is after the first explicit halant glyph between the
1792                //    first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1793                //    are following this halant, position is moved after it. If such
1794                //    position is found, this is the target position. Otherwise,
1795                //    proceed to the next step.
1796                //
1797                //    Note: in old-implementation fonts, where classifications were
1798                //    fixed in shaping engine, there was no case where reph position
1799                //    will be found on this step.
1800                {
1801                    new_reph_pos = start + 1;
1802                    while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1803                        new_reph_pos += 1;
1804                    }
1805
1806                    if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1807                        // ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1808                        if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1809                            new_reph_pos += 1;
1810                        }
1811
1812                        break;
1813                    }
1814                }
1815
1816                // 3. If reph should be repositioned after the main consonant: find the
1817                //    first consonant not ligated with main, or find the first
1818                //    consonant that is not a potential pre-base-reordering Ra.
1819                if reph_pos == RephPosition::AfterMain {
1820                    new_reph_pos = base;
1821                    while new_reph_pos + 1 < end
1822                        && buffer.info[new_reph_pos + 1].indic_position() as u8
1823                            <= position::AFTER_MAIN as u8
1824                    {
1825                        new_reph_pos += 1;
1826                    }
1827
1828                    if new_reph_pos < end {
1829                        break;
1830                    }
1831                }
1832
1833                // 4. If reph should be positioned before post-base consonant, find
1834                //    first post-base classified consonant not ligated with main. If no
1835                //    consonant is found, the target position should be before the
1836                //    first matra, syllable modifier sign or vedic sign.
1837                //
1838                // This is our take on what step 4 is trying to say (and failing, BADLY).
1839                if reph_pos == RephPosition::AfterSub {
1840                    new_reph_pos = base;
1841                    while new_reph_pos + 1 < end
1842                        && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32)
1843                            & (rb_flag(position::POST_C as u32)
1844                                | rb_flag(position::AFTER_POST as u32)
1845                                | rb_flag(position::SMVD as u32)))
1846                            == 0
1847                    {
1848                        new_reph_pos += 1;
1849                    }
1850
1851                    if new_reph_pos < end {
1852                        break;
1853                    }
1854                }
1855            }
1856
1857            // 5. If no consonant is found in steps 3 or 4, move reph to a position
1858            //    immediately before the first post-base matra, syllable modifier
1859            //    sign or vedic sign that has a reordering class after the intended
1860            //    reph position. For example, if the reordering position for reph
1861            //    is post-main, it will skip above-base matras that also have a
1862            //    post-main position.
1863            //
1864            // Copied from step 2.
1865            new_reph_pos = start + 1;
1866            while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1867                new_reph_pos += 1;
1868            }
1869
1870            if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1871                /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1872                if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1873                    new_reph_pos += 1;
1874                }
1875
1876                break;
1877            }
1878            // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1879
1880            // 6. Otherwise, reorder reph to the end of the syllable.
1881            {
1882                new_reph_pos = end - 1;
1883                while new_reph_pos > start
1884                    && buffer.info[new_reph_pos].indic_position() == position::SMVD
1885                {
1886                    new_reph_pos -= 1;
1887                }
1888
1889                // If the Reph is to be ending up after a Matra,Halant sequence,
1890                // position it before that Halant so it can interact with the Matra.
1891                // However, if it's a plain Consonant,Halant we shouldn't do that.
1892                // Uniscribe doesn't do this.
1893                // TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1894                if buffer.info[new_reph_pos].is_halant() {
1895                    for info in &buffer.info[base + 1..new_reph_pos] {
1896                        if info.indic_category() == category::M {
1897                            // Ok, got it.
1898                            new_reph_pos -= 1;
1899                        }
1900                    }
1901                }
1902            }
1903
1904            break;
1905        }
1906
1907        // Move
1908        buffer.merge_clusters(start, new_reph_pos + 1);
1909
1910        let reph = buffer.info[start];
1911        for i in 0..new_reph_pos - start {
1912            buffer.info[i + start] = buffer.info[i + start + 1];
1913        }
1914        buffer.info[new_reph_pos] = reph;
1915
1916        if start < base && base <= new_reph_pos {
1917            base -= 1;
1918        }
1919    }
1920
1921    // - Reorder pre-base-reordering consonants:
1922    //
1923    //   If a pre-base-reordering consonant is found, reorder it according to
1924    //   the following rules:
1925
1926    // Otherwise there can't be any pre-base-reordering Ra.
1927    if try_pref && base + 1 < end {
1928        for i in base + 1..end {
1929            if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1930                // 1. Only reorder a glyph produced by substitution during application
1931                //    of the <pref> feature. (Note that a font may shape a Ra consonant with
1932                //    the feature generally but block it in certain contexts.)
1933                //
1934                // Note: We just check that something got substituted.  We don't check that
1935                // the <pref> feature actually did it...
1936                //
1937                // Reorder pref only if it ligated.
1938                if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) {
1939                    // 2. Try to find a target position the same way as for pre-base matra.
1940                    //    If it is found, reorder pre-base consonant glyph.
1941                    //
1942                    // 3. If position is not found, reorder immediately before main consonant.
1943
1944                    let mut new_pos = base;
1945                    // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1946                    // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1947                    // We want to position matra after them.
1948                    if buffer.script != Some(script::MALAYALAM)
1949                        && buffer.script != Some(script::TAMIL)
1950                    {
1951                        while new_pos > start
1952                            && !buffer.info[new_pos - 1].is_one_of(
1953                                rb_flag(category::M as u32) | rb_flag(category::H as u32),
1954                            )
1955                        {
1956                            new_pos -= 1;
1957                        }
1958                    }
1959
1960                    if new_pos > start && buffer.info[new_pos - 1].is_halant() {
1961                        // -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1962                        if new_pos < end && buffer.info[new_pos].is_joiner() {
1963                            new_pos += 1;
1964                        }
1965                    }
1966
1967                    {
1968                        let old_pos = i;
1969
1970                        buffer.merge_clusters(new_pos, old_pos + 1);
1971                        let tmp = buffer.info[old_pos];
1972                        for i in (0..=old_pos - new_pos).rev() {
1973                            buffer.info[i + new_pos + 1] = buffer.info[i + new_pos];
1974                        }
1975                        buffer.info[new_pos] = tmp;
1976
1977                        if new_pos <= base && base < old_pos {
1978                            // TODO: investigate
1979                            #[allow(unused_assignments)]
1980                            {
1981                                base += 1;
1982                            }
1983                        }
1984                    }
1985                }
1986
1987                break;
1988            }
1989        }
1990    }
1991
1992    // Apply 'init' to the Left Matra if it's a word start.
1993    if buffer.info[start].indic_position() == position::PRE_M {
1994        if start == 0
1995            || (rb_flag_unsafe(
1996                _hb_glyph_info_get_general_category(&buffer.info[start - 1]).to_rb(),
1997            ) & rb_flag_range(
1998                hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1999                hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
2000            )) == 0
2001        {
2002            buffer.info[start].mask |= plan.mask_array[indic_feature::INIT];
2003        } else {
2004            buffer.unsafe_to_break(Some(start - 1), Some(start + 1));
2005        }
2006    }
2007}
2008
2009pub fn get_category_and_position(u: u32) -> (Category, Position) {
2010    let (c1, c2) = super::ot_shape_complex_indic_table::get_categories(u);
2011    let c2 = if c1 == SyllabicCategory::ConsonantMedial
2012        || c1 == SyllabicCategory::GeminationMark
2013        || c1 == SyllabicCategory::RegisterShifter
2014        || c1 == SyllabicCategory::ConsonantSucceedingRepha
2015        || c1 == SyllabicCategory::Virama
2016        || c1 == SyllabicCategory::VowelDependent
2017        || false
2018    {
2019        c2
2020    } else {
2021        MatraCategory::NotApplicable
2022    };
2023
2024    let c1 = match c1 {
2025        SyllabicCategory::Other => category::X,
2026        SyllabicCategory::Avagraha => category::SYMBOL,
2027        SyllabicCategory::Bindu => category::SM,
2028        SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care.
2029        SyllabicCategory::CantillationMark => category::A,
2030        SyllabicCategory::Consonant => category::C,
2031        SyllabicCategory::ConsonantDead => category::C,
2032        SyllabicCategory::ConsonantFinal => category::CM,
2033        SyllabicCategory::ConsonantHeadLetter => category::C,
2034        SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER,
2035        SyllabicCategory::ConsonantKiller => category::M, // U+17CD only.
2036        SyllabicCategory::ConsonantMedial => category::CM,
2037        SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER,
2038        SyllabicCategory::ConsonantPrecedingRepha => category::REPHA,
2039        SyllabicCategory::ConsonantPrefixed => category::X,
2040        SyllabicCategory::ConsonantSubjoined => category::CM,
2041        SyllabicCategory::ConsonantSucceedingRepha => category::CM,
2042        SyllabicCategory::ConsonantWithStacker => category::CS,
2043        SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552
2044        SyllabicCategory::InvisibleStacker => category::COENG,
2045        SyllabicCategory::Joiner => category::ZWJ,
2046        SyllabicCategory::ModifyingLetter => category::X,
2047        SyllabicCategory::NonJoiner => category::ZWNJ,
2048        SyllabicCategory::Nukta => category::N,
2049        SyllabicCategory::Number => category::PLACEHOLDER,
2050        SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care.
2051        SyllabicCategory::PureKiller => category::M,
2052        SyllabicCategory::RegisterShifter => category::RS,
2053        SyllabicCategory::SyllableModifier => category::SM,
2054        SyllabicCategory::ToneLetter => category::X,
2055        SyllabicCategory::ToneMark => category::N,
2056        SyllabicCategory::Virama => category::H,
2057        SyllabicCategory::Visarga => category::SM,
2058        SyllabicCategory::Vowel => category::V,
2059        SyllabicCategory::VowelDependent => category::M,
2060        SyllabicCategory::VowelIndependent => category::V,
2061    };
2062
2063    let c2 = match c2 {
2064        MatraCategory::NotApplicable => position::END,
2065        MatraCategory::Left => position::PRE_C,
2066        MatraCategory::Top => position::ABOVE_C,
2067        MatraCategory::Bottom => position::BELOW_C,
2068        MatraCategory::Right => position::POST_C,
2069        MatraCategory::BottomAndLeft => position::POST_C,
2070        MatraCategory::BottomAndRight => position::POST_C,
2071        MatraCategory::LeftAndRight => position::POST_C,
2072        MatraCategory::TopAndBottom => position::BELOW_C,
2073        MatraCategory::TopAndBottomAndRight => position::POST_C,
2074        MatraCategory::TopAndBottomAndLeft => position::BELOW_C,
2075        MatraCategory::TopAndLeft => position::ABOVE_C,
2076        MatraCategory::TopAndLeftAndRight => position::POST_C,
2077        MatraCategory::TopAndRight => position::POST_C,
2078        MatraCategory::Overstruck => position::AFTER_MAIN,
2079        MatraCategory::VisualOrderLeft => position::PRE_M,
2080    };
2081
2082    (c1, c2)
2083}
2084
2085#[rustfmt::skip]
2086fn matra_position_indic(u: u32, side: u8) -> u8 {
2087    #[inline] fn in_half_block(u: u32, base: u32) -> bool { u & !0x7F == base }
2088    #[inline] fn is_deva(u: u32) -> bool { in_half_block(u, 0x0900) }
2089    #[inline] fn is_beng(u: u32) -> bool { in_half_block(u, 0x0980) }
2090    #[inline] fn is_guru(u: u32) -> bool { in_half_block(u, 0x0A00) }
2091    #[inline] fn is_gujr(u: u32) -> bool { in_half_block(u, 0x0A80) }
2092    #[inline] fn is_orya(u: u32) -> bool { in_half_block(u, 0x0B00) }
2093    #[inline] fn is_taml(u: u32) -> bool { in_half_block(u, 0x0B80) }
2094    #[inline] fn is_telu(u: u32) -> bool { in_half_block(u, 0x0C00) }
2095    #[inline] fn is_knda(u: u32) -> bool { in_half_block(u, 0x0C80) }
2096    #[inline] fn is_mlym(u: u32) -> bool { in_half_block(u, 0x0D00) }
2097    #[inline] fn is_sinh(u: u32) -> bool { in_half_block(u, 0x0D80) }
2098
2099    #[inline]
2100    fn matra_pos_right(u: u32) -> Position {
2101        if is_deva(u) {
2102            position::AFTER_SUB
2103        } else if is_beng(u) {
2104            position::AFTER_POST
2105        } else if is_guru(u) {
2106            position::AFTER_POST
2107        } else if is_gujr(u) {
2108            position::AFTER_POST
2109        } else if is_orya(u) {
2110            position::AFTER_POST
2111        } else if is_taml(u) {
2112            position::AFTER_POST
2113        } else if is_telu(u) {
2114            if u <= 0x0C42 {
2115                position::BEFORE_SUB
2116            } else {
2117                position::AFTER_SUB
2118            }
2119        } else if is_knda(u) {
2120            if u < 0x0CC3 || u > 0xCD6 {
2121                position::BEFORE_SUB
2122            } else {
2123                position::AFTER_SUB
2124            }
2125        } else if is_mlym(u) {
2126            position::AFTER_POST
2127        } else if is_sinh(u) {
2128            position::AFTER_SUB
2129        } else {
2130            position::AFTER_SUB
2131        }
2132    }
2133
2134    // BENG and MLYM don't have top matras.
2135    #[inline]
2136    fn matra_pos_top(u: u32) -> Position {
2137        if is_deva(u) {
2138            position::AFTER_SUB
2139        } else if is_guru(u) {
2140            // Deviate from spec
2141            position::AFTER_POST
2142        } else if is_gujr(u) {
2143            position::AFTER_SUB
2144        } else if is_orya(u) {
2145            position::AFTER_MAIN
2146        } else if is_taml(u) {
2147            position::AFTER_SUB
2148        } else if is_telu(u) {
2149            position::BEFORE_SUB
2150        } else if is_knda(u) {
2151            position::BEFORE_SUB
2152        } else if is_sinh(u) {
2153            position::AFTER_SUB
2154        } else {
2155            position::AFTER_SUB
2156        }
2157    }
2158
2159    #[inline]
2160    fn matra_pos_bottom(u: u32) -> Position {
2161        if is_deva(u) {
2162            position::AFTER_SUB
2163        } else if is_beng(u) {
2164            position::AFTER_SUB
2165        } else if is_guru(u) {
2166            position::AFTER_POST
2167        } else if is_gujr(u) {
2168            position::AFTER_POST
2169        } else if is_orya(u) {
2170            position::AFTER_SUB
2171        } else if is_taml(u) {
2172            position::AFTER_POST
2173        } else if is_telu(u) {
2174            position::BEFORE_SUB
2175        } else if is_knda(u) {
2176            position::BEFORE_SUB
2177        } else if is_mlym(u) {
2178            position::AFTER_POST
2179        } else if is_sinh(u) {
2180            position::AFTER_SUB
2181        } else {
2182            position::AFTER_SUB
2183        }
2184    }
2185
2186    match side {
2187        position::PRE_C => position::PRE_M,
2188        position::POST_C => matra_pos_right(u),
2189        position::ABOVE_C => matra_pos_top(u),
2190        position::BELOW_C => matra_pos_bottom(u),
2191        _ => side,
2192    }
2193}
rustybuzz/hb/ot_shape_complex_indic.rs

rustybuzz/hb/
ot_shape_complex_indic.rs