swash/text/cluster/
myanmar.rs

1//! Parser for Myanmar clusters.
2
3use super::unicode_data::{Category, ClusterBreak, MyanmarClass};
4use super::{CharCluster, Emoji, ShapeClass, Token, Whitespace, MAX_CLUSTER_SIZE};
5
6type Kind = MyanmarClass;
7
8pub struct MyanmarState<I> {
9    chars: I,
10    cur: Token,
11    cur_kind: Kind,
12    cur_emoji: bool,
13    done: bool,
14}
15
16impl<I> MyanmarState<I>
17where
18    I: Iterator<Item = Token> + Clone,
19{
20    pub fn new(mut chars: I) -> Self {
21        if let Some(first) = chars.by_ref().next() {
22            let (kind, emoji) = first.info.myanmar_class();
23            Self {
24                chars,
25                cur: first,
26                cur_kind: kind,
27                cur_emoji: emoji,
28                done: false,
29            }
30        } else {
31            Self {
32                chars,
33                cur: Token::default(),
34                cur_kind: MyanmarClass::O,
35                cur_emoji: false,
36                done: true,
37            }
38        }
39    }
40
41    pub fn next(&mut self, cluster: &mut CharCluster) -> bool {
42        if self.done {
43            return false;
44        }
45        Parser::new(self, cluster).parse();
46        true
47    }
48}
49
50struct Parser<'a, I> {
51    s: &'a mut MyanmarState<I>,
52    cluster: &'a mut CharCluster,
53    vt: bool,
54}
55
56impl<'a, I> Parser<'a, I>
57where
58    I: Iterator<Item = Token> + Clone,
59{
60    fn new(s: &'a mut MyanmarState<I>, cluster: &'a mut CharCluster) -> Self {
61        Self {
62            s,
63            cluster,
64            vt: false,
65        }
66    }
67
68    fn parse(&mut self) -> Option<()> {
69        use MyanmarClass::*;
70        if self.s.done {
71            return Some(());
72        }
73        if self.emoji() {
74            self.cluster.info_mut().set_emoji(Emoji::Default);
75            while self.emoji() {
76                self.accept_any_as(ShapeClass::Base)?;
77                if !self.parse_emoji_extension()? {
78                    break;
79                }
80            }
81            return Some(());
82        }
83        match self.kind() {
84            O => {
85                // This is not in the Myanmar spec, but added to support uniform
86                // clustering of CRLF across the parsers.
87                match self.s.cur.ch {
88                    '\r' => {
89                        self.cluster.info_mut().set_space(Whitespace::Newline);
90                        self.accept_any_as(ShapeClass::Control)?;
91                        if self.s.cur.ch == '\n' {
92                            self.accept_any_as(ShapeClass::Control)?;
93                        }
94                    }
95                    '\n' => {
96                        self.cluster.info_mut().set_space(Whitespace::Newline);
97                        self.accept_any_as(ShapeClass::Control)?;
98                    }
99                    _ => {
100                        self.cluster.info_mut().set_space_from_char(self.s.cur.ch);
101                        let class = match self.s.cur.info.category() {
102                            Category::Format => match self.s.cur.ch as u32 {
103                                0x200C => ShapeClass::Zwnj,
104                                0x200D => ShapeClass::Zwj,
105                                _ => ShapeClass::Control,
106                            },
107                            Category::Control => ShapeClass::Control,
108                            _ => ShapeClass::Base,
109                        };
110                        self.accept_any_as(class)?;
111                    }
112                }
113            }
114            P | S | R | WJ | D0 => {
115                self.accept_any()?;
116            }
117            _ => {
118                match self.s.cur.ch as u32 {
119                    0x1004 | 0x101B | 0x105A => {
120                        let mut iter = self.s.chars.clone();
121                        if let Some(b) = iter.next() {
122                            if b.ch == '\u{103A}' {
123                                if let Some(c) = iter.next() {
124                                    if c.ch == '\u{1039}' {
125                                        self.cluster.push(&self.s.cur, ShapeClass::Kinzi);
126                                        self.cluster.push(&b, ShapeClass::Kinzi);
127                                        self.cluster.push(&c, ShapeClass::Kinzi);
128                                        self.advance();
129                                        self.advance();
130                                        self.advance();
131                                    }
132                                }
133                            }
134                        }
135                    }
136                    _ => {}
137                }
138                match self.kind() {
139                    C | IV | D | DB => {
140                        self.accept_any_as(ShapeClass::Base)?;
141                        self.accept_as(VS, ShapeClass::Vs)?;
142                        while self.parse_stacked_consonant_or_vowel()? {}
143                        if self.vt {
144                            return Some(());
145                        }
146                        self.accept_zero_or_many(As)?;
147                        if self.accept(MY)? {
148                            self.accept(As)?;
149                        }
150                        self.accept_as(MR, ShapeClass::MedialRa)?;
151                        if self.accept(MW)? {
152                            self.accept(MH)?;
153                            self.accept(As)?;
154                        } else if self.accept(MH)? {
155                            self.accept(As)?;
156                        }
157                        self.accept_zero_or_many_as(VPre, ShapeClass::VPre)?;
158                        self.accept_zero_or_many(VAbv)?;
159                        self.accept_zero_or_many_as(VBlw, ShapeClass::VBlw)?;
160                        self.accept_zero_or_many_as(A, ShapeClass::Anusvara)?;
161                        if self.accept(DB)? {
162                            self.accept(As)?;
163                        }
164                        while self.parse_post_base_vowel()? {}
165                        while self.parse_pwo_tone_mark()? {}
166                        self.accept_zero_or_many(V)?;
167                        self.accept(J)?;
168                        return Some(());
169                    }
170                    _ => {
171                        self.cluster.info_mut().set_broken();
172                        self.accept_any()?;
173                        return Some(());
174                    }
175                }
176            }
177        }
178        None
179    }
180
181    fn parse_stacked_consonant_or_vowel(&mut self) -> Option<bool> {
182        use MyanmarClass::*;
183        match self.kind() {
184            H => {
185                self.vt = true;
186                self.accept_any_as(ShapeClass::Halant)?;
187                match self.kind() {
188                    C | IV => {
189                        self.vt = false;
190                        self.accept_any_as(ShapeClass::Base)?;
191                        self.accept_as(VS, ShapeClass::Vs)?;
192                        Some(true)
193                    }
194                    _ => Some(false),
195                }
196            }
197            _ => Some(false),
198        }
199    }
200
201    fn parse_post_base_vowel(&mut self) -> Option<bool> {
202        use MyanmarClass::*;
203        match self.kind() {
204            VPst => {
205                self.accept_any()?;
206                self.accept(MH)?;
207                self.accept_zero_or_many(As)?;
208                self.accept_zero_or_many(VAbv)?;
209                self.accept_zero_or_many_as(A, ShapeClass::Anusvara)?;
210                if self.accept(DB)? {
211                    self.accept(As)?;
212                }
213                Some(true)
214            }
215            _ => Some(false),
216        }
217    }
218
219    fn parse_pwo_tone_mark(&mut self) -> Option<bool> {
220        use MyanmarClass::*;
221        match self.kind() {
222            PT => {
223                self.accept_any()?;
224                if self.accept(As)? {
225                    self.accept_as(A, ShapeClass::Anusvara)?;
226                } else {
227                    // This goes against the spec, but seems to be necessary to handle the actual
228                    // example of a complex cluster here:
229                    // https://docs.microsoft.com/en-us/typography/script-development/myanmar#well-formed-clusters
230                    self.accept_zero_or_many_as(A, ShapeClass::Anusvara)?; // self.accept(A)?;
231                    self.accept(DB)?;
232                    self.accept(As)?;
233                }
234                Some(true)
235            }
236            _ => Some(false),
237        }
238    }
239
240    fn parse_emoji_extension(&mut self) -> Option<bool> {
241        use ClusterBreak::*;
242        loop {
243            match self.s.cur.info.cluster_break() {
244                EX => match self.s.cur.ch as u32 {
245                    0x200C => self.accept_any_as(ShapeClass::Zwnj)?,
246                    0xFE0F => {
247                        self.cluster.info_mut().set_emoji(Emoji::Color);
248                        self.cluster.note_char(&self.s.cur);
249                        self.advance()?;
250                    }
251                    0xFE0E => {
252                        self.cluster.info_mut().set_emoji(Emoji::Text);
253                        self.cluster.note_char(&self.s.cur);
254                        self.advance()?;
255                    }
256                    _ => self.accept_any_as(ShapeClass::Mark)?,
257                },
258                ZWJ => {
259                    self.accept_any_as(ShapeClass::Zwj)?;
260                    return Some(true);
261                }
262                _ => break,
263            }
264        }
265        Some(false)
266    }
267
268    #[inline(always)]
269    fn emoji(&self) -> bool {
270        self.s.cur_emoji
271    }
272
273    #[inline(always)]
274    fn kind(&self) -> Kind {
275        self.s.cur_kind
276    }
277
278    fn accept(&mut self, kind: Kind) -> Option<bool> {
279        self.accept_as(kind, ShapeClass::Other)
280    }
281
282    fn accept_as(&mut self, kind: Kind, as_class: ShapeClass) -> Option<bool> {
283        if self.s.cur_kind == kind {
284            self.accept_any_as(as_class)?;
285            Some(true)
286        } else {
287            Some(false)
288        }
289    }
290
291    fn accept_zero_or_many(&mut self, kind: Kind) -> Option<bool> {
292        let mut some = false;
293        while self.accept(kind)? {
294            some = true;
295        }
296        Some(some)
297    }
298
299    fn accept_zero_or_many_as(&mut self, kind: Kind, as_class: ShapeClass) -> Option<bool> {
300        let mut some = false;
301        while self.accept_as(kind, as_class)? {
302            some = true;
303        }
304        Some(some)
305    }
306
307    fn accept_any(&mut self) -> Option<()> {
308        self.cluster.push(&self.s.cur, ShapeClass::Other);
309        self.advance()?;
310        Some(())
311    }
312
313    fn accept_any_as(&mut self, as_class: ShapeClass) -> Option<()> {
314        self.cluster.push(&self.s.cur, as_class);
315        self.advance()?;
316        Some(())
317    }
318
319    fn advance(&mut self) -> Option<()> {
320        if self.cluster.len() as usize == MAX_CLUSTER_SIZE {
321            return None;
322        }
323        if let Some(input) = self.s.chars.next() {
324            let (kind, emoji) = input.info.myanmar_class();
325            self.s.cur = input;
326            self.s.cur_emoji = emoji;
327            self.s.cur_kind = kind;
328            if input.ch == '\u{34f}' {
329                self.accept_any()?;
330            }
331            Some(())
332        } else {
333            self.s.done = true;
334            None
335        }
336    }
337}