swash/text/cluster/
simple.rs

1//! Simple cluster formation (unicode grapheme cluster algorithm).
2
3use super::super::ClusterBreak;
4use super::{CharCluster, Emoji, ShapeClass, Token, Whitespace, MAX_CLUSTER_SIZE};
5
6pub struct SimpleState<I> {
7    chars: I,
8    cur: Token,
9    cur_kind: ClusterBreak,
10    cur_emoji: bool,
11    done: bool,
12}
13
14impl<I> SimpleState<I>
15where
16    I: Iterator<Item = Token>,
17{
18    pub fn new(mut chars: I) -> Self {
19        if let Some(first) = chars.by_ref().next() {
20            let (kind, emoji) = first.info.cluster_class();
21            Self {
22                chars,
23                cur: first,
24                cur_kind: kind,
25                cur_emoji: emoji,
26                done: false,
27            }
28        } else {
29            Self {
30                chars,
31                cur: Token::default(),
32                cur_kind: ClusterBreak::XX,
33                cur_emoji: false,
34                done: true,
35            }
36        }
37    }
38
39    pub fn next(&mut self, cluster: &mut CharCluster) -> bool {
40        if self.done {
41            return false;
42        }
43        Parser { s: self, cluster }.parse();
44        true
45    }
46}
47
48pub struct Parser<'a, I> {
49    s: &'a mut SimpleState<I>,
50    cluster: &'a mut CharCluster,
51}
52
53impl<'a, I> Parser<'a, I>
54where
55    I: Iterator<Item = Token>,
56{
57    fn parse(&mut self) -> Option<()> {
58        use ClusterBreak::*;
59        while self.accept(PP)? {}
60        if self.emoji() {
61            self.cluster.info_mut().set_emoji(Emoji::Default);
62            while self.emoji() {
63                self.accept_any()?;
64                if !self.parse_emoji_extension()? {
65                    break;
66                }
67            }
68        } else {
69            match self.kind() {
70                CN => {
71                    self.accept_any_as(ShapeClass::Control)?;
72                }
73                LF => {
74                    self.cluster.info_mut().set_space(Whitespace::Newline);
75                    self.accept_any_as(ShapeClass::Control)?;
76                }
77                CR => {
78                    self.cluster.info_mut().set_space(Whitespace::Newline);
79                    self.accept_any_as(ShapeClass::Control)?;
80                    self.accept_as(LF, ShapeClass::Control)?;
81                }
82                L => {
83                    self.accept_any()?;
84                    match self.kind() {
85                        L | V | LV | LVT => {
86                            self.accept_any()?;
87                        }
88                        _ => {}
89                    }
90                }
91                LV | V => {
92                    self.accept_any()?;
93                    match self.kind() {
94                        V | T => {
95                            self.accept_any()?;
96                        }
97                        _ => {}
98                    }
99                }
100                LVT | T => {
101                    self.accept_any()?;
102                    self.accept(T)?;
103                }
104                RI => {
105                    self.accept(RI)?;
106                }
107                EX | SM | ZWJ => {
108                    self.cluster.info_mut().set_broken();
109                }
110                _ => {
111                    self.cluster.info_mut().set_space_from_char(self.s.cur.ch);
112                    self.accept_any()?;
113                }
114            }
115        }
116        while self.parse_extension()? {}
117        Some(())
118    }
119
120    fn parse_emoji_extension(&mut self) -> Option<bool> {
121        use ClusterBreak::*;
122        loop {
123            match self.kind() {
124                EX => match self.s.cur.ch as u32 {
125                    0x200C => self.accept_any_as(ShapeClass::Zwnj)?,
126                    0xFE0F => {
127                        self.cluster.info_mut().set_emoji(Emoji::Color);
128                        self.cluster.note_char(&self.s.cur);
129                        self.advance()?;
130                    }
131                    0xFE0E => {
132                        self.cluster.info_mut().set_emoji(Emoji::Text);
133                        self.cluster.note_char(&self.s.cur);
134                        self.advance()?;
135                    }
136                    _ => self.accept_any_as(ShapeClass::Mark)?,
137                },
138                ZWJ => {
139                    self.accept_any_as(ShapeClass::Zwj)?;
140                    return Some(true);
141                }
142                _ => break,
143            }
144        }
145        Some(false)
146    }
147
148    fn parse_extension(&mut self) -> Option<bool> {
149        use ClusterBreak::*;
150        Some(match self.kind() {
151            EX => {
152                if self.s.cur.ch as u32 == 0x200C {
153                    self.accept_any_as(ShapeClass::Zwnj)?;
154                } else if self.s.cur.info.is_variation_selector() {
155                    self.accept_any_as(ShapeClass::Vs)?;
156                } else {
157                    self.cluster.force_normalize();
158                    self.accept_any_as(ShapeClass::Mark)?;
159                }
160                true
161            }
162            SM => {
163                self.cluster.force_normalize();
164                self.accept_any_as(ShapeClass::Mark)?;
165                true
166            }
167            ZWJ => {
168                self.accept_any_as(ShapeClass::Zwj)?;
169                true
170            }
171            _ => false,
172        })
173    }
174
175    #[inline(always)]
176    fn emoji(&self) -> bool {
177        self.s.cur_emoji
178    }
179
180    #[inline(always)]
181    fn kind(&self) -> ClusterBreak {
182        self.s.cur_kind
183    }
184
185    fn accept(&mut self, kind: ClusterBreak) -> Option<bool> {
186        if self.s.cur_kind == kind {
187            self.accept_any()?;
188            Some(true)
189        } else {
190            Some(false)
191        }
192    }
193
194    fn accept_as(&mut self, kind: ClusterBreak, as_kind: ShapeClass) -> Option<bool> {
195        if self.s.cur_kind == kind {
196            self.accept_any_as(as_kind)?;
197            Some(true)
198        } else {
199            Some(false)
200        }
201    }
202
203    fn accept_any(&mut self) -> Option<()> {
204        self.push_cur();
205        self.advance()?;
206        Some(())
207    }
208
209    fn accept_any_as(&mut self, as_kind: ShapeClass) -> Option<()> {
210        self.cluster.push(&self.s.cur, as_kind);
211        self.advance()?;
212        Some(())
213    }
214
215    fn advance(&mut self) -> Option<()> {
216        if self.cluster.len() as usize == MAX_CLUSTER_SIZE {
217            return None;
218        }
219        if let Some(input) = self.s.chars.next() {
220            let (kind, emoji) = input.info.cluster_class();
221            self.s.cur = input;
222            self.s.cur_emoji = emoji;
223            self.s.cur_kind = kind;
224            Some(())
225        } else {
226            self.s.done = true;
227            None
228        }
229    }
230
231    #[inline]
232    fn push_cur(&mut self) {
233        self.cluster.push(&self.s.cur, ShapeClass::Base);
234    }
235}