1use super::{cluster::Boundary, Codepoint, LineBreak, Properties, WordBreak};
2use core::borrow::Borrow;
3
4pub fn analyze<I>(chars: I) -> Analyze<I::IntoIter>
7where
8 I: IntoIterator,
9 I::IntoIter: Clone,
10 I::Item: Borrow<char>,
11{
12 Analyze {
13 chars: chars.into_iter(),
14 state: BoundaryState::new(),
15 }
16}
17
18#[derive(Clone)]
21pub struct Analyze<I> {
22 chars: I,
23 state: BoundaryState,
24}
25
26impl<I> Iterator for Analyze<I>
27where
28 I: Iterator + Clone,
29 I::Item: Borrow<char>,
30{
31 type Item = (Properties, Boundary);
32
33 fn next(&mut self) -> Option<Self::Item> {
34 self.state.next(&mut self.chars)
35 }
36}
37
38impl<I> Analyze<I> {
39 pub fn needs_bidi_resolution(&self) -> bool {
42 self.state.needs_bidi
43 }
44
45 pub fn set_break_strength(&mut self, strength: WordBreakStrength) {
47 self.state.strength = strength;
48 }
49}
50
51#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
53#[repr(u8)]
54pub enum WordBreakStrength {
55 #[default]
57 Normal,
58 BreakAll,
61 KeepAll,
63}
64
65#[derive(Clone)]
66struct BoundaryState {
67 strength: WordBreakStrength,
68 prev: WordBreak,
69 prevent_next: bool,
70 ri_count: u8,
71 emoji: bool,
72 next_emoji: bool,
73 line_state: (u8, Option<LineBreak>),
74 first: bool,
75 needs_bidi: bool,
76}
77
78impl BoundaryState {
79 fn new() -> Self {
80 Self {
81 strength: WordBreakStrength::default(),
82 prev: WordBreak::EX,
83 prevent_next: false,
84 ri_count: 0,
85 emoji: false,
86 next_emoji: false,
87 line_state: (sot, None),
88 first: true,
89 needs_bidi: false,
90 }
91 }
92
93 fn reset_state(&mut self) {
94 self.ri_count = 0;
95 self.emoji = false;
96 }
97
98 fn check_word<I>(&mut self, props: Properties, iter: &mut I) -> bool
99 where
100 I: Iterator + Clone,
101 I::Item: Borrow<char>,
102 {
103 use WordBreak::*;
104 let b = props.word_break();
105 let emoji = props.is_extended_pictographic();
106 if self.first {
107 self.first = false;
108 self.prev = b;
109 self.next_emoji = emoji;
110 if b == RI {
111 self.ri_count = 1;
112 }
113 return true;
114 }
115 let prev_emoji = self.emoji;
116 self.emoji = self.emoji || self.next_emoji;
117 self.next_emoji = emoji;
118 let a = self.prev;
119 self.prev = b;
120 if self.prevent_next {
121 self.prevent_next = false;
122 return false;
123 }
124 if a == CR && b == LF {
125 self.reset_state();
126 return false;
127 }
128 let a_mask = a.mask();
129 let b_mask = b.mask();
130 const AH_LETTER: u32 = LE.mask() | HL.mask();
131 const MID_NUM_LET_Q: u32 = MB.mask() | SQ.mask();
132 const WB3_A: u32 = NL.mask() | CR.mask() | LF.mask();
133 if a_mask & WB3_A != 0 || b_mask & WB3_A != 0 {
134 self.reset_state();
137 return true;
138 }
139 if a == ZWJ && emoji {
140 self.reset_state();
141 return false;
142 }
143 const WB_4: u32 = Extend.mask() | FO.mask() | ZWJ.mask();
144 if b_mask & WB_4 != 0 {
145 self.reset_state();
147 self.prev = a;
148 return false;
149 }
150 if a == WSegSpace && b == WSegSpace {
151 self.reset_state();
153 return false;
154 }
155 if a_mask & AH_LETTER != 0 {
156 if b_mask & (AH_LETTER | NU.mask()) != 0 {
159 self.reset_state();
160 return false;
161 }
162 if b_mask & (ML.mask() | MID_NUM_LET_Q) != 0 {
163 if let Some(c) = iter
166 .clone()
167 .next()
168 .map(|p| p.borrow().properties().word_break())
169 {
170 if c.mask() & AH_LETTER != 0 {
171 self.prevent_next = true;
172 self.reset_state();
173 return false;
174 }
175 }
176 }
177 }
178 if a == HL {
179 if b == SQ {
180 self.reset_state();
181 return false;
182 }
183 if b == DQ {
184 if let Some(c) = iter
187 .clone()
188 .next()
189 .map(|p| p.borrow().properties().word_break())
190 {
191 if c == HL {
192 self.prevent_next = true;
193 self.reset_state();
194 return false;
195 }
196 }
197 }
198 }
199 if a_mask & NU.mask() != 0 {
200 if b_mask & (NU.mask() | AH_LETTER) != 0 {
203 self.reset_state();
204 return false;
205 }
206 if b_mask & (MN.mask() | MID_NUM_LET_Q) != 0 {
207 if let Some(c) = iter
208 .clone()
209 .next()
210 .map(|p| p.borrow().properties().word_break())
211 {
212 if c == NU {
215 self.prevent_next = true;
216 self.reset_state();
217 return false;
218 }
219 }
220 }
221 }
222 if a == KA && b == KA {
223 self.reset_state();
225 return false;
226 }
227 const WB13_A: u32 = AH_LETTER | NU.mask() | KA.mask() | EX.mask();
228 if a_mask & WB13_A != 0 && b == EX {
229 self.reset_state();
231 return false;
232 }
233 const WB13_B: u32 = AH_LETTER | NU.mask() | KA.mask();
234 if a == EX && b_mask & WB13_B != 0 {
235 self.reset_state();
237 return false;
238 }
239 if prev_emoji && a == ZWJ && emoji {
240 self.ri_count = 0;
241 return false;
242 }
243 if self.ri_count == 2 {
244 self.reset_state();
245 if b == RI {
246 self.ri_count = 1;
247 }
248 return true;
249 }
250 if b == RI {
251 self.ri_count += 1;
252 if a != RI {
253 self.reset_state();
254 self.ri_count = 1;
255 return true;
256 }
257 self.emoji = false;
258 return false;
259 }
260 self.reset_state();
261 true
262 }
263
264 fn check_line(&mut self, props: Properties) -> Boundary {
265 let state = self.line_state;
266 let lb = props.line_break();
267
268 use LineBreak::*;
269
270 let val = PAIR_TABLE[state.0 as usize][lb as usize];
271
272 let mode_val = if self.strength == WordBreakStrength::BreakAll {
276 let left = if matches!(state.1, Some(AL | NU | SA)) {
277 ID as usize
278 } else {
279 state.0 as usize
280 };
281 let right = if matches!(lb, AL | NU | SA) {
282 ID as usize
283 } else {
284 lb as usize
285 };
286 PAIR_TABLE[left][right]
287 } else {
288 val
289 };
290
291 let mut mode = if mode_val & MANDATORY_BREAK_BIT != 0 {
292 Boundary::Mandatory
293 } else if mode_val & ALLOWED_BREAK_BIT != 0 && state.1 != Some(ZWJ) {
294 Boundary::Line
295 } else {
296 Boundary::None
297 };
298
299 if let (
305 WordBreakStrength::KeepAll,
306 Some(AI | AL | ID | NU | HY | H2 | H3 | JL | JV | JT | CJ),
307 AI | AL | ID | NU | HY | H2 | H3 | JL | JV | JT | CJ,
308 ) = (self.strength, state.1, lb)
309 {
310 mode = Boundary::None;
311 }
312
313 self.line_state = (val & !(ALLOWED_BREAK_BIT | MANDATORY_BREAK_BIT), Some(lb));
315 mode
316 }
317
318 fn next<I>(&mut self, iter: &mut I) -> Option<(Properties, Boundary)>
319 where
320 I: Iterator + Clone,
321 I::Item: Borrow<char>,
322 {
323 let props = iter.next()?.borrow().properties();
324 let mut boundary = self.check_line(props);
325 let word = self.check_word(props, iter);
326 if boundary as u16 == 0 && word {
327 boundary = Boundary::Word;
328 }
329 self.needs_bidi = self.needs_bidi || props.bidi_class().needs_resolution();
330 Some((props, boundary))
331 }
332}
333
334const ALLOWED_BREAK_BIT: u8 = 0x80;
335const MANDATORY_BREAK_BIT: u8 = 0x40;
336
337#[allow(non_upper_case_globals)]
338const sot: u8 = 44;
339
340#[rustfmt::skip]
341const PAIR_TABLE: [[u8; 44]; 53] = [
342 [1,1,130,3,132,5,134,28,8,1,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,1,235,],
343 [1,1,130,3,132,5,134,28,8,1,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,1,235,],
344 [129,129,2,3,132,5,134,28,8,2,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,50,38,39,129,41,2,235,],
345 [129,129,130,3,132,5,134,28,8,3,10,11,140,141,14,143,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,3,235,],
346 [1,1,2,3,4,5,134,28,8,4,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,37,38,39,1,41,4,235,],
347 [193,193,194,195,196,197,198,220,200,193,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,193,193,229,230,231,193,233,193,235,],
348 [129,129,130,131,132,5,134,156,8,6,10,11,140,141,14,15,144,145,146,147,148,149,22,151,152,153,26,27,156,157,158,159,160,33,162,129,129,37,38,39,129,41,6,235,],
349 [129,129,130,3,132,5,134,28,8,28,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,28,235,],
350 [129,129,130,3,132,5,134,28,8,8,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,31,32,33,162,129,129,48,38,39,129,41,8,235,],
351 [1,1,130,3,132,5,134,28,8,9,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,9,235,],
352 [1,1,130,3,132,5,134,28,8,10,10,11,140,141,14,15,144,145,18,19,148,149,22,151,152,153,26,27,28,29,158,31,32,33,162,1,1,49,38,39,1,41,10,235,],
353 [193,193,194,195,196,197,198,220,200,193,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,26,219,220,221,222,223,224,225,226,193,193,229,230,231,193,233,193,235,],
354 [129,129,130,3,132,5,134,28,8,12,10,11,140,13,14,15,144,145,146,19,148,21,22,151,152,153,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,12,235,],
355 [129,129,130,3,132,5,134,28,8,13,10,11,140,141,14,15,144,145,146,19,148,21,22,151,152,153,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,13,235,],
356 [129,129,130,3,132,5,134,28,8,14,10,11,140,141,14,15,144,145,146,19,148,21,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,14,235,],
357 [1,1,2,3,4,5,6,28,8,15,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,37,38,39,1,41,15,235,],
358 [129,129,130,3,132,5,134,28,8,16,10,11,140,141,14,15,144,145,146,19,148,21,22,151,24,25,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,16,235,],
359 [129,129,130,3,132,5,134,28,8,17,10,11,140,141,14,15,144,145,146,19,148,21,22,151,24,153,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,17,235,],
360 [1,1,130,51,132,5,134,28,8,18,10,11,140,141,14,15,144,145,18,51,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,18,235,],
361 [129,129,130,3,132,5,134,28,8,19,10,11,140,141,14,143,144,145,146,19,148,149,22,151,152,153,26,27,28,29,158,159,160,33,162,129,129,37,38,39,129,41,19,235,],
362 [129,129,130,3,132,5,134,28,8,20,10,11,140,141,14,15,144,145,146,19,148,21,22,151,152,153,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,20,235,],
363 [129,129,130,3,132,5,134,28,8,21,10,11,140,141,14,15,144,145,146,19,148,21,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,21,235,],
364 [1,1,130,3,132,5,134,28,8,22,10,11,140,141,14,15,144,145,18,19,148,149,22,151,152,153,26,27,28,29,158,159,160,33,162,1,1,37,38,39,1,41,22,235,],
365 [129,129,130,3,132,5,134,28,8,23,10,11,140,141,14,15,16,17,146,19,148,21,22,23,152,25,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,23,235,],
366 [129,129,130,3,132,5,134,28,8,24,10,11,140,141,14,15,144,145,146,19,148,21,22,151,24,153,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,24,235,],
367 [129,129,130,3,132,5,134,28,8,25,10,11,140,141,14,15,144,145,146,19,148,21,22,151,24,25,26,27,28,157,158,31,160,33,162,129,129,37,38,39,129,41,25,235,],
368 [193,193,194,195,196,197,198,220,200,193,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,193,193,229,230,231,193,233,193,235,],
369 [193,193,194,195,196,197,198,220,200,193,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,193,193,229,230,231,193,233,193,235,],
370 [129,129,130,3,132,5,134,28,8,28,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,28,235,],
371 [1,1,130,3,132,5,134,28,8,29,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,29,235,],
372 [1,1,2,3,4,5,6,28,8,30,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,46,38,39,1,41,30,235,],
373 [1,1,130,3,132,5,134,28,8,31,10,11,140,141,14,15,144,145,18,19,148,149,22,151,152,153,26,27,28,29,30,159,160,33,162,1,1,37,38,39,1,41,31,235,],
374 [1,1,130,3,132,5,134,28,8,32,10,11,12,13,14,15,16,17,18,19,20,149,22,23,24,25,26,27,28,29,30,159,160,33,162,1,1,37,38,39,1,41,32,235,],
375 [1,1,2,3,4,5,6,28,8,33,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,47,38,39,1,41,33,235,],
376 [129,129,130,3,132,5,134,28,8,34,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,52,129,129,37,38,39,129,41,34,235,],
377 [1,1,130,3,132,5,134,28,8,1,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,1,235,],
378 [1,1,130,3,132,5,134,28,8,1,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,1,235,],
379 [129,129,130,131,132,5,134,156,8,129,10,11,140,141,14,143,144,145,146,147,148,149,22,151,152,153,26,27,156,157,158,159,160,161,162,129,129,37,38,39,129,41,129,235,],
380 [129,129,130,3,132,5,134,28,8,38,10,11,140,141,14,15,144,145,18,19,148,149,22,151,152,153,26,27,28,29,158,159,160,33,162,129,129,37,38,39,129,41,38,235,],
381 [1,1,2,3,4,5,6,28,8,39,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,37,38,39,1,41,39,235,],
382 [1,1,130,3,132,5,134,28,8,1,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,1,235,],
383 [129,129,130,131,132,5,134,156,136,129,138,11,140,141,142,143,144,145,146,147,148,149,150,151,152,153,26,27,156,157,158,159,160,161,162,129,129,45,166,167,129,41,129,235,],
384 [1,1,130,3,132,5,134,28,8,42,10,11,140,141,14,15,144,145,18,19,148,21,22,151,152,153,26,27,28,29,30,31,32,33,162,1,1,37,38,39,1,41,42,235,],
385 [129,129,130,3,132,5,134,28,8,129,10,11,140,141,14,143,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,129,235,],
386 [1,1,2,3,4,5,6,28,8,1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,37,38,39,1,41,1,43,],
387 [129,129,130,131,132,5,134,156,136,129,138,11,140,141,142,143,144,145,146,147,148,149,150,151,152,153,26,27,156,157,158,159,160,161,162,129,129,45,166,167,129,41,129,235,],
388 [1,1,2,3,4,5,6,28,8,1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,46,38,39,1,41,1,235,],
389 [129,129,130,131,132,5,134,156,8,129,10,11,140,141,14,143,144,145,146,147,148,149,22,151,152,153,26,27,156,157,30,159,160,161,162,129,129,47,38,39,129,41,129,235,],
390 [129,129,130,131,132,5,134,28,8,129,10,11,140,141,14,143,144,145,146,147,148,149,22,151,152,153,26,27,28,157,158,159,160,161,162,129,129,48,38,39,129,41,129,235,],
391 [129,129,130,131,132,5,134,28,8,129,10,11,140,141,14,143,144,145,146,147,148,149,22,151,152,153,26,27,28,157,158,159,160,161,162,129,129,49,38,39,129,41,129,235,],
392 [129,129,2,131,132,5,134,156,8,129,10,11,140,141,14,143,144,145,146,147,148,149,22,151,152,153,26,27,156,157,158,159,160,161,162,129,129,50,38,39,129,41,129,235,],
393 [1,1,2,3,4,5,134,28,8,51,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,1,1,37,38,39,1,41,51,235,],
394 [129,129,130,3,132,5,134,28,8,52,10,11,140,141,14,15,144,145,146,19,148,149,22,151,152,153,26,27,28,157,158,159,160,33,162,129,129,37,38,39,129,41,52,235,],
395];