swash/text/cluster/
cluster.rs

1use super::super::{Codepoint as _, JoiningType};
2use super::char::{Char, ShapeClass};
3use super::token::Token;
4use super::{ClusterInfo, UserData};
5use crate::GlyphId;
6
7use core::fmt;
8use core::ops::Range;
9
10/// The maximum number of characters in a single cluster.
11pub const MAX_CLUSTER_SIZE: usize = 32;
12
13/// Character cluster; output from the parser and input to the shaper.
14#[derive(Copy, Clone)]
15pub struct CharCluster {
16    info: ClusterInfo,
17    chars: [Char; MAX_CLUSTER_SIZE],
18    len: u8,
19    map_len: u8,
20    start: u32,
21    end: u32,
22    force_normalize: bool,
23    comp: Form,
24    decomp: Form,
25    form: FormKind,
26    best_ratio: f32,
27}
28
29impl CharCluster {
30    /// Creates a new empty cluster.
31    pub fn new() -> Self {
32        Self {
33            info: ClusterInfo(0),
34            chars: [DEFAULT_CHAR; MAX_CLUSTER_SIZE],
35            len: 0,
36            map_len: 0,
37            start: 0,
38            end: 0,
39            force_normalize: false,
40            comp: Form::new(),
41            decomp: Form::new(),
42            form: FormKind::Original,
43            best_ratio: 0.,
44        }
45    }
46
47    /// Returns the cluster information.
48    pub fn info(&self) -> ClusterInfo {
49        self.info
50    }
51
52    /// Returns the primary user data for the cluster.
53    pub fn user_data(&self) -> UserData {
54        self.chars[0].data
55    }
56
57    /// Returns the source range for the cluster in code units.
58    pub fn range(&self) -> SourceRange {
59        SourceRange {
60            start: self.start,
61            end: self.end,
62        }
63    }
64
65    /// Returns true if the cluster is empty.
66    pub fn is_empty(&self) -> bool {
67        self.len == 0
68    }
69
70    /// Returns the sequence of characters in the cluster.
71    pub fn chars(&self) -> &[Char] {
72        &self.chars[..self.len as usize]
73    }
74
75    /// Returns the currently mapped sequence of characters in the cluster.
76    pub fn mapped_chars(&self) -> &[Char] {
77        match self.form {
78            FormKind::Original => &self.chars[..self.len as usize],
79            FormKind::NFD => self.decomp.chars(),
80            FormKind::NFC => self.comp.chars(),
81        }
82    }
83
84    /// Applies a nominal glyph identifier mapping to the cluster, returning
85    /// a result indicating the status of the mapping.
86    pub fn map(&mut self, f: impl Fn(char) -> GlyphId) -> Status {
87        let len = self.len;
88        if len == 0 {
89            return Status::Complete;
90        }
91        let mut glyph_ids = [0u16; MAX_CLUSTER_SIZE];
92        let prev_ratio = self.best_ratio;
93        let mut ratio;
94        if self.force_normalize && self.composed().is_some() {
95            ratio = self.comp.map(&f, &mut glyph_ids, self.best_ratio);
96            if ratio > self.best_ratio {
97                self.best_ratio = ratio;
98                self.form = FormKind::NFC;
99                if ratio >= 1. {
100                    return Status::Complete;
101                }
102            }
103        }
104        ratio = Mapper {
105            chars: &mut self.chars[..self.len as usize],
106            map_len: self.map_len.max(1),
107        }
108        .map(&f, &mut glyph_ids, self.best_ratio);
109        if ratio > self.best_ratio {
110            self.best_ratio = ratio;
111            self.form = FormKind::Original;
112            if ratio >= 1. {
113                return Status::Complete;
114            }
115        }
116        if len > 1 && self.decomposed().is_some() {
117            ratio = self.decomp.map(&f, &mut glyph_ids, self.best_ratio);
118            if ratio > self.best_ratio {
119                self.best_ratio = ratio;
120                self.form = FormKind::NFD;
121                if ratio >= 1. {
122                    return Status::Complete;
123                }
124            }
125            if !self.force_normalize && self.composed().is_some() {
126                ratio = self.comp.map(&f, &mut glyph_ids, self.best_ratio);
127                if ratio > self.best_ratio {
128                    self.best_ratio = ratio;
129                    self.form = FormKind::NFC;
130                    if ratio >= 1. {
131                        return Status::Complete;
132                    }
133                }
134            }
135        }
136        if self.best_ratio > prev_ratio {
137            Status::Keep
138        } else {
139            Status::Discard
140        }
141    }
142
143    /// Resets the cluster to the initial empty state.
144    pub fn clear(&mut self) {
145        self.info = ClusterInfo(0);
146        self.len = 0;
147        self.map_len = 0;
148        self.start = 0;
149        self.end = 0;
150        self.force_normalize = false;
151        self.comp.clear();
152        self.decomp.clear();
153        self.form = FormKind::Original;
154        self.best_ratio = 0.;
155    }
156
157    /// Returns the sequence of decomposed characters for the cluster.
158    fn decomposed(&mut self) -> Option<&[Char]> {
159        match self.decomp.state {
160            FormState::Invalid => None,
161            FormState::None => {
162                self.decomp.state = FormState::Invalid;
163                let mut i = 0;
164                for ch in &self.chars[..self.len as usize] {
165                    let mut end = i;
166                    let mut copy = *ch;
167                    for c in ch.ch.decompose() {
168                        if end == MAX_CLUSTER_SIZE {
169                            return None;
170                        }
171                        copy.ch = c;
172                        self.decomp.chars[end] = copy;
173                        end += 1;
174                    }
175                    i = end;
176                }
177                if i == 0 {
178                    return None;
179                }
180                self.decomp.len = i as u8;
181                self.decomp.state = FormState::Valid;
182                self.decomp.setup();
183                Some(self.decomp.chars())
184            }
185            FormState::Valid => Some(self.decomp.chars()),
186        }
187    }
188
189    /// Returns the sequence of composed characters for the cluster.
190    fn composed(&mut self) -> Option<&[Char]> {
191        match self.comp.state {
192            FormState::Invalid => None,
193            FormState::None => {
194                if self.decomposed().map(|chars| chars.len()).unwrap_or(0) == 0 {
195                    self.comp.state = FormState::Invalid;
196                    return None;
197                }
198                self.comp.state = FormState::Invalid;
199                let mut last = self.decomp.chars[0];
200                let mut i = 0;
201                for ch in &self.decomp.chars()[1..] {
202                    if let Some(comp) = char::compose(last.ch, ch.ch) {
203                        last.ch = comp;
204                    } else {
205                        self.comp.chars[i] = last;
206                        i += 1;
207                        last = *ch;
208                    }
209                }
210                self.comp.chars[i] = last;
211                self.comp.len = i as u8 + 1;
212                self.comp.state = FormState::Valid;
213                self.comp.setup();
214                Some(self.comp.chars())
215            }
216            FormState::Valid => Some(self.comp.chars()),
217        }
218    }
219}
220
221impl Default for CharCluster {
222    fn default() -> Self {
223        Self::new()
224    }
225}
226
227/// Functions for cluster building.
228impl CharCluster {
229    pub(super) fn info_mut(&mut self) -> &mut ClusterInfo {
230        &mut self.info
231    }
232
233    pub(super) fn len(&self) -> u8 {
234        self.len
235    }
236
237    pub(super) fn force_normalize(&mut self) {
238        self.force_normalize = true;
239    }
240
241    pub(super) fn push(&mut self, input: &Token, class: ShapeClass) {
242        let contributes_to_shaping = input.info.contributes_to_shaping();
243        self.chars[self.len as usize] = Char {
244            ch: input.ch,
245            shape_class: class,
246            joining_type: input.info.joining_type(),
247            ignorable: input.info.is_ignorable(),
248            contributes_to_shaping,
249            glyph_id: 0,
250            offset: input.offset,
251            data: input.data,
252        };
253        if self.len == 0 {
254            self.start = input.offset;
255        }
256        self.info.merge_boundary(input.info.boundary() as u16);
257        self.end = input.offset + input.len as u32;
258        self.len += 1;
259        self.map_len += contributes_to_shaping as u8;
260    }
261
262    /// This function records the attributes and range information for
263    /// a character but does not add it to the cluster. It is used when
264    /// characters such as emoji variation selectors are dropped from
265    /// shaping but should still be included in the cluster range.
266    pub(super) fn note_char(&mut self, input: &Token) {
267        if self.len == 0 {
268            self.start = input.offset;
269        }
270        self.info.merge_boundary(input.info.boundary() as u16);
271        self.end = input.offset + input.len as u32;
272    }
273}
274
275/// Iterative status of mapping a character cluster to nominal glyph identifiers.
276#[derive(Copy, Clone, PartialEq, Eq, Debug)]
277pub enum Status {
278    /// Mapping should be skipped.
279    Discard,
280    /// The best mapping so far.
281    Keep,
282    /// Complete mapping.
283    Complete,
284}
285
286/// Source range of a cluster in code units.
287#[derive(Copy, Clone)]
288pub struct SourceRange {
289    pub start: u32,
290    pub end: u32,
291}
292
293impl SourceRange {
294    /// Converts the source range into a `usize` range.
295    pub fn to_range(self) -> Range<usize> {
296        self.start as usize..self.end as usize
297    }
298}
299
300impl From<SourceRange> for Range<usize> {
301    fn from(s: SourceRange) -> Self {
302        s.to_range()
303    }
304}
305
306impl fmt::Debug for SourceRange {
307    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
308        write!(f, "{}..{}", self.start, self.end)
309    }
310}
311
312impl fmt::Display for SourceRange {
313    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
314        write!(f, "{}..{}", self.start, self.end)
315    }
316}
317
318#[derive(Copy, Clone, PartialEq, Eq)]
319#[allow(clippy::upper_case_acronyms)]
320enum FormKind {
321    Original,
322    NFD,
323    NFC,
324}
325
326#[derive(Copy, Clone, PartialEq, Eq)]
327enum FormState {
328    None,
329    Valid,
330    Invalid,
331}
332
333#[derive(Copy, Clone)]
334struct Form {
335    pub chars: [Char; MAX_CLUSTER_SIZE],
336    pub len: u8,
337    pub map_len: u8,
338    pub state: FormState,
339}
340
341impl Form {
342    fn new() -> Self {
343        Self {
344            chars: [DEFAULT_CHAR; MAX_CLUSTER_SIZE],
345            len: 0,
346            map_len: 0,
347            state: FormState::None,
348        }
349    }
350
351    fn clear(&mut self) {
352        self.len = 0;
353        self.map_len = 0;
354        self.state = FormState::None;
355    }
356
357    fn chars(&self) -> &[Char] {
358        &self.chars[..self.len as usize]
359    }
360
361    fn setup(&mut self) {
362        self.map_len = (self
363            .chars()
364            .iter()
365            .filter(|c| c.shape_class != ShapeClass::Control)
366            .count() as u8)
367            .max(1);
368    }
369
370    fn map(
371        &mut self,
372        f: &impl Fn(char) -> u16,
373        glyphs: &mut [u16; MAX_CLUSTER_SIZE],
374        best_ratio: f32,
375    ) -> f32 {
376        Mapper {
377            chars: &mut self.chars[..self.len as usize],
378            map_len: self.map_len,
379        }
380        .map(f, glyphs, best_ratio)
381    }
382}
383
384struct Mapper<'a> {
385    chars: &'a mut [Char],
386    map_len: u8,
387}
388
389impl<'a> Mapper<'a> {
390    fn map(
391        &mut self,
392        f: &impl Fn(char) -> u16,
393        glyphs: &mut [u16; MAX_CLUSTER_SIZE],
394        best_ratio: f32,
395    ) -> f32 {
396        if self.map_len == 0 {
397            return 1.;
398        }
399        let mut mapped = 0;
400        for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) {
401            if !c.contributes_to_shaping {
402                *g = f(c.ch);
403                if self.map_len == 1 {
404                    mapped += 1;
405                }
406            } else {
407                let gid = f(c.ch);
408                *g = gid;
409                if gid != 0 {
410                    mapped += 1;
411                }
412            }
413        }
414        let ratio = mapped as f32 / self.map_len as f32;
415        if ratio > best_ratio {
416            for (ch, glyph) in self.chars.iter_mut().zip(glyphs) {
417                ch.glyph_id = *glyph;
418            }
419        }
420        ratio
421    }
422}
423
424const DEFAULT_CHAR: Char = Char {
425    ch: ' ',
426    shape_class: ShapeClass::Base,
427    joining_type: JoiningType::U,
428    ignorable: false,
429    contributes_to_shaping: true,
430    glyph_id: 0,
431    data: 0,
432    offset: 0,
433};