1use super::buffer::*;
2use super::common::hb_codepoint_t;
3use super::hb_font_t;
4use super::ot_layout::*;
5use super::ot_shape_complex::MAX_COMBINING_MARKS;
6use super::ot_shape_plan::hb_ot_shape_plan_t;
7use super::unicode::{hb_unicode_funcs_t, CharExt};
89pub struct hb_ot_shape_normalize_context_t<'a> {
10pub plan: &'a hb_ot_shape_plan_t,
11pub buffer: &'a mut hb_buffer_t,
12pub face: &'a hb_font_t<'a>,
13pub decompose: fn(&hb_ot_shape_normalize_context_t, char) -> Option<(char, char)>,
14pub compose: fn(&hb_ot_shape_normalize_context_t, char, char) -> Option<char>,
15}
1617pub type hb_ot_shape_normalization_mode_t = i32;
18pub const HB_OT_SHAPE_NORMALIZATION_MODE_NONE: i32 = 0;
19pub const HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED: i32 = 1;
20pub const HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS: i32 = 2; /* Never composes base-to-base */
21pub const HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT: i32 = 3; /* Always fully decomposes and then recompose back */
22pub const HB_OT_SHAPE_NORMALIZATION_MODE_AUTO: i32 = 4; /* See hb-ot-shape-normalize.cc for logic. */
23#[allow(dead_code)]
24pub const HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT: i32 = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO;
2526// HIGHLEVEL DESIGN:
27//
28// This file exports one main function: normalize().
29//
30// This function closely reflects the Unicode Normalization Algorithm,
31// yet it's different.
32//
33// Each shaper specifies whether it prefers decomposed (NFD) or composed (NFC).
34// The logic however tries to use whatever the font can support.
35//
36// In general what happens is that: each grapheme is decomposed in a chain
37// of 1:2 decompositions, marks reordered, and then recomposed if desired,
38// so far it's like Unicode Normalization. However, the decomposition and
39// recomposition only happens if the font supports the resulting characters.
40//
41// The goals are:
42//
43// - Try to render all canonically equivalent strings similarly. To really
44// achieve this we have to always do the full decomposition and then
45// selectively recompose from there. It's kinda too expensive though, so
46// we skip some cases. For example, if composed is desired, we simply
47// don't touch 1-character clusters that are supported by the font, even
48// though their NFC may be different.
49//
50// - When a font has a precomposed character for a sequence but the 'ccmp'
51// feature in the font is not adequate, use the precomposed character
52// which typically has better mark positioning.
53//
54// - When a font does not support a combining mark, but supports it precomposed
55// with previous base, use that. This needs the itemizer to have this
56// knowledge too. We need to provide assistance to the itemizer.
57//
58// - When a font does not support a character but supports its canonical
59// decomposition, well, use the decomposition.
60//
61// - The complex shapers can customize the compose and decompose functions to
62// offload some of their requirements to the normalizer. For example, the
63// Indic shaper may want to disallow recomposing of two matras.
6465fn decompose_unicode(
66_: &hb_ot_shape_normalize_context_t,
67 ab: hb_codepoint_t,
68) -> Option<(hb_codepoint_t, hb_codepoint_t)> {
69super::unicode::decompose(ab)
70}
7172fn compose_unicode(
73_: &hb_ot_shape_normalize_context_t,
74 a: hb_codepoint_t,
75 b: hb_codepoint_t,
76) -> Option<hb_codepoint_t> {
77super::unicode::compose(a, b)
78}
7980fn set_glyph(info: &mut hb_glyph_info_t, font: &hb_font_t) {
81if let Some(glyph_id) = font.get_nominal_glyph(info.glyph_id) {
82 info.set_glyph_index(u32::from(glyph_id.0));
83 }
84}
8586fn output_char(buffer: &mut hb_buffer_t, unichar: u32, glyph: u32) {
87// This is very confusing indeed.
88buffer.cur_mut(0).set_glyph_index(glyph);
89 buffer.output_glyph(unichar);
90// TODO: should be _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer);
91let mut flags = buffer.scratch_flags;
92 buffer.prev_mut().init_unicode_props(&mut flags);
93 buffer.scratch_flags = flags;
94}
9596fn next_char(buffer: &mut hb_buffer_t, glyph: u32) {
97 buffer.cur_mut(0).set_glyph_index(glyph);
98 buffer.next_glyph();
99}
100101fn skip_char(buffer: &mut hb_buffer_t) {
102 buffer.skip_glyph();
103}
104105/// Returns 0 if didn't decompose, number of resulting characters otherwise.
106fn decompose(ctx: &mut hb_ot_shape_normalize_context_t, shortest: bool, ab: hb_codepoint_t) -> u32 {
107let (a, b) = match (ctx.decompose)(ctx, ab) {
108Some(decomposed) => decomposed,
109_ => return 0,
110 };
111112let a_glyph = ctx.face.get_nominal_glyph(u32::from(a));
113let b_glyph = if b != '\0' {
114match ctx.face.get_nominal_glyph(u32::from(b)) {
115Some(glyph_id) => Some(glyph_id),
116None => return 0,
117 }
118 } else {
119None
120};
121122if !shortest || a_glyph.is_none() {
123let ret = decompose(ctx, shortest, a);
124if ret != 0 {
125if let Some(b_glyph) = b_glyph {
126 output_char(ctx.buffer, u32::from(b), u32::from(b_glyph.0));
127return ret + 1;
128 }
129return ret;
130 }
131 }
132133if let Some(a_glyph) = a_glyph {
134// Output a and b.
135output_char(ctx.buffer, u32::from(a), u32::from(a_glyph.0));
136if let Some(b_glyph) = b_glyph {
137 output_char(ctx.buffer, u32::from(b), u32::from(b_glyph.0));
138return 2;
139 }
140return 1;
141 }
1421430
144}
145146fn decompose_current_character(ctx: &mut hb_ot_shape_normalize_context_t, shortest: bool) {
147let u = ctx.buffer.cur(0).as_char();
148let glyph = ctx.face.get_nominal_glyph(u32::from(u));
149150// TODO: different to harfbuzz, sync
151if !shortest || glyph.is_none() {
152if decompose(ctx, shortest, u) > 0 {
153 skip_char(ctx.buffer);
154return;
155 }
156 }
157158// TODO: different to harfbuzz, sync
159if let Some(glyph) = glyph {
160 next_char(ctx.buffer, u32::from(glyph.0));
161return;
162 }
163164if _hb_glyph_info_is_unicode_space(ctx.buffer.cur(0)) {
165let space_type = u.space_fallback();
166if space_type != hb_unicode_funcs_t::NOT_SPACE {
167let space_glyph = ctx.face.get_nominal_glyph(0x0020).or(ctx.buffer.invisible);
168169if let Some(space_glyph) = space_glyph {
170 _hb_glyph_info_set_unicode_space_fallback_type(ctx.buffer.cur_mut(0), space_type);
171 next_char(ctx.buffer, u32::from(space_glyph.0));
172 ctx.buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK;
173return;
174 }
175 }
176 }
177178// U+2011 is the only sensible character that is a no-break version of another character
179 // and not a space. The space ones are handled already. Handle this lone one.
180if u == '\u{2011}' {
181if let Some(other_glyph) = ctx.face.get_nominal_glyph(0x2010) {
182 next_char(ctx.buffer, u32::from(other_glyph.0));
183return;
184 }
185 }
186187// Insert a .notdef glyph if decomposition failed.
188next_char(ctx.buffer, 0);
189}
190191fn handle_variation_selector_cluster(
192 ctx: &mut hb_ot_shape_normalize_context_t,
193 end: usize,
194_: bool,
195) {
196let face = ctx.face;
197198// TODO: Currently if there's a variation-selector we give-up, it's just too hard.
199let buffer = &mut ctx.buffer;
200while buffer.idx < end - 1 && buffer.successful {
201if buffer.cur(1).as_char().is_variation_selector() {
202if let Some(glyph_id) =
203 face.glyph_variation_index(buffer.cur(0).as_char(), buffer.cur(1).as_char())
204 {
205 buffer.cur_mut(0).set_glyph_index(u32::from(glyph_id.0));
206let unicode = buffer.cur(0).glyph_id;
207 buffer.replace_glyphs(2, 1, &[unicode]);
208 } else {
209// Just pass on the two characters separately, let GSUB do its magic.
210set_glyph(buffer.cur_mut(0), face);
211 buffer.next_glyph();
212 set_glyph(buffer.cur_mut(0), face);
213 buffer.next_glyph();
214 }
215216// Skip any further variation selectors.
217while buffer.idx < end && buffer.cur(0).as_char().is_variation_selector() {
218 set_glyph(buffer.cur_mut(0), face);
219 buffer.next_glyph();
220 }
221 } else {
222 set_glyph(buffer.cur_mut(0), face);
223 buffer.next_glyph();
224 }
225 }
226227if ctx.buffer.idx < end {
228 set_glyph(ctx.buffer.cur_mut(0), face);
229 ctx.buffer.next_glyph();
230 }
231}
232233fn decompose_multi_char_cluster(
234 ctx: &mut hb_ot_shape_normalize_context_t,
235 end: usize,
236 short_circuit: bool,
237) {
238let mut i = ctx.buffer.idx;
239while i < end && ctx.buffer.successful {
240if ctx.buffer.info[i].as_char().is_variation_selector() {
241 handle_variation_selector_cluster(ctx, end, short_circuit);
242return;
243 }
244 i += 1;
245 }
246247while ctx.buffer.idx < end && ctx.buffer.successful {
248 decompose_current_character(ctx, short_circuit);
249 }
250}
251252fn compare_combining_class(pa: &hb_glyph_info_t, pb: &hb_glyph_info_t) -> bool {
253let a = _hb_glyph_info_get_modified_combining_class(pa);
254let b = _hb_glyph_info_get_modified_combining_class(pb);
255 a > b
256}
257258pub fn _hb_ot_shape_normalize(
259 plan: &hb_ot_shape_plan_t,
260 buffer: &mut hb_buffer_t,
261 face: &hb_font_t,
262) {
263if buffer.is_empty() {
264return;
265 }
266267let mut mode = plan.shaper.normalization_preference;
268if mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO {
269if plan.has_gpos_mark {
270// https://github.com/harfbuzz/harfbuzz/issues/653#issuecomment-423905920
271 // mode = Some(HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED);
272mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
273 } else {
274 mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
275 }
276 }
277278let mut ctx = hb_ot_shape_normalize_context_t {
279 plan,
280 buffer,
281 face,
282 decompose: plan.shaper.decompose.unwrap_or(decompose_unicode),
283 compose: plan.shaper.compose.unwrap_or(compose_unicode),
284 };
285let mut buffer = &mut ctx.buffer;
286287let always_short_circuit = mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE;
288let might_short_circuit = always_short_circuit
289 || (mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED
290 && mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT);
291292// We do a fairly straightforward yet custom normalization process in three
293 // separate rounds: decompose, reorder, recompose (if desired). Currently
294 // this makes two buffer swaps. We can make it faster by moving the last
295 // two rounds into the inner loop for the first round, but it's more readable
296 // this way.
297298 // First round, decompose
299let mut all_simple = true;
300 {
301 buffer.clear_output();
302let count = buffer.len;
303 buffer.idx = 0;
304loop {
305let mut end = buffer.idx + 1;
306while end < count && !_hb_glyph_info_is_unicode_mark(&buffer.info[end]) {
307 end += 1;
308 }
309310if end < count {
311// Leave one base for the marks to cluster with.
312end -= 1;
313 }
314315// From idx to end are simple clusters.
316if might_short_circuit {
317let len = end - buffer.idx;
318let mut done = 0;
319while done < len {
320let cur = buffer.cur_mut(done);
321 cur.set_glyph_index(match face.get_nominal_glyph(cur.glyph_id) {
322Some(glyph_id) => u32::from(glyph_id.0),
323None => break,
324 });
325 done += 1;
326 }
327 buffer.next_glyphs(done);
328 }
329330while buffer.idx < end && buffer.successful {
331 decompose_current_character(&mut ctx, might_short_circuit);
332 buffer = &mut ctx.buffer;
333 }
334335if buffer.idx == count || !buffer.successful {
336break;
337 }
338339 all_simple = false;
340341// Find all the marks now.
342end = buffer.idx + 1;
343while end < count && _hb_glyph_info_is_unicode_mark(&buffer.info[end]) {
344 end += 1;
345 }
346347// idx to end is one non-simple cluster.
348decompose_multi_char_cluster(&mut ctx, end, always_short_circuit);
349 buffer = &mut ctx.buffer;
350351if buffer.idx >= count || !buffer.successful {
352break;
353 }
354 }
355356 buffer.sync();
357 }
358359// Second round, reorder (inplace)
360if !all_simple {
361let count = buffer.len;
362let mut i = 0;
363while i < count {
364if _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) == 0 {
365 i += 1;
366continue;
367 }
368369let mut end = i + 1;
370while end < count && _hb_glyph_info_get_modified_combining_class(&buffer.info[end]) != 0
371{
372 end += 1;
373 }
374375// We are going to do a O(n^2). Only do this if the sequence is short.
376if end - i <= MAX_COMBINING_MARKS {
377 buffer.sort(i, end, compare_combining_class);
378379if let Some(reorder_marks) = ctx.plan.shaper.reorder_marks {
380 reorder_marks(ctx.plan, buffer, i, end);
381 }
382 }
383384 i = end + 1;
385 }
386 }
387if buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ != 0 {
388// For all CGJ, check if it prevented any reordering at all.
389 // If it did NOT, then make it skippable.
390 // https://github.com/harfbuzz/harfbuzz/issues/554
391for i in 1..buffer.len.saturating_sub(1) {
392if buffer.info[i].glyph_id == 0x034F
393/* CGJ */
394{
395let last = _hb_glyph_info_get_modified_combining_class(&buffer.info[i - 1]);
396let next = _hb_glyph_info_get_modified_combining_class(&buffer.info[i + 1]);
397if next == 0 || last <= next {
398 buffer.info[i].unhide();
399 }
400 }
401 }
402 }
403404// Third round, recompose
405if !all_simple
406 && buffer.successful
407 && (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
408 || mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)
409 {
410// As noted in the comment earlier, we don't try to combine
411 // ccc=0 chars with their previous Starter.
412413let count = buffer.len;
414let mut starter = 0;
415 buffer.clear_output();
416 buffer.next_glyph();
417while buffer.idx < count && buffer.successful {
418// We don't try to compose a non-mark character with it's preceding starter.
419 // This is both an optimization to avoid trying to compose every two neighboring
420 // glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
421 // fonts are not designed to mix-and-match pre-composed syllables and Jamo.
422let cur = buffer.cur(0);
423if _hb_glyph_info_is_unicode_mark(cur) &&
424// If there's anything between the starter and this char, they should have CCC
425 // smaller than this character's.
426(starter == buffer.out_len - 1
427|| _hb_glyph_info_get_modified_combining_class(buffer.prev()) < _hb_glyph_info_get_modified_combining_class(cur))
428 {
429let a = buffer.out_info()[starter].as_char();
430let b = cur.as_char();
431if let Some(composed) = (ctx.compose)(&ctx, a, b) {
432if let Some(glyph_id) = face.get_nominal_glyph(u32::from(composed)) {
433// Copy to out-buffer.
434buffer = &mut ctx.buffer;
435 buffer.next_glyph();
436if !buffer.successful {
437return;
438 }
439440// Merge and remove the second composable.
441buffer.merge_out_clusters(starter, buffer.out_len);
442 buffer.out_len -= 1;
443444// Modify starter and carry on.
445let mut flags = buffer.scratch_flags;
446let info = &mut buffer.out_info_mut()[starter];
447 info.glyph_id = u32::from(composed);
448 info.set_glyph_index(u32::from(glyph_id.0));
449 info.init_unicode_props(&mut flags);
450 buffer.scratch_flags = flags;
451452continue;
453 }
454 }
455 }
456457// Blocked, or doesn't compose.
458buffer = &mut ctx.buffer;
459 buffer.next_glyph();
460461if _hb_glyph_info_get_modified_combining_class(buffer.prev()) == 0 {
462 starter = buffer.out_len - 1;
463 }
464 }
465466 buffer.sync();
467 }
468}