1#![cfg_attr(not(any(test, doc)), no_std)]
7#![cfg_attr(
8 not(test),
9 deny(
10 clippy::indexing_slicing,
11 clippy::unwrap_used,
12 clippy::expect_used,
13 clippy::panic,
14 clippy::exhaustive_structs,
15 clippy::exhaustive_enums,
16 clippy::trivially_copy_pass_by_ref,
17 missing_debug_implementations,
18 )
19)]
20#![warn(missing_docs)]
21
22extern crate alloc;
62
63macro_rules! ccc {
66 ($name:ident, $num:expr) => {
67 const {
68 #[cfg(feature = "icu_properties")]
69 if icu_properties::props::CanonicalCombiningClass::$name.to_icu4c_value() != $num {
70 panic!("icu_normalizer has incorrect ccc values")
71 }
72 CanonicalCombiningClass::from_icu4c_value($num)
73 }
74 };
75}
76
77pub mod properties;
78pub mod provider;
79pub mod uts46;
80
81use crate::provider::CanonicalCompositions;
82use crate::provider::DecompositionData;
83use crate::provider::NormalizerNfdDataV1;
84use crate::provider::NormalizerNfkdDataV1;
85use crate::provider::NormalizerUts46DataV1;
86use alloc::borrow::Cow;
87use alloc::string::String;
88use core::char::REPLACEMENT_CHARACTER;
89use icu_collections::char16trie::Char16Trie;
90use icu_collections::char16trie::Char16TrieIterator;
91use icu_collections::char16trie::TrieResult;
92use icu_collections::codepointtrie::CodePointTrie;
93#[cfg(feature = "icu_properties")]
94use icu_properties::props::CanonicalCombiningClass;
95use icu_provider::prelude::*;
96use provider::DecompositionTables;
97use provider::NormalizerNfcV1;
98use provider::NormalizerNfdTablesV1;
99use provider::NormalizerNfkdTablesV1;
100use smallvec::SmallVec;
101#[cfg(feature = "utf16_iter")]
102use utf16_iter::Utf16CharsEx;
103#[cfg(feature = "utf8_iter")]
104use utf8_iter::Utf8CharsEx;
105use zerovec::{zeroslice, ZeroSlice};
106
107#[cfg(not(feature = "icu_properties"))]
110#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
111struct CanonicalCombiningClass(pub(crate) u8);
112
113#[cfg(not(feature = "icu_properties"))]
114impl CanonicalCombiningClass {
115 const fn from_icu4c_value(v: u8) -> Self {
116 Self(v)
117 }
118 const fn to_icu4c_value(self) -> u8 {
119 self.0
120 }
121}
122
123const CCC_NOT_REORDERED: CanonicalCombiningClass = ccc!(NotReordered, 0);
124const CCC_ABOVE: CanonicalCombiningClass = ccc!(Above, 230);
125
126#[derive(Debug, PartialEq, Eq)]
128enum IgnorableBehavior {
129 Unsupported,
131 Ignored,
133 ReplacementCharacter,
136}
137
138const IGNORABLE_MARKER: u32 = 0xFFFFFFFF;
142
143const NON_ROUND_TRIP_MARKER: u32 = 1 << 30;
147
148const BACKWARD_COMBINING_MARKER: u32 = 1 << 31;
153
154const HIGH_ZEROS_MASK: u32 = 0x3FFF0000;
160
161const LOW_ZEROS_MASK: u32 = 0xFFE0;
166
167fn trie_value_has_ccc(trie_value: u32) -> bool {
172 (trie_value & 0x3FFFFE00) == 0xD800
173}
174
175fn trie_value_indicates_special_non_starter_decomposition(trie_value: u32) -> bool {
179 (trie_value & 0x3FFFFF00) == 0xD900
180}
181
182fn decomposition_starts_with_non_starter(trie_value: u32) -> bool {
187 trie_value_has_ccc(trie_value)
188}
189
190fn ccc_from_trie_value(trie_value: u32) -> CanonicalCombiningClass {
194 if trie_value_has_ccc(trie_value) {
195 CanonicalCombiningClass::from_icu4c_value(trie_value as u8)
196 } else {
197 CCC_NOT_REORDERED
198 }
199}
200
201static FDFA_NFKD: [u16; 17] = [
204 0x644, 0x649, 0x20, 0x627, 0x644, 0x644, 0x647, 0x20, 0x639, 0x644, 0x64A, 0x647, 0x20, 0x648,
205 0x633, 0x644, 0x645,
206];
207
208const FDFA_MARKER: u16 = 1;
213
214const HANGUL_S_BASE: u32 = 0xAC00;
217const HANGUL_L_BASE: u32 = 0x1100;
219const HANGUL_V_BASE: u32 = 0x1161;
221const HANGUL_T_BASE: u32 = 0x11A7;
223const HANGUL_L_COUNT: u32 = 19;
225const HANGUL_V_COUNT: u32 = 21;
227const HANGUL_T_COUNT: u32 = 28;
229const HANGUL_N_COUNT: u32 = 588;
231const HANGUL_S_COUNT: u32 = 11172;
233
234const HANGUL_JAMO_LIMIT: u32 = 0x1200;
236
237#[inline(always)]
243fn unwrap_or_gigo<T>(opt: Option<T>, default: T) -> T {
244 if let Some(val) = opt {
245 val
246 } else {
247 debug_assert!(false);
249 default
250 }
251}
252
253#[inline(always)]
255fn char_from_u32(u: u32) -> char {
256 unwrap_or_gigo(core::char::from_u32(u), REPLACEMENT_CHARACTER)
257}
258
259#[inline(always)]
261fn char_from_u16(u: u16) -> char {
262 char_from_u32(u32::from(u))
263}
264
265const EMPTY_U16: &ZeroSlice<u16> = zeroslice![];
266
267const EMPTY_CHAR: &ZeroSlice<char> = zeroslice![];
268
269#[inline(always)]
270fn in_inclusive_range(c: char, start: char, end: char) -> bool {
271 u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
272}
273
274#[inline(always)]
275#[cfg(feature = "utf16_iter")]
276fn in_inclusive_range16(u: u16, start: u16, end: u16) -> bool {
277 u.wrapping_sub(start) <= (end - start)
278}
279
280#[inline]
284fn compose(iter: Char16TrieIterator, starter: char, second: char) -> Option<char> {
285 let v = u32::from(second).wrapping_sub(HANGUL_V_BASE);
286 if v >= HANGUL_JAMO_LIMIT - HANGUL_V_BASE {
287 return compose_non_hangul(iter, starter, second);
288 }
289 if v < HANGUL_V_COUNT {
290 let l = u32::from(starter).wrapping_sub(HANGUL_L_BASE);
291 if l < HANGUL_L_COUNT {
292 let lv = l * HANGUL_N_COUNT + v * HANGUL_T_COUNT;
293 return Some(unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lv) });
295 }
296 return None;
297 }
298 if in_inclusive_range(second, '\u{11A8}', '\u{11C2}') {
299 let lv = u32::from(starter).wrapping_sub(HANGUL_S_BASE);
300 if lv < HANGUL_S_COUNT && lv % HANGUL_T_COUNT == 0 {
301 let lvt = lv + (u32::from(second) - HANGUL_T_BASE);
302 return Some(unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lvt) });
304 }
305 }
306 None
307}
308
309fn compose_non_hangul(mut iter: Char16TrieIterator, starter: char, second: char) -> Option<char> {
313 match iter.next(second) {
318 TrieResult::NoMatch => None,
319 TrieResult::NoValue => match iter.next(starter) {
320 TrieResult::NoMatch => None,
321 TrieResult::FinalValue(i) => {
322 if let Some(c) = char::from_u32(i as u32) {
323 Some(c)
324 } else {
325 debug_assert!(false);
327 None
328 }
329 }
330 TrieResult::NoValue | TrieResult::Intermediate(_) => {
331 debug_assert!(false);
333 None
334 }
335 },
336 TrieResult::FinalValue(_) | TrieResult::Intermediate(_) => {
337 debug_assert!(false);
339 None
340 }
341 }
342}
343
344#[inline(always)]
346fn starter_and_decomposes_to_self_impl(trie_val: u32) -> bool {
347 (trie_val & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0
350}
351
352#[inline(always)]
354fn potential_passthrough_and_cannot_combine_backwards_impl(trie_val: u32) -> bool {
355 (trie_val & (NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_MARKER)) == 0
356}
357
358#[derive(Debug, PartialEq, Eq)]
363struct CharacterAndTrieValue {
364 character: char,
365 trie_val: u32,
367}
368
369impl CharacterAndTrieValue {
370 #[inline(always)]
371 pub fn new(c: char, trie_value: u32) -> Self {
372 CharacterAndTrieValue {
373 character: c,
374 trie_val: trie_value,
375 }
376 }
377
378 #[inline(always)]
379 pub fn starter_and_decomposes_to_self(&self) -> bool {
380 starter_and_decomposes_to_self_impl(self.trie_val)
381 }
382
383 #[inline(always)]
385 #[cfg(feature = "utf8_iter")]
386 pub fn starter_and_decomposes_to_self_except_replacement(&self) -> bool {
387 (self.trie_val & !BACKWARD_COMBINING_MARKER) == 0
392 }
393
394 #[inline(always)]
396 pub fn can_combine_backwards(&self) -> bool {
397 (self.trie_val & BACKWARD_COMBINING_MARKER) != 0
398 }
399 #[inline(always)]
401 pub fn potential_passthrough(&self) -> bool {
402 (self.trie_val & NON_ROUND_TRIP_MARKER) == 0
403 }
404 #[inline(always)]
406 pub fn potential_passthrough_and_cannot_combine_backwards(&self) -> bool {
407 potential_passthrough_and_cannot_combine_backwards_impl(self.trie_val)
408 }
409}
410
411#[derive(Debug)]
434struct CharacterAndClass(u32);
435
436impl CharacterAndClass {
437 pub fn new(c: char, ccc: CanonicalCombiningClass) -> Self {
438 CharacterAndClass(u32::from(c) | (u32::from(ccc.to_icu4c_value()) << 24))
439 }
440 pub fn new_with_placeholder(c: char) -> Self {
441 CharacterAndClass(u32::from(c) | ((0xFF) << 24))
442 }
443 pub fn new_with_trie_value(c_tv: CharacterAndTrieValue) -> Self {
444 Self::new(c_tv.character, ccc_from_trie_value(c_tv.trie_val))
445 }
446 pub fn new_starter(c: char) -> Self {
447 CharacterAndClass(u32::from(c))
448 }
449 pub fn character(&self) -> char {
452 unsafe { char::from_u32_unchecked(self.0 & 0xFFFFFF) }
455 }
456 pub fn ccc(&self) -> CanonicalCombiningClass {
458 CanonicalCombiningClass::from_icu4c_value((self.0 >> 24) as u8)
459 }
460
461 pub fn character_and_ccc(&self) -> (char, CanonicalCombiningClass) {
462 (self.character(), self.ccc())
463 }
464 pub fn set_ccc_from_trie_if_not_already_set(&mut self, trie: &CodePointTrie<u32>) {
465 if self.0 >> 24 != 0xFF {
466 return;
467 }
468 let scalar = self.0 & 0xFFFFFF;
469 self.0 =
470 ((ccc_from_trie_value(trie.get32_u32(scalar)).to_icu4c_value() as u32) << 24) | scalar;
471 }
472}
473
474#[inline(always)]
476fn sort_slice_by_ccc(slice: &mut [CharacterAndClass], trie: &CodePointTrie<u32>) {
477 if slice.len() < 2 {
483 return;
484 }
485 slice
486 .iter_mut()
487 .for_each(|cc| cc.set_ccc_from_trie_if_not_already_set(trie));
488 slice.sort_by_key(|cc| cc.ccc());
489}
490
491#[derive(Debug)]
494pub struct Decomposition<'data, I>
495where
496 I: Iterator<Item = char>,
497{
498 delegate: I,
499 buffer: SmallVec<[CharacterAndClass; 17]>, buffer_pos: usize,
504 pending: Option<CharacterAndTrieValue>, trie: &'data CodePointTrie<'data, u32>,
511 scalars16: &'data ZeroSlice<u16>,
512 scalars24: &'data ZeroSlice<char>,
513 supplementary_scalars16: &'data ZeroSlice<u16>,
514 supplementary_scalars24: &'data ZeroSlice<char>,
515 decomposition_passthrough_bound: u32, ignorable_behavior: IgnorableBehavior, }
522
523impl<'data, I> Decomposition<'data, I>
524where
525 I: Iterator<Item = char>,
526{
527 #[doc(hidden)] pub fn new(
538 delegate: I,
539 decompositions: &'data DecompositionData,
540 tables: &'data DecompositionTables,
541 ) -> Self {
542 Self::new_with_supplements(
543 delegate,
544 decompositions,
545 tables,
546 None,
547 0xC0,
548 IgnorableBehavior::Unsupported,
549 )
550 }
551
552 fn new_with_supplements(
559 delegate: I,
560 decompositions: &'data DecompositionData,
561 tables: &'data DecompositionTables,
562 supplementary_tables: Option<&'data DecompositionTables>,
563 decomposition_passthrough_bound: u8,
564 ignorable_behavior: IgnorableBehavior,
565 ) -> Self {
566 let mut ret = Decomposition::<I> {
567 delegate,
568 buffer: SmallVec::new(), buffer_pos: 0,
570 pending: Some(CharacterAndTrieValue::new('\u{FFFF}', 0)),
573 trie: &decompositions.trie,
574 scalars16: &tables.scalars16,
575 scalars24: &tables.scalars24,
576 supplementary_scalars16: if let Some(supplementary) = supplementary_tables {
577 &supplementary.scalars16
578 } else {
579 EMPTY_U16
580 },
581 supplementary_scalars24: if let Some(supplementary) = supplementary_tables {
582 &supplementary.scalars24
583 } else {
584 EMPTY_CHAR
585 },
586 decomposition_passthrough_bound: u32::from(decomposition_passthrough_bound),
587 ignorable_behavior,
588 };
589 let _ = ret.next(); ret
591 }
592
593 fn push_decomposition16(
594 &mut self,
595 offset: usize,
596 len: usize,
597 only_non_starters_in_trail: bool,
598 slice16: &ZeroSlice<u16>,
599 ) -> (char, usize) {
600 let (starter, tail) = slice16
601 .get_subslice(offset..offset + len)
602 .and_then(|slice| slice.split_first())
603 .map_or_else(
604 || {
605 debug_assert!(false);
607 (REPLACEMENT_CHARACTER, EMPTY_U16)
608 },
609 |(first, trail)| (char_from_u16(first), trail),
610 );
611 if only_non_starters_in_trail {
612 self.buffer.extend(
614 tail.iter()
615 .map(|u| CharacterAndClass::new_with_placeholder(char_from_u16(u))),
616 );
617 (starter, 0)
618 } else {
619 let mut i = 0;
620 let mut combining_start = 0;
621 for u in tail.iter() {
622 let ch = char_from_u16(u);
623 let trie_value = self.trie.get(ch);
624 self.buffer.push(CharacterAndClass::new_with_trie_value(
625 CharacterAndTrieValue::new(ch, trie_value),
626 ));
627 i += 1;
628 if !decomposition_starts_with_non_starter(trie_value) {
631 combining_start = i;
632 }
633 }
634 (starter, combining_start)
635 }
636 }
637
638 fn push_decomposition32(
639 &mut self,
640 offset: usize,
641 len: usize,
642 only_non_starters_in_trail: bool,
643 slice32: &ZeroSlice<char>,
644 ) -> (char, usize) {
645 let (starter, tail) = slice32
646 .get_subslice(offset..offset + len)
647 .and_then(|slice| slice.split_first())
648 .unwrap_or_else(|| {
649 debug_assert!(false);
651 (REPLACEMENT_CHARACTER, EMPTY_CHAR)
652 });
653 if only_non_starters_in_trail {
654 self.buffer
656 .extend(tail.iter().map(CharacterAndClass::new_with_placeholder));
657 (starter, 0)
658 } else {
659 let mut i = 0;
660 let mut combining_start = 0;
661 for ch in tail.iter() {
662 let trie_value = self.trie.get(ch);
663 self.buffer.push(CharacterAndClass::new_with_trie_value(
664 CharacterAndTrieValue::new(ch, trie_value),
665 ));
666 i += 1;
667 if !decomposition_starts_with_non_starter(trie_value) {
670 combining_start = i;
671 }
672 }
673 (starter, combining_start)
674 }
675 }
676
677 #[inline(always)]
678 fn attach_trie_value(&self, c: char) -> CharacterAndTrieValue {
679 CharacterAndTrieValue::new(c, self.trie.get(c))
680 }
681
682 fn delegate_next_no_pending(&mut self) -> Option<CharacterAndTrieValue> {
683 debug_assert!(self.pending.is_none());
684 loop {
685 let c = self.delegate.next()?;
686
687 if u32::from(c) < self.decomposition_passthrough_bound {
689 return Some(CharacterAndTrieValue::new(c, 0));
690 }
691
692 let trie_val = self.trie.get(c);
693 if trie_val == IGNORABLE_MARKER {
696 match self.ignorable_behavior {
697 IgnorableBehavior::Unsupported => {
698 debug_assert!(false);
699 }
700 IgnorableBehavior::ReplacementCharacter => {
701 return Some(CharacterAndTrieValue::new(
702 c,
703 u32::from(REPLACEMENT_CHARACTER) | NON_ROUND_TRIP_MARKER,
704 ));
705 }
706 IgnorableBehavior::Ignored => {
707 continue;
709 }
710 }
711 }
712 return Some(CharacterAndTrieValue::new(c, trie_val));
713 }
714 }
715
716 fn delegate_next(&mut self) -> Option<CharacterAndTrieValue> {
717 if let Some(pending) = self.pending.take() {
718 Some(pending)
723 } else {
724 self.delegate_next_no_pending()
725 }
726 }
727
728 fn decomposing_next(&mut self, c_and_trie_val: CharacterAndTrieValue) -> char {
729 let (starter, combining_start) = {
730 let c = c_and_trie_val.character;
731 let decomposition = c_and_trie_val.trie_val;
733 if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 {
736 (c, 0)
738 } else {
739 let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0;
740 let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0;
741 if !high_zeros && !low_zeros {
742 let starter = char_from_u32(decomposition & 0x7FFF);
744 let combining = char_from_u32((decomposition >> 15) & 0x7FFF);
745 self.buffer
746 .push(CharacterAndClass::new_with_placeholder(combining));
747 (starter, 0)
748 } else if high_zeros {
749 let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); if hangul_offset < HANGUL_S_COUNT {
769 debug_assert_eq!(decomposition, 1);
770 let l = hangul_offset / HANGUL_N_COUNT;
773 let v = (hangul_offset % HANGUL_N_COUNT) / HANGUL_T_COUNT;
774 let t = hangul_offset % HANGUL_T_COUNT;
775
776 self.buffer.push(CharacterAndClass::new_starter(unsafe {
780 core::char::from_u32_unchecked(HANGUL_V_BASE + v)
781 }));
782 let first = unsafe { core::char::from_u32_unchecked(HANGUL_L_BASE + l) };
783 if t != 0 {
784 self.buffer.push(CharacterAndClass::new_starter(unsafe {
785 core::char::from_u32_unchecked(HANGUL_T_BASE + t)
786 }));
787 (first, 2)
788 } else {
789 (first, 1)
790 }
791 } else {
792 let singleton = decomposition as u16;
793 if singleton != FDFA_MARKER {
794 let starter = char_from_u16(singleton);
796 (starter, 0)
797 } else {
798 self.buffer.extend(FDFA_NFKD.map(|u| {
800 CharacterAndClass::new_starter(unsafe {
803 core::char::from_u32_unchecked(u32::from(u))
804 })
805 }));
806 ('\u{0635}', 17)
807 }
808 }
809 } else {
810 debug_assert!(low_zeros);
811 let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1;
813 let len_bits = decomposition & 0b1111;
815 let only_non_starters_in_trail = (decomposition & 0b10000) != 0;
816 if offset < self.scalars16.len() {
817 self.push_decomposition16(
818 offset,
819 (len_bits + 2) as usize,
820 only_non_starters_in_trail,
821 self.scalars16,
822 )
823 } else if offset < self.scalars16.len() + self.scalars24.len() {
824 self.push_decomposition32(
825 offset - self.scalars16.len(),
826 (len_bits + 1) as usize,
827 only_non_starters_in_trail,
828 self.scalars24,
829 )
830 } else if offset
831 < self.scalars16.len()
832 + self.scalars24.len()
833 + self.supplementary_scalars16.len()
834 {
835 self.push_decomposition16(
836 offset - (self.scalars16.len() + self.scalars24.len()),
837 (len_bits + 2) as usize,
838 only_non_starters_in_trail,
839 self.supplementary_scalars16,
840 )
841 } else {
842 self.push_decomposition32(
843 offset
844 - (self.scalars16.len()
845 + self.scalars24.len()
846 + self.supplementary_scalars16.len()),
847 (len_bits + 1) as usize,
848 only_non_starters_in_trail,
849 self.supplementary_scalars24,
850 )
851 }
852 }
853 }
854 };
855 self.gather_and_sort_combining(combining_start);
858 starter
859 }
860
861 fn gather_and_sort_combining(&mut self, combining_start: usize) {
862 while let Some(ch_and_trie_val) = self.delegate_next() {
865 if !trie_value_has_ccc(ch_and_trie_val.trie_val) {
866 self.pending = Some(ch_and_trie_val);
867 break;
868 } else if !trie_value_indicates_special_non_starter_decomposition(
869 ch_and_trie_val.trie_val,
870 ) {
871 self.buffer
872 .push(CharacterAndClass::new_with_trie_value(ch_and_trie_val));
873 } else {
874 let mapped = match ch_and_trie_val.character {
876 '\u{0340}' => {
877 CharacterAndClass::new('\u{0300}', CCC_ABOVE)
879 }
880 '\u{0341}' => {
881 CharacterAndClass::new('\u{0301}', CCC_ABOVE)
883 }
884 '\u{0343}' => {
885 CharacterAndClass::new('\u{0313}', CCC_ABOVE)
887 }
888 '\u{0344}' => {
889 self.buffer
891 .push(CharacterAndClass::new('\u{0308}', CCC_ABOVE));
892 CharacterAndClass::new('\u{0301}', CCC_ABOVE)
893 }
894 '\u{0F73}' => {
895 self.buffer
897 .push(CharacterAndClass::new('\u{0F71}', ccc!(CCC129, 129)));
898 CharacterAndClass::new('\u{0F72}', ccc!(CCC130, 130))
899 }
900 '\u{0F75}' => {
901 self.buffer
903 .push(CharacterAndClass::new('\u{0F71}', ccc!(CCC129, 129)));
904 CharacterAndClass::new('\u{0F74}', ccc!(CCC132, 132))
905 }
906 '\u{0F81}' => {
907 self.buffer
909 .push(CharacterAndClass::new('\u{0F71}', ccc!(CCC129, 129)));
910 CharacterAndClass::new('\u{0F80}', ccc!(CCC130, 130))
911 }
912 '\u{FF9E}' => {
913 CharacterAndClass::new('\u{3099}', ccc!(KanaVoicing, 8))
915 }
916 '\u{FF9F}' => {
917 CharacterAndClass::new('\u{309A}', ccc!(KanaVoicing, 8))
919 }
920 _ => {
921 debug_assert!(false);
923 CharacterAndClass::new_with_placeholder(REPLACEMENT_CHARACTER)
924 }
925 };
926 self.buffer.push(mapped);
927 }
928 }
929 #[allow(clippy::indexing_slicing)]
932 sort_slice_by_ccc(&mut self.buffer[combining_start..], self.trie);
933 }
934}
935
936impl<I> Iterator for Decomposition<'_, I>
937where
938 I: Iterator<Item = char>,
939{
940 type Item = char;
941
942 fn next(&mut self) -> Option<char> {
943 if let Some(ret) = self.buffer.get(self.buffer_pos).map(|c| c.character()) {
944 self.buffer_pos += 1;
945 if self.buffer_pos == self.buffer.len() {
946 self.buffer.clear();
947 self.buffer_pos = 0;
948 }
949 return Some(ret);
950 }
951 debug_assert_eq!(self.buffer_pos, 0);
952 let c_and_trie_val = self.pending.take()?;
953 Some(self.decomposing_next(c_and_trie_val))
954 }
955}
956
957#[derive(Debug)]
960pub struct Composition<'data, I>
961where
962 I: Iterator<Item = char>,
963{
964 decomposition: Decomposition<'data, I>,
967 canonical_compositions: Char16Trie<'data>,
969 unprocessed_starter: Option<char>,
974 composition_passthrough_bound: u32,
980}
981
982impl<'data, I> Composition<'data, I>
983where
984 I: Iterator<Item = char>,
985{
986 fn new(
987 decomposition: Decomposition<'data, I>,
988 canonical_compositions: Char16Trie<'data>,
989 composition_passthrough_bound: u16,
990 ) -> Self {
991 Self {
992 decomposition,
993 canonical_compositions,
994 unprocessed_starter: None,
995 composition_passthrough_bound: u32::from(composition_passthrough_bound),
996 }
997 }
998
999 #[inline(always)]
1003 pub fn compose(&self, starter: char, second: char) -> Option<char> {
1004 compose(self.canonical_compositions.iter(), starter, second)
1005 }
1006
1007 #[inline(always)]
1011 fn compose_non_hangul(&self, starter: char, second: char) -> Option<char> {
1012 compose_non_hangul(self.canonical_compositions.iter(), starter, second)
1013 }
1014}
1015
1016impl<I> Iterator for Composition<'_, I>
1017where
1018 I: Iterator<Item = char>,
1019{
1020 type Item = char;
1021
1022 #[inline]
1023 fn next(&mut self) -> Option<char> {
1024 let mut undecomposed_starter = CharacterAndTrieValue::new('\u{0}', 0); if self.unprocessed_starter.is_none() {
1026 #[allow(clippy::never_loop)]
1028 loop {
1029 if let Some((character, ccc)) = self
1030 .decomposition
1031 .buffer
1032 .get(self.decomposition.buffer_pos)
1033 .map(|c| c.character_and_ccc())
1034 {
1035 self.decomposition.buffer_pos += 1;
1036 if self.decomposition.buffer_pos == self.decomposition.buffer.len() {
1037 self.decomposition.buffer.clear();
1038 self.decomposition.buffer_pos = 0;
1039 }
1040 if ccc == CCC_NOT_REORDERED {
1041 self.unprocessed_starter = Some(character);
1049 break; }
1051 return Some(character);
1052 }
1053 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1054 undecomposed_starter = self.decomposition.pending.take()?;
1055 if u32::from(undecomposed_starter.character) < self.composition_passthrough_bound
1056 || undecomposed_starter.potential_passthrough()
1057 {
1058 if let Some(upcoming) = self.decomposition.delegate_next_no_pending() {
1063 let cannot_combine_backwards = u32::from(upcoming.character)
1064 < self.composition_passthrough_bound
1065 || !upcoming.can_combine_backwards();
1066 self.decomposition.pending = Some(upcoming);
1067 if cannot_combine_backwards {
1068 return Some(undecomposed_starter.character);
1070 }
1071 } else {
1072 return Some(undecomposed_starter.character);
1074 }
1075 }
1076 break; }
1078 }
1079 let mut starter = '\u{0}'; let mut attempt_composition = false;
1085 loop {
1086 if let Some(unprocessed) = self.unprocessed_starter.take() {
1087 debug_assert_eq!(undecomposed_starter, CharacterAndTrieValue::new('\u{0}', 0));
1088 debug_assert_eq!(starter, '\u{0}');
1089 starter = unprocessed;
1090 } else {
1091 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1092 let next_starter = self.decomposition.decomposing_next(undecomposed_starter);
1093 if !attempt_composition {
1094 starter = next_starter;
1095 } else if let Some(composed) = self.compose(starter, next_starter) {
1096 starter = composed;
1097 } else {
1098 self.unprocessed_starter = Some(next_starter);
1101 return Some(starter);
1102 }
1103 }
1104 loop {
1107 let (character, ccc) = if let Some((character, ccc)) = self
1108 .decomposition
1109 .buffer
1110 .get(self.decomposition.buffer_pos)
1111 .map(|c| c.character_and_ccc())
1112 {
1113 (character, ccc)
1114 } else {
1115 self.decomposition.buffer.clear();
1116 self.decomposition.buffer_pos = 0;
1117 break;
1118 };
1119 if let Some(composed) = self.compose(starter, character) {
1120 starter = composed;
1121 self.decomposition.buffer_pos += 1;
1122 continue;
1123 }
1124 let mut most_recent_skipped_ccc = ccc;
1125 {
1126 let _ = self
1127 .decomposition
1128 .buffer
1129 .drain(0..self.decomposition.buffer_pos);
1130 }
1131 self.decomposition.buffer_pos = 0;
1132 if most_recent_skipped_ccc == CCC_NOT_REORDERED {
1133 return Some(starter);
1136 }
1137 let mut i = 1; while let Some((character, ccc)) = self
1139 .decomposition
1140 .buffer
1141 .get(i)
1142 .map(|c| c.character_and_ccc())
1143 {
1144 if ccc == CCC_NOT_REORDERED {
1145 return Some(starter);
1147 }
1148 debug_assert!(ccc >= most_recent_skipped_ccc);
1149 if ccc != most_recent_skipped_ccc {
1150 if let Some(composed) = self.compose_non_hangul(starter, character) {
1154 self.decomposition.buffer.remove(i);
1155 starter = composed;
1156 continue;
1157 }
1158 }
1159 most_recent_skipped_ccc = ccc;
1160 i += 1;
1161 }
1162 break;
1163 }
1164
1165 debug_assert_eq!(self.decomposition.buffer_pos, 0);
1166
1167 if !self.decomposition.buffer.is_empty() {
1168 return Some(starter);
1169 }
1170 #[allow(clippy::unwrap_used)]
1172 if self.decomposition.pending.is_some() {
1173 let pending = self.decomposition.pending.as_ref().unwrap();
1181 if u32::from(pending.character) < self.composition_passthrough_bound
1182 || !pending.can_combine_backwards()
1183 {
1184 return Some(starter);
1186 }
1187 undecomposed_starter = self.decomposition.pending.take().unwrap();
1190 attempt_composition = true;
1195 continue;
1196 }
1197 return Some(starter);
1199 }
1200 }
1201}
1202
1203macro_rules! composing_normalize_to {
1204 ($(#[$meta:meta])*,
1205 $normalize_to:ident,
1206 $write:path,
1207 $slice:ty,
1208 $prolog:block,
1209 $always_valid_utf:literal,
1210 $as_slice:ident,
1211 $fast:block,
1212 $text:ident,
1213 $sink:ident,
1214 $composition:ident,
1215 $composition_passthrough_bound:ident,
1216 $undecomposed_starter:ident,
1217 $pending_slice:ident,
1218 $len_utf:ident,
1219 ) => {
1220 $(#[$meta])*
1221 pub fn $normalize_to<W: $write + ?Sized>(
1222 &self,
1223 $text: $slice,
1224 $sink: &mut W,
1225 ) -> core::fmt::Result {
1226 $prolog
1227 let mut $composition = self.normalize_iter($text.chars());
1228 debug_assert_eq!($composition.decomposition.ignorable_behavior, IgnorableBehavior::Unsupported);
1229 for cc in $composition.decomposition.buffer.drain(..) {
1230 $sink.write_char(cc.character())?;
1231 }
1232
1233 let $composition_passthrough_bound = $composition.composition_passthrough_bound;
1235 'outer: loop {
1236 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1237 let mut $undecomposed_starter =
1238 if let Some(pending) = $composition.decomposition.pending.take() {
1239 pending
1240 } else {
1241 return Ok(());
1242 };
1243 #[allow(clippy::indexing_slicing)]
1246 if u32::from($undecomposed_starter.character) < $composition_passthrough_bound ||
1247 $undecomposed_starter.potential_passthrough()
1248 {
1249 if $always_valid_utf || $undecomposed_starter.character != REPLACEMENT_CHARACTER {
1253 let $pending_slice = &$text[$text.len() - $composition.decomposition.delegate.$as_slice().len() - $undecomposed_starter.character.$len_utf()..];
1254 $fast
1260 }
1261 }
1262 let mut starter = $composition
1264 .decomposition
1265 .decomposing_next($undecomposed_starter);
1266 'bufferloop: loop {
1267 loop {
1270 let (character, ccc) = if let Some((character, ccc)) = $composition
1271 .decomposition
1272 .buffer
1273 .get($composition.decomposition.buffer_pos)
1274 .map(|c| c.character_and_ccc())
1275 {
1276 (character, ccc)
1277 } else {
1278 $composition.decomposition.buffer.clear();
1279 $composition.decomposition.buffer_pos = 0;
1280 break;
1281 };
1282 if let Some(composed) = $composition.compose(starter, character) {
1283 starter = composed;
1284 $composition.decomposition.buffer_pos += 1;
1285 continue;
1286 }
1287 let mut most_recent_skipped_ccc = ccc;
1288 if most_recent_skipped_ccc == CCC_NOT_REORDERED {
1289 $sink.write_char(starter)?;
1294 starter = character;
1295 $composition.decomposition.buffer_pos += 1;
1296 continue 'bufferloop;
1297 } else {
1298 {
1299 let _ = $composition
1300 .decomposition
1301 .buffer
1302 .drain(0..$composition.decomposition.buffer_pos);
1303 }
1304 $composition.decomposition.buffer_pos = 0;
1305 }
1306 let mut i = 1; while let Some((character, ccc)) = $composition
1308 .decomposition
1309 .buffer
1310 .get(i)
1311 .map(|c| c.character_and_ccc())
1312 {
1313 if ccc == CCC_NOT_REORDERED {
1314 $sink.write_char(starter)?;
1316 for cc in $composition.decomposition.buffer.drain(..i) {
1317 $sink.write_char(cc.character())?;
1318 }
1319 starter = character;
1320 {
1321 let removed = $composition.decomposition.buffer.remove(0);
1322 debug_assert_eq!(starter, removed.character());
1323 }
1324 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1325 continue 'bufferloop;
1326 }
1327 debug_assert!(ccc >= most_recent_skipped_ccc);
1328 if ccc != most_recent_skipped_ccc {
1329 if let Some(composed) =
1333 $composition.compose_non_hangul(starter, character)
1334 {
1335 $composition.decomposition.buffer.remove(i);
1336 starter = composed;
1337 continue;
1338 }
1339 }
1340 most_recent_skipped_ccc = ccc;
1341 i += 1;
1342 }
1343 break;
1344 }
1345 debug_assert_eq!($composition.decomposition.buffer_pos, 0);
1346
1347 if !$composition.decomposition.buffer.is_empty() {
1348 $sink.write_char(starter)?;
1349 for cc in $composition.decomposition.buffer.drain(..) {
1350 $sink.write_char(cc.character())?;
1351 }
1352 continue 'outer;
1354 }
1355 if $composition.decomposition.pending.is_some() {
1357 let pending = $composition.decomposition.pending.as_ref().unwrap();
1365 if u32::from(pending.character) < $composition.composition_passthrough_bound
1366 || !pending.can_combine_backwards()
1367 {
1368 $sink.write_char(starter)?;
1370 continue 'outer;
1371 }
1372 let pending_starter = $composition.decomposition.pending.take().unwrap();
1373 let decomposed = $composition.decomposition.decomposing_next(pending_starter);
1374 if let Some(composed) = $composition.compose(starter, decomposed) {
1375 starter = composed;
1376 } else {
1377 $sink.write_char(starter)?;
1378 starter = decomposed;
1379 }
1380 continue 'bufferloop;
1381 }
1382 $sink.write_char(starter)?;
1384 return Ok(());
1385 } }
1387 }
1388 };
1389}
1390
1391macro_rules! decomposing_normalize_to {
1392 ($(#[$meta:meta])*,
1393 $normalize_to:ident,
1394 $write:path,
1395 $slice:ty,
1396 $prolog:block,
1397 $as_slice:ident,
1398 $fast:block,
1399 $text:ident,
1400 $sink:ident,
1401 $decomposition:ident,
1402 $decomposition_passthrough_bound:ident,
1403 $undecomposed_starter:ident,
1404 $pending_slice:ident,
1405 $outer:lifetime, ) => {
1407 $(#[$meta])*
1408 pub fn $normalize_to<W: $write + ?Sized>(
1409 &self,
1410 $text: $slice,
1411 $sink: &mut W,
1412 ) -> core::fmt::Result {
1413 $prolog
1414
1415 let mut $decomposition = self.normalize_iter($text.chars());
1416 debug_assert_eq!($decomposition.ignorable_behavior, IgnorableBehavior::Unsupported);
1417
1418 let $decomposition_passthrough_bound = $decomposition.decomposition_passthrough_bound;
1420 $outer: loop {
1421 for cc in $decomposition.buffer.drain(..) {
1422 $sink.write_char(cc.character())?;
1423 }
1424 debug_assert_eq!($decomposition.buffer_pos, 0);
1425 let mut $undecomposed_starter = if let Some(pending) = $decomposition.pending.take() {
1426 pending
1427 } else {
1428 return Ok(());
1429 };
1430 #[allow(clippy::indexing_slicing)]
1433 if $undecomposed_starter.starter_and_decomposes_to_self() {
1434 $sink.write_char($undecomposed_starter.character)?;
1437
1438 let $pending_slice = $decomposition.delegate.$as_slice();
1439 $fast
1440 }
1441 let starter = $decomposition.decomposing_next($undecomposed_starter);
1442 $sink.write_char(starter)?;
1443 }
1444 }
1445 };
1446}
1447
1448macro_rules! normalizer_methods {
1449 () => {
1450 pub fn normalize<'a>(&self, text: &'a str) -> Cow<'a, str> {
1452 let (head, tail) = self.split_normalized(text);
1453 if tail.is_empty() {
1454 return Cow::Borrowed(head);
1455 }
1456 let mut ret = String::new();
1457 ret.reserve(text.len());
1458 ret.push_str(head);
1459 let _ = self.normalize_to(tail, &mut ret);
1460 Cow::Owned(ret)
1461 }
1462
1463 pub fn split_normalized<'a>(&self, text: &'a str) -> (&'a str, &'a str) {
1467 let up_to = self.is_normalized_up_to(text);
1468 text.split_at_checked(up_to).unwrap_or_else(|| {
1469 debug_assert!(false);
1471 ("", text)
1472 })
1473 }
1474
1475 fn is_normalized_up_to(&self, text: &str) -> usize {
1477 let mut sink = IsNormalizedSinkStr::new(text);
1478 let _ = self.normalize_to(text, &mut sink);
1479 text.len() - sink.remaining_len()
1480 }
1481
1482 pub fn is_normalized(&self, text: &str) -> bool {
1484 self.is_normalized_up_to(text) == text.len()
1485 }
1486
1487 #[cfg(feature = "utf16_iter")]
1494 pub fn normalize_utf16<'a>(&self, text: &'a [u16]) -> Cow<'a, [u16]> {
1495 let (head, tail) = self.split_normalized_utf16(text);
1496 if tail.is_empty() {
1497 return Cow::Borrowed(head);
1498 }
1499 let mut ret = alloc::vec::Vec::with_capacity(text.len());
1500 ret.extend_from_slice(head);
1501 let _ = self.normalize_utf16_to(tail, &mut ret);
1502 Cow::Owned(ret)
1503 }
1504
1505 #[cfg(feature = "utf16_iter")]
1511 pub fn split_normalized_utf16<'a>(&self, text: &'a [u16]) -> (&'a [u16], &'a [u16]) {
1512 let up_to = self.is_normalized_utf16_up_to(text);
1513 text.split_at_checked(up_to).unwrap_or_else(|| {
1514 debug_assert!(false);
1516 (&[], text)
1517 })
1518 }
1519
1520 #[cfg(feature = "utf16_iter")]
1524 fn is_normalized_utf16_up_to(&self, text: &[u16]) -> usize {
1525 let mut sink = IsNormalizedSinkUtf16::new(text);
1526 let _ = self.normalize_utf16_to(text, &mut sink);
1527 text.len() - sink.remaining_len()
1528 }
1529
1530 #[cfg(feature = "utf16_iter")]
1536 pub fn is_normalized_utf16(&self, text: &[u16]) -> bool {
1537 self.is_normalized_utf16_up_to(text) == text.len()
1538 }
1539
1540 #[cfg(feature = "utf8_iter")]
1547 pub fn normalize_utf8<'a>(&self, text: &'a [u8]) -> Cow<'a, str> {
1548 let (head, tail) = self.split_normalized_utf8(text);
1549 if tail.is_empty() {
1550 return Cow::Borrowed(head);
1551 }
1552 let mut ret = String::new();
1553 ret.reserve(text.len());
1554 ret.push_str(head);
1555 let _ = self.normalize_utf8_to(tail, &mut ret);
1556 Cow::Owned(ret)
1557 }
1558
1559 #[cfg(feature = "utf8_iter")]
1565 pub fn split_normalized_utf8<'a>(&self, text: &'a [u8]) -> (&'a str, &'a [u8]) {
1566 let up_to = self.is_normalized_utf8_up_to(text);
1567 let (head, tail) = text.split_at_checked(up_to).unwrap_or_else(|| {
1568 debug_assert!(false);
1570 (&[], text)
1571 });
1572 (unsafe { core::str::from_utf8_unchecked(head) }, tail)
1575 }
1576
1577 #[cfg(feature = "utf8_iter")]
1581 fn is_normalized_utf8_up_to(&self, text: &[u8]) -> usize {
1582 let mut sink = IsNormalizedSinkUtf8::new(text);
1583 let _ = self.normalize_utf8_to(text, &mut sink);
1584 text.len() - sink.remaining_len()
1585 }
1586
1587 #[cfg(feature = "utf8_iter")]
1594 pub fn is_normalized_utf8(&self, text: &[u8]) -> bool {
1595 self.is_normalized_utf8_up_to(text) == text.len()
1596 }
1597 };
1598}
1599
1600#[derive(Debug)]
1602pub struct DecomposingNormalizerBorrowed<'a> {
1603 decompositions: &'a DecompositionData<'a>,
1604 tables: &'a DecompositionTables<'a>,
1605 supplementary_tables: Option<&'a DecompositionTables<'a>>,
1606 decomposition_passthrough_bound: u8, composition_passthrough_bound: u16, }
1609
1610impl DecomposingNormalizerBorrowed<'static> {
1611 pub const fn static_to_owned(self) -> DecomposingNormalizer {
1616 DecomposingNormalizer {
1617 decompositions: DataPayload::from_static_ref(self.decompositions),
1618 tables: DataPayload::from_static_ref(self.tables),
1619 supplementary_tables: if let Some(s) = self.supplementary_tables {
1620 Some(DataPayload::from_static_ref(s))
1622 } else {
1623 None
1624 },
1625 decomposition_passthrough_bound: self.decomposition_passthrough_bound,
1626 composition_passthrough_bound: self.composition_passthrough_bound,
1627 }
1628 }
1629
1630 #[cfg(feature = "compiled_data")]
1636 pub const fn new_nfd() -> Self {
1637 const _: () = assert!(
1638 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1639 .scalars16
1640 .const_len()
1641 + crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1642 .scalars24
1643 .const_len()
1644 <= 0xFFF,
1645 "future extension"
1646 );
1647
1648 DecomposingNormalizerBorrowed {
1649 decompositions: crate::provider::Baked::SINGLETON_NORMALIZER_NFD_DATA_V1,
1650 tables: crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1,
1651 supplementary_tables: None,
1652 decomposition_passthrough_bound: 0xC0,
1653 composition_passthrough_bound: 0x0300,
1654 }
1655 }
1656
1657 #[cfg(feature = "compiled_data")]
1663 pub const fn new_nfkd() -> Self {
1664 const _: () = assert!(
1665 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1666 .scalars16
1667 .const_len()
1668 + crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1669 .scalars24
1670 .const_len()
1671 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1
1672 .scalars16
1673 .const_len()
1674 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1
1675 .scalars24
1676 .const_len()
1677 <= 0xFFF,
1678 "future extension"
1679 );
1680
1681 const _: () = assert!(
1682 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1.passthrough_cap <= 0x0300,
1683 "invalid"
1684 );
1685
1686 let decomposition_capped =
1687 if crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1.passthrough_cap < 0xC0 {
1688 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1.passthrough_cap
1689 } else {
1690 0xC0
1691 };
1692 let composition_capped =
1693 if crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1.passthrough_cap < 0x0300 {
1694 crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1.passthrough_cap
1695 } else {
1696 0x0300
1697 };
1698
1699 DecomposingNormalizerBorrowed {
1700 decompositions: crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_DATA_V1,
1701 tables: crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1,
1702 supplementary_tables: Some(crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1),
1703 decomposition_passthrough_bound: decomposition_capped as u8,
1704 composition_passthrough_bound: composition_capped,
1705 }
1706 }
1707
1708 #[cfg(feature = "compiled_data")]
1709 pub(crate) const fn new_uts46_decomposed() -> Self {
1710 const _: () = assert!(
1711 crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1712 .scalars16
1713 .const_len()
1714 + crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1
1715 .scalars24
1716 .const_len()
1717 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1
1718 .scalars16
1719 .const_len()
1720 + crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1
1721 .scalars24
1722 .const_len()
1723 <= 0xFFF,
1724 "future extension"
1725 );
1726
1727 const _: () = assert!(
1728 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1.passthrough_cap <= 0x0300,
1729 "invalid"
1730 );
1731
1732 let decomposition_capped =
1733 if crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1.passthrough_cap < 0xC0 {
1734 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1.passthrough_cap
1735 } else {
1736 0xC0
1737 };
1738 let composition_capped = if crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1
1739 .passthrough_cap
1740 < 0x0300
1741 {
1742 crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1.passthrough_cap
1743 } else {
1744 0x0300
1745 };
1746
1747 DecomposingNormalizerBorrowed {
1748 decompositions: crate::provider::Baked::SINGLETON_NORMALIZER_UTS46_DATA_V1,
1749 tables: crate::provider::Baked::SINGLETON_NORMALIZER_NFD_TABLES_V1,
1750 supplementary_tables: Some(crate::provider::Baked::SINGLETON_NORMALIZER_NFKD_TABLES_V1),
1751 decomposition_passthrough_bound: decomposition_capped as u8,
1752 composition_passthrough_bound: composition_capped,
1753 }
1754 }
1755}
1756
1757impl<'data> DecomposingNormalizerBorrowed<'data> {
1758 pub fn normalize_iter<I: Iterator<Item = char>>(&self, iter: I) -> Decomposition<'data, I> {
1761 Decomposition::new_with_supplements(
1762 iter,
1763 self.decompositions,
1764 self.tables,
1765 self.supplementary_tables,
1766 self.decomposition_passthrough_bound,
1767 IgnorableBehavior::Unsupported,
1768 )
1769 }
1770
1771 normalizer_methods!();
1772
1773 decomposing_normalize_to!(
1774 ,
1776 normalize_to,
1777 core::fmt::Write,
1778 &str,
1779 {
1780 },
1781 as_str,
1782 {
1783 let decomposition_passthrough_byte_bound = if decomposition_passthrough_bound == 0xC0 {
1784 0xC3u8
1785 } else {
1786 decomposition_passthrough_bound.min(0x80) as u8
1787 };
1788 #[allow(clippy::unwrap_used)]
1790 'fast: loop {
1791 let mut code_unit_iter = decomposition.delegate.as_str().as_bytes().iter();
1792 'fastest: loop {
1793 if let Some(&upcoming_byte) = code_unit_iter.next() {
1794 if upcoming_byte < decomposition_passthrough_byte_bound {
1795 continue 'fastest;
1797 }
1798 decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
1799 break 'fastest;
1800 }
1801 sink.write_str(pending_slice)?;
1803 return Ok(());
1804 }
1805
1806 let upcoming = decomposition.delegate.next().unwrap();
1809 let upcoming_with_trie_value = decomposition.attach_trie_value(upcoming);
1810 if upcoming_with_trie_value.starter_and_decomposes_to_self() {
1811 continue 'fast;
1812 }
1813 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
1814 - decomposition.delegate.as_str().len()
1815 - upcoming.len_utf8()];
1816 sink.write_str(consumed_so_far_slice)?;
1817
1818 if decomposition_starts_with_non_starter(
1820 upcoming_with_trie_value.trie_val,
1821 ) {
1822 decomposition.pending = Some(upcoming_with_trie_value);
1825 decomposition.gather_and_sort_combining(0);
1826 continue 'outer;
1827 }
1828 undecomposed_starter = upcoming_with_trie_value;
1829 debug_assert!(decomposition.pending.is_none());
1830 break 'fast;
1831 }
1832 },
1833 text,
1834 sink,
1835 decomposition,
1836 decomposition_passthrough_bound,
1837 undecomposed_starter,
1838 pending_slice,
1839 'outer,
1840 );
1841
1842 decomposing_normalize_to!(
1843 #[cfg(feature = "utf8_iter")]
1850 ,
1851 normalize_utf8_to,
1852 core::fmt::Write,
1853 &[u8],
1854 {
1855 },
1856 as_slice,
1857 {
1858 let decomposition_passthrough_byte_bound = decomposition_passthrough_bound.min(0x80) as u8;
1859 #[allow(clippy::unwrap_used)]
1861 'fast: loop {
1862 let mut code_unit_iter = decomposition.delegate.as_slice().iter();
1863 'fastest: loop {
1864 if let Some(&upcoming_byte) = code_unit_iter.next() {
1865 if upcoming_byte < decomposition_passthrough_byte_bound {
1866 continue 'fastest;
1868 }
1869 break 'fastest;
1870 }
1871 sink.write_str(unsafe { core::str::from_utf8_unchecked(pending_slice) })?;
1873 return Ok(());
1874 }
1875 decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
1876
1877 let upcoming = decomposition.delegate.next().unwrap();
1880 let upcoming_with_trie_value = decomposition.attach_trie_value(upcoming);
1881 if upcoming_with_trie_value.starter_and_decomposes_to_self_except_replacement() {
1882 continue 'fast;
1889 }
1890
1891 if upcoming == REPLACEMENT_CHARACTER {
1893 let mut consumed_so_far = pending_slice[..pending_slice.len() - decomposition.delegate.as_slice().len()].chars();
1898 let back = consumed_so_far.next_back();
1899 debug_assert_eq!(back, Some(REPLACEMENT_CHARACTER));
1900 let consumed_so_far_slice = consumed_so_far.as_slice();
1901 sink.write_str(unsafe { core::str::from_utf8_unchecked(consumed_so_far_slice) } )?;
1902
1903 undecomposed_starter = upcoming_with_trie_value;
1907 debug_assert!(decomposition.pending.is_none());
1908 break 'fast;
1909 }
1910
1911 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
1912 - decomposition.delegate.as_slice().len()
1913 - upcoming.len_utf8()];
1914 sink.write_str(unsafe { core::str::from_utf8_unchecked(consumed_so_far_slice) } )?;
1915
1916 if decomposition_starts_with_non_starter(
1918 upcoming_with_trie_value.trie_val,
1919 ) {
1920 decomposition.pending = Some(upcoming_with_trie_value);
1923 decomposition.gather_and_sort_combining(0);
1924 continue 'outer;
1925 }
1926 undecomposed_starter = upcoming_with_trie_value;
1927 debug_assert!(decomposition.pending.is_none());
1928 break 'fast;
1929 }
1930 },
1931 text,
1932 sink,
1933 decomposition,
1934 decomposition_passthrough_bound,
1935 undecomposed_starter,
1936 pending_slice,
1937 'outer,
1938 );
1939
1940 decomposing_normalize_to!(
1941 #[cfg(feature = "utf16_iter")]
1948 ,
1949 normalize_utf16_to,
1950 write16::Write16,
1951 &[u16],
1952 {
1953 sink.size_hint(text.len())?;
1954 },
1955 as_slice,
1956 {
1957 let mut code_unit_iter = decomposition.delegate.as_slice().iter();
1958 'fast: loop {
1959 if let Some(&upcoming_code_unit) = code_unit_iter.next() {
1960 let mut upcoming32 = u32::from(upcoming_code_unit);
1961 if upcoming32 < decomposition_passthrough_bound {
1962 continue 'fast;
1963 }
1964 let mut trie_value = decomposition.trie.get32(upcoming32);
1967 if starter_and_decomposes_to_self_impl(trie_value) {
1968 continue 'fast;
1969 }
1970 #[allow(clippy::never_loop)]
1973 'surrogateloop: loop {
1974 let surrogate_base = upcoming32.wrapping_sub(0xD800);
1975 if surrogate_base > (0xDFFF - 0xD800) {
1976 break 'surrogateloop;
1978 }
1979 if surrogate_base <= (0xDBFF - 0xD800) {
1980 let iter_backup = code_unit_iter.clone();
1981 if let Some(&low) = code_unit_iter.next() {
1982 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
1983 upcoming32 = (upcoming32 << 10) + u32::from(low)
1984 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
1985 trie_value = decomposition.trie.get32(upcoming32);
1987 if starter_and_decomposes_to_self_impl(trie_value) {
1988 continue 'fast;
1989 }
1990 break 'surrogateloop;
1991 } else {
1992 code_unit_iter = iter_backup;
1993 }
1994 }
1995 }
1996 upcoming32 = 0xFFFD; break 'surrogateloop;
2000 }
2001
2002 let upcoming = unsafe { char::from_u32_unchecked(upcoming32) };
2003 let upcoming_with_trie_value = CharacterAndTrieValue::new(upcoming, trie_value);
2004
2005 let consumed_so_far_slice = &pending_slice[..pending_slice.len()
2006 - code_unit_iter.as_slice().len()
2007 - upcoming.len_utf16()];
2008 sink.write_slice(consumed_so_far_slice)?;
2009
2010 if decomposition_starts_with_non_starter(
2012 upcoming_with_trie_value.trie_val,
2013 ) {
2014 decomposition.delegate = code_unit_iter.as_slice().chars();
2016 decomposition.pending = Some(upcoming_with_trie_value);
2019 decomposition.gather_and_sort_combining(0);
2020 continue 'outer;
2021 }
2022 undecomposed_starter = upcoming_with_trie_value;
2023 debug_assert!(decomposition.pending.is_none());
2024 break 'fast;
2025 }
2026 sink.write_slice(pending_slice)?;
2028 return Ok(());
2029 }
2030 decomposition.delegate = code_unit_iter.as_slice().chars();
2032 },
2033 text,
2034 sink,
2035 decomposition,
2036 decomposition_passthrough_bound,
2037 undecomposed_starter,
2038 pending_slice,
2039 'outer,
2040 );
2041}
2042
2043#[derive(Debug)]
2045pub struct DecomposingNormalizer {
2046 decompositions: DataPayload<NormalizerNfdDataV1>,
2047 tables: DataPayload<NormalizerNfdTablesV1>,
2048 supplementary_tables: Option<DataPayload<NormalizerNfkdTablesV1>>,
2049 decomposition_passthrough_bound: u8, composition_passthrough_bound: u16, }
2052
2053impl DecomposingNormalizer {
2054 pub fn as_borrowed(&self) -> DecomposingNormalizerBorrowed {
2056 DecomposingNormalizerBorrowed {
2057 decompositions: self.decompositions.get(),
2058 tables: self.tables.get(),
2059 supplementary_tables: self.supplementary_tables.as_ref().map(|s| s.get()),
2060 decomposition_passthrough_bound: self.decomposition_passthrough_bound,
2061 composition_passthrough_bound: self.composition_passthrough_bound,
2062 }
2063 }
2064
2065 #[cfg(feature = "compiled_data")]
2071 pub const fn new_nfd() -> DecomposingNormalizerBorrowed<'static> {
2072 DecomposingNormalizerBorrowed::new_nfd()
2073 }
2074
2075 icu_provider::gen_buffer_data_constructors!(
2076 () -> error: DataError,
2077 functions: [
2078 new_nfd: skip,
2079 try_new_nfd_with_buffer_provider,
2080 try_new_nfd_unstable,
2081 Self,
2082 ]
2083 );
2084
2085 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_nfd)]
2086 pub fn try_new_nfd_unstable<D>(provider: &D) -> Result<Self, DataError>
2087 where
2088 D: DataProvider<NormalizerNfdDataV1> + DataProvider<NormalizerNfdTablesV1> + ?Sized,
2089 {
2090 let decompositions: DataPayload<NormalizerNfdDataV1> =
2091 provider.load(Default::default())?.payload;
2092 let tables: DataPayload<NormalizerNfdTablesV1> = provider.load(Default::default())?.payload;
2093
2094 if tables.get().scalars16.len() + tables.get().scalars24.len() > 0xFFF {
2095 return Err(
2102 DataError::custom("future extension").with_marker(NormalizerNfdTablesV1::INFO)
2103 );
2104 }
2105
2106 let cap = decompositions.get().passthrough_cap;
2107 if cap > 0x0300 {
2108 return Err(DataError::custom("invalid").with_marker(NormalizerNfdDataV1::INFO));
2109 }
2110 let decomposition_capped = cap.min(0xC0);
2111 let composition_capped = cap.min(0x0300);
2112
2113 Ok(DecomposingNormalizer {
2114 decompositions,
2115 tables,
2116 supplementary_tables: None,
2117 decomposition_passthrough_bound: decomposition_capped as u8,
2118 composition_passthrough_bound: composition_capped,
2119 })
2120 }
2121
2122 icu_provider::gen_buffer_data_constructors!(
2123 () -> error: DataError,
2124 functions: [
2125 new_nfkd: skip,
2126 try_new_nfkd_with_buffer_provider,
2127 try_new_nfkd_unstable,
2128 Self,
2129 ]
2130 );
2131
2132 #[cfg(feature = "compiled_data")]
2138 pub const fn new_nfkd() -> DecomposingNormalizerBorrowed<'static> {
2139 DecomposingNormalizerBorrowed::new_nfkd()
2140 }
2141
2142 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_nfkd)]
2143 pub fn try_new_nfkd_unstable<D>(provider: &D) -> Result<Self, DataError>
2144 where
2145 D: DataProvider<NormalizerNfkdDataV1>
2146 + DataProvider<NormalizerNfdTablesV1>
2147 + DataProvider<NormalizerNfkdTablesV1>
2148 + ?Sized,
2149 {
2150 let decompositions: DataPayload<NormalizerNfkdDataV1> =
2151 provider.load(Default::default())?.payload;
2152 let tables: DataPayload<NormalizerNfdTablesV1> = provider.load(Default::default())?.payload;
2153 let supplementary_tables: DataPayload<NormalizerNfkdTablesV1> =
2154 provider.load(Default::default())?.payload;
2155
2156 if tables.get().scalars16.len()
2157 + tables.get().scalars24.len()
2158 + supplementary_tables.get().scalars16.len()
2159 + supplementary_tables.get().scalars24.len()
2160 > 0xFFF
2161 {
2162 return Err(
2169 DataError::custom("future extension").with_marker(NormalizerNfdTablesV1::INFO)
2170 );
2171 }
2172
2173 let cap = decompositions.get().passthrough_cap;
2174 if cap > 0x0300 {
2175 return Err(DataError::custom("invalid").with_marker(NormalizerNfkdDataV1::INFO));
2176 }
2177 let decomposition_capped = cap.min(0xC0);
2178 let composition_capped = cap.min(0x0300);
2179
2180 Ok(DecomposingNormalizer {
2181 decompositions: decompositions.cast(),
2182 tables,
2183 supplementary_tables: Some(supplementary_tables),
2184 decomposition_passthrough_bound: decomposition_capped as u8,
2185 composition_passthrough_bound: composition_capped,
2186 })
2187 }
2188
2189 pub(crate) fn try_new_uts46_decomposed_unstable<D>(provider: &D) -> Result<Self, DataError>
2207 where
2208 D: DataProvider<NormalizerUts46DataV1>
2209 + DataProvider<NormalizerNfdTablesV1>
2210 + DataProvider<NormalizerNfkdTablesV1>
2211 + ?Sized,
2213 {
2214 let decompositions: DataPayload<NormalizerUts46DataV1> =
2215 provider.load(Default::default())?.payload;
2216 let tables: DataPayload<NormalizerNfdTablesV1> = provider.load(Default::default())?.payload;
2217 let supplementary_tables: DataPayload<NormalizerNfkdTablesV1> =
2218 provider.load(Default::default())?.payload;
2219
2220 if tables.get().scalars16.len()
2221 + tables.get().scalars24.len()
2222 + supplementary_tables.get().scalars16.len()
2223 + supplementary_tables.get().scalars24.len()
2224 > 0xFFF
2225 {
2226 return Err(
2233 DataError::custom("future extension").with_marker(NormalizerNfdTablesV1::INFO)
2234 );
2235 }
2236
2237 let cap = decompositions.get().passthrough_cap;
2238 if cap > 0x0300 {
2239 return Err(DataError::custom("invalid").with_marker(NormalizerUts46DataV1::INFO));
2240 }
2241 let decomposition_capped = cap.min(0xC0);
2242 let composition_capped = cap.min(0x0300);
2243
2244 Ok(DecomposingNormalizer {
2245 decompositions: decompositions.cast(),
2246 tables,
2247 supplementary_tables: Some(supplementary_tables),
2248 decomposition_passthrough_bound: decomposition_capped as u8,
2249 composition_passthrough_bound: composition_capped,
2250 })
2251 }
2252}
2253
2254#[derive(Debug)]
2256pub struct ComposingNormalizerBorrowed<'a> {
2257 decomposing_normalizer: DecomposingNormalizerBorrowed<'a>,
2258 canonical_compositions: &'a CanonicalCompositions<'a>,
2259}
2260
2261impl ComposingNormalizerBorrowed<'static> {
2262 pub const fn static_to_owned(self) -> ComposingNormalizer {
2267 ComposingNormalizer {
2268 decomposing_normalizer: self.decomposing_normalizer.static_to_owned(),
2269 canonical_compositions: DataPayload::from_static_ref(self.canonical_compositions),
2270 }
2271 }
2272
2273 #[cfg(feature = "compiled_data")]
2279 pub const fn new_nfc() -> Self {
2280 ComposingNormalizerBorrowed {
2281 decomposing_normalizer: DecomposingNormalizerBorrowed::new_nfd(),
2282 canonical_compositions: crate::provider::Baked::SINGLETON_NORMALIZER_NFC_V1,
2283 }
2284 }
2285
2286 #[cfg(feature = "compiled_data")]
2292 pub const fn new_nfkc() -> Self {
2293 ComposingNormalizerBorrowed {
2294 decomposing_normalizer: DecomposingNormalizerBorrowed::new_nfkd(),
2295 canonical_compositions: crate::provider::Baked::SINGLETON_NORMALIZER_NFC_V1,
2296 }
2297 }
2298
2299 #[cfg(feature = "compiled_data")]
2309 pub(crate) const fn new_uts46() -> Self {
2310 ComposingNormalizerBorrowed {
2311 decomposing_normalizer: DecomposingNormalizerBorrowed::new_uts46_decomposed(),
2312 canonical_compositions: crate::provider::Baked::SINGLETON_NORMALIZER_NFC_V1,
2313 }
2314 }
2315}
2316
2317impl<'data> ComposingNormalizerBorrowed<'data> {
2318 pub fn normalize_iter<I: Iterator<Item = char>>(&self, iter: I) -> Composition<'data, I> {
2321 self.normalize_iter_private(iter, IgnorableBehavior::Unsupported)
2322 }
2323
2324 fn normalize_iter_private<I: Iterator<Item = char>>(
2325 &self,
2326 iter: I,
2327 ignorable_behavior: IgnorableBehavior,
2328 ) -> Composition<'data, I> {
2329 Composition::new(
2330 Decomposition::new_with_supplements(
2331 iter,
2332 self.decomposing_normalizer.decompositions,
2333 self.decomposing_normalizer.tables,
2334 self.decomposing_normalizer.supplementary_tables,
2335 self.decomposing_normalizer.decomposition_passthrough_bound,
2336 ignorable_behavior,
2337 ),
2338 self.canonical_compositions.canonical_compositions.clone(),
2339 self.decomposing_normalizer.composition_passthrough_bound,
2340 )
2341 }
2342
2343 normalizer_methods!();
2344
2345 composing_normalize_to!(
2346 ,
2348 normalize_to,
2349 core::fmt::Write,
2350 &str,
2351 {},
2352 true,
2353 as_str,
2354 {
2355 let composition_passthrough_byte_bound = if composition_passthrough_bound == 0x300 {
2357 0xCCu8
2358 } else {
2359 composition_passthrough_bound.min(0x80) as u8
2362 };
2363 #[allow(clippy::unwrap_used)]
2365 'fast: loop {
2366 let mut code_unit_iter = composition.decomposition.delegate.as_str().as_bytes().iter();
2367 'fastest: loop {
2368 if let Some(&upcoming_byte) = code_unit_iter.next() {
2369 if upcoming_byte < composition_passthrough_byte_bound {
2370 continue 'fastest;
2372 }
2373 composition.decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars();
2374 break 'fastest;
2375 }
2376 sink.write_str(pending_slice)?;
2378 return Ok(());
2379 }
2380 let upcoming = composition.decomposition.delegate.next().unwrap();
2383 let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming);
2384 if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() {
2385 continue 'fast;
2390 }
2391 composition.decomposition.pending = Some(upcoming_with_trie_value);
2393
2394 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8()].chars();
2396 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2398 let consumed_so_far_slice = consumed_so_far.as_str();
2399 sink.write_str(consumed_so_far_slice)?;
2400 break 'fast;
2401 }
2402 },
2403 text,
2404 sink,
2405 composition,
2406 composition_passthrough_bound,
2407 undecomposed_starter,
2408 pending_slice,
2409 len_utf8,
2410 );
2411
2412 composing_normalize_to!(
2413 #[cfg(feature = "utf8_iter")]
2420 ,
2421 normalize_utf8_to,
2422 core::fmt::Write,
2423 &[u8],
2424 {},
2425 false,
2426 as_slice,
2427 {
2428 'fast: loop {
2429 if let Some(upcoming) = composition.decomposition.delegate.next() {
2430 if u32::from(upcoming) < composition_passthrough_bound {
2431 continue 'fast;
2433 }
2434 let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming);
2436 if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() {
2437 continue 'fast;
2445 }
2446 if upcoming == REPLACEMENT_CHARACTER {
2450 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len()].chars();
2456 let back = consumed_so_far.next_back();
2457 debug_assert_eq!(back, Some(REPLACEMENT_CHARACTER));
2458 let consumed_so_far_slice = consumed_so_far.as_slice();
2459 sink.write_str(unsafe { core::str::from_utf8_unchecked(consumed_so_far_slice) })?;
2460 undecomposed_starter = CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0);
2461 composition.decomposition.pending = None;
2462 break 'fast;
2463 }
2464
2465 composition.decomposition.pending = Some(upcoming_with_trie_value);
2466 let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8()].chars();
2469 #[allow(clippy::unwrap_used)]
2470 {
2471 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2477 }
2478 let consumed_so_far_slice = consumed_so_far.as_slice();
2479 sink.write_str(unsafe { core::str::from_utf8_unchecked(consumed_so_far_slice)})?;
2480 break 'fast;
2481 }
2482 sink.write_str(unsafe { core::str::from_utf8_unchecked(pending_slice) })?;
2484 return Ok(());
2485 }
2486 },
2487 text,
2488 sink,
2489 composition,
2490 composition_passthrough_bound,
2491 undecomposed_starter,
2492 pending_slice,
2493 len_utf8,
2494 );
2495
2496 composing_normalize_to!(
2497 #[cfg(feature = "utf16_iter")]
2504 ,
2505 normalize_utf16_to,
2506 write16::Write16,
2507 &[u16],
2508 {
2509 sink.size_hint(text.len())?;
2510 },
2511 false,
2512 as_slice,
2513 {
2514 let mut code_unit_iter = composition.decomposition.delegate.as_slice().iter();
2515 let mut upcoming32;
2516 let mut trie_value;
2519 'fast: loop {
2520 if let Some(&upcoming_code_unit) = code_unit_iter.next() {
2521 upcoming32 = u32::from(upcoming_code_unit); if upcoming32 < composition_passthrough_bound {
2523 continue 'fast;
2532 }
2533 trie_value = composition.decomposition.trie.get32(upcoming32);
2536 if potential_passthrough_and_cannot_combine_backwards_impl(trie_value) {
2537 continue 'fast;
2542 }
2543
2544 #[allow(clippy::never_loop)]
2547 'surrogateloop: loop {
2548 let surrogate_base = upcoming32.wrapping_sub(0xD800);
2549 if surrogate_base > (0xDFFF - 0xD800) {
2550 break 'surrogateloop;
2552 }
2553 if surrogate_base <= (0xDBFF - 0xD800) {
2554 let iter_backup = code_unit_iter.clone();
2555 if let Some(&low) = code_unit_iter.next() {
2556 if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
2557 upcoming32 = (upcoming32 << 10) + u32::from(low)
2558 - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32);
2559 trie_value = composition.decomposition.trie.get32(upcoming32);
2561 if potential_passthrough_and_cannot_combine_backwards_impl(trie_value) {
2562 continue 'fast;
2564 }
2565 break 'surrogateloop;
2566 } else {
2567 code_unit_iter = iter_backup;
2568 }
2569 }
2570 }
2571 upcoming32 = 0xFFFD; debug_assert_eq!(trie_value, NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_MARKER | 0xFFFD);
2575 break 'surrogateloop;
2576 }
2577
2578 let upcoming = unsafe { char::from_u32_unchecked(upcoming32) };
2580 let upcoming_with_trie_value = CharacterAndTrieValue::new(upcoming, trie_value);
2581 composition.decomposition.pending = Some(upcoming_with_trie_value);
2583 let mut consumed_so_far = pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16()].chars();
2584 #[allow(clippy::unwrap_used)]
2586 {
2587 undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap());
2593 }
2594 let consumed_so_far_slice = consumed_so_far.as_slice();
2595 sink.write_slice(consumed_so_far_slice)?;
2596 break 'fast;
2597 }
2598 sink.write_slice(pending_slice)?;
2600 return Ok(());
2601 }
2602 composition.decomposition.delegate = code_unit_iter.as_slice().chars();
2604 },
2605 text,
2606 sink,
2607 composition,
2608 composition_passthrough_bound,
2609 undecomposed_starter,
2610 pending_slice,
2611 len_utf16,
2612 );
2613}
2614
2615#[derive(Debug)]
2617pub struct ComposingNormalizer {
2618 decomposing_normalizer: DecomposingNormalizer,
2619 canonical_compositions: DataPayload<NormalizerNfcV1>,
2620}
2621
2622impl ComposingNormalizer {
2623 pub fn as_borrowed(&self) -> ComposingNormalizerBorrowed<'_> {
2625 ComposingNormalizerBorrowed {
2626 decomposing_normalizer: self.decomposing_normalizer.as_borrowed(),
2627 canonical_compositions: self.canonical_compositions.get(),
2628 }
2629 }
2630
2631 #[cfg(feature = "compiled_data")]
2637 pub const fn new_nfc() -> ComposingNormalizerBorrowed<'static> {
2638 ComposingNormalizerBorrowed::new_nfc()
2639 }
2640
2641 icu_provider::gen_buffer_data_constructors!(
2642 () -> error: DataError,
2643 functions: [
2644 new_nfc: skip,
2645 try_new_nfc_with_buffer_provider,
2646 try_new_nfc_unstable,
2647 Self,
2648 ]
2649 );
2650
2651 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_nfc)]
2652 pub fn try_new_nfc_unstable<D>(provider: &D) -> Result<Self, DataError>
2653 where
2654 D: DataProvider<NormalizerNfdDataV1>
2655 + DataProvider<NormalizerNfdTablesV1>
2656 + DataProvider<NormalizerNfcV1>
2657 + ?Sized,
2658 {
2659 let decomposing_normalizer = DecomposingNormalizer::try_new_nfd_unstable(provider)?;
2660
2661 let canonical_compositions: DataPayload<NormalizerNfcV1> =
2662 provider.load(Default::default())?.payload;
2663
2664 Ok(ComposingNormalizer {
2665 decomposing_normalizer,
2666 canonical_compositions,
2667 })
2668 }
2669
2670 #[cfg(feature = "compiled_data")]
2676 pub const fn new_nfkc() -> ComposingNormalizerBorrowed<'static> {
2677 ComposingNormalizerBorrowed::new_nfkc()
2678 }
2679
2680 icu_provider::gen_buffer_data_constructors!(
2681 () -> error: DataError,
2682 functions: [
2683 new_nfkc: skip,
2684 try_new_nfkc_with_buffer_provider,
2685 try_new_nfkc_unstable,
2686 Self,
2687 ]
2688 );
2689
2690 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_nfkc)]
2691 pub fn try_new_nfkc_unstable<D>(provider: &D) -> Result<Self, DataError>
2692 where
2693 D: DataProvider<NormalizerNfkdDataV1>
2694 + DataProvider<NormalizerNfdTablesV1>
2695 + DataProvider<NormalizerNfkdTablesV1>
2696 + DataProvider<NormalizerNfcV1>
2697 + ?Sized,
2698 {
2699 let decomposing_normalizer = DecomposingNormalizer::try_new_nfkd_unstable(provider)?;
2700
2701 let canonical_compositions: DataPayload<NormalizerNfcV1> =
2702 provider.load(Default::default())?.payload;
2703
2704 Ok(ComposingNormalizer {
2705 decomposing_normalizer,
2706 canonical_compositions,
2707 })
2708 }
2709
2710 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_uts46)]
2711 pub(crate) fn try_new_uts46_unstable<D>(provider: &D) -> Result<Self, DataError>
2712 where
2713 D: DataProvider<NormalizerUts46DataV1>
2714 + DataProvider<NormalizerNfdTablesV1>
2715 + DataProvider<NormalizerNfkdTablesV1>
2716 + DataProvider<NormalizerNfcV1>
2718 + ?Sized,
2719 {
2720 let decomposing_normalizer =
2721 DecomposingNormalizer::try_new_uts46_decomposed_unstable(provider)?;
2722
2723 let canonical_compositions: DataPayload<NormalizerNfcV1> =
2724 provider.load(Default::default())?.payload;
2725
2726 Ok(ComposingNormalizer {
2727 decomposing_normalizer,
2728 canonical_compositions,
2729 })
2730 }
2731}
2732
2733#[cfg(feature = "utf16_iter")]
2734struct IsNormalizedSinkUtf16<'a> {
2735 expect: &'a [u16],
2736}
2737
2738#[cfg(feature = "utf16_iter")]
2739impl<'a> IsNormalizedSinkUtf16<'a> {
2740 pub fn new(slice: &'a [u16]) -> Self {
2741 IsNormalizedSinkUtf16 { expect: slice }
2742 }
2743 pub fn remaining_len(&self) -> usize {
2744 self.expect.len()
2745 }
2746}
2747
2748#[cfg(feature = "utf16_iter")]
2749impl write16::Write16 for IsNormalizedSinkUtf16<'_> {
2750 fn write_slice(&mut self, s: &[u16]) -> core::fmt::Result {
2751 #[allow(clippy::indexing_slicing)]
2756 if core::ptr::eq(s.as_ptr(), self.expect.as_ptr()) {
2757 self.expect = &self.expect[s.len()..];
2758 Ok(())
2759 } else {
2760 Err(core::fmt::Error {})
2761 }
2762 }
2763
2764 fn write_char(&mut self, c: char) -> core::fmt::Result {
2765 let mut iter = self.expect.chars();
2766 if iter.next() == Some(c) {
2767 self.expect = iter.as_slice();
2768 Ok(())
2769 } else {
2770 Err(core::fmt::Error {})
2771 }
2772 }
2773}
2774
2775#[cfg(feature = "utf8_iter")]
2776struct IsNormalizedSinkUtf8<'a> {
2777 expect: &'a [u8],
2778}
2779
2780#[cfg(feature = "utf8_iter")]
2781impl<'a> IsNormalizedSinkUtf8<'a> {
2782 pub fn new(slice: &'a [u8]) -> Self {
2783 IsNormalizedSinkUtf8 { expect: slice }
2784 }
2785 pub fn remaining_len(&self) -> usize {
2786 self.expect.len()
2787 }
2788}
2789
2790#[cfg(feature = "utf8_iter")]
2791impl core::fmt::Write for IsNormalizedSinkUtf8<'_> {
2792 fn write_str(&mut self, s: &str) -> core::fmt::Result {
2793 #[allow(clippy::indexing_slicing)]
2798 if core::ptr::eq(s.as_ptr(), self.expect.as_ptr()) {
2799 self.expect = &self.expect[s.len()..];
2800 Ok(())
2801 } else {
2802 Err(core::fmt::Error {})
2803 }
2804 }
2805
2806 fn write_char(&mut self, c: char) -> core::fmt::Result {
2807 let mut iter = self.expect.chars();
2808 if iter.next() == Some(c) {
2809 self.expect = iter.as_slice();
2810 Ok(())
2811 } else {
2812 Err(core::fmt::Error {})
2813 }
2814 }
2815}
2816
2817struct IsNormalizedSinkStr<'a> {
2818 expect: &'a str,
2819}
2820
2821impl<'a> IsNormalizedSinkStr<'a> {
2822 pub fn new(slice: &'a str) -> Self {
2823 IsNormalizedSinkStr { expect: slice }
2824 }
2825 pub fn remaining_len(&self) -> usize {
2826 self.expect.len()
2827 }
2828}
2829
2830impl core::fmt::Write for IsNormalizedSinkStr<'_> {
2831 fn write_str(&mut self, s: &str) -> core::fmt::Result {
2832 #[allow(clippy::indexing_slicing)]
2837 if core::ptr::eq(s.as_ptr(), self.expect.as_ptr()) {
2838 self.expect = &self.expect[s.len()..];
2839 Ok(())
2840 } else {
2841 Err(core::fmt::Error {})
2842 }
2843 }
2844
2845 fn write_char(&mut self, c: char) -> core::fmt::Result {
2846 let mut iter = self.expect.chars();
2847 if iter.next() == Some(c) {
2848 self.expect = iter.as_str();
2849 Ok(())
2850 } else {
2851 Err(core::fmt::Error {})
2852 }
2853 }
2854}