icu_normalizer/
provider.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
14
15// Provider structs must be stable
16#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
17
18use icu_collections::char16trie::Char16Trie;
19use icu_collections::codepointtrie::CodePointTrie;
20use icu_provider::prelude::*;
21use zerovec::ZeroVec;
22
23#[cfg(feature = "compiled_data")]
24#[derive(Debug)]
25/// Baked data
26///
27/// <div class="stab unstable">
28/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
29/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
30/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
31/// </div>
32pub struct Baked;
33
34#[cfg(feature = "compiled_data")]
35#[allow(unused_imports)]
36const _: () = {
37    use icu_normalizer_data::*;
38    pub mod icu {
39        pub use crate as normalizer;
40        pub use icu_collections as collections;
41    }
42    make_provider!(Baked);
43    impl_normalizer_nfc_v1!(Baked);
44    impl_normalizer_nfd_data_v1!(Baked);
45    impl_normalizer_nfd_supplement_v1!(Baked);
46    impl_normalizer_nfd_tables_v1!(Baked);
47    impl_normalizer_nfkd_data_v1!(Baked);
48    impl_normalizer_nfkd_tables_v1!(Baked);
49    impl_normalizer_uts46_data_v1!(Baked);
50};
51
52icu_provider::data_marker!(
53    /// Marker for data for canonical decomposition.
54    NormalizerNfdDataV1,
55    "normalizer/nfd/data/v1",
56    DecompositionData<'static>,
57    is_singleton = true
58);
59icu_provider::data_marker!(
60    /// Marker for additional data for canonical decomposition.
61    NormalizerNfdTablesV1,
62    "normalizer/nfd/tables/v1",
63    DecompositionTables<'static>,
64    is_singleton = true
65);
66icu_provider::data_marker!(
67    /// Marker for data for compatibility decomposition.
68    NormalizerNfkdDataV1,
69    "normalizer/nfkd/data/v1",
70    DecompositionData<'static>,
71    is_singleton = true
72);
73icu_provider::data_marker!(
74    /// Marker for additional data for compatibility decomposition.
75    NormalizerNfkdTablesV1,
76    "normalizer/nfkd/tables/v1",
77    DecompositionTables<'static>,
78    is_singleton = true
79);
80icu_provider::data_marker!(
81    /// Marker for data for UTS-46 decomposition.
82    NormalizerUts46DataV1,
83    "normalizer/uts46/data/v1",
84    DecompositionData<'static>,
85    is_singleton = true
86);
87icu_provider::data_marker!(
88    /// Marker for data for composition.
89    NormalizerNfcV1,
90    "normalizer/nfc/v1",
91    CanonicalCompositions<'static>,
92    is_singleton = true
93);
94icu_provider::data_marker!(
95    /// Marker for additional data for non-recusrsive composition.
96    NormalizerNfdSupplementV1,
97    "normalizer/nfd/supplement/v1",
98    NonRecursiveDecompositionSupplement<'static>,
99    is_singleton = true
100);
101
102#[cfg(feature = "datagen")]
103/// The latest minimum set of markers required by this component.
104pub const MARKERS: &[DataMarkerInfo] = &[
105    NormalizerNfcV1::INFO,
106    NormalizerNfdDataV1::INFO,
107    NormalizerNfdTablesV1::INFO,
108    NormalizerNfkdDataV1::INFO,
109    NormalizerNfkdTablesV1::INFO,
110    NormalizerNfdSupplementV1::INFO,
111    NormalizerUts46DataV1::INFO,
112];
113
114/// Decomposition data
115///
116/// <div class="stab unstable">
117/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
118/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
119/// to be stable, their Rust representation might not be. Use with caution.
120/// </div>
121#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
122#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
123#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
124#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
125pub struct DecompositionData<'data> {
126    /// Trie for decomposition.
127    #[cfg_attr(feature = "serde", serde(borrow))]
128    pub trie: CodePointTrie<'data, u32>,
129    /// The passthrough bounds of NFD/NFC are lowered to this
130    /// maximum instead. (16-bit, because cannot be higher
131    /// than 0x0300, which is the bound for NFC.)
132    pub passthrough_cap: u16,
133}
134
135icu_provider::data_struct!(
136    DecompositionData<'_>,
137    #[cfg(feature = "datagen")]
138);
139
140/// The expansion tables for cases where the decomposition isn't
141/// contained in the trie value
142///
143/// <div class="stab unstable">
144/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
145/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
146/// to be stable, their Rust representation might not be. Use with caution.
147/// </div>
148#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
149#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
150#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
151#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
152pub struct DecompositionTables<'data> {
153    /// Decompositions that are fully within the BMP
154    #[cfg_attr(feature = "serde", serde(borrow))]
155    pub scalars16: ZeroVec<'data, u16>,
156    /// Decompositions with at least one character outside
157    /// the BMP
158    #[cfg_attr(feature = "serde", serde(borrow))]
159    pub scalars24: ZeroVec<'data, char>,
160}
161
162icu_provider::data_struct!(
163    DecompositionTables<'_>,
164    #[cfg(feature = "datagen")]
165);
166
167/// Non-Hangul canonical compositions
168///
169/// <div class="stab unstable">
170/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
171/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
172/// to be stable, their Rust representation might not be. Use with caution.
173/// </div>
174#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
175#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
176#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
177#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
178pub struct CanonicalCompositions<'data> {
179    /// Trie keys are two-`char` strings with the second
180    /// character coming first. The value, if any, is the
181    /// (non-Hangul) canonical composition.
182    #[cfg_attr(feature = "serde", serde(borrow))]
183    pub canonical_compositions: Char16Trie<'data>,
184}
185
186icu_provider::data_struct!(
187    CanonicalCompositions<'_>,
188    #[cfg(feature = "datagen")]
189);
190
191/// Non-recursive canonical decompositions that differ from
192/// `DecompositionData`.
193///
194/// <div class="stab unstable">
195/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
196/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
197/// to be stable, their Rust representation might not be. Use with caution.
198/// </div>
199#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
200#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
201#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
202#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
203pub struct NonRecursiveDecompositionSupplement<'data> {
204    /// Trie for the supplementary non-recursive decompositions
205    #[cfg_attr(feature = "serde", serde(borrow))]
206    pub trie: CodePointTrie<'data, u32>,
207    /// Decompositions with at least one character outside
208    /// the BMP
209    #[cfg_attr(feature = "serde", serde(borrow))]
210    pub scalars24: ZeroVec<'data, char>,
211}
212
213icu_provider::data_struct!(
214    NonRecursiveDecompositionSupplement<'_>,
215    #[cfg(feature = "datagen")]
216);