1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
1415// Provider structs must be stable
16#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
1718use icu_collections::char16trie::Char16Trie;
19use icu_collections::codepointtrie::CodePointTrie;
20use icu_provider::prelude::*;
21use zerovec::ZeroVec;
2223#[cfg(feature = "compiled_data")]
24#[derive(Debug)]
25/// Baked data
26///
27/// <div class="stab unstable">
28/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
29/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
30/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
31/// </div>
32pub struct Baked;
3334#[cfg(feature = "compiled_data")]
35#[allow(unused_imports)]
36const _: () = {
37use icu_normalizer_data::*;
38pub mod icu {
39pub use crate as normalizer;
40pub use icu_collections as collections;
41 }
42make_provider!(Baked);
43impl_normalizer_nfc_v1!(Baked);
44impl_normalizer_nfd_data_v1!(Baked);
45impl_normalizer_nfd_supplement_v1!(Baked);
46impl_normalizer_nfd_tables_v1!(Baked);
47impl_normalizer_nfkd_data_v1!(Baked);
48impl_normalizer_nfkd_tables_v1!(Baked);
49impl_normalizer_uts46_data_v1!(Baked);
50};
5152icu_provider::data_marker!(
53/// Marker for data for canonical decomposition.
54NormalizerNfdDataV1,
55"normalizer/nfd/data/v1",
56 DecompositionData<'static>,
57 is_singleton = true
58);
59icu_provider::data_marker!(
60/// Marker for additional data for canonical decomposition.
61NormalizerNfdTablesV1,
62"normalizer/nfd/tables/v1",
63 DecompositionTables<'static>,
64 is_singleton = true
65);
66icu_provider::data_marker!(
67/// Marker for data for compatibility decomposition.
68NormalizerNfkdDataV1,
69"normalizer/nfkd/data/v1",
70 DecompositionData<'static>,
71 is_singleton = true
72);
73icu_provider::data_marker!(
74/// Marker for additional data for compatibility decomposition.
75NormalizerNfkdTablesV1,
76"normalizer/nfkd/tables/v1",
77 DecompositionTables<'static>,
78 is_singleton = true
79);
80icu_provider::data_marker!(
81/// Marker for data for UTS-46 decomposition.
82NormalizerUts46DataV1,
83"normalizer/uts46/data/v1",
84 DecompositionData<'static>,
85 is_singleton = true
86);
87icu_provider::data_marker!(
88/// Marker for data for composition.
89NormalizerNfcV1,
90"normalizer/nfc/v1",
91 CanonicalCompositions<'static>,
92 is_singleton = true
93);
94icu_provider::data_marker!(
95/// Marker for additional data for non-recusrsive composition.
96NormalizerNfdSupplementV1,
97"normalizer/nfd/supplement/v1",
98 NonRecursiveDecompositionSupplement<'static>,
99 is_singleton = true
100);
101102#[cfg(feature = "datagen")]
103/// The latest minimum set of markers required by this component.
104pub const MARKERS: &[DataMarkerInfo] = &[
105 NormalizerNfcV1::INFO,
106 NormalizerNfdDataV1::INFO,
107 NormalizerNfdTablesV1::INFO,
108 NormalizerNfkdDataV1::INFO,
109 NormalizerNfkdTablesV1::INFO,
110 NormalizerNfdSupplementV1::INFO,
111 NormalizerUts46DataV1::INFO,
112];
113114/// Decomposition data
115///
116/// <div class="stab unstable">
117/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
118/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
119/// to be stable, their Rust representation might not be. Use with caution.
120/// </div>
121#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
122#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
123#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
124#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
125pub struct DecompositionData<'data> {
126/// Trie for decomposition.
127#[cfg_attr(feature = "serde", serde(borrow))]
128pub trie: CodePointTrie<'data, u32>,
129/// The passthrough bounds of NFD/NFC are lowered to this
130 /// maximum instead. (16-bit, because cannot be higher
131 /// than 0x0300, which is the bound for NFC.)
132pub passthrough_cap: u16,
133}
134135icu_provider::data_struct!(
136 DecompositionData<'_>,
137#[cfg(feature = "datagen")]
138);
139140/// The expansion tables for cases where the decomposition isn't
141/// contained in the trie value
142///
143/// <div class="stab unstable">
144/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
145/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
146/// to be stable, their Rust representation might not be. Use with caution.
147/// </div>
148#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
149#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
150#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
151#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
152pub struct DecompositionTables<'data> {
153/// Decompositions that are fully within the BMP
154#[cfg_attr(feature = "serde", serde(borrow))]
155pub scalars16: ZeroVec<'data, u16>,
156/// Decompositions with at least one character outside
157 /// the BMP
158#[cfg_attr(feature = "serde", serde(borrow))]
159pub scalars24: ZeroVec<'data, char>,
160}
161162icu_provider::data_struct!(
163 DecompositionTables<'_>,
164#[cfg(feature = "datagen")]
165);
166167/// Non-Hangul canonical compositions
168///
169/// <div class="stab unstable">
170/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
171/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
172/// to be stable, their Rust representation might not be. Use with caution.
173/// </div>
174#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
175#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
176#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
177#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
178pub struct CanonicalCompositions<'data> {
179/// Trie keys are two-`char` strings with the second
180 /// character coming first. The value, if any, is the
181 /// (non-Hangul) canonical composition.
182#[cfg_attr(feature = "serde", serde(borrow))]
183pub canonical_compositions: Char16Trie<'data>,
184}
185186icu_provider::data_struct!(
187 CanonicalCompositions<'_>,
188#[cfg(feature = "datagen")]
189);
190191/// Non-recursive canonical decompositions that differ from
192/// `DecompositionData`.
193///
194/// <div class="stab unstable">
195/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
196/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
197/// to be stable, their Rust representation might not be. Use with caution.
198/// </div>
199#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
200#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
201#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))]
202#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
203pub struct NonRecursiveDecompositionSupplement<'data> {
204/// Trie for the supplementary non-recursive decompositions
205#[cfg_attr(feature = "serde", serde(borrow))]
206pub trie: CodePointTrie<'data, u32>,
207/// Decompositions with at least one character outside
208 /// the BMP
209#[cfg_attr(feature = "serde", serde(borrow))]
210pub scalars24: ZeroVec<'data, char>,
211}
212213icu_provider::data_struct!(
214 NonRecursiveDecompositionSupplement<'_>,
215#[cfg(feature = "datagen")]
216);