icu_properties/
trievalue.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::bidi::BidiMirroringGlyph;
6use crate::props::{
7    BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8    GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory,
9    JoiningType, LineBreak, Script, SentenceBreak, VerticalOrientation, WordBreak,
10};
11use crate::script::ScriptWithExt;
12use core::convert::TryInto;
13use core::num::TryFromIntError;
14use zerovec::ule::{AsULE, RawBytesULE};
15
16use icu_collections::codepointtrie::TrieValue;
17
18use core::convert::TryFrom;
19
20impl TrieValue for CanonicalCombiningClass {
21    type TryFromU32Error = TryFromIntError;
22
23    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
24        u8::try_from(i).map(Self)
25    }
26
27    fn to_u32(self) -> u32 {
28        u32::from(self.0)
29    }
30}
31
32impl TrieValue for BidiClass {
33    type TryFromU32Error = TryFromIntError;
34
35    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
36        u8::try_from(i).map(Self)
37    }
38
39    fn to_u32(self) -> u32 {
40        u32::from(self.0)
41    }
42}
43
44impl TrieValue for GeneralCategory {
45    type TryFromU32Error = &'static str;
46
47    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
48        // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
49        GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
50            .ok_or("Cannot parse GeneralCategory from integer")
51    }
52
53    fn to_u32(self) -> u32 {
54        u32::from(self as u8)
55    }
56}
57
58impl TrieValue for Script {
59    type TryFromU32Error = TryFromIntError;
60
61    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
62        u16::try_from(i).map(Script)
63    }
64
65    fn to_u32(self) -> u32 {
66        u32::from(self.0)
67    }
68}
69
70impl TrieValue for HangulSyllableType {
71    type TryFromU32Error = TryFromIntError;
72
73    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
74        u8::try_from(i).map(Self)
75    }
76
77    fn to_u32(self) -> u32 {
78        u32::from(self.0)
79    }
80}
81
82impl TrieValue for ScriptWithExt {
83    type TryFromU32Error = TryFromIntError;
84
85    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
86        u16::try_from(i).map(Self)
87    }
88
89    fn to_u32(self) -> u32 {
90        u32::from(self.0)
91    }
92}
93
94impl TrieValue for EastAsianWidth {
95    type TryFromU32Error = TryFromIntError;
96
97    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
98        u8::try_from(i).map(Self)
99    }
100
101    fn to_u32(self) -> u32 {
102        u32::from(self.0)
103    }
104}
105
106impl TrieValue for LineBreak {
107    type TryFromU32Error = TryFromIntError;
108
109    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
110        u8::try_from(i).map(Self)
111    }
112
113    fn to_u32(self) -> u32 {
114        u32::from(self.0)
115    }
116}
117
118impl TrieValue for GraphemeClusterBreak {
119    type TryFromU32Error = TryFromIntError;
120
121    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
122        u8::try_from(i).map(Self)
123    }
124
125    fn to_u32(self) -> u32 {
126        u32::from(self.0)
127    }
128}
129
130impl TrieValue for WordBreak {
131    type TryFromU32Error = TryFromIntError;
132
133    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
134        u8::try_from(i).map(Self)
135    }
136
137    fn to_u32(self) -> u32 {
138        u32::from(self.0)
139    }
140}
141
142impl TrieValue for SentenceBreak {
143    type TryFromU32Error = TryFromIntError;
144
145    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
146        u8::try_from(i).map(Self)
147    }
148
149    fn to_u32(self) -> u32 {
150        u32::from(self.0)
151    }
152}
153
154impl TrieValue for IndicConjunctBreak {
155    type TryFromU32Error = TryFromIntError;
156
157    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
158        u8::try_from(i).map(Self)
159    }
160
161    fn to_u32(self) -> u32 {
162        u32::from(self.0)
163    }
164}
165
166impl TrieValue for IndicSyllabicCategory {
167    type TryFromU32Error = TryFromIntError;
168
169    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
170        u8::try_from(i).map(Self)
171    }
172
173    fn to_u32(self) -> u32 {
174        u32::from(self.0)
175    }
176}
177
178impl TrieValue for VerticalOrientation {
179    type TryFromU32Error = TryFromIntError;
180
181    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
182        u8::try_from(i).map(Self)
183    }
184
185    fn to_u32(self) -> u32 {
186        u32::from(self.0)
187    }
188}
189
190// GCG is not used inside tries, but it is used in the name lookup type, and we want
191// to squeeze it into a u16 for storage. Its named mask values are specced so we can
192// do this in code.
193//
194// This is done by:
195// - Single-value masks are translated to their corresponding GeneralCategory values
196// - we know all of the multi-value masks and we give them special values
197// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
198//
199// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
200// with malformed ICU4X generated data.
201impl AsULE for GeneralCategoryGroup {
202    type ULE = RawBytesULE<2>;
203    fn to_unaligned(self) -> Self::ULE {
204        let value = gcg_to_packed_u16(self);
205        value.to_unaligned()
206    }
207    fn from_unaligned(ule: Self::ULE) -> Self {
208        let value = ule.as_unsigned_int();
209        packed_u16_to_gcg(value)
210    }
211}
212
213fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
214    match value {
215        0xFFFF => GeneralCategoryGroup::CasedLetter,
216        0xFFFE => GeneralCategoryGroup::Letter,
217        0xFFFD => GeneralCategoryGroup::Mark,
218        0xFFFC => GeneralCategoryGroup::Number,
219        0xFFFB => GeneralCategoryGroup::Separator,
220        0xFFFA => GeneralCategoryGroup::Other,
221        0xFFF9 => GeneralCategoryGroup::Punctuation,
222        0xFFF8 => GeneralCategoryGroup::Symbol,
223        v if v < 32 => GeneralCategory::new_from_u8(v as u8)
224            .map(|gc| gc.into())
225            .unwrap_or(GeneralCategoryGroup(0)),
226        // unknown values produce an empty mask
227        _ => GeneralCategoryGroup(0),
228    }
229}
230
231fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
232    // if it's a single property, translate to that property
233    if gcg.0.is_power_of_two() {
234        // inverse operation of a bitshift
235        gcg.0.trailing_zeros() as u16
236    } else {
237        match gcg {
238            GeneralCategoryGroup::CasedLetter => 0xFFFF,
239            GeneralCategoryGroup::Letter => 0xFFFE,
240            GeneralCategoryGroup::Mark => 0xFFFD,
241            GeneralCategoryGroup::Number => 0xFFFC,
242            GeneralCategoryGroup::Separator => 0xFFFB,
243            GeneralCategoryGroup::Other => 0xFFFA,
244            GeneralCategoryGroup::Punctuation => 0xFFF9,
245            GeneralCategoryGroup::Symbol => 0xFFF8,
246            _ => 0xFF00, // random sentinel value
247        }
248    }
249}
250
251impl TrieValue for GeneralCategoryGroup {
252    type TryFromU32Error = TryFromIntError;
253    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
254        // Even though we're dealing with u32s here, TrieValue is about converting
255        // trie storage types to the actual type. This type will always be a packed u16
256        // in our case since the names map upcasts from u16
257        u16::try_from(i).map(packed_u16_to_gcg)
258    }
259
260    fn to_u32(self) -> u32 {
261        u32::from(gcg_to_packed_u16(self))
262    }
263}
264
265impl TrieValue for BidiMirroringGlyph {
266    type TryFromU32Error = u32;
267
268    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
269        let code_point = i & 0x1FFFFF;
270        let mirroring_glyph = if code_point == 0 {
271            None
272        } else {
273            Some(char::try_from_u32(code_point).map_err(|_| i)?)
274        };
275        let mirrored = ((i >> 21) & 0x1) == 1;
276        let paired_bracket_type = {
277            let value = ((i >> 22) & 0x3) as u8;
278            match value {
279                0 => crate::bidi::BidiPairedBracketType::None,
280                1 => crate::bidi::BidiPairedBracketType::Open,
281                2 => crate::bidi::BidiPairedBracketType::Close,
282                _ => return Err(i),
283            }
284        };
285        Ok(Self {
286            mirrored,
287            mirroring_glyph,
288            paired_bracket_type,
289        })
290    }
291
292    fn to_u32(self) -> u32 {
293        self.mirroring_glyph.unwrap_or_default() as u32
294            | ((self.mirrored as u32) << 21)
295            | (match self.paired_bracket_type {
296                crate::bidi::BidiPairedBracketType::None => 0,
297                crate::bidi::BidiPairedBracketType::Open => 1,
298                crate::bidi::BidiPairedBracketType::Close => 2,
299            } << 22)
300    }
301}
302
303impl TrieValue for JoiningType {
304    type TryFromU32Error = TryFromIntError;
305
306    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
307        u8::try_from(i).map(Self)
308    }
309
310    fn to_u32(self) -> u32 {
311        u32::from(self.0)
312    }
313}