icu_collections/codepointtrie/
cptrie.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::codepointtrie::error::Error;
6use crate::codepointtrie::impl_const::*;
7
8#[cfg(feature = "alloc")]
9use crate::codepointinvlist::CodePointInversionList;
10use core::char::CharTryFromError;
11use core::convert::Infallible;
12use core::convert::TryFrom;
13use core::fmt::Display;
14#[cfg(feature = "alloc")]
15use core::iter::FromIterator;
16use core::num::TryFromIntError;
17use core::ops::RangeInclusive;
18use yoke::Yokeable;
19use zerofrom::ZeroFrom;
20#[cfg(feature = "alloc")]
21use zerovec::ule::UleError;
22use zerovec::ZeroVec;
23
24/// The type of trie represents whether the trie has an optimization that
25/// would make it smaller or faster.
26///
27/// Regarding performance, a trie being a small or fast type affects the number of array lookups
28/// needed for code points in the range `[0x1000, 0x10000)`. In this range, `Small` tries use 4 array lookups,
29/// while `Fast` tries use 2 array lookups.
30/// Code points before the interval (in `[0, 0x1000)`) will always use 2 array lookups.
31/// Code points after the interval (in `[0x10000, 0x10FFFF]`) will always use 4 array lookups.
32///
33/// Regarding size, `Fast` type tries are larger than `Small` type tries because the minimum size of
34/// the index array is larger. The minimum size is the "fast max" limit, which is the limit of the range
35/// of code points with 2 array lookups.
36///
37/// See the document [Unicode Properties and Code Point Tries in ICU4X](https://github.com/unicode-org/icu4x/blob/main/documents/design/properties_code_point_trie.md).
38///
39/// Also see [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
40#[derive(Clone, Copy, PartialEq, Debug, Eq)]
41#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
42#[cfg_attr(feature = "databake", derive(databake::Bake))]
43#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
44pub enum TrieType {
45    /// Represents the "fast" type code point tries for the
46    /// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
47    Fast = 0,
48    /// Represents the "small" type code point tries for the
49    /// [`TrieType`] trait. The "fast max" limit is set to `0x0fff`.
50    Small = 1,
51}
52
53// TrieValue trait
54
55// AsULE is AsUnalignedLittleEndian, i.e. "allowed in a zerovec"
56
57/// A trait representing the values stored in the data array of a [`CodePointTrie`].
58/// This trait is used as a type parameter in constructing a `CodePointTrie`.
59///
60/// This trait can be implemented on anything that can be represented as a u32s worth of data.
61pub trait TrieValue: Copy + Eq + PartialEq + zerovec::ule::AsULE + 'static {
62    /// Last-resort fallback value to return if we cannot read data from the trie.
63    ///
64    /// In most cases, the error value is read from the last element of the `data` array,
65    /// this value is used for empty codepointtrie arrays
66    /// Error type when converting from a u32 to this `TrieValue`.
67    type TryFromU32Error: Display;
68    /// A parsing function that is primarily motivated by deserialization contexts.
69    /// When the serialization type width is smaller than 32 bits, then it is expected
70    /// that the call site will widen the value to a `u32` first.
71    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>;
72
73    /// A method for converting back to a `u32` that can roundtrip through
74    /// [`Self::try_from_u32()`]. The default implementation of this trait
75    /// method panics in debug mode and returns 0 in release mode.
76    ///
77    /// This method is allowed to have GIGO behavior when fed a value that has
78    /// no corresponding `u32` (since such values cannot be stored in the trie)
79    fn to_u32(self) -> u32;
80}
81
82macro_rules! impl_primitive_trie_value {
83    ($primitive:ty, $error:ty) => {
84        impl TrieValue for $primitive {
85            type TryFromU32Error = $error;
86            fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
87                Self::try_from(i)
88            }
89
90            fn to_u32(self) -> u32 {
91                // bitcast when the same size, zero-extend/sign-extend
92                // when not the same size
93                self as u32
94            }
95        }
96    };
97}
98
99impl_primitive_trie_value!(u8, TryFromIntError);
100impl_primitive_trie_value!(u16, TryFromIntError);
101impl_primitive_trie_value!(u32, Infallible);
102impl_primitive_trie_value!(i8, TryFromIntError);
103impl_primitive_trie_value!(i16, TryFromIntError);
104impl_primitive_trie_value!(i32, TryFromIntError);
105impl_primitive_trie_value!(char, CharTryFromError);
106
107/// Helper function used by [`get_range`]. Converts occurrences of trie's null
108/// value into the provided `null_value`.
109///
110/// Note: the ICU version of this helper function uses a `ValueFilter` function
111/// to apply a transform on a non-null value. But currently, this implementation
112/// stops short of that functionality, and instead leaves the non-null trie value
113/// untouched. This is equivalent to having a `ValueFilter` function that is the
114/// identity function.
115fn maybe_filter_value<T: TrieValue>(value: T, trie_null_value: T, null_value: T) -> T {
116    if value == trie_null_value {
117        null_value
118    } else {
119        value
120    }
121}
122
123/// This struct represents a de-serialized [`CodePointTrie`] that was exported from
124/// ICU binary data.
125///
126/// For more information:
127/// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie)
128/// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup)
129// serde impls in crate::serde
130#[derive(Debug, Eq, PartialEq, Yokeable, ZeroFrom)]
131pub struct CodePointTrie<'trie, T: TrieValue> {
132    pub(crate) header: CodePointTrieHeader,
133    pub(crate) index: ZeroVec<'trie, u16>,
134    pub(crate) data: ZeroVec<'trie, T>,
135    // serde impl skips this field
136    #[zerofrom(clone)] // TrieValue is Copy, this allows us to avoid
137    // a T: ZeroFrom bound
138    pub(crate) error_value: T,
139}
140
141/// This struct contains the fixed-length header fields of a [`CodePointTrie`].
142#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
143#[cfg_attr(feature = "databake", derive(databake::Bake))]
144#[cfg_attr(feature = "databake", databake(path = icu_collections::codepointtrie))]
145#[derive(Copy, Clone, Debug, Eq, PartialEq, Yokeable, ZeroFrom)]
146pub struct CodePointTrieHeader {
147    /// The code point of the start of the last range of the trie. A
148    /// range is defined as a partition of the code point space such that the
149    /// value in this trie associated with all code points of the same range is
150    /// the same.
151    ///
152    /// For the property value data for many Unicode properties,
153    /// often times, `high_start` is `U+10000` or lower. In such cases, not
154    /// reserving space in the `index` array for duplicate values is a large
155    /// savings. The "highValue" associated with the `high_start` range is
156    /// stored at the second-to-last position of the `data` array.
157    /// (See `impl_const::HIGH_VALUE_NEG_DATA_OFFSET`.)
158    pub high_start: u32,
159    /// A version of the `high_start` value that is right-shifted 12 spaces,
160    /// but is rounded up to a multiple `0x1000` for easy testing from UTF-8
161    /// lead bytes.
162    pub shifted12_high_start: u16,
163    /// Offset for the null block in the "index-3" table of the `index` array.
164    /// Set to an impossibly high value (e.g., `0xffff`) if there is no
165    /// dedicated index-3 null block.
166    pub index3_null_offset: u16,
167    /// Internal data null block offset, not shifted.
168    /// Set to an impossibly high value (e.g., `0xfffff`) if there is no
169    /// dedicated data null block.
170    pub data_null_offset: u32,
171    /// The value stored in the trie that represents a null value being
172    /// associated to a code point.
173    pub null_value: u32,
174    /// The enum value representing the type of trie, where trie type is as it
175    /// is defined in ICU (ex: Fast, Small).
176    pub trie_type: TrieType,
177}
178
179impl TryFrom<u8> for TrieType {
180    type Error = crate::codepointtrie::error::Error;
181
182    fn try_from(trie_type_int: u8) -> Result<TrieType, crate::codepointtrie::error::Error> {
183        match trie_type_int {
184            0 => Ok(TrieType::Fast),
185            1 => Ok(TrieType::Small),
186            _ => Err(crate::codepointtrie::error::Error::FromDeserialized {
187                reason: "Cannot parse value for trie_type",
188            }),
189        }
190    }
191}
192
193// Helper macro that turns arithmetic into wrapping-in-release, checked-in-debug arithmetic
194//
195// This is rustc's default behavior anyway, however some projects like Android deliberately
196// enable overflow checks. CodePointTrie::get() is intended to be used in Android bionic which
197// cares about codesize and we don't want the pile of panicking infrastructure brought in by overflow
198// checks, so we force wrapping in release.
199// See #6052
200macro_rules! w(
201    // Note: these matchers are not perfect since you cannot have an operator after an expr matcher
202    // Use variables if you need complex first operands.
203    ($a:tt + $b:expr) => {
204        {
205            #[allow(unused_parens)]
206            let a = $a;
207            let b = $b;
208            debug_assert!(a.checked_add(b).is_some());
209            $a.wrapping_add($b)
210        }
211    };
212    ($a:tt - $b:expr) => {
213
214        {
215            #[allow(unused_parens)]
216            let a = $a;
217            let b = $b;
218            debug_assert!(a.checked_sub(b).is_some());
219            $a.wrapping_sub($b)
220        }
221    };
222    ($a:tt * $b:expr) => {
223        {
224            #[allow(unused_parens)]
225            let a = $a;
226            let b = $b;
227            debug_assert!(a.checked_mul(b).is_some());
228            $a.wrapping_mul($b)
229        }
230    };
231);
232
233impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
234    #[doc(hidden)] // databake internal
235    pub const fn from_parts(
236        header: CodePointTrieHeader,
237        index: ZeroVec<'trie, u16>,
238        data: ZeroVec<'trie, T>,
239        error_value: T,
240    ) -> Self {
241        Self {
242            header,
243            index,
244            data,
245            error_value,
246        }
247    }
248
249    /// Returns a new [`CodePointTrie`] backed by borrowed data for the `index`
250    /// array and `data` array, whose data values have width `W`.
251    pub fn try_new(
252        header: CodePointTrieHeader,
253        index: ZeroVec<'trie, u16>,
254        data: ZeroVec<'trie, T>,
255    ) -> Result<CodePointTrie<'trie, T>, Error> {
256        // Validation invariants are not needed here when constructing a new
257        // `CodePointTrie` because:
258        //
259        // - Rust includes the size of a slice (or Vec or similar), which allows it
260        //   to prevent lookups at out-of-bounds indices, whereas in C++, it is the
261        //   programmer's responsibility to keep track of length info.
262        // - For lookups into collections, Rust guarantees that a fallback value will
263        //   be returned in the case of `.get()` encountering a lookup error, via
264        //   the `Option` type.
265        // - The `ZeroVec` serializer stores the length of the array along with the
266        //   ZeroVec data, meaning that a deserializer would also see that length info.
267
268        let error_value = data.last().ok_or(Error::EmptyDataVector)?;
269        let trie: CodePointTrie<'trie, T> = CodePointTrie {
270            header,
271            index,
272            data,
273            error_value,
274        };
275        Ok(trie)
276    }
277
278    /// Returns the position in the data array containing the trie's stored
279    /// error value.
280    #[inline(always)] // `always` based on normalizer benchmarking
281    fn trie_error_val_index(&self) -> u32 {
282        // We use wrapping_sub here to avoid panicky overflow checks.
283        // len should always be > 1, but if it isn't this will just cause GIGO behavior of producing
284        // None on `.get()`
285        debug_assert!(self.data.len() as u32 >= ERROR_VALUE_NEG_DATA_OFFSET);
286        w!((self.data.len() as u32) - ERROR_VALUE_NEG_DATA_OFFSET)
287    }
288
289    fn internal_small_index(&self, code_point: u32) -> u32 {
290        // We use wrapping arithmetic here to avoid overflow checks making their way into binaries
291        // with overflow checks enabled. Ultimately this code ends up as a checked index, so any
292        // bugs here will cause GIGO
293        let mut index1_pos: u32 = code_point >> SHIFT_1;
294        if self.header.trie_type == TrieType::Fast {
295            debug_assert!(
296                FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self.header.high_start
297            );
298            index1_pos = w!(index1_pos + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH);
299        } else {
300            assert!(code_point < self.header.high_start && self.header.high_start > SMALL_LIMIT);
301            index1_pos = w!(index1_pos + SMALL_INDEX_LENGTH);
302        }
303        let index1_val = if let Some(index1_val) = self.index.get(index1_pos as usize) {
304            index1_val
305        } else {
306            return self.trie_error_val_index();
307        };
308        let index3_block_idx: u32 =
309            w!((index1_val as u32) + (code_point >> SHIFT_2) & INDEX_2_MASK);
310        let mut index3_block: u32 =
311            if let Some(index3_block) = self.index.get(index3_block_idx as usize) {
312                index3_block as u32
313            } else {
314                return self.trie_error_val_index();
315            };
316        let mut index3_pos: u32 = (code_point >> SHIFT_3) & INDEX_3_MASK;
317        let mut data_block: u32;
318        if index3_block & 0x8000 == 0 {
319            // 16-bit indexes
320            data_block =
321                if let Some(data_block) = self.index.get(w!(index3_block + index3_pos) as usize) {
322                    data_block as u32
323                } else {
324                    return self.trie_error_val_index();
325                };
326        } else {
327            // 18-bit indexes stored in groups of 9 entries per 8 indexes.
328            index3_block = w!((index3_block & 0x7fff) + w!((index3_pos & !7) + index3_pos >> 3));
329            index3_pos &= 7;
330            data_block = if let Some(data_block) = self.index.get(index3_block as usize) {
331                data_block as u32
332            } else {
333                return self.trie_error_val_index();
334            };
335            data_block = (data_block << w!(2u32 + w!(2u32 * index3_pos))) & 0x30000;
336            index3_block += 1;
337            data_block =
338                if let Some(index3_val) = self.index.get(w!(index3_block + index3_pos) as usize) {
339                    data_block | (index3_val as u32)
340                } else {
341                    return self.trie_error_val_index();
342                };
343        }
344        // Returns data_pos == data_block (offset) +
345        //     portion of code_point bit field for last (4th) lookup
346        w!(data_block + code_point & SMALL_DATA_MASK)
347    }
348
349    /// Returns the position in the `data` array for the given code point,
350    /// where this code point is at or above the fast limit associated for the
351    /// `trie_type`. We will refer to that limit as "`fastMax`" here.
352    ///
353    /// A lookup of the value in the code point trie for a code point in the
354    /// code point space range [`fastMax`, `high_start`) will be a 4-step
355    /// lookup: 3 lookups in the `index` array and one lookup in the `data`
356    /// array. Lookups for code points in the range [`high_start`,
357    /// `CODE_POINT_MAX`] are short-circuited to be a single lookup, see
358    /// [`CodePointTrieHeader::high_start`].
359    fn small_index(&self, code_point: u32) -> u32 {
360        if code_point >= self.header.high_start {
361            w!((self.data.len() as u32) - HIGH_VALUE_NEG_DATA_OFFSET)
362        } else {
363            self.internal_small_index(code_point) // helper fn
364        }
365    }
366
367    /// Returns the position in the `data` array for the given code point,
368    /// where this code point is below the fast limit associated for the
369    /// `trie type`. We will refer to that limit as "`fastMax`" here.
370    ///
371    /// A lookup of the value in the code point trie for a code point in the
372    /// code point space range [0, `fastMax`) will be a 2-step lookup: 1
373    /// lookup in the `index` array and one lookup in the `data` array. By
374    /// design, for trie type `T`, there is an element allocated in the `index`
375    /// array for each block of code points in [0, `fastMax`), which in
376    /// turn guarantees that those code points are represented and only need 1
377    /// lookup.
378    #[inline(always)] // `always` based on normalizer benchmarking
379    fn fast_index(&self, code_point: u32) -> u32 {
380        let index_array_pos: u32 = code_point >> FAST_TYPE_SHIFT;
381        let index_array_val: u16 =
382            if let Some(index_array_val) = self.index.get(index_array_pos as usize) {
383                index_array_val
384            } else {
385                return self.trie_error_val_index();
386            };
387        let masked_cp = code_point & FAST_TYPE_DATA_MASK;
388        let index_array_val = index_array_val as u32;
389        let fast_index_val: u32 = w!(index_array_val + masked_cp);
390        fast_index_val
391    }
392
393    /// Returns the value that is associated with `code_point` in this [`CodePointTrie`].
394    ///
395    /// # Examples
396    ///
397    /// ```
398    /// use icu::collections::codepointtrie::planes;
399    /// let trie = planes::get_planes_trie();
400    ///
401    /// assert_eq!(0, trie.get32(0x41)); // 'A' as u32
402    /// assert_eq!(0, trie.get32(0x13E0)); // 'Ꮰ' as u32
403    /// assert_eq!(1, trie.get32(0x10044)); // '𐁄' as u32
404    /// ```
405    #[inline(always)] // `always` based on normalizer benchmarking
406    pub fn get32(&self, code_point: u32) -> T {
407        // If we cannot read from the data array, then return the sentinel value
408        // self.error_value() for the instance type for T: TrieValue.
409        self.get32_ule(code_point)
410            .map(|t| T::from_unaligned(*t))
411            .unwrap_or(self.error_value)
412    }
413
414    /// Returns the value that is associated with `char` in this [`CodePointTrie`].
415    ///
416    /// # Examples
417    ///
418    /// ```
419    /// use icu::collections::codepointtrie::planes;
420    /// let trie = planes::get_planes_trie();
421    ///
422    /// assert_eq!(0, trie.get('A')); // 'A' as u32
423    /// assert_eq!(0, trie.get('Ꮰ')); // 'Ꮰ' as u32
424    /// assert_eq!(1, trie.get('𐁄')); // '𐁄' as u32
425    /// ```
426    #[inline(always)]
427    pub fn get(&self, c: char) -> T {
428        self.get32(u32::from(c))
429    }
430
431    /// Returns a reference to the ULE of the value that is associated with `code_point` in this [`CodePointTrie`].
432    ///
433    /// # Examples
434    ///
435    /// ```
436    /// use icu::collections::codepointtrie::planes;
437    /// let trie = planes::get_planes_trie();
438    ///
439    /// assert_eq!(Some(&0), trie.get32_ule(0x41)); // 'A' as u32
440    /// assert_eq!(Some(&0), trie.get32_ule(0x13E0)); // 'Ꮰ' as u32
441    /// assert_eq!(Some(&1), trie.get32_ule(0x10044)); // '𐁄' as u32
442    /// ```
443    #[inline(always)] // `always` based on normalizer benchmarking
444    pub fn get32_ule(&self, code_point: u32) -> Option<&T::ULE> {
445        // All code points up to the fast max limit are represented
446        // individually in the `index` array to hold their `data` array position, and
447        // thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`).
448        // Code points above the "fast max" limit require 4 lookups.
449        let fast_max = match self.header.trie_type {
450            TrieType::Fast => FAST_TYPE_FAST_INDEXING_MAX,
451            TrieType::Small => SMALL_TYPE_FAST_INDEXING_MAX,
452        };
453        let data_pos: u32 = if code_point <= fast_max {
454            Self::fast_index(self, code_point)
455        } else if code_point <= CODE_POINT_MAX {
456            Self::small_index(self, code_point)
457        } else {
458            self.trie_error_val_index()
459        };
460        // Returns the trie value (or trie's error value).
461        self.data.as_ule_slice().get(data_pos as usize)
462    }
463
464    /// Converts the [`CodePointTrie`] into one that returns another type of the same size.
465    ///
466    /// Borrowed data remains borrowed, and owned data remains owned.
467    ///
468    /// If the old and new types are not the same size, use
469    /// [`CodePointTrie::try_alloc_map_value`].
470    ///
471    /// # Panics
472    ///
473    /// Panics if `T` and `P` are different sizes.
474    ///
475    /// More specifically, panics if [`ZeroVec::try_into_converted()`] panics when converting
476    /// `ZeroVec<T>` into `ZeroVec<P>`, which happens if `T::ULE` and `P::ULE` differ in size.
477    ///
478    /// # Examples
479    ///
480    /// ```no_run
481    /// use icu::collections::codepointtrie::planes;
482    /// use icu::collections::codepointtrie::CodePointTrie;
483    ///
484    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
485    /// let planes_trie_i8: CodePointTrie<i8> =
486    ///     planes_trie_u8.try_into_converted().expect("infallible");
487    ///
488    /// assert_eq!(planes_trie_i8.get32(0x30000), 3);
489    /// ```
490    #[cfg(feature = "alloc")]
491    pub fn try_into_converted<P>(self) -> Result<CodePointTrie<'trie, P>, UleError>
492    where
493        P: TrieValue,
494    {
495        let converted_data = self.data.try_into_converted()?;
496        let error_ule = self.error_value.to_unaligned();
497        let slice = &[error_ule];
498        let error_vec = ZeroVec::<T>::new_borrowed(slice);
499        let error_converted = error_vec.try_into_converted::<P>()?;
500        #[allow(clippy::expect_used)] // we know this cannot fail
501        Ok(CodePointTrie {
502            header: self.header,
503            index: self.index,
504            data: converted_data,
505            error_value: error_converted
506                .get(0)
507                .expect("vector known to have one element"),
508        })
509    }
510
511    /// Maps the [`CodePointTrie`] into one that returns a different type.
512    ///
513    /// This function returns owned data.
514    ///
515    /// If the old and new types are the same size, use the more efficient
516    /// [`CodePointTrie::try_into_converted`].
517    ///
518    /// # Examples
519    ///
520    /// ```
521    /// use icu::collections::codepointtrie::planes;
522    /// use icu::collections::codepointtrie::CodePointTrie;
523    ///
524    /// let planes_trie_u8: CodePointTrie<u8> = planes::get_planes_trie();
525    /// let planes_trie_u16: CodePointTrie<u16> = planes_trie_u8
526    ///     .try_alloc_map_value(TryFrom::try_from)
527    ///     .expect("infallible");
528    ///
529    /// assert_eq!(planes_trie_u16.get32(0x30000), 3);
530    /// ```
531    #[cfg(feature = "alloc")]
532    pub fn try_alloc_map_value<P, E>(
533        &self,
534        mut f: impl FnMut(T) -> Result<P, E>,
535    ) -> Result<CodePointTrie<'trie, P>, E>
536    where
537        P: TrieValue,
538    {
539        let error_converted = f(self.error_value)?;
540        let converted_data = self.data.iter().map(f).collect::<Result<ZeroVec<P>, E>>()?;
541        Ok(CodePointTrie {
542            header: self.header,
543            index: self.index.clone(),
544            data: converted_data,
545            error_value: error_converted,
546        })
547    }
548
549    /// Returns a [`CodePointMapRange`] struct which represents a range of code
550    /// points associated with the same trie value. The returned range will be
551    /// the longest stretch of consecutive code points starting at `start` that
552    /// share this value.
553    ///
554    /// This method is designed to use the internal details of
555    /// the structure of [`CodePointTrie`] to be optimally efficient. This will
556    /// outperform a naive approach that just uses [`CodePointTrie::get()`].
557    ///
558    /// This method provides lower-level functionality that can be used in the
559    /// implementation of other methods that are more convenient to the user.
560    /// To obtain an optimal partition of the code point space for
561    /// this trie resulting in the fewest number of ranges, see
562    /// [`CodePointTrie::iter_ranges()`].
563    ///
564    /// # Examples
565    ///
566    /// ```
567    /// use icu::collections::codepointtrie::planes;
568    ///
569    /// let trie = planes::get_planes_trie();
570    ///
571    /// const CODE_POINT_MAX: u32 = 0x10ffff;
572    /// let start = 0x1_0000;
573    /// let exp_end = 0x1_ffff;
574    ///
575    /// let start_val = trie.get32(start);
576    /// assert_eq!(trie.get32(exp_end), start_val);
577    /// assert_ne!(trie.get32(exp_end + 1), start_val);
578    ///
579    /// use icu::collections::codepointtrie::CodePointMapRange;
580    ///
581    /// let cpm_range: CodePointMapRange<u8> = trie.get_range(start).unwrap();
582    ///
583    /// assert_eq!(cpm_range.range.start(), &start);
584    /// assert_eq!(cpm_range.range.end(), &exp_end);
585    /// assert_eq!(cpm_range.value, start_val);
586    ///
587    /// // `start` can be any code point, whether or not it lies on the boundary
588    /// // of a maximally large range that still contains `start`
589    ///
590    /// let submaximal_1_start = start + 0x1234;
591    /// let submaximal_1 = trie.get_range(submaximal_1_start).unwrap();
592    /// assert_eq!(submaximal_1.range.start(), &0x1_1234);
593    /// assert_eq!(submaximal_1.range.end(), &0x1_ffff);
594    /// assert_eq!(submaximal_1.value, start_val);
595    ///
596    /// let submaximal_2_start = start + 0xffff;
597    /// let submaximal_2 = trie.get_range(submaximal_2_start).unwrap();
598    /// assert_eq!(submaximal_2.range.start(), &0x1_ffff);
599    /// assert_eq!(submaximal_2.range.end(), &0x1_ffff);
600    /// assert_eq!(submaximal_2.value, start_val);
601    /// ```
602    pub fn get_range(&self, start: u32) -> Option<CodePointMapRange<T>> {
603        // Exit early if the start code point is out of range, or if it is
604        // in the last range of code points in high_start..=CODE_POINT_MAX
605        // (start- and end-inclusive) that all share the same trie value.
606        if CODE_POINT_MAX < start {
607            return None;
608        }
609        if start >= self.header.high_start {
610            let di: usize = self.data.len() - (HIGH_VALUE_NEG_DATA_OFFSET as usize);
611            let value: T = self.data.get(di)?;
612            return Some(CodePointMapRange {
613                range: start..=CODE_POINT_MAX,
614                value,
615            });
616        }
617
618        let null_value: T = T::try_from_u32(self.header.null_value).ok()?;
619
620        let mut prev_i3_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
621        let mut prev_block: u32 = u32::MAX; // using u32::MAX (instead of -1 as an i32 in ICU)
622        let mut c: u32 = start;
623        let mut trie_value: T = self.error_value();
624        let mut value: T = self.error_value();
625        let mut have_value: bool = false;
626
627        loop {
628            let i3_block: u32;
629            let mut i3: u32;
630            let i3_block_length: u32;
631            let data_block_length: u32;
632
633            // Initialize values before beginning the iteration in the subsequent
634            // `loop` block. In particular, use the "i3*" local variables
635            // (representing the `index` array position's offset + increment
636            // for a 3rd-level trie lookup) to help initialize the data block
637            // variable `block` in the loop for the `data` array.
638            //
639            // When a lookup code point is <= the trie's *_FAST_INDEXING_MAX that
640            // corresponds to its `trie_type`, the lookup only takes 2 steps
641            // (once into the `index`, once into the `data` array); otherwise,
642            // takes 4 steps (3 iterative lookups into the `index`, once more
643            // into the `data` array). So for convenience's sake, when we have the
644            // 2-stage lookup, reuse the "i3*" variable names for the first lookup.
645            if c <= 0xffff
646                && (self.header.trie_type == TrieType::Fast || c <= SMALL_TYPE_FAST_INDEXING_MAX)
647            {
648                i3_block = 0;
649                i3 = c >> FAST_TYPE_SHIFT;
650                i3_block_length = if self.header.trie_type == TrieType::Fast {
651                    BMP_INDEX_LENGTH
652                } else {
653                    SMALL_INDEX_LENGTH
654                };
655                data_block_length = FAST_TYPE_DATA_BLOCK_LENGTH;
656            } else {
657                // Use the multi-stage index.
658                let mut i1: u32 = c >> SHIFT_1;
659                if self.header.trie_type == TrieType::Fast {
660                    debug_assert!(0xffff < c && c < self.header.high_start);
661                    i1 = i1 + BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
662                } else {
663                    debug_assert!(
664                        c < self.header.high_start && self.header.high_start > SMALL_LIMIT
665                    );
666                    i1 += SMALL_INDEX_LENGTH;
667                }
668                let i2: u16 = self.index.get(i1 as usize)?;
669                let i3_block_idx: u32 = (i2 as u32) + ((c >> SHIFT_2) & INDEX_2_MASK);
670                i3_block = if let Some(i3b) = self.index.get(i3_block_idx as usize) {
671                    i3b as u32
672                } else {
673                    return None;
674                };
675                if i3_block == prev_i3_block && (c - start) >= CP_PER_INDEX_2_ENTRY {
676                    // The index-3 block is the same as the previous one, and filled with value.
677                    debug_assert!((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
678                    c += CP_PER_INDEX_2_ENTRY;
679
680                    if c >= self.header.high_start {
681                        break;
682                    } else {
683                        continue;
684                    }
685                }
686                prev_i3_block = i3_block;
687                if i3_block == self.header.index3_null_offset as u32 {
688                    // This is the index-3 null block.
689                    // All of the `data` array blocks pointed to by the values
690                    // in this block of the `index` 3rd-stage subarray will
691                    // contain this trie's null_value. So if we are in the middle
692                    // of a range, end it and return early, otherwise start a new
693                    // range of null values.
694                    if have_value {
695                        if null_value != value {
696                            return Some(CodePointMapRange {
697                                range: start..=(c - 1),
698                                value,
699                            });
700                        }
701                    } else {
702                        trie_value = T::try_from_u32(self.header.null_value).ok()?;
703                        value = null_value;
704                        have_value = true;
705                    }
706                    prev_block = self.header.data_null_offset;
707                    c = (c + CP_PER_INDEX_2_ENTRY) & !(CP_PER_INDEX_2_ENTRY - 1);
708
709                    if c >= self.header.high_start {
710                        break;
711                    } else {
712                        continue;
713                    }
714                }
715                i3 = (c >> SHIFT_3) & INDEX_3_MASK;
716                i3_block_length = INDEX_3_BLOCK_LENGTH;
717                data_block_length = SMALL_DATA_BLOCK_LENGTH;
718            }
719
720            // Enumerate data blocks for one index-3 block.
721            loop {
722                let mut block: u32;
723                if (i3_block & 0x8000) == 0 {
724                    block = if let Some(b) = self.index.get((i3_block + i3) as usize) {
725                        b as u32
726                    } else {
727                        return None;
728                    };
729                } else {
730                    // 18-bit indexes stored in groups of 9 entries per 8 indexes.
731                    let mut group: u32 = (i3_block & 0x7fff) + (i3 & !7) + (i3 >> 3);
732                    let gi: u32 = i3 & 7;
733                    let gi_val: u32 = if let Some(giv) = self.index.get(group as usize) {
734                        giv.into()
735                    } else {
736                        return None;
737                    };
738                    block = (gi_val << (2 + (2 * gi))) & 0x30000;
739                    group += 1;
740                    let ggi_val: u32 = if let Some(ggiv) = self.index.get((group + gi) as usize) {
741                        ggiv as u32
742                    } else {
743                        return None;
744                    };
745                    block |= ggi_val;
746                }
747
748                // If our previous and current return values of the 3rd-stage `index`
749                // lookup yield the same `data` block offset, and if we already know that
750                // the entire `data` block / subarray starting at that offset stores
751                // `value` and nothing else, then we can extend our range by the length
752                // of a data block and continue.
753                // Otherwise, we have to iterate over the values stored in the
754                // new data block to see if they differ from `value`.
755                if block == prev_block && (c - start) >= data_block_length {
756                    // The block is the same as the previous one, and filled with value.
757                    debug_assert!((c & (data_block_length - 1)) == 0);
758                    c += data_block_length;
759                } else {
760                    let data_mask: u32 = data_block_length - 1;
761                    prev_block = block;
762                    if block == self.header.data_null_offset {
763                        // This is the data null block.
764                        // If we are in the middle of a range, end it and
765                        // return early, otherwise start a new range of null
766                        // values.
767                        if have_value {
768                            if null_value != value {
769                                return Some(CodePointMapRange {
770                                    range: start..=(c - 1),
771                                    value,
772                                });
773                            }
774                        } else {
775                            trie_value = T::try_from_u32(self.header.null_value).ok()?;
776                            value = null_value;
777                            have_value = true;
778                        }
779                        c = (c + data_block_length) & !data_mask;
780                    } else {
781                        let mut di: u32 = block + (c & data_mask);
782                        let mut trie_value_2: T = self.data.get(di as usize)?;
783                        if have_value {
784                            if trie_value_2 != trie_value {
785                                if maybe_filter_value(
786                                    trie_value_2,
787                                    T::try_from_u32(self.header.null_value).ok()?,
788                                    null_value,
789                                ) != value
790                                {
791                                    return Some(CodePointMapRange {
792                                        range: start..=(c - 1),
793                                        value,
794                                    });
795                                }
796                                // `trie_value` stores the previous value that was retrieved
797                                // from the trie.
798                                // `value` stores the value associated for the range (return
799                                // value) that we are currently building, which is computed
800                                // as a transformation by applying maybe_filter_value()
801                                // to the trie value.
802                                // The current trie value `trie_value_2` within this data block
803                                // differs here from the previous value in `trie_value`.
804                                // But both map to `value` after applying `maybe_filter_value`.
805                                // It is not clear whether the previous or the current trie value
806                                // (or neither) is more likely to match potential subsequent trie
807                                // values that would extend the range by mapping to `value`.
808                                // On the assumption of locality -- often times consecutive
809                                // characters map to the same trie values -- remembering the new
810                                // one might make it faster to extend this range further
811                                // (by increasing the chance that the next `trie_value_2 !=
812                                // trie_value` test will be false).
813                                trie_value = trie_value_2; // may or may not help
814                            }
815                        } else {
816                            trie_value = trie_value_2;
817                            value = maybe_filter_value(
818                                trie_value_2,
819                                T::try_from_u32(self.header.null_value).ok()?,
820                                null_value,
821                            );
822                            have_value = true;
823                        }
824
825                        c += 1;
826                        while (c & data_mask) != 0 {
827                            di += 1;
828                            trie_value_2 = self.data.get(di as usize)?;
829                            if trie_value_2 != trie_value {
830                                if maybe_filter_value(
831                                    trie_value_2,
832                                    T::try_from_u32(self.header.null_value).ok()?,
833                                    null_value,
834                                ) != value
835                                {
836                                    return Some(CodePointMapRange {
837                                        range: start..=(c - 1),
838                                        value,
839                                    });
840                                }
841                                // `trie_value` stores the previous value that was retrieved
842                                // from the trie.
843                                // `value` stores the value associated for the range (return
844                                // value) that we are currently building, which is computed
845                                // as a transformation by applying maybe_filter_value()
846                                // to the trie value.
847                                // The current trie value `trie_value_2` within this data block
848                                // differs here from the previous value in `trie_value`.
849                                // But both map to `value` after applying `maybe_filter_value`.
850                                // It is not clear whether the previous or the current trie value
851                                // (or neither) is more likely to match potential subsequent trie
852                                // values that would extend the range by mapping to `value`.
853                                // On the assumption of locality -- often times consecutive
854                                // characters map to the same trie values -- remembering the new
855                                // one might make it faster to extend this range further
856                                // (by increasing the chance that the next `trie_value_2 !=
857                                // trie_value` test will be false).
858                                trie_value = trie_value_2; // may or may not help
859                            }
860
861                            c += 1;
862                        }
863                    }
864                }
865
866                i3 += 1;
867                if i3 >= i3_block_length {
868                    break;
869                }
870            }
871
872            if c >= self.header.high_start {
873                break;
874            }
875        }
876
877        debug_assert!(have_value);
878
879        // Now that c >= high_start, compare `value` to `high_value` to see
880        // if we can merge our current range with the high_value range
881        // high_start..=CODE_POINT_MAX (start- and end-inclusive), otherwise
882        // stop at high_start - 1.
883        let di: u32 = self.data.len() as u32 - HIGH_VALUE_NEG_DATA_OFFSET;
884        let high_value: T = self.data.get(di as usize)?;
885        if maybe_filter_value(
886            high_value,
887            T::try_from_u32(self.header.null_value).ok()?,
888            null_value,
889        ) != value
890        {
891            c -= 1;
892        } else {
893            c = CODE_POINT_MAX;
894        }
895        Some(CodePointMapRange {
896            range: start..=c,
897            value,
898        })
899    }
900
901    /// Yields an [`Iterator`] returning ranges of consecutive code points that
902    /// share the same value in the [`CodePointTrie`], as given by
903    /// [`CodePointTrie::get_range()`].
904    ///
905    /// # Examples
906    ///
907    /// ```
908    /// use core::ops::RangeInclusive;
909    /// use icu::collections::codepointtrie::planes;
910    /// use icu::collections::codepointtrie::CodePointMapRange;
911    ///
912    /// let planes_trie = planes::get_planes_trie();
913    ///
914    /// let mut ranges = planes_trie.iter_ranges();
915    ///
916    /// for plane in 0..=16 {
917    ///     let exp_start = plane * 0x1_0000;
918    ///     let exp_end = exp_start + 0xffff;
919    ///     assert_eq!(
920    ///         ranges.next(),
921    ///         Some(CodePointMapRange {
922    ///             range: exp_start..=exp_end,
923    ///             value: plane as u8
924    ///         })
925    ///     );
926    /// }
927    ///
928    /// // Hitting the end of the iterator returns `None`, as will subsequent
929    /// // calls to .next().
930    /// assert_eq!(ranges.next(), None);
931    /// assert_eq!(ranges.next(), None);
932    /// ```
933    pub fn iter_ranges(&self) -> CodePointMapRangeIterator<T> {
934        let init_range = Some(CodePointMapRange {
935            range: u32::MAX..=u32::MAX,
936            value: self.error_value(),
937        });
938        CodePointMapRangeIterator::<T> {
939            cpt: self,
940            cpm_range: init_range,
941        }
942    }
943
944    /// Yields an [`Iterator`] returning the ranges of the code points whose values
945    /// match `value` in the [`CodePointTrie`].
946    ///
947    /// # Examples
948    ///
949    /// ```
950    /// use icu::collections::codepointtrie::planes;
951    ///
952    /// let trie = planes::get_planes_trie();
953    ///
954    /// let plane_val = 2;
955    /// let mut sip_range_iter = trie.iter_ranges_for_value(plane_val as u8);
956    ///
957    /// let start = plane_val * 0x1_0000;
958    /// let end = start + 0xffff;
959    ///
960    /// let sip_range = sip_range_iter.next()
961    ///     .expect("Plane 2 (SIP) should exist in planes data");
962    /// assert_eq!(start..=end, sip_range);
963    ///
964    /// assert!(sip_range_iter.next().is_none());
965    pub fn iter_ranges_for_value(
966        &self,
967        value: T,
968    ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
969        self.iter_ranges()
970            .filter(move |cpm_range| cpm_range.value == value)
971            .map(|cpm_range| cpm_range.range)
972    }
973
974    /// Yields an [`Iterator`] returning the ranges of the code points after passing
975    /// the value through a mapping function.
976    ///
977    /// This is preferable to calling `.get_ranges().map()` since it will coalesce
978    /// adjacent ranges into one.
979    ///
980    /// # Examples
981    ///
982    /// ```
983    /// use icu::collections::codepointtrie::planes;
984    ///
985    /// let trie = planes::get_planes_trie();
986    ///
987    /// let plane_val = 2;
988    /// let mut sip_range_iter = trie.iter_ranges_mapped(|value| value != plane_val as u8).filter(|range| range.value);
989    ///
990    /// let end = plane_val * 0x1_0000 - 1;
991    ///
992    /// let sip_range = sip_range_iter.next()
993    ///     .expect("Complemented planes data should have at least one entry");
994    /// assert_eq!(0..=end, sip_range.range);
995    pub fn iter_ranges_mapped<'a, U: Eq + 'a>(
996        &'a self,
997        mut map: impl FnMut(T) -> U + Copy + 'a,
998    ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a {
999        crate::iterator_utils::RangeListIteratorCoalescer::new(self.iter_ranges().map(
1000            move |range| CodePointMapRange {
1001                range: range.range,
1002                value: map(range.value),
1003            },
1004        ))
1005    }
1006
1007    /// Returns a [`CodePointInversionList`] for the code points that have the given
1008    /// [`TrieValue`] in the trie.
1009    ///
1010    /// # Examples
1011    ///
1012    /// ```
1013    /// use icu::collections::codepointtrie::planes;
1014    ///
1015    /// let trie = planes::get_planes_trie();
1016    ///
1017    /// let plane_val = 2;
1018    /// let sip = trie.get_set_for_value(plane_val as u8);
1019    ///
1020    /// let start = plane_val * 0x1_0000;
1021    /// let end = start + 0xffff;
1022    ///
1023    /// assert!(!sip.contains32(start - 1));
1024    /// assert!(sip.contains32(start));
1025    /// assert!(sip.contains32(end));
1026    /// assert!(!sip.contains32(end + 1));
1027    /// ```
1028    #[cfg(feature = "alloc")]
1029    pub fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> {
1030        let value_ranges = self.iter_ranges_for_value(value);
1031        CodePointInversionList::from_iter(value_ranges)
1032    }
1033
1034    /// Returns the value used as an error value for this trie
1035    #[inline]
1036    pub fn error_value(&self) -> T {
1037        self.error_value
1038    }
1039}
1040
1041#[cfg(feature = "databake")]
1042impl<T: TrieValue + databake::Bake> databake::Bake for CodePointTrie<'_, T> {
1043    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
1044        let header = self.header.bake(env);
1045        let index = self.index.bake(env);
1046        let data = self.data.bake(env);
1047        let error_value = self.error_value.bake(env);
1048        databake::quote! { icu_collections::codepointtrie::CodePointTrie::from_parts(#header, #index, #data, #error_value) }
1049    }
1050}
1051
1052#[cfg(feature = "databake")]
1053impl<T: TrieValue + databake::Bake> databake::BakeSize for CodePointTrie<'_, T> {
1054    fn borrows_size(&self) -> usize {
1055        self.header.borrows_size() + self.index.borrows_size() + self.data.borrows_size()
1056    }
1057}
1058
1059impl<T: TrieValue + Into<u32>> CodePointTrie<'_, T> {
1060    /// Returns the value that is associated with `code_point` for this [`CodePointTrie`]
1061    /// as a `u32`.
1062    ///
1063    /// # Examples
1064    ///
1065    /// ```
1066    /// use icu::collections::codepointtrie::planes;
1067    /// let trie = planes::get_planes_trie();
1068    ///
1069    /// let cp = '𑖎' as u32;
1070    /// assert_eq!(cp, 0x1158E);
1071    ///
1072    /// let plane_num: u8 = trie.get32(cp);
1073    /// assert_eq!(trie.get32_u32(cp), plane_num as u32);
1074    /// ```
1075    // Note: This API method maintains consistency with the corresponding
1076    // original ICU APIs.
1077    pub fn get32_u32(&self, code_point: u32) -> u32 {
1078        self.get32(code_point).into()
1079    }
1080}
1081
1082impl<T: TrieValue> Clone for CodePointTrie<'_, T>
1083where
1084    <T as zerovec::ule::AsULE>::ULE: Clone,
1085{
1086    fn clone(&self) -> Self {
1087        CodePointTrie {
1088            header: self.header,
1089            index: self.index.clone(),
1090            data: self.data.clone(),
1091            error_value: self.error_value,
1092        }
1093    }
1094}
1095
1096/// Represents a range of consecutive code points sharing the same value in a
1097/// code point map.
1098///
1099/// The start and end of the interval is represented as a
1100/// `RangeInclusive<u32>`, and the value is represented as `T`.
1101#[derive(PartialEq, Eq, Debug, Clone)]
1102pub struct CodePointMapRange<T> {
1103    /// Range of code points from start to end (inclusive).
1104    pub range: RangeInclusive<u32>,
1105    /// Trie value associated with this range.
1106    pub value: T,
1107}
1108
1109/// A custom [`Iterator`] type specifically for a code point trie that returns
1110/// [`CodePointMapRange`]s.
1111pub struct CodePointMapRangeIterator<'a, T: TrieValue> {
1112    cpt: &'a CodePointTrie<'a, T>,
1113    // Initialize `range` to Some(CodePointMapRange{ start: u32::MAX, end: u32::MAX, value: 0}).
1114    // When `range` is Some(...) and has a start value different from u32::MAX, then we have
1115    // returned at least one code point range due to a call to `next()`.
1116    // When `range` == `None`, it means that we have hit the end of iteration. It would occur
1117    // after a call to `next()` returns a None <=> we attempted to call `get_range()`
1118    // with a start code point that is > CODE_POINT_MAX.
1119    cpm_range: Option<CodePointMapRange<T>>,
1120}
1121
1122impl<T: TrieValue> Iterator for CodePointMapRangeIterator<'_, T> {
1123    type Item = CodePointMapRange<T>;
1124
1125    fn next(&mut self) -> Option<Self::Item> {
1126        self.cpm_range = match &self.cpm_range {
1127            Some(cpmr) => {
1128                if *cpmr.range.start() == u32::MAX {
1129                    self.cpt.get_range(0)
1130                } else {
1131                    self.cpt.get_range(cpmr.range.end() + 1)
1132                }
1133            }
1134            None => None,
1135        };
1136        // Note: Clone is cheap. We can't Copy because RangeInclusive does not impl Copy.
1137        self.cpm_range.clone()
1138    }
1139}
1140
1141#[cfg(test)]
1142mod tests {
1143    use super::*;
1144    use crate::codepointtrie::planes;
1145    use alloc::vec::Vec;
1146
1147    #[test]
1148    #[cfg(feature = "serde")]
1149    fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1150        let trie = crate::codepointtrie::planes::get_planes_trie();
1151        let trie_serialized: Vec<u8> = postcard::to_allocvec(&trie).unwrap();
1152
1153        // Assert an expected (golden data) version of the serialized trie.
1154        const EXP_TRIE_SERIALIZED: &[u8] = &[
1155            128, 128, 64, 128, 2, 2, 0, 0, 1, 160, 18, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1156            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1157            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1158            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1159            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136,
1160            2, 144, 2, 144, 2, 144, 2, 176, 2, 176, 2, 176, 2, 176, 2, 208, 2, 208, 2, 208, 2, 208,
1161            2, 240, 2, 240, 2, 240, 2, 240, 2, 16, 3, 16, 3, 16, 3, 16, 3, 48, 3, 48, 3, 48, 3, 48,
1162            3, 80, 3, 80, 3, 80, 3, 80, 3, 112, 3, 112, 3, 112, 3, 112, 3, 144, 3, 144, 3, 144, 3,
1163            144, 3, 176, 3, 176, 3, 176, 3, 176, 3, 208, 3, 208, 3, 208, 3, 208, 3, 240, 3, 240, 3,
1164            240, 3, 240, 3, 16, 4, 16, 4, 16, 4, 16, 4, 48, 4, 48, 4, 48, 4, 48, 4, 80, 4, 80, 4,
1165            80, 4, 80, 4, 112, 4, 112, 4, 112, 4, 112, 4, 0, 0, 16, 0, 32, 0, 48, 0, 64, 0, 80, 0,
1166            96, 0, 112, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32,
1167            0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48,
1168            0, 0, 0, 16, 0, 32, 0, 48, 0, 0, 0, 16, 0, 32, 0, 48, 0, 128, 0, 128, 0, 128, 0, 128,
1169            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1170            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128,
1171            0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 128, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1172            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1173            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 144,
1174            0, 144, 0, 144, 0, 144, 0, 144, 0, 144, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1175            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1176            0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160, 0, 160,
1177            0, 160, 0, 160, 0, 160, 0, 160, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1178            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1179            0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176, 0, 176,
1180            0, 176, 0, 176, 0, 176, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1181            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1182            0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192, 0, 192,
1183            0, 192, 0, 192, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1184            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1185            0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208,
1186            0, 208, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1187            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1188            0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224, 0, 224,
1189            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1190            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240,
1191            0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 0,
1192            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
1193            0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1194            1, 0, 1, 0, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1,
1195            16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16,
1196            1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 16, 1, 32, 1, 32, 1, 32, 1,
1197            32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32,
1198            1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1, 32, 1,
1199            32, 1, 32, 1, 32, 1, 32, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48,
1200            1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1,
1201            48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 48, 1, 64, 1, 64,
1202            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1,
1203            64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 64,
1204            1, 64, 1, 64, 1, 64, 1, 64, 1, 64, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1205            80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80,
1206            1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1, 80, 1,
1207            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96,
1208            1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1,
1209            96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 96, 1, 128, 0, 136, 0, 136, 0, 136, 0, 136,
1210            0, 136, 0, 136, 0, 136, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
1211            2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,
1212            0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1213            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1214            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 168, 0,
1215            168, 0, 168, 0, 168, 0, 168, 0, 168, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1216            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1217            200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0, 200, 0,
1218            200, 0, 200, 0, 200, 0, 200, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1219            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1220            232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0, 232, 0,
1221            232, 0, 232, 0, 232, 0, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8,
1222            1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1,
1223            8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 8, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1224            1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1,
1225            40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40, 1, 40,
1226            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1,
1227            72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72,
1228            1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 72, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1229            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1230            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1,
1231            104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 104, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1232            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1233            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 136, 1,
1234            136, 1, 136, 1, 136, 1, 136, 1, 136, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1235            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1236            168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1, 168, 1,
1237            168, 1, 168, 1, 168, 1, 168, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1238            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1239            200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1, 200, 1,
1240            200, 1, 200, 1, 200, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1241            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1242            232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1, 232, 1,
1243            232, 1, 232, 1, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2,
1244            8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 8,
1245            2, 8, 2, 8, 2, 8, 2, 8, 2, 8, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40,
1246            2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2,
1247            40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 40, 2, 72,
1248            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2,
1249            72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72,
1250            2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 72, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1251            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1252            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 104, 2,
1253            104, 2, 104, 2, 104, 2, 104, 2, 104, 2, 244, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1254            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1255            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1256            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1257            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1258            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1259            2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1260            4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
1261            6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
1262            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10,
1263            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,
1264            11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
1265            12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
1266            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1267            15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 0,
1268        ];
1269        assert_eq!(trie_serialized, EXP_TRIE_SERIALIZED);
1270
1271        let trie_deserialized = postcard::from_bytes::<CodePointTrie<u8>>(&trie_serialized)?;
1272
1273        assert_eq!(&trie.index, &trie_deserialized.index);
1274        assert_eq!(&trie.data, &trie_deserialized.data);
1275
1276        assert!(!trie_deserialized.index.is_owned());
1277        assert!(!trie_deserialized.data.is_owned());
1278
1279        Ok(())
1280    }
1281
1282    #[test]
1283    fn test_get_range() {
1284        let planes_trie = planes::get_planes_trie();
1285
1286        let first_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x0);
1287        assert_eq!(
1288            first_range,
1289            Some(CodePointMapRange {
1290                range: 0x0..=0xffff,
1291                value: 0
1292            })
1293        );
1294
1295        let second_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x1_0000);
1296        assert_eq!(
1297            second_range,
1298            Some(CodePointMapRange {
1299                range: 0x10000..=0x1ffff,
1300                value: 1
1301            })
1302        );
1303
1304        let penultimate_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0xf_0000);
1305        assert_eq!(
1306            penultimate_range,
1307            Some(CodePointMapRange {
1308                range: 0xf_0000..=0xf_ffff,
1309                value: 15
1310            })
1311        );
1312
1313        let last_range: Option<CodePointMapRange<u8>> = planes_trie.get_range(0x10_0000);
1314        assert_eq!(
1315            last_range,
1316            Some(CodePointMapRange {
1317                range: 0x10_0000..=0x10_ffff,
1318                value: 16
1319            })
1320        );
1321    }
1322
1323    #[test]
1324    fn databake() {
1325        databake::test_bake!(
1326            CodePointTrie<'static, u32>,
1327            const,
1328            crate::codepointtrie::CodePointTrie::from_parts(
1329                crate::codepointtrie::CodePointTrieHeader {
1330                    high_start: 1u32,
1331                    shifted12_high_start: 2u16,
1332                    index3_null_offset: 3u16,
1333                    data_null_offset: 4u32,
1334                    null_value: 5u32,
1335                    trie_type: crate::codepointtrie::TrieType::Small,
1336                },
1337                zerovec::ZeroVec::new(),
1338                zerovec::ZeroVec::new(),
1339                0u32,
1340            ),
1341            icu_collections,
1342            [zerovec],
1343        );
1344    }
1345}