icu_properties/
props.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30///     ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36    (
37        $ ( #[$meta:meta] )*
38        impl $enum_ty:ident {
39            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40        }
41    ) => {
42        $( #[$meta] )*
43        impl $enum_ty {
44            $(
45                $(#[$const_meta])*
46                $v const $i: $t = $e;
47            )*
48
49            /// All possible values of this enum in the Unicode version
50            /// from this ICU4X release.
51            pub const ALL_VALUES: &'static [$enum_ty] = &[
52                $($enum_ty::$i),*
53            ];
54        }
55
56        #[cfg(feature = "datagen")]
57        impl databake::Bake for $enum_ty {
58            fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
59                env.insert("icu_properties");
60                match *self {
61                    $(
62                        Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
63                    )*
64                    Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
65                }
66            }
67        }
68
69
70        impl From<$enum_ty> for u16  {
71            fn from(other: $enum_ty) -> Self {
72                other.0 as u16
73            }
74        }
75    }
76}
77
78pub use crate::code_point_map::EnumeratedProperty;
79
80macro_rules! make_enumerated_property {
81    (
82        name: $name:literal;
83        short_name: $short_name:literal;
84        ident: $value_ty:path;
85        data_marker: $data_marker:ty;
86        singleton: $singleton:ident;
87        $(ule_ty: $ule_ty:ty;)?
88    ) => {
89        impl crate::private::Sealed for $value_ty {}
90
91        impl EnumeratedProperty for $value_ty {
92            type DataMarker = $data_marker;
93            #[cfg(feature = "compiled_data")]
94            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
95                crate::provider::Baked::$singleton;
96            const NAME: &'static [u8] = $name.as_bytes();
97            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
98        }
99
100        $(
101            impl zerovec::ule::AsULE for $value_ty {
102                type ULE = $ule_ty;
103
104                fn to_unaligned(self) -> Self::ULE {
105                    self.0.to_unaligned()
106                }
107                fn from_unaligned(unaligned: Self::ULE) -> Self {
108                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109                }
110            }
111        )?
112    };
113}
114
115/// Enumerated property Bidi_Class
116///
117/// These are the categories required by the Unicode Bidirectional Algorithm.
118/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
119/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
120///
121/// # Example
122///
123/// ```
124/// use icu::properties::{props::BidiClass, CodePointMapData};
125///
126/// assert_eq!(
127///     CodePointMapData::<BidiClass>::new().get('y'),
128///     BidiClass::LeftToRight
129/// ); // U+0079
130/// assert_eq!(
131///     CodePointMapData::<BidiClass>::new().get('ع'),
132///     BidiClass::ArabicLetter
133/// ); // U+0639
134/// ```
135#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
136#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137#[allow(clippy::exhaustive_structs)] // newtype
138#[repr(transparent)]
139pub struct BidiClass(pub(crate) u8);
140
141impl BidiClass {
142    /// Returns an ICU4C `UBidiClass` value.
143    pub const fn to_icu4c_value(self) -> u8 {
144        self.0
145    }
146    /// Constructor from an ICU4C `UBidiClass` value.
147    pub const fn from_icu4c_value(value: u8) -> Self {
148        Self(value)
149    }
150}
151
152create_const_array! {
153#[allow(non_upper_case_globals)]
154impl BidiClass {
155    /// (`L`) any strong left-to-right character
156    pub const LeftToRight: BidiClass = BidiClass(0);
157    /// (`R`) any strong right-to-left (non-Arabic-type) character
158    pub const RightToLeft: BidiClass = BidiClass(1);
159    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
160    pub const EuropeanNumber: BidiClass = BidiClass(2);
161    /// (`ES`) plus and minus signs
162    pub const EuropeanSeparator: BidiClass = BidiClass(3);
163    /// (`ET`) a terminator in a numeric format context, includes currency signs
164    pub const EuropeanTerminator: BidiClass = BidiClass(4);
165    /// (`AN`) any Arabic-Indic digit
166    pub const ArabicNumber: BidiClass = BidiClass(5);
167    /// (`CS`) commas, colons, and slashes
168    pub const CommonSeparator: BidiClass = BidiClass(6);
169    /// (`B`) various newline characters
170    pub const ParagraphSeparator: BidiClass = BidiClass(7);
171    /// (`S`) various segment-related control codes
172    pub const SegmentSeparator: BidiClass = BidiClass(8);
173    /// (`WS`) spaces
174    pub const WhiteSpace: BidiClass = BidiClass(9);
175    /// (`ON`) most other symbols and punctuation marks
176    pub const OtherNeutral: BidiClass = BidiClass(10);
177    /// (`LRE`) U+202A: the LR embedding control
178    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
179    /// (`LRO`) U+202D: the LR override control
180    pub const LeftToRightOverride: BidiClass = BidiClass(12);
181    /// (`AL`) any strong right-to-left (Arabic-type) character
182    pub const ArabicLetter: BidiClass = BidiClass(13);
183    /// (`RLE`) U+202B: the RL embedding control
184    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
185    /// (`RLO`) U+202E: the RL override control
186    pub const RightToLeftOverride: BidiClass = BidiClass(15);
187    /// (`PDF`) U+202C: terminates an embedding or override control
188    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
189    /// (`NSM`) any nonspacing mark
190    pub const NonspacingMark: BidiClass = BidiClass(17);
191    /// (`BN`) most format characters, control codes, or noncharacters
192    pub const BoundaryNeutral: BidiClass = BidiClass(18);
193    /// (`FSI`) U+2068: the first strong isolate control
194    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
195    /// (`LRI`) U+2066: the LR isolate control
196    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
197    /// (`RLI`) U+2067: the RL isolate control
198    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
199    /// (`PDI`) U+2069: terminates an isolate control
200    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
201}
202}
203
204make_enumerated_property! {
205    name: "Bidi_Class";
206    short_name: "bc";
207    ident: BidiClass;
208    data_marker: crate::provider::PropertyEnumBidiClassV1;
209    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
210    ule_ty: u8;
211}
212
213// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
214pub(crate) mod gc {
215    /// Enumerated property General_Category.
216    ///
217    /// General_Category specifies the most general classification of a code point, usually
218    /// determined based on the primary characteristic of the assigned character. For example, is the
219    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
220    ///
221    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
222    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
223    /// crate::props::GeneralCategoryGroup).
224    ///
225    /// # Example
226    ///
227    /// ```
228    /// use icu::properties::{props::GeneralCategory, CodePointMapData};
229    ///
230    /// assert_eq!(
231    ///     CodePointMapData::<GeneralCategory>::new().get('木'),
232    ///     GeneralCategory::OtherLetter
233    /// ); // U+6728
234    /// assert_eq!(
235    ///     CodePointMapData::<GeneralCategory>::new().get('🎃'),
236    ///     GeneralCategory::OtherSymbol
237    /// ); // U+1F383 JACK-O-LANTERN
238    /// ```
239    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
240    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
242    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
243    #[allow(clippy::exhaustive_enums)] // this type is stable
244    #[zerovec::make_ule(GeneralCategoryULE)]
245    #[repr(u8)]
246    pub enum GeneralCategory {
247        /// (`Cn`) A reserved unassigned code point or a noncharacter
248        Unassigned = 0,
249
250        /// (`Lu`) An uppercase letter
251        UppercaseLetter = 1,
252        /// (`Ll`) A lowercase letter
253        LowercaseLetter = 2,
254        /// (`Lt`) A digraphic letter, with first part uppercase
255        TitlecaseLetter = 3,
256        /// (`Lm`) A modifier letter
257        ModifierLetter = 4,
258        /// (`Lo`) Other letters, including syllables and ideographs
259        OtherLetter = 5,
260
261        /// (`Mn`) A nonspacing combining mark (zero advance width)
262        NonspacingMark = 6,
263        /// (`Mc`) A spacing combining mark (positive advance width)
264        SpacingMark = 8,
265        /// (`Me`) An enclosing combining mark
266        EnclosingMark = 7,
267
268        /// (`Nd`) A decimal digit
269        DecimalNumber = 9,
270        /// (`Nl`) A letterlike numeric character
271        LetterNumber = 10,
272        /// (`No`) A numeric character of other type
273        OtherNumber = 11,
274
275        /// (`Zs`) A space character (of various non-zero widths)
276        SpaceSeparator = 12,
277        /// (`Zl`) U+2028 LINE SEPARATOR only
278        LineSeparator = 13,
279        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
280        ParagraphSeparator = 14,
281
282        /// (`Cc`) A C0 or C1 control code
283        Control = 15,
284        /// (`Cf`) A format control character
285        Format = 16,
286        /// (`Co`) A private-use character
287        PrivateUse = 17,
288        /// (`Cs`) A surrogate code point
289        Surrogate = 18,
290
291        /// (`Pd`) A dash or hyphen punctuation mark
292        DashPunctuation = 19,
293        /// (`Ps`) An opening punctuation mark (of a pair)
294        OpenPunctuation = 20,
295        /// (`Pe`) A closing punctuation mark (of a pair)
296        ClosePunctuation = 21,
297        /// (`Pc`) A connecting punctuation mark, like a tie
298        ConnectorPunctuation = 22,
299        /// (`Pi`) An initial quotation mark
300        InitialPunctuation = 28,
301        /// (`Pf`) A final quotation mark
302        FinalPunctuation = 29,
303        /// (`Po`) A punctuation mark of other type
304        OtherPunctuation = 23,
305
306        /// (`Sm`) A symbol of mathematical use
307        MathSymbol = 24,
308        /// (`Sc`) A currency sign
309        CurrencySymbol = 25,
310        /// (`Sk`) A non-letterlike modifier symbol
311        ModifierSymbol = 26,
312        /// (`So`) A symbol of other type
313        OtherSymbol = 27,
314    }
315}
316
317pub use gc::GeneralCategory;
318
319impl GeneralCategory {
320    /// All possible values of this enum
321    pub const ALL_VALUES: &'static [GeneralCategory] = &[
322        GeneralCategory::Unassigned,
323        GeneralCategory::UppercaseLetter,
324        GeneralCategory::LowercaseLetter,
325        GeneralCategory::TitlecaseLetter,
326        GeneralCategory::ModifierLetter,
327        GeneralCategory::OtherLetter,
328        GeneralCategory::NonspacingMark,
329        GeneralCategory::SpacingMark,
330        GeneralCategory::EnclosingMark,
331        GeneralCategory::DecimalNumber,
332        GeneralCategory::LetterNumber,
333        GeneralCategory::OtherNumber,
334        GeneralCategory::SpaceSeparator,
335        GeneralCategory::LineSeparator,
336        GeneralCategory::ParagraphSeparator,
337        GeneralCategory::Control,
338        GeneralCategory::Format,
339        GeneralCategory::PrivateUse,
340        GeneralCategory::Surrogate,
341        GeneralCategory::DashPunctuation,
342        GeneralCategory::OpenPunctuation,
343        GeneralCategory::ClosePunctuation,
344        GeneralCategory::ConnectorPunctuation,
345        GeneralCategory::InitialPunctuation,
346        GeneralCategory::FinalPunctuation,
347        GeneralCategory::OtherPunctuation,
348        GeneralCategory::MathSymbol,
349        GeneralCategory::CurrencySymbol,
350        GeneralCategory::ModifierSymbol,
351        GeneralCategory::OtherSymbol,
352    ];
353}
354
355#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
356/// Error value for `impl TryFrom<u8> for GeneralCategory`.
357#[non_exhaustive]
358pub struct GeneralCategoryOutOfBoundsError;
359
360impl TryFrom<u8> for GeneralCategory {
361    type Error = GeneralCategoryOutOfBoundsError;
362    /// Construct this [`GeneralCategory`] from an integer, returning
363    /// an error if it is out of bounds
364    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
365        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
366    }
367}
368
369make_enumerated_property! {
370    name: "General_Category";
371    short_name: "gc";
372    ident: GeneralCategory;
373    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
374    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
375}
376
377/// Groupings of multiple General_Category property values.
378///
379/// Instances of `GeneralCategoryGroup` represent the defined multi-category
380/// values that are useful for users in certain contexts, such as regex. In
381/// other words, unlike [`GeneralCategory`], this supports groups of general
382/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
383/// `LowercaseLetter`, etc.
384///
385/// See <https://www.unicode.org/reports/tr44/> .
386///
387/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
388/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
389/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
390///
391/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
392#[derive(Copy, Clone, PartialEq, Debug, Eq)]
393#[allow(clippy::exhaustive_structs)] // newtype
394#[repr(transparent)]
395pub struct GeneralCategoryGroup(pub(crate) u32);
396
397impl crate::private::Sealed for GeneralCategoryGroup {}
398
399use GeneralCategory as GC;
400use GeneralCategoryGroup as GCG;
401
402#[allow(non_upper_case_globals)]
403impl GeneralCategoryGroup {
404    /// (`Lu`) An uppercase letter
405    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
406    /// (`Ll`) A lowercase letter
407    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
408    /// (`Lt`) A digraphic letter, with first part uppercase
409    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
410    /// (`Lm`) A modifier letter
411    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
412    /// (`Lo`) Other letters, including syllables and ideographs
413    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
414    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
415    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
416        | (1 << (GC::LowercaseLetter as u32))
417        | (1 << (GC::TitlecaseLetter as u32)));
418    /// (`L`) The union of all letter categories
419    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
420        | (1 << (GC::LowercaseLetter as u32))
421        | (1 << (GC::TitlecaseLetter as u32))
422        | (1 << (GC::ModifierLetter as u32))
423        | (1 << (GC::OtherLetter as u32)));
424
425    /// (`Mn`) A nonspacing combining mark (zero advance width)
426    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
427    /// (`Mc`) A spacing combining mark (positive advance width)
428    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
429    /// (`Me`) An enclosing combining mark
430    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
431    /// (`M`) The union of all mark categories
432    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
433        | (1 << (GC::EnclosingMark as u32))
434        | (1 << (GC::SpacingMark as u32)));
435
436    /// (`Nd`) A decimal digit
437    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
438    /// (`Nl`) A letterlike numeric character
439    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
440    /// (`No`) A numeric character of other type
441    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
442    /// (`N`) The union of all number categories
443    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
444        | (1 << (GC::LetterNumber as u32))
445        | (1 << (GC::OtherNumber as u32)));
446
447    /// (`Zs`) A space character (of various non-zero widths)
448    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
449    /// (`Zl`) U+2028 LINE SEPARATOR only
450    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
451    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
452    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
453    /// (`Z`) The union of all separator categories
454    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
455        | (1 << (GC::LineSeparator as u32))
456        | (1 << (GC::ParagraphSeparator as u32)));
457
458    /// (`Cc`) A C0 or C1 control code
459    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
460    /// (`Cf`) A format control character
461    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
462    /// (`Co`) A private-use character
463    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
464    /// (`Cs`) A surrogate code point
465    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
466    /// (`Cn`) A reserved unassigned code point or a noncharacter
467    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
468    /// (`C`) The union of all control code, reserved, and unassigned categories
469    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
470        | (1 << (GC::Format as u32))
471        | (1 << (GC::PrivateUse as u32))
472        | (1 << (GC::Surrogate as u32))
473        | (1 << (GC::Unassigned as u32)));
474
475    /// (`Pd`) A dash or hyphen punctuation mark
476    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
477    /// (`Ps`) An opening punctuation mark (of a pair)
478    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
479    /// (`Pe`) A closing punctuation mark (of a pair)
480    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
481    /// (`Pc`) A connecting punctuation mark, like a tie
482    pub const ConnectorPunctuation: GeneralCategoryGroup =
483        GCG(1 << (GC::ConnectorPunctuation as u32));
484    /// (`Pi`) An initial quotation mark
485    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
486    /// (`Pf`) A final quotation mark
487    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
488    /// (`Po`) A punctuation mark of other type
489    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
490    /// (`P`) The union of all punctuation categories
491    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
492        | (1 << (GC::OpenPunctuation as u32))
493        | (1 << (GC::ClosePunctuation as u32))
494        | (1 << (GC::ConnectorPunctuation as u32))
495        | (1 << (GC::OtherPunctuation as u32))
496        | (1 << (GC::InitialPunctuation as u32))
497        | (1 << (GC::FinalPunctuation as u32)));
498
499    /// (`Sm`) A symbol of mathematical use
500    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
501    /// (`Sc`) A currency sign
502    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
503    /// (`Sk`) A non-letterlike modifier symbol
504    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
505    /// (`So`) A symbol of other type
506    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
507    /// (`S`) The union of all symbol categories
508    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
509        | (1 << (GC::CurrencySymbol as u32))
510        | (1 << (GC::ModifierSymbol as u32))
511        | (1 << (GC::OtherSymbol as u32)));
512
513    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
514
515    /// Return whether the code point belongs in the provided multi-value category.
516    ///
517    /// ```
518    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
519    /// use icu::properties::CodePointMapData;
520    ///
521    /// let gc = CodePointMapData::<GeneralCategory>::new();
522    ///
523    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
524    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
525    ///
526    /// // U+0B1E ORIYA LETTER NYA
527    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
528    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
529    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
530    ///
531    /// // U+0301 COMBINING ACUTE ACCENT
532    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
533    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
534    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
535    ///
536    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
537    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
538    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
539    ///
540    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
541    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
542    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
543    ///
544    /// // U+2713 CHECK MARK
545    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
546    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
547    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
548    ///
549    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
550    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
551    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
552    ///
553    /// // U+E007F CANCEL TAG
554    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
555    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
556    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
557    /// ```
558    pub const fn contains(self, val: GeneralCategory) -> bool {
559        0 != (1 << (val as u32)) & self.0
560    }
561
562    /// Produce a GeneralCategoryGroup that is the inverse of this one
563    ///
564    /// # Example
565    ///
566    /// ```rust
567    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
568    ///
569    /// let letter = GeneralCategoryGroup::Letter;
570    /// let not_letter = letter.complement();
571    ///
572    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
573    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
574    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
575    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
576    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
577    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
578    /// ```
579    pub const fn complement(self) -> Self {
580        // Mask off things not in Self::ALL to guarantee the mask
581        // values stay in-range
582        GeneralCategoryGroup(!self.0 & Self::ALL)
583    }
584
585    /// Return the group representing all GeneralCategory values
586    ///
587    /// # Example
588    ///
589    /// ```rust
590    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
591    ///
592    /// let all = GeneralCategoryGroup::all();
593    ///
594    /// assert!(all.contains(GeneralCategory::MathSymbol));
595    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
596    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
597    /// ```
598    pub const fn all() -> Self {
599        Self(Self::ALL)
600    }
601
602    /// Return the empty group
603    ///
604    /// # Example
605    ///
606    /// ```rust
607    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
608    ///
609    /// let empty = GeneralCategoryGroup::empty();
610    ///
611    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
612    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
613    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
614    /// ```
615    pub const fn empty() -> Self {
616        Self(0)
617    }
618
619    /// Take the union of two groups
620    ///
621    /// # Example
622    ///
623    /// ```rust
624    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
625    ///
626    /// let letter = GeneralCategoryGroup::Letter;
627    /// let symbol = GeneralCategoryGroup::Symbol;
628    /// let union = letter.union(symbol);
629    ///
630    /// assert!(union.contains(GeneralCategory::MathSymbol));
631    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
632    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
633    /// ```
634    pub const fn union(self, other: Self) -> Self {
635        Self(self.0 | other.0)
636    }
637
638    /// Take the intersection of two groups
639    ///
640    /// # Example
641    ///
642    /// ```rust
643    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
644    ///
645    /// let letter = GeneralCategoryGroup::Letter;
646    /// let lu = GeneralCategoryGroup::UppercaseLetter;
647    /// let intersection = letter.intersection(lu);
648    ///
649    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
650    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
651    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
652    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
653    /// ```
654    pub const fn intersection(self, other: Self) -> Self {
655        Self(self.0 & other.0)
656    }
657}
658
659impl From<GeneralCategory> for GeneralCategoryGroup {
660    fn from(subcategory: GeneralCategory) -> Self {
661        GeneralCategoryGroup(1 << (subcategory as u32))
662    }
663}
664impl From<u32> for GeneralCategoryGroup {
665    fn from(mask: u32) -> Self {
666        // Mask off things not in Self::ALL to guarantee the mask
667        // values stay in-range
668        GeneralCategoryGroup(mask & Self::ALL)
669    }
670}
671impl From<GeneralCategoryGroup> for u32 {
672    fn from(group: GeneralCategoryGroup) -> Self {
673        group.0
674    }
675}
676
677/// Enumerated property Script.
678///
679/// This is used with both the Script and Script_Extensions Unicode properties.
680/// Each character is assigned a single Script, but characters that are used in
681/// a particular subset of scripts will be in more than one Script_Extensions set.
682/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
683/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
684/// determine whether a code point belongs to a certain script, you should use
685/// [`ScriptWithExtensionsBorrowed::has_script`].
686///
687/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
688/// See `UScriptCode` in ICU4C.
689///
690/// # Example
691///
692/// ```
693/// use icu::properties::{CodePointMapData, props::Script};
694///
695/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
696/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
697/// ```
698/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
699#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
700#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
701#[allow(clippy::exhaustive_structs)] // newtype
702#[repr(transparent)]
703pub struct Script(pub(crate) u16);
704
705impl Script {
706    /// Returns an ICU4C `UScriptCode` value.
707    pub const fn to_icu4c_value(self) -> u16 {
708        self.0
709    }
710    /// Constructor from an ICU4C `UScriptCode` value.
711    pub const fn from_icu4c_value(value: u16) -> Self {
712        Self(value)
713    }
714}
715
716create_const_array! {
717#[allow(missing_docs)] // These constants don't need individual documentation.
718#[allow(non_upper_case_globals)]
719impl Script {
720    pub const Adlam: Script = Script(167);
721    pub const Ahom: Script = Script(161);
722    pub const AnatolianHieroglyphs: Script = Script(156);
723    pub const Arabic: Script = Script(2);
724    pub const Armenian: Script = Script(3);
725    pub const Avestan: Script = Script(117);
726    pub const Balinese: Script = Script(62);
727    pub const Bamum: Script = Script(130);
728    pub const BassaVah: Script = Script(134);
729    pub const Batak: Script = Script(63);
730    pub const Bengali: Script = Script(4);
731    pub const Bhaiksuki: Script = Script(168);
732    pub const Bopomofo: Script = Script(5);
733    pub const Brahmi: Script = Script(65);
734    pub const Braille: Script = Script(46);
735    pub const Buginese: Script = Script(55);
736    pub const Buhid: Script = Script(44);
737    pub const CanadianAboriginal: Script = Script(40);
738    pub const Carian: Script = Script(104);
739    pub const CaucasianAlbanian: Script = Script(159);
740    pub const Chakma: Script = Script(118);
741    pub const Cham: Script = Script(66);
742    pub const Cherokee: Script = Script(6);
743    pub const Chorasmian: Script = Script(189);
744    pub const Common: Script = Script(0);
745    pub const Coptic: Script = Script(7);
746    pub const Cuneiform: Script = Script(101);
747    pub const Cypriot: Script = Script(47);
748    pub const CyproMinoan: Script = Script(193);
749    pub const Cyrillic: Script = Script(8);
750    pub const Deseret: Script = Script(9);
751    pub const Devanagari: Script = Script(10);
752    pub const DivesAkuru: Script = Script(190);
753    pub const Dogra: Script = Script(178);
754    pub const Duployan: Script = Script(135);
755    pub const EgyptianHieroglyphs: Script = Script(71);
756    pub const Elbasan: Script = Script(136);
757    pub const Elymaic: Script = Script(185);
758    pub const Ethiopian: Script = Script(11);
759    pub const Georgian: Script = Script(12);
760    pub const Glagolitic: Script = Script(56);
761    pub const Gothic: Script = Script(13);
762    pub const Grantha: Script = Script(137);
763    pub const Greek: Script = Script(14);
764    pub const Gujarati: Script = Script(15);
765    pub const GunjalaGondi: Script = Script(179);
766    pub const Gurmukhi: Script = Script(16);
767    pub const Han: Script = Script(17);
768    pub const Hangul: Script = Script(18);
769    pub const HanifiRohingya: Script = Script(182);
770    pub const Hanunoo: Script = Script(43);
771    pub const Hatran: Script = Script(162);
772    pub const Hebrew: Script = Script(19);
773    pub const Hiragana: Script = Script(20);
774    pub const ImperialAramaic: Script = Script(116);
775    pub const Inherited: Script = Script(1);
776    pub const InscriptionalPahlavi: Script = Script(122);
777    pub const InscriptionalParthian: Script = Script(125);
778    pub const Javanese: Script = Script(78);
779    pub const Kaithi: Script = Script(120);
780    pub const Kannada: Script = Script(21);
781    pub const Katakana: Script = Script(22);
782    pub const Kawi: Script = Script(198);
783    pub const KayahLi: Script = Script(79);
784    pub const Kharoshthi: Script = Script(57);
785    pub const KhitanSmallScript: Script = Script(191);
786    pub const Khmer: Script = Script(23);
787    pub const Khojki: Script = Script(157);
788    pub const Khudawadi: Script = Script(145);
789    pub const Lao: Script = Script(24);
790    pub const Latin: Script = Script(25);
791    pub const Lepcha: Script = Script(82);
792    pub const Limbu: Script = Script(48);
793    pub const LinearA: Script = Script(83);
794    pub const LinearB: Script = Script(49);
795    pub const Lisu: Script = Script(131);
796    pub const Lycian: Script = Script(107);
797    pub const Lydian: Script = Script(108);
798    pub const Mahajani: Script = Script(160);
799    pub const Makasar: Script = Script(180);
800    pub const Malayalam: Script = Script(26);
801    pub const Mandaic: Script = Script(84);
802    pub const Manichaean: Script = Script(121);
803    pub const Marchen: Script = Script(169);
804    pub const MasaramGondi: Script = Script(175);
805    pub const Medefaidrin: Script = Script(181);
806    pub const MeeteiMayek: Script = Script(115);
807    pub const MendeKikakui: Script = Script(140);
808    pub const MeroiticCursive: Script = Script(141);
809    pub const MeroiticHieroglyphs: Script = Script(86);
810    pub const Miao: Script = Script(92);
811    pub const Modi: Script = Script(163);
812    pub const Mongolian: Script = Script(27);
813    pub const Mro: Script = Script(149);
814    pub const Multani: Script = Script(164);
815    pub const Myanmar: Script = Script(28);
816    pub const Nabataean: Script = Script(143);
817    pub const NagMundari: Script = Script(199);
818    pub const Nandinagari: Script = Script(187);
819    pub const Nastaliq: Script = Script(200);
820    pub const NewTaiLue: Script = Script(59);
821    pub const Newa: Script = Script(170);
822    pub const Nko: Script = Script(87);
823    pub const Nushu: Script = Script(150);
824    pub const NyiakengPuachueHmong: Script = Script(186);
825    pub const Ogham: Script = Script(29);
826    pub const OlChiki: Script = Script(109);
827    pub const OldHungarian: Script = Script(76);
828    pub const OldItalic: Script = Script(30);
829    pub const OldNorthArabian: Script = Script(142);
830    pub const OldPermic: Script = Script(89);
831    pub const OldPersian: Script = Script(61);
832    pub const OldSogdian: Script = Script(184);
833    pub const OldSouthArabian: Script = Script(133);
834    pub const OldTurkic: Script = Script(88);
835    pub const OldUyghur: Script = Script(194);
836    pub const Oriya: Script = Script(31);
837    pub const Osage: Script = Script(171);
838    pub const Osmanya: Script = Script(50);
839    pub const PahawhHmong: Script = Script(75);
840    pub const Palmyrene: Script = Script(144);
841    pub const PauCinHau: Script = Script(165);
842    pub const PhagsPa: Script = Script(90);
843    pub const Phoenician: Script = Script(91);
844    pub const PsalterPahlavi: Script = Script(123);
845    pub const Rejang: Script = Script(110);
846    pub const Runic: Script = Script(32);
847    pub const Samaritan: Script = Script(126);
848    pub const Saurashtra: Script = Script(111);
849    pub const Sharada: Script = Script(151);
850    pub const Shavian: Script = Script(51);
851    pub const Siddham: Script = Script(166);
852    pub const SignWriting: Script = Script(112);
853    pub const Sinhala: Script = Script(33);
854    pub const Sogdian: Script = Script(183);
855    pub const SoraSompeng: Script = Script(152);
856    pub const Soyombo: Script = Script(176);
857    pub const Sundanese: Script = Script(113);
858    pub const SylotiNagri: Script = Script(58);
859    pub const Syriac: Script = Script(34);
860    pub const Tagalog: Script = Script(42);
861    pub const Tagbanwa: Script = Script(45);
862    pub const TaiLe: Script = Script(52);
863    pub const TaiTham: Script = Script(106);
864    pub const TaiViet: Script = Script(127);
865    pub const Takri: Script = Script(153);
866    pub const Tamil: Script = Script(35);
867    pub const Tangsa: Script = Script(195);
868    pub const Tangut: Script = Script(154);
869    pub const Telugu: Script = Script(36);
870    pub const Thaana: Script = Script(37);
871    pub const Thai: Script = Script(38);
872    pub const Tibetan: Script = Script(39);
873    pub const Tifinagh: Script = Script(60);
874    pub const Tirhuta: Script = Script(158);
875    pub const Toto: Script = Script(196);
876    pub const Ugaritic: Script = Script(53);
877    pub const Unknown: Script = Script(103);
878    pub const Vai: Script = Script(99);
879    pub const Vithkuqi: Script = Script(197);
880    pub const Wancho: Script = Script(188);
881    pub const WarangCiti: Script = Script(146);
882    pub const Yezidi: Script = Script(192);
883    pub const Yi: Script = Script(41);
884    pub const ZanabazarSquare: Script = Script(177);
885}
886}
887
888make_enumerated_property! {
889    name: "Script";
890    short_name: "sc";
891    ident: Script;
892    data_marker: crate::provider::PropertyEnumScriptV1;
893    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
894    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
895}
896
897/// Enumerated property Hangul_Syllable_Type
898///
899/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
900/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
901///
902/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
903///
904/// # Example
905///
906/// ```
907/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
908///
909/// assert_eq!(
910///     CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
911///     HangulSyllableType::LeadingJamo
912/// ); // U+1100
913/// assert_eq!(
914///     CodePointMapData::<HangulSyllableType>::new().get('가'),
915///     HangulSyllableType::LeadingVowelSyllable
916/// ); // U+AC00
917/// ```
918#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
919#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
920#[allow(clippy::exhaustive_structs)] // newtype
921#[repr(transparent)]
922pub struct HangulSyllableType(pub(crate) u8);
923
924impl HangulSyllableType {
925    /// Returns an ICU4C `UHangulSyllableType` value.
926    pub const fn to_icu4c_value(self) -> u8 {
927        self.0
928    }
929    /// Constructor from an ICU4C `UHangulSyllableType` value.
930    pub const fn from_icu4c_value(value: u8) -> Self {
931        Self(value)
932    }
933}
934
935create_const_array! {
936#[allow(non_upper_case_globals)]
937impl HangulSyllableType {
938    /// (`NA`) not applicable (e.g. not a Hangul code point).
939    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
940    /// (`L`) a conjoining leading consonant Jamo.
941    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
942    /// (`V`) a conjoining vowel Jamo.
943    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
944    /// (`T`) a conjoining trailing consonant Jamo.
945    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
946    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
947    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
948    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
949    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
950}
951}
952
953make_enumerated_property! {
954    name: "Hangul_Syllable_Type";
955    short_name: "hst";
956    ident: HangulSyllableType;
957    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
958    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
959    ule_ty: u8;
960
961}
962
963/// Enumerated property East_Asian_Width.
964///
965/// See "Definition" in UAX #11 for the summary of each property value:
966/// <https://www.unicode.org/reports/tr11/#Definitions>
967///
968/// # Example
969///
970/// ```
971/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
972///
973/// assert_eq!(
974///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
975///     EastAsianWidth::Halfwidth
976/// ); // U+FF71: Halfwidth Katakana Letter A
977/// assert_eq!(
978///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
979///     EastAsianWidth::Wide
980/// ); //U+30A2: Katakana Letter A
981/// ```
982#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
983#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
984#[allow(clippy::exhaustive_structs)] // newtype
985#[repr(transparent)]
986pub struct EastAsianWidth(pub(crate) u8);
987
988impl EastAsianWidth {
989    /// Returns an ICU4C `UEastAsianWidth` value.
990    pub const fn to_icu4c_value(self) -> u8 {
991        self.0
992    }
993    /// Constructor from an ICU4C `UEastAsianWidth` value.
994    pub const fn from_icu4c_value(value: u8) -> Self {
995        Self(value)
996    }
997}
998
999create_const_array! {
1000#[allow(missing_docs)] // These constants don't need individual documentation.
1001#[allow(non_upper_case_globals)]
1002impl EastAsianWidth {
1003    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1004    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1005    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1006    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1007    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1008    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1009}
1010}
1011
1012make_enumerated_property! {
1013    name: "East_Asian_Width";
1014    short_name: "ea";
1015    ident: EastAsianWidth;
1016    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1017    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1018    ule_ty: u8;
1019}
1020
1021/// Enumerated property Line_Break.
1022///
1023/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1024/// value: <https://www.unicode.org/reports/tr14/#Properties>
1025///
1026/// The numeric value is compatible with `ULineBreak` in ICU4C.
1027///
1028/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1029///
1030/// # Example
1031///
1032/// ```
1033/// use icu::properties::{props::LineBreak, CodePointMapData};
1034///
1035/// assert_eq!(
1036///     CodePointMapData::<LineBreak>::new().get(')'),
1037///     LineBreak::CloseParenthesis
1038/// ); // U+0029: Right Parenthesis
1039/// assert_eq!(
1040///     CodePointMapData::<LineBreak>::new().get('ぁ'),
1041///     LineBreak::ConditionalJapaneseStarter
1042/// ); //U+3041: Hiragana Letter Small A
1043/// ```
1044#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1045#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1046#[allow(clippy::exhaustive_structs)] // newtype
1047#[repr(transparent)]
1048pub struct LineBreak(pub(crate) u8);
1049
1050impl LineBreak {
1051    /// Returns an ICU4C `ULineBreak` value.
1052    pub const fn to_icu4c_value(self) -> u8 {
1053        self.0
1054    }
1055    /// Constructor from an ICU4C `ULineBreak` value.
1056    pub const fn from_icu4c_value(value: u8) -> Self {
1057        Self(value)
1058    }
1059}
1060
1061create_const_array! {
1062#[allow(missing_docs)] // These constants don't need individual documentation.
1063#[allow(non_upper_case_globals)]
1064impl LineBreak {
1065    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1066    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1067    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1068    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1069    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1070    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1071    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1072    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1073    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1074    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1075    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1076    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1077    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1078    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1079    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1080    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1081    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1082    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1083    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1084    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1085    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1086    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1087    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1088    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1089    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1090    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1091    pub const Space: LineBreak = LineBreak(26); // name="SP"
1092    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1093    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1094    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1095    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1096    pub const H2: LineBreak = LineBreak(31); // name="H2"
1097    pub const H3: LineBreak = LineBreak(32); // name="H3"
1098    pub const JL: LineBreak = LineBreak(33); // name="JL"
1099    pub const JT: LineBreak = LineBreak(34); // name="JT"
1100    pub const JV: LineBreak = LineBreak(35); // name="JV"
1101    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1102    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1103    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1104    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1105    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1106    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1107    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1108
1109    // Added in ICU 74:
1110    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1111    pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP"
1112    pub const AksaraStart: LineBreak = LineBreak(45); // name=AS"
1113    pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF"
1114    pub const Virama: LineBreak = LineBreak(47); // name=VI"
1115}
1116}
1117
1118make_enumerated_property! {
1119    name: "Line_Break";
1120    short_name: "lb";
1121    ident: LineBreak;
1122    data_marker: crate::provider::PropertyEnumLineBreakV1;
1123    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1124    ule_ty: u8;
1125}
1126
1127/// Enumerated property Grapheme_Cluster_Break.
1128///
1129/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1130/// summary of each property value:
1131/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1132///
1133/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1134///
1135/// # Example
1136///
1137/// ```
1138/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1139///
1140/// assert_eq!(
1141///     CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1142///     GraphemeClusterBreak::RegionalIndicator
1143/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1144/// assert_eq!(
1145///     CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1146///     GraphemeClusterBreak::SpacingMark
1147/// ); //U+0E33: Thai Character Sara Am
1148/// ```
1149#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1150#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1151#[allow(clippy::exhaustive_structs)] // this type is stable
1152#[repr(transparent)]
1153pub struct GraphemeClusterBreak(pub(crate) u8);
1154
1155impl GraphemeClusterBreak {
1156    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1157    pub const fn to_icu4c_value(self) -> u8 {
1158        self.0
1159    }
1160    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1161    pub const fn from_icu4c_value(value: u8) -> Self {
1162        Self(value)
1163    }
1164}
1165
1166create_const_array! {
1167#[allow(missing_docs)] // These constants don't need individual documentation.
1168#[allow(non_upper_case_globals)]
1169impl GraphemeClusterBreak {
1170    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1171    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1172    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1173    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1174    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1175    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1176    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1177    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1178    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1179    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1180    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1181    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1182    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1183    /// This value is obsolete and unused.
1184    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1185    /// This value is obsolete and unused.
1186    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1187    /// This value is obsolete and unused.
1188    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1189    /// This value is obsolete and unused.
1190    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1191    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1192}
1193}
1194
1195make_enumerated_property! {
1196    name: "Grapheme_Cluster_Break";
1197    short_name: "GCB";
1198    ident: GraphemeClusterBreak;
1199    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1200    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1201    ule_ty: u8;
1202}
1203
1204/// Enumerated property Word_Break.
1205///
1206/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1207/// each property value:
1208/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1209///
1210/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1211///
1212/// # Example
1213///
1214/// ```
1215/// use icu::properties::{props::WordBreak, CodePointMapData};
1216///
1217/// assert_eq!(
1218///     CodePointMapData::<WordBreak>::new().get('.'),
1219///     WordBreak::MidNumLet
1220/// ); // U+002E: Full Stop
1221/// assert_eq!(
1222///     CodePointMapData::<WordBreak>::new().get(','),
1223///     WordBreak::MidNum
1224/// ); // U+FF0C: Fullwidth Comma
1225/// ```
1226#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1227#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1228#[allow(clippy::exhaustive_structs)] // newtype
1229#[repr(transparent)]
1230pub struct WordBreak(pub(crate) u8);
1231
1232impl WordBreak {
1233    /// Returns an ICU4C `UWordBreak` value.
1234    pub const fn to_icu4c_value(self) -> u8 {
1235        self.0
1236    }
1237    /// Constructor from an ICU4C `UWordBreak` value.
1238    pub const fn from_icu4c_value(value: u8) -> Self {
1239        Self(value)
1240    }
1241}
1242
1243create_const_array! {
1244#[allow(missing_docs)] // These constants don't need individual documentation.
1245#[allow(non_upper_case_globals)]
1246impl WordBreak {
1247    pub const Other: WordBreak = WordBreak(0); // name="XX"
1248    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1249    pub const Format: WordBreak = WordBreak(2); // name="FO"
1250    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1251    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1252    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1253    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1254    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1255    pub const CR: WordBreak = WordBreak(8); // name="CR"
1256    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1257    pub const LF: WordBreak = WordBreak(10); // name="LF"
1258    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1259    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1260    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1261    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1262    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1263    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1264    /// This value is obsolete and unused.
1265    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1266    /// This value is obsolete and unused.
1267    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1268    /// This value is obsolete and unused.
1269    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1270    /// This value is obsolete and unused.
1271    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1272    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1273    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1274}
1275}
1276
1277make_enumerated_property! {
1278    name: "Word_Break";
1279    short_name: "WB";
1280    ident: WordBreak;
1281    data_marker: crate::provider::PropertyEnumWordBreakV1;
1282    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1283    ule_ty: u8;
1284}
1285
1286/// Enumerated property Sentence_Break.
1287///
1288/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1289/// each property value:
1290/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1291///
1292/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1293///
1294/// # Example
1295///
1296/// ```
1297/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1298///
1299/// assert_eq!(
1300///     CodePointMapData::<SentenceBreak>::new().get('9'),
1301///     SentenceBreak::Numeric
1302/// ); // U+FF19: Fullwidth Digit Nine
1303/// assert_eq!(
1304///     CodePointMapData::<SentenceBreak>::new().get(','),
1305///     SentenceBreak::SContinue
1306/// ); // U+002C: Comma
1307/// ```
1308#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1309#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1310#[allow(clippy::exhaustive_structs)] // newtype
1311#[repr(transparent)]
1312pub struct SentenceBreak(pub(crate) u8);
1313
1314impl SentenceBreak {
1315    /// Returns an ICU4C `USentenceBreak` value.
1316    pub const fn to_icu4c_value(self) -> u8 {
1317        self.0
1318    }
1319    /// Constructor from an ICU4C `USentenceBreak` value.
1320    pub const fn from_icu4c_value(value: u8) -> Self {
1321        Self(value)
1322    }
1323}
1324
1325create_const_array! {
1326#[allow(missing_docs)] // These constants don't need individual documentation.
1327#[allow(non_upper_case_globals)]
1328impl SentenceBreak {
1329    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1330    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1331    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1332    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1333    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1334    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1335    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1336    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1337    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1338    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1339    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1340    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1341    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1342    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1343    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1344}
1345}
1346
1347make_enumerated_property! {
1348    name: "Sentence_Break";
1349    short_name: "SB";
1350    ident: SentenceBreak;
1351    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1352    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1353    ule_ty: u8;
1354}
1355
1356/// Property Canonical_Combining_Class.
1357/// See UAX #15:
1358/// <https://www.unicode.org/reports/tr15/>.
1359///
1360/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1361/// to look up the Canonical_Combining_Class property by scalar value.
1362///
1363/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1364/// to look up the Canonical_Combining_Class property by scalar value.
1365///
1366/// # Example
1367///
1368/// ```
1369/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1370///
1371/// assert_eq!(
1372///     CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1373///     CanonicalCombiningClass::NotReordered
1374/// ); // U+0061: LATIN SMALL LETTER A
1375/// assert_eq!(
1376///     CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1377///     CanonicalCombiningClass::Above
1378/// ); // U+0301: COMBINING ACUTE ACCENT
1379/// ```
1380//
1381// NOTE: The Pernosco debugger has special knowledge
1382// of this struct. Please do not change the bit layout
1383// or the crate-module-qualified name of this struct
1384// without coordination.
1385#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1386#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1387#[allow(clippy::exhaustive_structs)] // newtype
1388#[repr(transparent)]
1389pub struct CanonicalCombiningClass(pub(crate) u8);
1390
1391impl CanonicalCombiningClass {
1392    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1393    pub const fn to_icu4c_value(self) -> u8 {
1394        self.0
1395    }
1396    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1397    pub const fn from_icu4c_value(value: u8) -> Self {
1398        Self(value)
1399    }
1400}
1401
1402create_const_array! {
1403// These constant names come from PropertyValueAliases.txt
1404#[allow(missing_docs)] // These constants don't need individual documentation.
1405#[allow(non_upper_case_globals)]
1406impl CanonicalCombiningClass {
1407    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1408    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1409    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1410    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1411    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1412    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1413    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1414    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1415    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1416    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1417    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1418    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1419    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1420    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1421    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1422    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1423    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1424    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1425    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1426    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1427    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1428    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1429    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1430    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1431    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1432    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1433    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1434    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1435    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1436    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1437    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1438    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1439    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1440    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1441    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1442    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1443    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1444    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1445    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1446    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1447    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1448    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1449    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1450    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1451    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1452    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1453    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1454    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1455    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1456    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1457    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1458    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1459    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1460    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1461    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1462    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1463    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1464    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1465}
1466}
1467
1468make_enumerated_property! {
1469    name: "Canonical_Combining_Class";
1470    short_name: "ccc";
1471    ident: CanonicalCombiningClass;
1472    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1473    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1474    ule_ty: u8;
1475}
1476
1477/// Property Indic_Conjunct_Break.
1478/// See UAX #44:
1479/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1480///
1481/// # Example
1482///
1483/// ```
1484/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1485///
1486/// assert_eq!(
1487///     CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1488///     IndicConjunctBreak::None
1489/// );
1490/// assert_eq!(
1491///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1492///     IndicConjunctBreak::Linker
1493/// );
1494/// assert_eq!(
1495///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1496///     IndicConjunctBreak::Consonant
1497/// );
1498/// assert_eq!(
1499///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1500///     IndicConjunctBreak::Extend
1501/// );
1502/// ```
1503#[doc(hidden)] // draft API in ICU4C
1504#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1505#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1506#[allow(clippy::exhaustive_structs)] // newtype
1507#[repr(transparent)]
1508pub struct IndicConjunctBreak(pub(crate) u8);
1509
1510impl IndicConjunctBreak {
1511    /// Returns an ICU4C `UIndicConjunctBreak` value.
1512    pub const fn to_icu4c_value(self) -> u8 {
1513        self.0
1514    }
1515    /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1516    pub const fn from_icu4c_value(value: u8) -> Self {
1517        Self(value)
1518    }
1519}
1520
1521create_const_array! {
1522#[doc(hidden)] // draft API in ICU4C
1523#[allow(non_upper_case_globals)]
1524impl IndicConjunctBreak {
1525    pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1526    pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1527    pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1528    pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1529}
1530}
1531
1532make_enumerated_property! {
1533    name: "Indic_Conjunct_Break";
1534    short_name: "InCB";
1535    ident: IndicConjunctBreak;
1536    data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1537    singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1538    ule_ty: u8;
1539}
1540
1541/// Property Indic_Syllabic_Category.
1542/// See UAX #44:
1543/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1544///
1545/// # Example
1546///
1547/// ```
1548/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1549///
1550/// assert_eq!(
1551///     CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1552///     IndicSyllabicCategory::Other
1553/// );
1554/// assert_eq!(
1555///     CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1556///     IndicSyllabicCategory::Bindu
1557/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1558/// ```
1559#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1560#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1561#[allow(clippy::exhaustive_structs)] // newtype
1562#[repr(transparent)]
1563pub struct IndicSyllabicCategory(pub(crate) u8);
1564
1565impl IndicSyllabicCategory {
1566    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1567    pub const fn to_icu4c_value(self) -> u8 {
1568        self.0
1569    }
1570    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1571    pub const fn from_icu4c_value(value: u8) -> Self {
1572        Self(value)
1573    }
1574}
1575
1576create_const_array! {
1577#[allow(missing_docs)] // These constants don't need individual documentation.
1578#[allow(non_upper_case_globals)]
1579impl IndicSyllabicCategory {
1580    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1581    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1582    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1583    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1584    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1585    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1586    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1587    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1588    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1589    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1590    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1591    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1592    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1593    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1594    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1595    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1596    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1597    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1598    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1599    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1600    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1601    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1602    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1603    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1604    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1605    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1606    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1607    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1608    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1609    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1610    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1611    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1612    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1613    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1614    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1615    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1616    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1617}
1618}
1619
1620make_enumerated_property! {
1621    name: "Indic_Syllabic_Category";
1622    short_name: "InSC";
1623    ident: IndicSyllabicCategory;
1624    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1625    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1626    ule_ty: u8;
1627}
1628
1629/// Enumerated property Joining_Type.
1630///
1631/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1632/// each property value.
1633///
1634/// # Example
1635///
1636/// ```
1637/// use icu::properties::{props::JoiningType, CodePointMapData};
1638///
1639/// assert_eq!(
1640///     CodePointMapData::<JoiningType>::new().get('ؠ'),
1641///     JoiningType::DualJoining
1642/// ); // U+0620: Arabic Letter Kashmiri Yeh
1643/// assert_eq!(
1644///     CodePointMapData::<JoiningType>::new().get('𐫍'),
1645///     JoiningType::LeftJoining
1646/// ); // U+10ACD: Manichaean Letter Heth
1647/// ```
1648#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1649#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1650#[allow(clippy::exhaustive_structs)] // newtype
1651#[repr(transparent)]
1652pub struct JoiningType(pub(crate) u8);
1653
1654impl JoiningType {
1655    /// Returns an ICU4C `UJoiningType` value.
1656    pub const fn to_icu4c_value(self) -> u8 {
1657        self.0
1658    }
1659    /// Constructor from an ICU4C `UJoiningType` value.
1660    pub const fn from_icu4c_value(value: u8) -> Self {
1661        Self(value)
1662    }
1663}
1664
1665create_const_array! {
1666#[allow(missing_docs)] // These constants don't need individual documentation.
1667#[allow(non_upper_case_globals)]
1668impl JoiningType {
1669    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1670    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1671    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1672    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1673    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1674    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1675}
1676}
1677
1678make_enumerated_property! {
1679    name: "Joining_Type";
1680    short_name: "jt";
1681    ident: JoiningType;
1682    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1683    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1684    ule_ty: u8;
1685}
1686
1687/// Property Vertical_Orientation
1688///
1689/// See UTR #50:
1690/// <https://www.unicode.org/reports/tr50/#vo>
1691///
1692/// # Example
1693///
1694/// ```
1695/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1696///
1697/// assert_eq!(
1698///     CodePointMapData::<VerticalOrientation>::new().get('a'),
1699///     VerticalOrientation::Rotated
1700/// );
1701/// assert_eq!(
1702///     CodePointMapData::<VerticalOrientation>::new().get('§'),
1703///     VerticalOrientation::Upright
1704/// );
1705/// assert_eq!(
1706///     CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1707///     VerticalOrientation::TransformedRotated
1708/// );
1709/// assert_eq!(
1710///     CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1711///     VerticalOrientation::TransformedUpright
1712/// );
1713/// ```
1714#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1715#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1716#[allow(clippy::exhaustive_structs)] // newtype
1717#[repr(transparent)]
1718pub struct VerticalOrientation(pub(crate) u8);
1719
1720impl VerticalOrientation {
1721    /// Returns an ICU4C `UVerticalOrientation` value.
1722    pub const fn to_icu4c_value(self) -> u8 {
1723        self.0
1724    }
1725    /// Constructor from an ICU4C `UVerticalOrientation` value.
1726    pub const fn from_icu4c_value(value: u8) -> Self {
1727        Self(value)
1728    }
1729}
1730
1731create_const_array! {
1732#[allow(missing_docs)] // These constants don't need individual documentation.
1733#[allow(non_upper_case_globals)]
1734impl VerticalOrientation {
1735    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1736    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1737    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1738    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1739}
1740}
1741
1742make_enumerated_property! {
1743    name: "Vertical_Orientation";
1744    short_name: "vo";
1745    ident: VerticalOrientation;
1746    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1747    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1748    ule_ty: u8;
1749}
1750
1751pub use crate::code_point_set::BinaryProperty;
1752
1753macro_rules! make_binary_property {
1754    (
1755        name: $name:literal;
1756        short_name: $short_name:literal;
1757        ident: $ident:ident;
1758        data_marker: $data_marker:ty;
1759        singleton: $singleton:ident;
1760            $(#[$doc:meta])+
1761    ) => {
1762        $(#[$doc])+
1763        #[derive(Debug)]
1764        #[non_exhaustive]
1765        pub struct $ident;
1766
1767        impl crate::private::Sealed for $ident {}
1768
1769        impl BinaryProperty for $ident {
1770        type DataMarker = $data_marker;
1771            #[cfg(feature = "compiled_data")]
1772            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1773                &crate::provider::Baked::$singleton;
1774            const NAME: &'static [u8] = $name.as_bytes();
1775            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1776        }
1777    };
1778}
1779
1780make_binary_property! {
1781    name: "ASCII_Hex_Digit";
1782    short_name: "AHex";
1783    ident: AsciiHexDigit;
1784    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1785    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1786    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1787    ///
1788    /// # Example
1789    ///
1790    /// ```
1791    /// use icu::properties::CodePointSetData;
1792    /// use icu::properties::props::AsciiHexDigit;
1793    ///
1794    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1795    ///
1796    /// assert!(ascii_hex_digit.contains('3'));
1797    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1798    /// assert!(ascii_hex_digit.contains('A'));
1799    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1800    /// ```
1801}
1802
1803make_binary_property! {
1804    name: "Alnum";
1805    short_name: "Alnum";
1806    ident: Alnum;
1807    data_marker: crate::provider::PropertyBinaryAlnumV1;
1808    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1809    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1810    ///
1811    /// This is defined for POSIX compatibility.
1812}
1813
1814make_binary_property! {
1815    name: "Alphabetic";
1816    short_name: "Alpha";
1817    ident: Alphabetic;
1818    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1819    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1820    /// Alphabetic characters.
1821    ///
1822    /// # Example
1823    ///
1824    /// ```
1825    /// use icu::properties::CodePointSetData;
1826    /// use icu::properties::props::Alphabetic;
1827    ///
1828    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1829    ///
1830    /// assert!(!alphabetic.contains('3'));
1831    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1832    /// assert!(alphabetic.contains('A'));
1833    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1834    /// ```
1835
1836}
1837
1838make_binary_property! {
1839    name: "Bidi_Control";
1840    short_name: "Bidi_C";
1841    ident: BidiControl;
1842    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1843    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1844    /// Format control characters which have specific functions in the Unicode Bidirectional
1845    /// Algorithm.
1846    ///
1847    /// # Example
1848    ///
1849    /// ```
1850    /// use icu::properties::CodePointSetData;
1851    /// use icu::properties::props::BidiControl;
1852    ///
1853    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1854    ///
1855    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1856    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1857    /// ```
1858
1859}
1860
1861make_binary_property! {
1862    name: "Bidi_Mirrored";
1863    short_name: "Bidi_M";
1864    ident: BidiMirrored;
1865    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1866    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1867    /// Characters that are mirrored in bidirectional text.
1868    ///
1869    /// # Example
1870    ///
1871    /// ```
1872    /// use icu::properties::CodePointSetData;
1873    /// use icu::properties::props::BidiMirrored;
1874    ///
1875    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1876    ///
1877    /// assert!(bidi_mirrored.contains('['));
1878    /// assert!(bidi_mirrored.contains(']'));
1879    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1880    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1881    /// ```
1882
1883}
1884
1885make_binary_property! {
1886    name: "Blank";
1887    short_name: "Blank";
1888    ident: Blank;
1889    data_marker: crate::provider::PropertyBinaryBlankV1;
1890    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1891    /// Horizontal whitespace characters
1892
1893}
1894
1895make_binary_property! {
1896    name: "Cased";
1897    short_name: "Cased";
1898    ident: Cased;
1899    data_marker: crate::provider::PropertyBinaryCasedV1;
1900    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1901    /// Uppercase, lowercase, and titlecase characters.
1902    ///
1903    /// # Example
1904    ///
1905    /// ```
1906    /// use icu::properties::CodePointSetData;
1907    /// use icu::properties::props::Cased;
1908    ///
1909    /// let cased = CodePointSetData::new::<Cased>();
1910    ///
1911    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1912    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
1913    /// ```
1914
1915}
1916
1917make_binary_property! {
1918    name: "Case_Ignorable";
1919    short_name: "CI";
1920    ident: CaseIgnorable;
1921    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1922    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1923    /// Characters which are ignored for casing purposes.
1924    ///
1925    /// # Example
1926    ///
1927    /// ```
1928    /// use icu::properties::CodePointSetData;
1929    /// use icu::properties::props::CaseIgnorable;
1930    ///
1931    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1932    ///
1933    /// assert!(case_ignorable.contains(':'));
1934    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
1935    /// ```
1936
1937}
1938
1939make_binary_property! {
1940    name: "Full_Composition_Exclusion";
1941    short_name: "Comp_Ex";
1942    ident: FullCompositionExclusion;
1943    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1944    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1945    /// Characters that are excluded from composition.
1946    ///
1947    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1948
1949}
1950
1951make_binary_property! {
1952    name: "Changes_When_Casefolded";
1953    short_name: "CWCF";
1954    ident: ChangesWhenCasefolded;
1955    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1956    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1957    /// Characters whose normalized forms are not stable under case folding.
1958    ///
1959    /// # Example
1960    ///
1961    /// ```
1962    /// use icu::properties::CodePointSetData;
1963    /// use icu::properties::props::ChangesWhenCasefolded;
1964    ///
1965    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1966    ///
1967    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
1968    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
1969    /// ```
1970
1971}
1972
1973make_binary_property! {
1974    name: "Changes_When_Casemapped";
1975    short_name: "CWCM";
1976    ident: ChangesWhenCasemapped;
1977    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1978    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1979    /// Characters which may change when they undergo case mapping.
1980
1981}
1982
1983make_binary_property! {
1984    name: "Changes_When_NFKC_Casefolded";
1985    short_name: "CWKCF";
1986    ident: ChangesWhenNfkcCasefolded;
1987    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1988    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1989    /// Characters which are not identical to their `NFKC_Casefold` mapping.
1990    ///
1991    /// # Example
1992    ///
1993    /// ```
1994    /// use icu::properties::CodePointSetData;
1995    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
1996    ///
1997    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
1998    ///
1999    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
2000    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2001    /// ```
2002
2003}
2004
2005make_binary_property! {
2006    name: "Changes_When_Lowercased";
2007    short_name: "CWL";
2008    ident: ChangesWhenLowercased;
2009    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2010    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2011    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2012    ///
2013    /// # Example
2014    ///
2015    /// ```
2016    /// use icu::properties::CodePointSetData;
2017    /// use icu::properties::props::ChangesWhenLowercased;
2018    ///
2019    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2020    ///
2021    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2022    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
2023    /// ```
2024
2025}
2026
2027make_binary_property! {
2028    name: "Changes_When_Titlecased";
2029    short_name: "CWT";
2030    ident: ChangesWhenTitlecased;
2031    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2032    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2033    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2034    ///
2035    /// # Example
2036    ///
2037    /// ```
2038    /// use icu::properties::CodePointSetData;
2039    /// use icu::properties::props::ChangesWhenTitlecased;
2040    ///
2041    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2042    ///
2043    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
2044    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
2045    /// ```
2046
2047}
2048
2049make_binary_property! {
2050    name: "Changes_When_Uppercased";
2051    short_name: "CWU";
2052    ident: ChangesWhenUppercased;
2053    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2054    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2055    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2056    ///
2057    /// # Example
2058    ///
2059    /// ```
2060    /// use icu::properties::CodePointSetData;
2061    /// use icu::properties::props::ChangesWhenUppercased;
2062    ///
2063    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2064    ///
2065    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
2066    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
2067    /// ```
2068
2069}
2070
2071make_binary_property! {
2072    name: "Dash";
2073    short_name: "Dash";
2074    ident: Dash;
2075    data_marker: crate::provider::PropertyBinaryDashV1;
2076    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2077    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2078    /// their compatibility equivalents.
2079    ///
2080    /// # Example
2081    ///
2082    /// ```
2083    /// use icu::properties::CodePointSetData;
2084    /// use icu::properties::props::Dash;
2085    ///
2086    /// let dash = CodePointSetData::new::<Dash>();
2087    ///
2088    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
2089    /// assert!(dash.contains('-'));  // U+002D
2090    /// assert!(!dash.contains('='));  // U+003D
2091    /// ```
2092
2093}
2094
2095make_binary_property! {
2096    name: "Deprecated";
2097    short_name: "Dep";
2098    ident: Deprecated;
2099    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2100    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2101    /// Deprecated characters.
2102    ///
2103    /// No characters will ever be removed from the standard, but the
2104    /// usage of deprecated characters is strongly discouraged.
2105    ///
2106    /// # Example
2107    ///
2108    /// ```
2109    /// use icu::properties::CodePointSetData;
2110    /// use icu::properties::props::Deprecated;
2111    ///
2112    /// let deprecated = CodePointSetData::new::<Deprecated>();
2113    ///
2114    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2115    /// assert!(!deprecated.contains('A'));
2116    /// ```
2117
2118}
2119
2120make_binary_property! {
2121    name: "Default_Ignorable_Code_Point";
2122    short_name: "DI";
2123    ident: DefaultIgnorableCodePoint;
2124    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2125    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2126    /// For programmatic determination of default ignorable code points.
2127    ///
2128    /// New characters that
2129    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2130    /// ranges, permitting programs to correctly handle the default rendering of such
2131    /// characters when not otherwise supported.
2132    ///
2133    /// # Example
2134    ///
2135    /// ```
2136    /// use icu::properties::CodePointSetData;
2137    /// use icu::properties::props::DefaultIgnorableCodePoint;
2138    ///
2139    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2140    ///
2141    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2142    /// assert!(!default_ignorable_code_point.contains('E'));
2143    /// ```
2144
2145}
2146
2147make_binary_property! {
2148    name: "Diacritic";
2149    short_name: "Dia";
2150    ident: Diacritic;
2151    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2152    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2153    /// Characters that linguistically modify the meaning of another character to which they apply.
2154    ///
2155    /// # Example
2156    ///
2157    /// ```
2158    /// use icu::properties::CodePointSetData;
2159    /// use icu::properties::props::Diacritic;
2160    ///
2161    /// let diacritic = CodePointSetData::new::<Diacritic>();
2162    ///
2163    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2164    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2165    /// ```
2166
2167}
2168
2169make_binary_property! {
2170    name: "Emoji_Modifier_Base";
2171    short_name: "EBase";
2172    ident: EmojiModifierBase;
2173    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2174    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2175    /// Characters that can serve as a base for emoji modifiers.
2176    ///
2177    /// # Example
2178    ///
2179    /// ```
2180    /// use icu::properties::CodePointSetData;
2181    /// use icu::properties::props::EmojiModifierBase;
2182    ///
2183    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2184    ///
2185    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2186    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2187    /// ```
2188
2189}
2190
2191make_binary_property! {
2192    name: "Emoji_Component";
2193    short_name: "EComp";
2194    ident: EmojiComponent;
2195    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2196    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2197    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2198    /// separate choices, such as base characters for emoji keycaps.
2199    ///
2200    /// # Example
2201    ///
2202    /// ```
2203    /// use icu::properties::CodePointSetData;
2204    /// use icu::properties::props::EmojiComponent;
2205    ///
2206    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2207    ///
2208    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2209    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2210    /// assert!(emoji_component.contains('7'));
2211    /// assert!(!emoji_component.contains('T'));
2212    /// ```
2213
2214}
2215
2216make_binary_property! {
2217    name: "Emoji_Modifier";
2218    short_name: "EMod";
2219    ident: EmojiModifier;
2220    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2221    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2222    /// Characters that are emoji modifiers.
2223    ///
2224    /// # Example
2225    ///
2226    /// ```
2227    /// use icu::properties::CodePointSetData;
2228    /// use icu::properties::props::EmojiModifier;
2229    ///
2230    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2231    ///
2232    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2233    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2234    /// ```
2235
2236}
2237
2238make_binary_property! {
2239    name: "Emoji";
2240    short_name: "Emoji";
2241    ident: Emoji;
2242    data_marker: crate::provider::PropertyBinaryEmojiV1;
2243    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2244    /// Characters that are emoji.
2245    ///
2246    /// # Example
2247    ///
2248    /// ```
2249    /// use icu::properties::CodePointSetData;
2250    /// use icu::properties::props::Emoji;
2251    ///
2252    /// let emoji = CodePointSetData::new::<Emoji>();
2253    ///
2254    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2255    /// assert!(!emoji.contains('V'));
2256    /// ```
2257
2258}
2259
2260make_binary_property! {
2261    name: "Emoji_Presentation";
2262    short_name: "EPres";
2263    ident: EmojiPresentation;
2264    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2265    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2266    /// Characters that have emoji presentation by default.
2267    ///
2268    /// # Example
2269    ///
2270    /// ```
2271    /// use icu::properties::CodePointSetData;
2272    /// use icu::properties::props::EmojiPresentation;
2273    ///
2274    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2275    ///
2276    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2277    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2278    /// ```
2279
2280}
2281
2282make_binary_property! {
2283    name: "Extender";
2284    short_name: "Ext";
2285    ident: Extender;
2286    data_marker: crate::provider::PropertyBinaryExtenderV1;
2287    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2288    /// Characters whose principal function is to extend the value of a preceding alphabetic
2289    /// character or to extend the shape of adjacent characters.
2290    ///
2291    /// # Example
2292    ///
2293    /// ```
2294    /// use icu::properties::CodePointSetData;
2295    /// use icu::properties::props::Extender;
2296    ///
2297    /// let extender = CodePointSetData::new::<Extender>();
2298    ///
2299    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2300    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2301    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2302    /// ```
2303
2304}
2305
2306make_binary_property! {
2307    name: "Extended_Pictographic";
2308    short_name: "ExtPict";
2309    ident: ExtendedPictographic;
2310    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2311    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2312    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2313    /// emoji characters
2314    ///
2315    /// # Example
2316    ///
2317    /// ```
2318    /// use icu::properties::CodePointSetData;
2319    /// use icu::properties::props::ExtendedPictographic;
2320    ///
2321    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2322    ///
2323    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2324    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2325    /// ```
2326
2327}
2328
2329make_binary_property! {
2330    name: "Graph";
2331    short_name: "Graph";
2332    ident: Graph;
2333    data_marker: crate::provider::PropertyBinaryGraphV1;
2334    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2335    /// Invisible characters.
2336    ///
2337    /// This is defined for POSIX compatibility.
2338
2339}
2340
2341make_binary_property! {
2342    name: "Grapheme_Base";
2343    short_name: "Gr_Base";
2344    ident: GraphemeBase;
2345    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2346    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2347    /// Property used together with the definition of Standard Korean Syllable Block to define
2348    /// "Grapheme base".
2349    ///
2350    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2351    ///
2352    /// # Example
2353    ///
2354    /// ```
2355    /// use icu::properties::CodePointSetData;
2356    /// use icu::properties::props::GraphemeBase;
2357    ///
2358    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2359    ///
2360    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2361    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2362    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2363    /// ```
2364
2365}
2366
2367make_binary_property! {
2368    name: "Grapheme_Extend";
2369    short_name: "Gr_Ext";
2370    ident: GraphemeExtend;
2371    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2372    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2373    /// Property used to define "Grapheme extender".
2374    ///
2375    /// See D59 in Chapter 3, Conformance in the
2376    /// Unicode Standard.
2377    ///
2378    /// # Example
2379    ///
2380    /// ```
2381    /// use icu::properties::CodePointSetData;
2382    /// use icu::properties::props::GraphemeExtend;
2383    ///
2384    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2385    ///
2386    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2387    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2388    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2389    /// ```
2390
2391}
2392
2393make_binary_property! {
2394    name: "Grapheme_Link";
2395    short_name: "Gr_Link";
2396    ident: GraphemeLink;
2397    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2398    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2399    /// Deprecated property.
2400    ///
2401    /// Formerly proposed for programmatic determination of grapheme
2402    /// cluster boundaries.
2403}
2404
2405make_binary_property! {
2406    name: "Hex_Digit";
2407    short_name: "Hex";
2408    ident: HexDigit;
2409    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2410    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2411    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2412    /// compatibility equivalents.
2413    ///
2414    /// # Example
2415    ///
2416    /// ```
2417    /// use icu::properties::CodePointSetData;
2418    /// use icu::properties::props::HexDigit;
2419    ///
2420    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2421    ///
2422    /// assert!(hex_digit.contains('0'));
2423    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2424    /// assert!(hex_digit.contains('f'));
2425    /// assert!(hex_digit.contains('f'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2426    /// assert!(hex_digit.contains('F'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2427    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2428    /// ```
2429}
2430
2431make_binary_property! {
2432    name: "Hyphen";
2433    short_name: "Hyphen";
2434    ident: Hyphen;
2435    data_marker: crate::provider::PropertyBinaryHyphenV1;
2436    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2437    /// Deprecated property.
2438    ///
2439    /// Dashes which are used to mark connections between pieces of
2440    /// words, plus the Katakana middle dot.
2441}
2442
2443make_binary_property! {
2444    name: "Id_Continue";
2445    short_name: "IDC";
2446    ident: IdContinue;
2447    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2448    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2449    /// Characters that can come after the first character in an identifier.
2450    ///
2451    /// If using NFKC to
2452    /// fold differences between characters, use [`XidContinue`] instead.  See
2453    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2454    /// more details.
2455    ///
2456    /// # Example
2457    ///
2458    /// ```
2459    /// use icu::properties::CodePointSetData;
2460    /// use icu::properties::props::IdContinue;
2461    ///
2462    /// let id_continue = CodePointSetData::new::<IdContinue>();
2463    ///
2464    /// assert!(id_continue.contains('x'));
2465    /// assert!(id_continue.contains('1'));
2466    /// assert!(id_continue.contains('_'));
2467    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2468    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2469    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2470    /// ```
2471}
2472
2473make_binary_property! {
2474    name: "Ideographic";
2475    short_name: "Ideo";
2476    ident: Ideographic;
2477    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2478    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2479    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2480    /// ideographs, or related siniform ideographs
2481    ///
2482    /// # Example
2483    ///
2484    /// ```
2485    /// use icu::properties::CodePointSetData;
2486    /// use icu::properties::props::Ideographic;
2487    ///
2488    /// let ideographic = CodePointSetData::new::<Ideographic>();
2489    ///
2490    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2491    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2492    /// ```
2493}
2494
2495make_binary_property! {
2496    name: "Id_Start";
2497    short_name: "IDS";
2498    ident: IdStart;
2499    data_marker: crate::provider::PropertyBinaryIdStartV1;
2500    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2501    /// Characters that can begin an identifier.
2502    ///
2503    /// If using NFKC to fold differences between
2504    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2505    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2506    ///
2507    /// # Example
2508    ///
2509    /// ```
2510    /// use icu::properties::CodePointSetData;
2511    /// use icu::properties::props::IdStart;
2512    ///
2513    /// let id_start = CodePointSetData::new::<IdStart>();
2514    ///
2515    /// assert!(id_start.contains('x'));
2516    /// assert!(!id_start.contains('1'));
2517    /// assert!(!id_start.contains('_'));
2518    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2519    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2520    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2521    /// ```
2522}
2523
2524make_binary_property! {
2525    name: "Ids_Binary_Operator";
2526    short_name: "IDSB";
2527    ident: IdsBinaryOperator;
2528    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2529    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2530    /// Characters used in Ideographic Description Sequences.
2531    ///
2532    /// # Example
2533    ///
2534    /// ```
2535    /// use icu::properties::CodePointSetData;
2536    /// use icu::properties::props::IdsBinaryOperator;
2537    ///
2538    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2539    ///
2540    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2541    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2542    /// ```
2543}
2544
2545make_binary_property! {
2546    name: "Ids_Trinary_Operator";
2547    short_name: "IDST";
2548    ident: IdsTrinaryOperator;
2549    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2550    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2551    /// Characters used in Ideographic Description Sequences.
2552    ///
2553    /// # Example
2554    ///
2555    /// ```
2556    /// use icu::properties::CodePointSetData;
2557    /// use icu::properties::props::IdsTrinaryOperator;
2558    ///
2559    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2560    ///
2561    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2562    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2563    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2564    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2565    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2566    /// ```
2567}
2568
2569make_binary_property! {
2570    name: "Join_Control";
2571    short_name: "Join_C";
2572    ident: JoinControl;
2573    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2574    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2575    /// Format control characters which have specific functions for control of cursive joining
2576    /// and ligation.
2577    ///
2578    /// # Example
2579    ///
2580    /// ```
2581    /// use icu::properties::CodePointSetData;
2582    /// use icu::properties::props::JoinControl;
2583    ///
2584    /// let join_control = CodePointSetData::new::<JoinControl>();
2585    ///
2586    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2587    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2588    /// assert!(!join_control.contains('\u{200E}'));
2589    /// ```
2590}
2591
2592make_binary_property! {
2593    name: "Logical_Order_Exception";
2594    short_name: "LOE";
2595    ident: LogicalOrderException;
2596    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2597    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2598    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2599    ///
2600    /// # Example
2601    ///
2602    /// ```
2603    /// use icu::properties::CodePointSetData;
2604    /// use icu::properties::props::LogicalOrderException;
2605    ///
2606    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2607    ///
2608    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2609    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2610    /// ```
2611}
2612
2613make_binary_property! {
2614    name: "Lowercase";
2615    short_name: "Lower";
2616    ident: Lowercase;
2617    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2618    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2619    /// Lowercase characters.
2620    ///
2621    /// # Example
2622    ///
2623    /// ```
2624    /// use icu::properties::CodePointSetData;
2625    /// use icu::properties::props::Lowercase;
2626    ///
2627    /// let lowercase = CodePointSetData::new::<Lowercase>();
2628    ///
2629    /// assert!(lowercase.contains('a'));
2630    /// assert!(!lowercase.contains('A'));
2631    /// ```
2632}
2633
2634make_binary_property! {
2635    name: "Math";
2636    short_name: "Math";
2637    ident: Math;
2638    data_marker: crate::provider::PropertyBinaryMathV1;
2639    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2640    /// Characters used in mathematical notation.
2641    ///
2642    /// # Example
2643    ///
2644    /// ```
2645    /// use icu::properties::CodePointSetData;
2646    /// use icu::properties::props::Math;
2647    ///
2648    /// let math = CodePointSetData::new::<Math>();
2649    ///
2650    /// assert!(math.contains('='));
2651    /// assert!(math.contains('+'));
2652    /// assert!(!math.contains('-'));
2653    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2654    /// assert!(!math.contains('/'));
2655    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2656    /// ```
2657}
2658
2659make_binary_property! {
2660    name: "Noncharacter_Code_Point";
2661    short_name: "NChar";
2662    ident: NoncharacterCodePoint;
2663    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2664    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2665    /// Code points permanently reserved for internal use.
2666    ///
2667    /// # Example
2668    ///
2669    /// ```
2670    /// use icu::properties::CodePointSetData;
2671    /// use icu::properties::props::NoncharacterCodePoint;
2672    ///
2673    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2674    ///
2675    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2676    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2677    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2678    /// ```
2679}
2680
2681make_binary_property! {
2682    name: "NFC_Inert";
2683    short_name: "NFC_Inert";
2684    ident: NfcInert;
2685    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2686    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2687    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2688}
2689
2690make_binary_property! {
2691    name: "NFD_Inert";
2692    short_name: "NFD_Inert";
2693    ident: NfdInert;
2694    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2695    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2696    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2697}
2698
2699make_binary_property! {
2700    name: "NFKC_Inert";
2701    short_name: "NFKC_Inert";
2702    ident: NfkcInert;
2703    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2704    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2705    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2706}
2707
2708make_binary_property! {
2709    name: "NFKD_Inert";
2710    short_name: "NFKD_Inert";
2711    ident: NfkdInert;
2712    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2713    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2714    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2715}
2716
2717make_binary_property! {
2718    name: "Pattern_Syntax";
2719    short_name: "Pat_Syn";
2720    ident: PatternSyntax;
2721    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2722    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2723    /// Characters used as syntax in patterns (such as regular expressions).
2724    ///
2725    /// See [`Unicode
2726    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2727    /// details.
2728    ///
2729    /// # Example
2730    ///
2731    /// ```
2732    /// use icu::properties::CodePointSetData;
2733    /// use icu::properties::props::PatternSyntax;
2734    ///
2735    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2736    ///
2737    /// assert!(pattern_syntax.contains('{'));
2738    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2739    /// assert!(!pattern_syntax.contains('0'));
2740    /// ```
2741}
2742
2743make_binary_property! {
2744    name: "Pattern_White_Space";
2745    short_name: "Pat_WS";
2746    ident: PatternWhiteSpace;
2747    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2748    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2749    /// Characters used as whitespace in patterns (such as regular expressions).
2750    ///
2751    /// See
2752    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2753    /// more details.
2754    ///
2755    /// # Example
2756    ///
2757    /// ```
2758    /// use icu::properties::CodePointSetData;
2759    /// use icu::properties::props::PatternWhiteSpace;
2760    ///
2761    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2762    ///
2763    /// assert!(pattern_white_space.contains(' '));
2764    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2765    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2766    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2767    /// ```
2768}
2769
2770make_binary_property! {
2771    name: "Prepended_Concatenation_Mark";
2772    short_name: "PCM";
2773    ident: PrependedConcatenationMark;
2774    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2775    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2776    /// A small class of visible format controls, which precede and then span a sequence of
2777    /// other characters, usually digits.
2778}
2779
2780make_binary_property! {
2781    name: "Print";
2782    short_name: "Print";
2783    ident: Print;
2784    data_marker: crate::provider::PropertyBinaryPrintV1;
2785    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2786    /// Printable characters (visible characters and whitespace).
2787    ///
2788    /// This is defined for POSIX compatibility.
2789}
2790
2791make_binary_property! {
2792    name: "Quotation_Mark";
2793    short_name: "QMark";
2794    ident: QuotationMark;
2795    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2796    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2797    /// Punctuation characters that function as quotation marks.
2798    ///
2799    /// # Example
2800    ///
2801    /// ```
2802    /// use icu::properties::CodePointSetData;
2803    /// use icu::properties::props::QuotationMark;
2804    ///
2805    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2806    ///
2807    /// assert!(quotation_mark.contains('\''));
2808    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2809    /// assert!(!quotation_mark.contains('<'));
2810    /// ```
2811}
2812
2813make_binary_property! {
2814    name: "Radical";
2815    short_name: "Radical";
2816    ident: Radical;
2817    data_marker: crate::provider::PropertyBinaryRadicalV1;
2818    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2819    /// Characters used in the definition of Ideographic Description Sequences.
2820    ///
2821    /// # Example
2822    ///
2823    /// ```
2824    /// use icu::properties::CodePointSetData;
2825    /// use icu::properties::props::Radical;
2826    ///
2827    /// let radical = CodePointSetData::new::<Radical>();
2828    ///
2829    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2830    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2831    /// ```
2832}
2833
2834make_binary_property! {
2835    name: "Regional_Indicator";
2836    short_name: "RI";
2837    ident: RegionalIndicator;
2838    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2839    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2840    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2841    ///
2842    /// # Example
2843    ///
2844    /// ```
2845    /// use icu::properties::CodePointSetData;
2846    /// use icu::properties::props::RegionalIndicator;
2847    ///
2848    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2849    ///
2850    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2851    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2852    /// assert!(!regional_indicator.contains('T'));
2853    /// ```
2854}
2855
2856make_binary_property! {
2857    name: "Soft_Dotted";
2858    short_name: "SD";
2859    ident: SoftDotted;
2860    data_marker: crate::provider::PropertyBinarySoftDottedV1;
2861    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2862    /// Characters with a "soft dot", like i or j.
2863    ///
2864    /// An accent placed on these characters causes
2865    /// the dot to disappear.
2866    ///
2867    /// # Example
2868    ///
2869    /// ```
2870    /// use icu::properties::CodePointSetData;
2871    /// use icu::properties::props::SoftDotted;
2872    ///
2873    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2874    ///
2875    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2876    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
2877    /// ```
2878}
2879
2880make_binary_property! {
2881    name: "Segment_Starter";
2882    short_name: "Segment_Starter";
2883    ident: SegmentStarter;
2884    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2885    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2886    /// Characters that are starters in terms of Unicode normalization and combining character
2887    /// sequences.
2888}
2889
2890make_binary_property! {
2891    name: "Case_Sensitive";
2892    short_name: "Case_Sensitive";
2893    ident: CaseSensitive;
2894    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2895    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2896    /// Characters that are either the source of a case mapping or in the target of a case
2897    /// mapping.
2898}
2899
2900make_binary_property! {
2901    name: "Sentence_Terminal";
2902    short_name: "STerm";
2903    ident: SentenceTerminal;
2904    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2905    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2906    /// Punctuation characters that generally mark the end of sentences.
2907    ///
2908    /// # Example
2909    ///
2910    /// ```
2911    /// use icu::properties::CodePointSetData;
2912    /// use icu::properties::props::SentenceTerminal;
2913    ///
2914    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2915    ///
2916    /// assert!(sentence_terminal.contains('.'));
2917    /// assert!(sentence_terminal.contains('?'));
2918    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2919    /// assert!(!sentence_terminal.contains(','));
2920    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2921    /// ```
2922}
2923
2924make_binary_property! {
2925    name: "Terminal_Punctuation";
2926    short_name: "Term";
2927    ident: TerminalPunctuation;
2928    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2929    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2930    /// Punctuation characters that generally mark the end of textual units.
2931    ///
2932    /// # Example
2933    ///
2934    /// ```
2935    /// use icu::properties::CodePointSetData;
2936    /// use icu::properties::props::TerminalPunctuation;
2937    ///
2938    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2939    ///
2940    /// assert!(terminal_punctuation.contains('.'));
2941    /// assert!(terminal_punctuation.contains('?'));
2942    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2943    /// assert!(terminal_punctuation.contains(','));
2944    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2945    /// ```
2946}
2947
2948make_binary_property! {
2949    name: "Unified_Ideograph";
2950    short_name: "UIdeo";
2951    ident: UnifiedIdeograph;
2952    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2953    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2954    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
2955    ///
2956    /// # Example
2957    ///
2958    /// ```
2959    /// use icu::properties::CodePointSetData;
2960    /// use icu::properties::props::UnifiedIdeograph;
2961    ///
2962    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
2963    ///
2964    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2965    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
2966    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
2967    /// ```
2968}
2969
2970make_binary_property! {
2971    name: "Uppercase";
2972    short_name: "Upper";
2973    ident: Uppercase;
2974    data_marker: crate::provider::PropertyBinaryUppercaseV1;
2975    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
2976    /// Uppercase characters.
2977    ///
2978    /// # Example
2979    ///
2980    /// ```
2981    /// use icu::properties::CodePointSetData;
2982    /// use icu::properties::props::Uppercase;
2983    ///
2984    /// let uppercase = CodePointSetData::new::<Uppercase>();
2985    ///
2986    /// assert!(uppercase.contains('U'));
2987    /// assert!(!uppercase.contains('u'));
2988    /// ```
2989}
2990
2991make_binary_property! {
2992    name: "Variation_Selector";
2993    short_name: "VS";
2994    ident: VariationSelector;
2995    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
2996    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
2997    /// Characters that are Variation Selectors.
2998    ///
2999    /// # Example
3000    ///
3001    /// ```
3002    /// use icu::properties::CodePointSetData;
3003    /// use icu::properties::props::VariationSelector;
3004    ///
3005    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3006    ///
3007    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
3008    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
3009    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
3010    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
3011    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
3012    /// ```
3013}
3014
3015make_binary_property! {
3016    name: "White_Space";
3017    short_name: "space";
3018    ident: WhiteSpace;
3019    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3020    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3021    /// Spaces, separator characters and other control characters which should be treated by
3022    /// programming languages as "white space" for the purpose of parsing elements.
3023    ///
3024    /// # Example
3025    ///
3026    /// ```
3027    /// use icu::properties::CodePointSetData;
3028    /// use icu::properties::props::WhiteSpace;
3029    ///
3030    /// let white_space = CodePointSetData::new::<WhiteSpace>();
3031    ///
3032    /// assert!(white_space.contains(' '));
3033    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
3034    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
3035    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
3036    /// ```
3037}
3038
3039make_binary_property! {
3040    name: "Xdigit";
3041    short_name: "Xdigit";
3042    ident: Xdigit;
3043    data_marker: crate::provider::PropertyBinaryXdigitV1;
3044    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3045    /// Hexadecimal digits
3046    ///
3047    /// This is defined for POSIX compatibility.
3048}
3049
3050make_binary_property! {
3051    name: "XID_Continue";
3052    short_name: "XIDC";
3053    ident: XidContinue;
3054    data_marker: crate::provider::PropertyBinaryXidContinueV1;
3055    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3056    /// Characters that can come after the first character in an identifier.
3057    ///
3058    /// See [`Unicode Standard Annex
3059    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3060    ///
3061    /// # Example
3062    ///
3063    /// ```
3064    /// use icu::properties::CodePointSetData;
3065    /// use icu::properties::props::XidContinue;
3066    ///
3067    /// let xid_continue = CodePointSetData::new::<XidContinue>();
3068    ///
3069    /// assert!(xid_continue.contains('x'));
3070    /// assert!(xid_continue.contains('1'));
3071    /// assert!(xid_continue.contains('_'));
3072    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3073    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3074    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3075    /// ```
3076}
3077
3078make_binary_property! {
3079    name: "XID_Start";
3080    short_name: "XIDS";
3081    ident: XidStart;
3082    data_marker: crate::provider::PropertyBinaryXidStartV1;
3083    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3084    /// Characters that can begin an identifier.
3085    ///
3086    /// See [`Unicode
3087    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3088    /// details.
3089    ///
3090    /// # Example
3091    ///
3092    /// ```
3093    /// use icu::properties::CodePointSetData;
3094    /// use icu::properties::props::XidStart;
3095    ///
3096    /// let xid_start = CodePointSetData::new::<XidStart>();
3097    ///
3098    /// assert!(xid_start.contains('x'));
3099    /// assert!(!xid_start.contains('1'));
3100    /// assert!(!xid_start.contains('_'));
3101    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3102    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3103    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3104    /// ```
3105}
3106
3107pub use crate::emoji::EmojiSet;
3108
3109macro_rules! make_emoji_set {
3110    (
3111        ident: $ident:ident;
3112        data_marker: $data_marker:ty;
3113        singleton: $singleton:ident;
3114        $(#[$doc:meta])+
3115    ) => {
3116        $(#[$doc])+
3117        #[derive(Debug)]
3118        #[non_exhaustive]
3119        pub struct $ident;
3120
3121        impl crate::private::Sealed for $ident {}
3122
3123        impl EmojiSet for $ident {
3124            type DataMarker = $data_marker;
3125            #[cfg(feature = "compiled_data")]
3126            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3127                &crate::provider::Baked::$singleton;
3128        }
3129    }
3130}
3131
3132make_emoji_set! {
3133    ident: BasicEmoji;
3134    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3135    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3136    /// Characters and character sequences intended for general-purpose, independent, direct input.
3137    ///
3138    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3139    /// details.
3140    ///
3141    /// # Example
3142    ///
3143    /// ```
3144    /// use icu::properties::EmojiSetData;
3145    /// use icu::properties::props::BasicEmoji;
3146    ///
3147    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3148    ///
3149    /// assert!(!basic_emoji.contains('\u{0020}'));
3150    /// assert!(!basic_emoji.contains('\n'));
3151    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3152    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3153    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3154    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3155    /// ```
3156}
3157
3158#[cfg(test)]
3159mod test_enumerated_property_completeness {
3160    use super::*;
3161    use std::collections::BTreeMap;
3162
3163    fn check_enum<'a, T: NamedEnumeratedProperty>(
3164        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3165        consts: impl IntoIterator<Item = &'a T>,
3166    ) where
3167        u16: From<T>,
3168    {
3169        let mut data: BTreeMap<_, _> = lookup
3170            .map
3171            .iter()
3172            .map(|(name, value)| (value, (name, "Data")))
3173            .collect();
3174
3175        let names = crate::PropertyNamesLong::<T>::new();
3176        let consts = consts.into_iter().map(|value| {
3177            (
3178                u16::from(*value) as usize,
3179                (
3180                    names.get(*value).unwrap_or("<unknown>").to_string(),
3181                    "Consts",
3182                ),
3183            )
3184        });
3185
3186        let mut diff = Vec::new();
3187        for t @ (value, _) in consts {
3188            if data.remove(&value).is_none() {
3189                diff.push(t);
3190            }
3191        }
3192        diff.extend(data);
3193
3194        let mut fmt_diff = String::new();
3195        for (value, (name, source)) in diff {
3196            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3197        }
3198
3199        assert!(
3200            fmt_diff.is_empty(),
3201            "Values defined in data do not match values defined in consts. Difference:\n{}",
3202            fmt_diff
3203        );
3204    }
3205
3206    #[test]
3207    fn test_ea() {
3208        check_enum(
3209            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3210            EastAsianWidth::ALL_VALUES,
3211        );
3212    }
3213
3214    #[test]
3215    fn test_ccc() {
3216        check_enum(
3217            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3218            CanonicalCombiningClass::ALL_VALUES,
3219        );
3220    }
3221
3222    #[test]
3223    fn test_jt() {
3224        check_enum(
3225            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3226            JoiningType::ALL_VALUES,
3227        );
3228    }
3229
3230    #[test]
3231    fn test_insc() {
3232        check_enum(
3233            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3234            IndicSyllabicCategory::ALL_VALUES,
3235        );
3236    }
3237
3238    #[test]
3239    fn test_sb() {
3240        check_enum(
3241            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3242            SentenceBreak::ALL_VALUES,
3243        );
3244    }
3245
3246    #[test]
3247    fn test_wb() {
3248        check_enum(
3249            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3250            WordBreak::ALL_VALUES,
3251        );
3252    }
3253
3254    #[test]
3255    fn test_bc() {
3256        check_enum(
3257            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3258            BidiClass::ALL_VALUES,
3259        );
3260    }
3261
3262    #[test]
3263    fn test_hst() {
3264        check_enum(
3265            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3266            HangulSyllableType::ALL_VALUES,
3267        );
3268    }
3269
3270    #[test]
3271    fn test_vo() {
3272        check_enum(
3273            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3274            VerticalOrientation::ALL_VALUES,
3275        );
3276    }
3277}