icu_properties/props.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28/// pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29/// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30/// ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36 (
37 $ ( #[$meta:meta] )*
38 impl $enum_ty:ident {
39 $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40 }
41 ) => {
42 $( #[$meta] )*
43 impl $enum_ty {
44 $(
45 $(#[$const_meta])*
46 $v const $i: $t = $e;
47 )*
48
49 /// All possible values of this enum in the Unicode version
50 /// from this ICU4X release.
51 pub const ALL_VALUES: &'static [$enum_ty] = &[
52 $($enum_ty::$i),*
53 ];
54 }
55
56 #[cfg(feature = "datagen")]
57 impl databake::Bake for $enum_ty {
58 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
59 env.insert("icu_properties");
60 match *self {
61 $(
62 Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
63 )*
64 Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
65 }
66 }
67 }
68
69
70 impl From<$enum_ty> for u16 {
71 fn from(other: $enum_ty) -> Self {
72 other.0 as u16
73 }
74 }
75 }
76}
77
78pub use crate::code_point_map::EnumeratedProperty;
79
80macro_rules! make_enumerated_property {
81 (
82 name: $name:literal;
83 short_name: $short_name:literal;
84 ident: $value_ty:path;
85 data_marker: $data_marker:ty;
86 singleton: $singleton:ident;
87 $(ule_ty: $ule_ty:ty;)?
88 ) => {
89 impl crate::private::Sealed for $value_ty {}
90
91 impl EnumeratedProperty for $value_ty {
92 type DataMarker = $data_marker;
93 #[cfg(feature = "compiled_data")]
94 const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
95 crate::provider::Baked::$singleton;
96 const NAME: &'static [u8] = $name.as_bytes();
97 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
98 }
99
100 $(
101 impl zerovec::ule::AsULE for $value_ty {
102 type ULE = $ule_ty;
103
104 fn to_unaligned(self) -> Self::ULE {
105 self.0.to_unaligned()
106 }
107 fn from_unaligned(unaligned: Self::ULE) -> Self {
108 Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109 }
110 }
111 )?
112 };
113}
114
115/// Enumerated property Bidi_Class
116///
117/// These are the categories required by the Unicode Bidirectional Algorithm.
118/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
119/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
120///
121/// # Example
122///
123/// ```
124/// use icu::properties::{props::BidiClass, CodePointMapData};
125///
126/// assert_eq!(
127/// CodePointMapData::<BidiClass>::new().get('y'),
128/// BidiClass::LeftToRight
129/// ); // U+0079
130/// assert_eq!(
131/// CodePointMapData::<BidiClass>::new().get('ع'),
132/// BidiClass::ArabicLetter
133/// ); // U+0639
134/// ```
135#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
136#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137#[allow(clippy::exhaustive_structs)] // newtype
138#[repr(transparent)]
139pub struct BidiClass(pub(crate) u8);
140
141impl BidiClass {
142 /// Returns an ICU4C `UBidiClass` value.
143 pub const fn to_icu4c_value(self) -> u8 {
144 self.0
145 }
146 /// Constructor from an ICU4C `UBidiClass` value.
147 pub const fn from_icu4c_value(value: u8) -> Self {
148 Self(value)
149 }
150}
151
152create_const_array! {
153#[allow(non_upper_case_globals)]
154impl BidiClass {
155 /// (`L`) any strong left-to-right character
156 pub const LeftToRight: BidiClass = BidiClass(0);
157 /// (`R`) any strong right-to-left (non-Arabic-type) character
158 pub const RightToLeft: BidiClass = BidiClass(1);
159 /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
160 pub const EuropeanNumber: BidiClass = BidiClass(2);
161 /// (`ES`) plus and minus signs
162 pub const EuropeanSeparator: BidiClass = BidiClass(3);
163 /// (`ET`) a terminator in a numeric format context, includes currency signs
164 pub const EuropeanTerminator: BidiClass = BidiClass(4);
165 /// (`AN`) any Arabic-Indic digit
166 pub const ArabicNumber: BidiClass = BidiClass(5);
167 /// (`CS`) commas, colons, and slashes
168 pub const CommonSeparator: BidiClass = BidiClass(6);
169 /// (`B`) various newline characters
170 pub const ParagraphSeparator: BidiClass = BidiClass(7);
171 /// (`S`) various segment-related control codes
172 pub const SegmentSeparator: BidiClass = BidiClass(8);
173 /// (`WS`) spaces
174 pub const WhiteSpace: BidiClass = BidiClass(9);
175 /// (`ON`) most other symbols and punctuation marks
176 pub const OtherNeutral: BidiClass = BidiClass(10);
177 /// (`LRE`) U+202A: the LR embedding control
178 pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
179 /// (`LRO`) U+202D: the LR override control
180 pub const LeftToRightOverride: BidiClass = BidiClass(12);
181 /// (`AL`) any strong right-to-left (Arabic-type) character
182 pub const ArabicLetter: BidiClass = BidiClass(13);
183 /// (`RLE`) U+202B: the RL embedding control
184 pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
185 /// (`RLO`) U+202E: the RL override control
186 pub const RightToLeftOverride: BidiClass = BidiClass(15);
187 /// (`PDF`) U+202C: terminates an embedding or override control
188 pub const PopDirectionalFormat: BidiClass = BidiClass(16);
189 /// (`NSM`) any nonspacing mark
190 pub const NonspacingMark: BidiClass = BidiClass(17);
191 /// (`BN`) most format characters, control codes, or noncharacters
192 pub const BoundaryNeutral: BidiClass = BidiClass(18);
193 /// (`FSI`) U+2068: the first strong isolate control
194 pub const FirstStrongIsolate: BidiClass = BidiClass(19);
195 /// (`LRI`) U+2066: the LR isolate control
196 pub const LeftToRightIsolate: BidiClass = BidiClass(20);
197 /// (`RLI`) U+2067: the RL isolate control
198 pub const RightToLeftIsolate: BidiClass = BidiClass(21);
199 /// (`PDI`) U+2069: terminates an isolate control
200 pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
201}
202}
203
204make_enumerated_property! {
205 name: "Bidi_Class";
206 short_name: "bc";
207 ident: BidiClass;
208 data_marker: crate::provider::PropertyEnumBidiClassV1;
209 singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
210 ule_ty: u8;
211}
212
213// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
214pub(crate) mod gc {
215 /// Enumerated property General_Category.
216 ///
217 /// General_Category specifies the most general classification of a code point, usually
218 /// determined based on the primary characteristic of the assigned character. For example, is the
219 /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
220 ///
221 /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
222 /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
223 /// crate::props::GeneralCategoryGroup).
224 ///
225 /// # Example
226 ///
227 /// ```
228 /// use icu::properties::{props::GeneralCategory, CodePointMapData};
229 ///
230 /// assert_eq!(
231 /// CodePointMapData::<GeneralCategory>::new().get('木'),
232 /// GeneralCategory::OtherLetter
233 /// ); // U+6728
234 /// assert_eq!(
235 /// CodePointMapData::<GeneralCategory>::new().get('🎃'),
236 /// GeneralCategory::OtherSymbol
237 /// ); // U+1F383 JACK-O-LANTERN
238 /// ```
239 #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
240 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
242 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
243 #[allow(clippy::exhaustive_enums)] // this type is stable
244 #[zerovec::make_ule(GeneralCategoryULE)]
245 #[repr(u8)]
246 pub enum GeneralCategory {
247 /// (`Cn`) A reserved unassigned code point or a noncharacter
248 Unassigned = 0,
249
250 /// (`Lu`) An uppercase letter
251 UppercaseLetter = 1,
252 /// (`Ll`) A lowercase letter
253 LowercaseLetter = 2,
254 /// (`Lt`) A digraphic letter, with first part uppercase
255 TitlecaseLetter = 3,
256 /// (`Lm`) A modifier letter
257 ModifierLetter = 4,
258 /// (`Lo`) Other letters, including syllables and ideographs
259 OtherLetter = 5,
260
261 /// (`Mn`) A nonspacing combining mark (zero advance width)
262 NonspacingMark = 6,
263 /// (`Mc`) A spacing combining mark (positive advance width)
264 SpacingMark = 8,
265 /// (`Me`) An enclosing combining mark
266 EnclosingMark = 7,
267
268 /// (`Nd`) A decimal digit
269 DecimalNumber = 9,
270 /// (`Nl`) A letterlike numeric character
271 LetterNumber = 10,
272 /// (`No`) A numeric character of other type
273 OtherNumber = 11,
274
275 /// (`Zs`) A space character (of various non-zero widths)
276 SpaceSeparator = 12,
277 /// (`Zl`) U+2028 LINE SEPARATOR only
278 LineSeparator = 13,
279 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
280 ParagraphSeparator = 14,
281
282 /// (`Cc`) A C0 or C1 control code
283 Control = 15,
284 /// (`Cf`) A format control character
285 Format = 16,
286 /// (`Co`) A private-use character
287 PrivateUse = 17,
288 /// (`Cs`) A surrogate code point
289 Surrogate = 18,
290
291 /// (`Pd`) A dash or hyphen punctuation mark
292 DashPunctuation = 19,
293 /// (`Ps`) An opening punctuation mark (of a pair)
294 OpenPunctuation = 20,
295 /// (`Pe`) A closing punctuation mark (of a pair)
296 ClosePunctuation = 21,
297 /// (`Pc`) A connecting punctuation mark, like a tie
298 ConnectorPunctuation = 22,
299 /// (`Pi`) An initial quotation mark
300 InitialPunctuation = 28,
301 /// (`Pf`) A final quotation mark
302 FinalPunctuation = 29,
303 /// (`Po`) A punctuation mark of other type
304 OtherPunctuation = 23,
305
306 /// (`Sm`) A symbol of mathematical use
307 MathSymbol = 24,
308 /// (`Sc`) A currency sign
309 CurrencySymbol = 25,
310 /// (`Sk`) A non-letterlike modifier symbol
311 ModifierSymbol = 26,
312 /// (`So`) A symbol of other type
313 OtherSymbol = 27,
314 }
315}
316
317pub use gc::GeneralCategory;
318
319impl GeneralCategory {
320 /// All possible values of this enum
321 pub const ALL_VALUES: &'static [GeneralCategory] = &[
322 GeneralCategory::Unassigned,
323 GeneralCategory::UppercaseLetter,
324 GeneralCategory::LowercaseLetter,
325 GeneralCategory::TitlecaseLetter,
326 GeneralCategory::ModifierLetter,
327 GeneralCategory::OtherLetter,
328 GeneralCategory::NonspacingMark,
329 GeneralCategory::SpacingMark,
330 GeneralCategory::EnclosingMark,
331 GeneralCategory::DecimalNumber,
332 GeneralCategory::LetterNumber,
333 GeneralCategory::OtherNumber,
334 GeneralCategory::SpaceSeparator,
335 GeneralCategory::LineSeparator,
336 GeneralCategory::ParagraphSeparator,
337 GeneralCategory::Control,
338 GeneralCategory::Format,
339 GeneralCategory::PrivateUse,
340 GeneralCategory::Surrogate,
341 GeneralCategory::DashPunctuation,
342 GeneralCategory::OpenPunctuation,
343 GeneralCategory::ClosePunctuation,
344 GeneralCategory::ConnectorPunctuation,
345 GeneralCategory::InitialPunctuation,
346 GeneralCategory::FinalPunctuation,
347 GeneralCategory::OtherPunctuation,
348 GeneralCategory::MathSymbol,
349 GeneralCategory::CurrencySymbol,
350 GeneralCategory::ModifierSymbol,
351 GeneralCategory::OtherSymbol,
352 ];
353}
354
355#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
356/// Error value for `impl TryFrom<u8> for GeneralCategory`.
357#[non_exhaustive]
358pub struct GeneralCategoryOutOfBoundsError;
359
360impl TryFrom<u8> for GeneralCategory {
361 type Error = GeneralCategoryOutOfBoundsError;
362 /// Construct this [`GeneralCategory`] from an integer, returning
363 /// an error if it is out of bounds
364 fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
365 GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
366 }
367}
368
369make_enumerated_property! {
370 name: "General_Category";
371 short_name: "gc";
372 ident: GeneralCategory;
373 data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
374 singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
375}
376
377/// Groupings of multiple General_Category property values.
378///
379/// Instances of `GeneralCategoryGroup` represent the defined multi-category
380/// values that are useful for users in certain contexts, such as regex. In
381/// other words, unlike [`GeneralCategory`], this supports groups of general
382/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
383/// `LowercaseLetter`, etc.
384///
385/// See <https://www.unicode.org/reports/tr44/> .
386///
387/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
388/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
389/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
390///
391/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
392#[derive(Copy, Clone, PartialEq, Debug, Eq)]
393#[allow(clippy::exhaustive_structs)] // newtype
394#[repr(transparent)]
395pub struct GeneralCategoryGroup(pub(crate) u32);
396
397impl crate::private::Sealed for GeneralCategoryGroup {}
398
399use GeneralCategory as GC;
400use GeneralCategoryGroup as GCG;
401
402#[allow(non_upper_case_globals)]
403impl GeneralCategoryGroup {
404 /// (`Lu`) An uppercase letter
405 pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
406 /// (`Ll`) A lowercase letter
407 pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
408 /// (`Lt`) A digraphic letter, with first part uppercase
409 pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
410 /// (`Lm`) A modifier letter
411 pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
412 /// (`Lo`) Other letters, including syllables and ideographs
413 pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
414 /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
415 pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
416 | (1 << (GC::LowercaseLetter as u32))
417 | (1 << (GC::TitlecaseLetter as u32)));
418 /// (`L`) The union of all letter categories
419 pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
420 | (1 << (GC::LowercaseLetter as u32))
421 | (1 << (GC::TitlecaseLetter as u32))
422 | (1 << (GC::ModifierLetter as u32))
423 | (1 << (GC::OtherLetter as u32)));
424
425 /// (`Mn`) A nonspacing combining mark (zero advance width)
426 pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
427 /// (`Mc`) A spacing combining mark (positive advance width)
428 pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
429 /// (`Me`) An enclosing combining mark
430 pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
431 /// (`M`) The union of all mark categories
432 pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
433 | (1 << (GC::EnclosingMark as u32))
434 | (1 << (GC::SpacingMark as u32)));
435
436 /// (`Nd`) A decimal digit
437 pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
438 /// (`Nl`) A letterlike numeric character
439 pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
440 /// (`No`) A numeric character of other type
441 pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
442 /// (`N`) The union of all number categories
443 pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
444 | (1 << (GC::LetterNumber as u32))
445 | (1 << (GC::OtherNumber as u32)));
446
447 /// (`Zs`) A space character (of various non-zero widths)
448 pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
449 /// (`Zl`) U+2028 LINE SEPARATOR only
450 pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
451 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
452 pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
453 /// (`Z`) The union of all separator categories
454 pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
455 | (1 << (GC::LineSeparator as u32))
456 | (1 << (GC::ParagraphSeparator as u32)));
457
458 /// (`Cc`) A C0 or C1 control code
459 pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
460 /// (`Cf`) A format control character
461 pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
462 /// (`Co`) A private-use character
463 pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
464 /// (`Cs`) A surrogate code point
465 pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
466 /// (`Cn`) A reserved unassigned code point or a noncharacter
467 pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
468 /// (`C`) The union of all control code, reserved, and unassigned categories
469 pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
470 | (1 << (GC::Format as u32))
471 | (1 << (GC::PrivateUse as u32))
472 | (1 << (GC::Surrogate as u32))
473 | (1 << (GC::Unassigned as u32)));
474
475 /// (`Pd`) A dash or hyphen punctuation mark
476 pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
477 /// (`Ps`) An opening punctuation mark (of a pair)
478 pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
479 /// (`Pe`) A closing punctuation mark (of a pair)
480 pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
481 /// (`Pc`) A connecting punctuation mark, like a tie
482 pub const ConnectorPunctuation: GeneralCategoryGroup =
483 GCG(1 << (GC::ConnectorPunctuation as u32));
484 /// (`Pi`) An initial quotation mark
485 pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
486 /// (`Pf`) A final quotation mark
487 pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
488 /// (`Po`) A punctuation mark of other type
489 pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
490 /// (`P`) The union of all punctuation categories
491 pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
492 | (1 << (GC::OpenPunctuation as u32))
493 | (1 << (GC::ClosePunctuation as u32))
494 | (1 << (GC::ConnectorPunctuation as u32))
495 | (1 << (GC::OtherPunctuation as u32))
496 | (1 << (GC::InitialPunctuation as u32))
497 | (1 << (GC::FinalPunctuation as u32)));
498
499 /// (`Sm`) A symbol of mathematical use
500 pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
501 /// (`Sc`) A currency sign
502 pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
503 /// (`Sk`) A non-letterlike modifier symbol
504 pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
505 /// (`So`) A symbol of other type
506 pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
507 /// (`S`) The union of all symbol categories
508 pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
509 | (1 << (GC::CurrencySymbol as u32))
510 | (1 << (GC::ModifierSymbol as u32))
511 | (1 << (GC::OtherSymbol as u32)));
512
513 const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
514
515 /// Return whether the code point belongs in the provided multi-value category.
516 ///
517 /// ```
518 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
519 /// use icu::properties::CodePointMapData;
520 ///
521 /// let gc = CodePointMapData::<GeneralCategory>::new();
522 ///
523 /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
524 /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
525 ///
526 /// // U+0B1E ORIYA LETTER NYA
527 /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
528 /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
529 /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
530 ///
531 /// // U+0301 COMBINING ACUTE ACCENT
532 /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
533 /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
534 /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
535 ///
536 /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
537 /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
538 /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
539 ///
540 /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
541 /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
542 /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
543 ///
544 /// // U+2713 CHECK MARK
545 /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
546 /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
547 /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
548 ///
549 /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
550 /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
551 /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
552 ///
553 /// // U+E007F CANCEL TAG
554 /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
555 /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
556 /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
557 /// ```
558 pub const fn contains(self, val: GeneralCategory) -> bool {
559 0 != (1 << (val as u32)) & self.0
560 }
561
562 /// Produce a GeneralCategoryGroup that is the inverse of this one
563 ///
564 /// # Example
565 ///
566 /// ```rust
567 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
568 ///
569 /// let letter = GeneralCategoryGroup::Letter;
570 /// let not_letter = letter.complement();
571 ///
572 /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
573 /// assert!(!letter.contains(GeneralCategory::MathSymbol));
574 /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
575 /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
576 /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
577 /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
578 /// ```
579 pub const fn complement(self) -> Self {
580 // Mask off things not in Self::ALL to guarantee the mask
581 // values stay in-range
582 GeneralCategoryGroup(!self.0 & Self::ALL)
583 }
584
585 /// Return the group representing all GeneralCategory values
586 ///
587 /// # Example
588 ///
589 /// ```rust
590 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
591 ///
592 /// let all = GeneralCategoryGroup::all();
593 ///
594 /// assert!(all.contains(GeneralCategory::MathSymbol));
595 /// assert!(all.contains(GeneralCategory::OtherPunctuation));
596 /// assert!(all.contains(GeneralCategory::UppercaseLetter));
597 /// ```
598 pub const fn all() -> Self {
599 Self(Self::ALL)
600 }
601
602 /// Return the empty group
603 ///
604 /// # Example
605 ///
606 /// ```rust
607 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
608 ///
609 /// let empty = GeneralCategoryGroup::empty();
610 ///
611 /// assert!(!empty.contains(GeneralCategory::MathSymbol));
612 /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
613 /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
614 /// ```
615 pub const fn empty() -> Self {
616 Self(0)
617 }
618
619 /// Take the union of two groups
620 ///
621 /// # Example
622 ///
623 /// ```rust
624 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
625 ///
626 /// let letter = GeneralCategoryGroup::Letter;
627 /// let symbol = GeneralCategoryGroup::Symbol;
628 /// let union = letter.union(symbol);
629 ///
630 /// assert!(union.contains(GeneralCategory::MathSymbol));
631 /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
632 /// assert!(union.contains(GeneralCategory::UppercaseLetter));
633 /// ```
634 pub const fn union(self, other: Self) -> Self {
635 Self(self.0 | other.0)
636 }
637
638 /// Take the intersection of two groups
639 ///
640 /// # Example
641 ///
642 /// ```rust
643 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
644 ///
645 /// let letter = GeneralCategoryGroup::Letter;
646 /// let lu = GeneralCategoryGroup::UppercaseLetter;
647 /// let intersection = letter.intersection(lu);
648 ///
649 /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
650 /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
651 /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
652 /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
653 /// ```
654 pub const fn intersection(self, other: Self) -> Self {
655 Self(self.0 & other.0)
656 }
657}
658
659impl From<GeneralCategory> for GeneralCategoryGroup {
660 fn from(subcategory: GeneralCategory) -> Self {
661 GeneralCategoryGroup(1 << (subcategory as u32))
662 }
663}
664impl From<u32> for GeneralCategoryGroup {
665 fn from(mask: u32) -> Self {
666 // Mask off things not in Self::ALL to guarantee the mask
667 // values stay in-range
668 GeneralCategoryGroup(mask & Self::ALL)
669 }
670}
671impl From<GeneralCategoryGroup> for u32 {
672 fn from(group: GeneralCategoryGroup) -> Self {
673 group.0
674 }
675}
676
677/// Enumerated property Script.
678///
679/// This is used with both the Script and Script_Extensions Unicode properties.
680/// Each character is assigned a single Script, but characters that are used in
681/// a particular subset of scripts will be in more than one Script_Extensions set.
682/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
683/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
684/// determine whether a code point belongs to a certain script, you should use
685/// [`ScriptWithExtensionsBorrowed::has_script`].
686///
687/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
688/// See `UScriptCode` in ICU4C.
689///
690/// # Example
691///
692/// ```
693/// use icu::properties::{CodePointMapData, props::Script};
694///
695/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728
696/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN
697/// ```
698/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
699#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
700#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
701#[allow(clippy::exhaustive_structs)] // newtype
702#[repr(transparent)]
703pub struct Script(pub(crate) u16);
704
705impl Script {
706 /// Returns an ICU4C `UScriptCode` value.
707 pub const fn to_icu4c_value(self) -> u16 {
708 self.0
709 }
710 /// Constructor from an ICU4C `UScriptCode` value.
711 pub const fn from_icu4c_value(value: u16) -> Self {
712 Self(value)
713 }
714}
715
716create_const_array! {
717#[allow(missing_docs)] // These constants don't need individual documentation.
718#[allow(non_upper_case_globals)]
719impl Script {
720 pub const Adlam: Script = Script(167);
721 pub const Ahom: Script = Script(161);
722 pub const AnatolianHieroglyphs: Script = Script(156);
723 pub const Arabic: Script = Script(2);
724 pub const Armenian: Script = Script(3);
725 pub const Avestan: Script = Script(117);
726 pub const Balinese: Script = Script(62);
727 pub const Bamum: Script = Script(130);
728 pub const BassaVah: Script = Script(134);
729 pub const Batak: Script = Script(63);
730 pub const Bengali: Script = Script(4);
731 pub const Bhaiksuki: Script = Script(168);
732 pub const Bopomofo: Script = Script(5);
733 pub const Brahmi: Script = Script(65);
734 pub const Braille: Script = Script(46);
735 pub const Buginese: Script = Script(55);
736 pub const Buhid: Script = Script(44);
737 pub const CanadianAboriginal: Script = Script(40);
738 pub const Carian: Script = Script(104);
739 pub const CaucasianAlbanian: Script = Script(159);
740 pub const Chakma: Script = Script(118);
741 pub const Cham: Script = Script(66);
742 pub const Cherokee: Script = Script(6);
743 pub const Chorasmian: Script = Script(189);
744 pub const Common: Script = Script(0);
745 pub const Coptic: Script = Script(7);
746 pub const Cuneiform: Script = Script(101);
747 pub const Cypriot: Script = Script(47);
748 pub const CyproMinoan: Script = Script(193);
749 pub const Cyrillic: Script = Script(8);
750 pub const Deseret: Script = Script(9);
751 pub const Devanagari: Script = Script(10);
752 pub const DivesAkuru: Script = Script(190);
753 pub const Dogra: Script = Script(178);
754 pub const Duployan: Script = Script(135);
755 pub const EgyptianHieroglyphs: Script = Script(71);
756 pub const Elbasan: Script = Script(136);
757 pub const Elymaic: Script = Script(185);
758 pub const Ethiopian: Script = Script(11);
759 pub const Georgian: Script = Script(12);
760 pub const Glagolitic: Script = Script(56);
761 pub const Gothic: Script = Script(13);
762 pub const Grantha: Script = Script(137);
763 pub const Greek: Script = Script(14);
764 pub const Gujarati: Script = Script(15);
765 pub const GunjalaGondi: Script = Script(179);
766 pub const Gurmukhi: Script = Script(16);
767 pub const Han: Script = Script(17);
768 pub const Hangul: Script = Script(18);
769 pub const HanifiRohingya: Script = Script(182);
770 pub const Hanunoo: Script = Script(43);
771 pub const Hatran: Script = Script(162);
772 pub const Hebrew: Script = Script(19);
773 pub const Hiragana: Script = Script(20);
774 pub const ImperialAramaic: Script = Script(116);
775 pub const Inherited: Script = Script(1);
776 pub const InscriptionalPahlavi: Script = Script(122);
777 pub const InscriptionalParthian: Script = Script(125);
778 pub const Javanese: Script = Script(78);
779 pub const Kaithi: Script = Script(120);
780 pub const Kannada: Script = Script(21);
781 pub const Katakana: Script = Script(22);
782 pub const Kawi: Script = Script(198);
783 pub const KayahLi: Script = Script(79);
784 pub const Kharoshthi: Script = Script(57);
785 pub const KhitanSmallScript: Script = Script(191);
786 pub const Khmer: Script = Script(23);
787 pub const Khojki: Script = Script(157);
788 pub const Khudawadi: Script = Script(145);
789 pub const Lao: Script = Script(24);
790 pub const Latin: Script = Script(25);
791 pub const Lepcha: Script = Script(82);
792 pub const Limbu: Script = Script(48);
793 pub const LinearA: Script = Script(83);
794 pub const LinearB: Script = Script(49);
795 pub const Lisu: Script = Script(131);
796 pub const Lycian: Script = Script(107);
797 pub const Lydian: Script = Script(108);
798 pub const Mahajani: Script = Script(160);
799 pub const Makasar: Script = Script(180);
800 pub const Malayalam: Script = Script(26);
801 pub const Mandaic: Script = Script(84);
802 pub const Manichaean: Script = Script(121);
803 pub const Marchen: Script = Script(169);
804 pub const MasaramGondi: Script = Script(175);
805 pub const Medefaidrin: Script = Script(181);
806 pub const MeeteiMayek: Script = Script(115);
807 pub const MendeKikakui: Script = Script(140);
808 pub const MeroiticCursive: Script = Script(141);
809 pub const MeroiticHieroglyphs: Script = Script(86);
810 pub const Miao: Script = Script(92);
811 pub const Modi: Script = Script(163);
812 pub const Mongolian: Script = Script(27);
813 pub const Mro: Script = Script(149);
814 pub const Multani: Script = Script(164);
815 pub const Myanmar: Script = Script(28);
816 pub const Nabataean: Script = Script(143);
817 pub const NagMundari: Script = Script(199);
818 pub const Nandinagari: Script = Script(187);
819 pub const Nastaliq: Script = Script(200);
820 pub const NewTaiLue: Script = Script(59);
821 pub const Newa: Script = Script(170);
822 pub const Nko: Script = Script(87);
823 pub const Nushu: Script = Script(150);
824 pub const NyiakengPuachueHmong: Script = Script(186);
825 pub const Ogham: Script = Script(29);
826 pub const OlChiki: Script = Script(109);
827 pub const OldHungarian: Script = Script(76);
828 pub const OldItalic: Script = Script(30);
829 pub const OldNorthArabian: Script = Script(142);
830 pub const OldPermic: Script = Script(89);
831 pub const OldPersian: Script = Script(61);
832 pub const OldSogdian: Script = Script(184);
833 pub const OldSouthArabian: Script = Script(133);
834 pub const OldTurkic: Script = Script(88);
835 pub const OldUyghur: Script = Script(194);
836 pub const Oriya: Script = Script(31);
837 pub const Osage: Script = Script(171);
838 pub const Osmanya: Script = Script(50);
839 pub const PahawhHmong: Script = Script(75);
840 pub const Palmyrene: Script = Script(144);
841 pub const PauCinHau: Script = Script(165);
842 pub const PhagsPa: Script = Script(90);
843 pub const Phoenician: Script = Script(91);
844 pub const PsalterPahlavi: Script = Script(123);
845 pub const Rejang: Script = Script(110);
846 pub const Runic: Script = Script(32);
847 pub const Samaritan: Script = Script(126);
848 pub const Saurashtra: Script = Script(111);
849 pub const Sharada: Script = Script(151);
850 pub const Shavian: Script = Script(51);
851 pub const Siddham: Script = Script(166);
852 pub const SignWriting: Script = Script(112);
853 pub const Sinhala: Script = Script(33);
854 pub const Sogdian: Script = Script(183);
855 pub const SoraSompeng: Script = Script(152);
856 pub const Soyombo: Script = Script(176);
857 pub const Sundanese: Script = Script(113);
858 pub const SylotiNagri: Script = Script(58);
859 pub const Syriac: Script = Script(34);
860 pub const Tagalog: Script = Script(42);
861 pub const Tagbanwa: Script = Script(45);
862 pub const TaiLe: Script = Script(52);
863 pub const TaiTham: Script = Script(106);
864 pub const TaiViet: Script = Script(127);
865 pub const Takri: Script = Script(153);
866 pub const Tamil: Script = Script(35);
867 pub const Tangsa: Script = Script(195);
868 pub const Tangut: Script = Script(154);
869 pub const Telugu: Script = Script(36);
870 pub const Thaana: Script = Script(37);
871 pub const Thai: Script = Script(38);
872 pub const Tibetan: Script = Script(39);
873 pub const Tifinagh: Script = Script(60);
874 pub const Tirhuta: Script = Script(158);
875 pub const Toto: Script = Script(196);
876 pub const Ugaritic: Script = Script(53);
877 pub const Unknown: Script = Script(103);
878 pub const Vai: Script = Script(99);
879 pub const Vithkuqi: Script = Script(197);
880 pub const Wancho: Script = Script(188);
881 pub const WarangCiti: Script = Script(146);
882 pub const Yezidi: Script = Script(192);
883 pub const Yi: Script = Script(41);
884 pub const ZanabazarSquare: Script = Script(177);
885}
886}
887
888make_enumerated_property! {
889 name: "Script";
890 short_name: "sc";
891 ident: Script;
892 data_marker: crate::provider::PropertyEnumScriptV1;
893 singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
894 ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
895}
896
897/// Enumerated property Hangul_Syllable_Type
898///
899/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
900/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
901///
902/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
903///
904/// # Example
905///
906/// ```
907/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
908///
909/// assert_eq!(
910/// CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
911/// HangulSyllableType::LeadingJamo
912/// ); // U+1100
913/// assert_eq!(
914/// CodePointMapData::<HangulSyllableType>::new().get('가'),
915/// HangulSyllableType::LeadingVowelSyllable
916/// ); // U+AC00
917/// ```
918#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
919#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
920#[allow(clippy::exhaustive_structs)] // newtype
921#[repr(transparent)]
922pub struct HangulSyllableType(pub(crate) u8);
923
924impl HangulSyllableType {
925 /// Returns an ICU4C `UHangulSyllableType` value.
926 pub const fn to_icu4c_value(self) -> u8 {
927 self.0
928 }
929 /// Constructor from an ICU4C `UHangulSyllableType` value.
930 pub const fn from_icu4c_value(value: u8) -> Self {
931 Self(value)
932 }
933}
934
935create_const_array! {
936#[allow(non_upper_case_globals)]
937impl HangulSyllableType {
938 /// (`NA`) not applicable (e.g. not a Hangul code point).
939 pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
940 /// (`L`) a conjoining leading consonant Jamo.
941 pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
942 /// (`V`) a conjoining vowel Jamo.
943 pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
944 /// (`T`) a conjoining trailing consonant Jamo.
945 pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
946 /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
947 pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
948 /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
949 pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
950}
951}
952
953make_enumerated_property! {
954 name: "Hangul_Syllable_Type";
955 short_name: "hst";
956 ident: HangulSyllableType;
957 data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
958 singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
959 ule_ty: u8;
960
961}
962
963/// Enumerated property East_Asian_Width.
964///
965/// See "Definition" in UAX #11 for the summary of each property value:
966/// <https://www.unicode.org/reports/tr11/#Definitions>
967///
968/// # Example
969///
970/// ```
971/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
972///
973/// assert_eq!(
974/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
975/// EastAsianWidth::Halfwidth
976/// ); // U+FF71: Halfwidth Katakana Letter A
977/// assert_eq!(
978/// CodePointMapData::<EastAsianWidth>::new().get('ア'),
979/// EastAsianWidth::Wide
980/// ); //U+30A2: Katakana Letter A
981/// ```
982#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
983#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
984#[allow(clippy::exhaustive_structs)] // newtype
985#[repr(transparent)]
986pub struct EastAsianWidth(pub(crate) u8);
987
988impl EastAsianWidth {
989 /// Returns an ICU4C `UEastAsianWidth` value.
990 pub const fn to_icu4c_value(self) -> u8 {
991 self.0
992 }
993 /// Constructor from an ICU4C `UEastAsianWidth` value.
994 pub const fn from_icu4c_value(value: u8) -> Self {
995 Self(value)
996 }
997}
998
999create_const_array! {
1000#[allow(missing_docs)] // These constants don't need individual documentation.
1001#[allow(non_upper_case_globals)]
1002impl EastAsianWidth {
1003 pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1004 pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1005 pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1006 pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1007 pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1008 pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1009}
1010}
1011
1012make_enumerated_property! {
1013 name: "East_Asian_Width";
1014 short_name: "ea";
1015 ident: EastAsianWidth;
1016 data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1017 singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1018 ule_ty: u8;
1019}
1020
1021/// Enumerated property Line_Break.
1022///
1023/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1024/// value: <https://www.unicode.org/reports/tr14/#Properties>
1025///
1026/// The numeric value is compatible with `ULineBreak` in ICU4C.
1027///
1028/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1029///
1030/// # Example
1031///
1032/// ```
1033/// use icu::properties::{props::LineBreak, CodePointMapData};
1034///
1035/// assert_eq!(
1036/// CodePointMapData::<LineBreak>::new().get(')'),
1037/// LineBreak::CloseParenthesis
1038/// ); // U+0029: Right Parenthesis
1039/// assert_eq!(
1040/// CodePointMapData::<LineBreak>::new().get('ぁ'),
1041/// LineBreak::ConditionalJapaneseStarter
1042/// ); //U+3041: Hiragana Letter Small A
1043/// ```
1044#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1045#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1046#[allow(clippy::exhaustive_structs)] // newtype
1047#[repr(transparent)]
1048pub struct LineBreak(pub(crate) u8);
1049
1050impl LineBreak {
1051 /// Returns an ICU4C `ULineBreak` value.
1052 pub const fn to_icu4c_value(self) -> u8 {
1053 self.0
1054 }
1055 /// Constructor from an ICU4C `ULineBreak` value.
1056 pub const fn from_icu4c_value(value: u8) -> Self {
1057 Self(value)
1058 }
1059}
1060
1061create_const_array! {
1062#[allow(missing_docs)] // These constants don't need individual documentation.
1063#[allow(non_upper_case_globals)]
1064impl LineBreak {
1065 pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1066 pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1067 pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1068 pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1069 pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1070 pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1071 pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1072 pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1073 pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1074 pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1075 pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1076 pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1077 pub const Glue: LineBreak = LineBreak(12); // name="GL"
1078 pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1079 pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1080 pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1081 pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1082 pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1083 pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1084 pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1085 pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1086 pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1087 pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1088 pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1089 pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1090 pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1091 pub const Space: LineBreak = LineBreak(26); // name="SP"
1092 pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1093 pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1094 pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1095 pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1096 pub const H2: LineBreak = LineBreak(31); // name="H2"
1097 pub const H3: LineBreak = LineBreak(32); // name="H3"
1098 pub const JL: LineBreak = LineBreak(33); // name="JL"
1099 pub const JT: LineBreak = LineBreak(34); // name="JT"
1100 pub const JV: LineBreak = LineBreak(35); // name="JV"
1101 pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1102 pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1103 pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1104 pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1105 pub const EBase: LineBreak = LineBreak(40); // name="EB"
1106 pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1107 pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1108
1109 // Added in ICU 74:
1110 pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1111 pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP"
1112 pub const AksaraStart: LineBreak = LineBreak(45); // name=AS"
1113 pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF"
1114 pub const Virama: LineBreak = LineBreak(47); // name=VI"
1115}
1116}
1117
1118make_enumerated_property! {
1119 name: "Line_Break";
1120 short_name: "lb";
1121 ident: LineBreak;
1122 data_marker: crate::provider::PropertyEnumLineBreakV1;
1123 singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1124 ule_ty: u8;
1125}
1126
1127/// Enumerated property Grapheme_Cluster_Break.
1128///
1129/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1130/// summary of each property value:
1131/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1132///
1133/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1134///
1135/// # Example
1136///
1137/// ```
1138/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1139///
1140/// assert_eq!(
1141/// CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1142/// GraphemeClusterBreak::RegionalIndicator
1143/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1144/// assert_eq!(
1145/// CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1146/// GraphemeClusterBreak::SpacingMark
1147/// ); //U+0E33: Thai Character Sara Am
1148/// ```
1149#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1150#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1151#[allow(clippy::exhaustive_structs)] // this type is stable
1152#[repr(transparent)]
1153pub struct GraphemeClusterBreak(pub(crate) u8);
1154
1155impl GraphemeClusterBreak {
1156 /// Returns an ICU4C `UGraphemeClusterBreak` value.
1157 pub const fn to_icu4c_value(self) -> u8 {
1158 self.0
1159 }
1160 /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1161 pub const fn from_icu4c_value(value: u8) -> Self {
1162 Self(value)
1163 }
1164}
1165
1166create_const_array! {
1167#[allow(missing_docs)] // These constants don't need individual documentation.
1168#[allow(non_upper_case_globals)]
1169impl GraphemeClusterBreak {
1170 pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1171 pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1172 pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1173 pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1174 pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1175 pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1176 pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1177 pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1178 pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1179 pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1180 pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1181 pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1182 pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1183 /// This value is obsolete and unused.
1184 pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1185 /// This value is obsolete and unused.
1186 pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1187 /// This value is obsolete and unused.
1188 pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1189 /// This value is obsolete and unused.
1190 pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1191 pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1192}
1193}
1194
1195make_enumerated_property! {
1196 name: "Grapheme_Cluster_Break";
1197 short_name: "GCB";
1198 ident: GraphemeClusterBreak;
1199 data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1200 singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1201 ule_ty: u8;
1202}
1203
1204/// Enumerated property Word_Break.
1205///
1206/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1207/// each property value:
1208/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1209///
1210/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1211///
1212/// # Example
1213///
1214/// ```
1215/// use icu::properties::{props::WordBreak, CodePointMapData};
1216///
1217/// assert_eq!(
1218/// CodePointMapData::<WordBreak>::new().get('.'),
1219/// WordBreak::MidNumLet
1220/// ); // U+002E: Full Stop
1221/// assert_eq!(
1222/// CodePointMapData::<WordBreak>::new().get(','),
1223/// WordBreak::MidNum
1224/// ); // U+FF0C: Fullwidth Comma
1225/// ```
1226#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1227#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1228#[allow(clippy::exhaustive_structs)] // newtype
1229#[repr(transparent)]
1230pub struct WordBreak(pub(crate) u8);
1231
1232impl WordBreak {
1233 /// Returns an ICU4C `UWordBreak` value.
1234 pub const fn to_icu4c_value(self) -> u8 {
1235 self.0
1236 }
1237 /// Constructor from an ICU4C `UWordBreak` value.
1238 pub const fn from_icu4c_value(value: u8) -> Self {
1239 Self(value)
1240 }
1241}
1242
1243create_const_array! {
1244#[allow(missing_docs)] // These constants don't need individual documentation.
1245#[allow(non_upper_case_globals)]
1246impl WordBreak {
1247 pub const Other: WordBreak = WordBreak(0); // name="XX"
1248 pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1249 pub const Format: WordBreak = WordBreak(2); // name="FO"
1250 pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1251 pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1252 pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1253 pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1254 pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1255 pub const CR: WordBreak = WordBreak(8); // name="CR"
1256 pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1257 pub const LF: WordBreak = WordBreak(10); // name="LF"
1258 pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1259 pub const Newline: WordBreak = WordBreak(12); // name="NL"
1260 pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1261 pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1262 pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1263 pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1264 /// This value is obsolete and unused.
1265 pub const EBase: WordBreak = WordBreak(17); // name="EB"
1266 /// This value is obsolete and unused.
1267 pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1268 /// This value is obsolete and unused.
1269 pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1270 /// This value is obsolete and unused.
1271 pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1272 pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1273 pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1274}
1275}
1276
1277make_enumerated_property! {
1278 name: "Word_Break";
1279 short_name: "WB";
1280 ident: WordBreak;
1281 data_marker: crate::provider::PropertyEnumWordBreakV1;
1282 singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1283 ule_ty: u8;
1284}
1285
1286/// Enumerated property Sentence_Break.
1287///
1288/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1289/// each property value:
1290/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1291///
1292/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1293///
1294/// # Example
1295///
1296/// ```
1297/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1298///
1299/// assert_eq!(
1300/// CodePointMapData::<SentenceBreak>::new().get('9'),
1301/// SentenceBreak::Numeric
1302/// ); // U+FF19: Fullwidth Digit Nine
1303/// assert_eq!(
1304/// CodePointMapData::<SentenceBreak>::new().get(','),
1305/// SentenceBreak::SContinue
1306/// ); // U+002C: Comma
1307/// ```
1308#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1309#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1310#[allow(clippy::exhaustive_structs)] // newtype
1311#[repr(transparent)]
1312pub struct SentenceBreak(pub(crate) u8);
1313
1314impl SentenceBreak {
1315 /// Returns an ICU4C `USentenceBreak` value.
1316 pub const fn to_icu4c_value(self) -> u8 {
1317 self.0
1318 }
1319 /// Constructor from an ICU4C `USentenceBreak` value.
1320 pub const fn from_icu4c_value(value: u8) -> Self {
1321 Self(value)
1322 }
1323}
1324
1325create_const_array! {
1326#[allow(missing_docs)] // These constants don't need individual documentation.
1327#[allow(non_upper_case_globals)]
1328impl SentenceBreak {
1329 pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1330 pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1331 pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1332 pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1333 pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1334 pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1335 pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1336 pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1337 pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1338 pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1339 pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1340 pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1341 pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1342 pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1343 pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1344}
1345}
1346
1347make_enumerated_property! {
1348 name: "Sentence_Break";
1349 short_name: "SB";
1350 ident: SentenceBreak;
1351 data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1352 singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1353 ule_ty: u8;
1354}
1355
1356/// Property Canonical_Combining_Class.
1357/// See UAX #15:
1358/// <https://www.unicode.org/reports/tr15/>.
1359///
1360/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1361/// to look up the Canonical_Combining_Class property by scalar value.
1362///
1363/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1364/// to look up the Canonical_Combining_Class property by scalar value.
1365///
1366/// # Example
1367///
1368/// ```
1369/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1370///
1371/// assert_eq!(
1372/// CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1373/// CanonicalCombiningClass::NotReordered
1374/// ); // U+0061: LATIN SMALL LETTER A
1375/// assert_eq!(
1376/// CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1377/// CanonicalCombiningClass::Above
1378/// ); // U+0301: COMBINING ACUTE ACCENT
1379/// ```
1380//
1381// NOTE: The Pernosco debugger has special knowledge
1382// of this struct. Please do not change the bit layout
1383// or the crate-module-qualified name of this struct
1384// without coordination.
1385#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1386#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1387#[allow(clippy::exhaustive_structs)] // newtype
1388#[repr(transparent)]
1389pub struct CanonicalCombiningClass(pub(crate) u8);
1390
1391impl CanonicalCombiningClass {
1392 /// Returns an ICU4C `UCanonicalCombiningClass` value.
1393 pub const fn to_icu4c_value(self) -> u8 {
1394 self.0
1395 }
1396 /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1397 pub const fn from_icu4c_value(value: u8) -> Self {
1398 Self(value)
1399 }
1400}
1401
1402create_const_array! {
1403// These constant names come from PropertyValueAliases.txt
1404#[allow(missing_docs)] // These constants don't need individual documentation.
1405#[allow(non_upper_case_globals)]
1406impl CanonicalCombiningClass {
1407 pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1408 pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1409 pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1410 pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1411 pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1412 pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1413 pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1414 pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1415 pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1416 pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1417 pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1418 pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1419 pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1420 pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1421 pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1422 pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1423 pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1424 pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1425 pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1426 pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1427 pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1428 pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1429 pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1430 pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1431 pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1432 pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1433 pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1434 pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1435 pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1436 pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1437 pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1438 pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1439 pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1440 pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1441 pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1442 pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1443 pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1444 pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1445 pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1446 pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1447 pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1448 pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1449 pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1450 pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1451 pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1452 pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1453 pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1454 pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1455 pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1456 pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1457 pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1458 pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1459 pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1460 pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1461 pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1462 pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1463 pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1464 pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1465}
1466}
1467
1468make_enumerated_property! {
1469 name: "Canonical_Combining_Class";
1470 short_name: "ccc";
1471 ident: CanonicalCombiningClass;
1472 data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1473 singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1474 ule_ty: u8;
1475}
1476
1477/// Property Indic_Conjunct_Break.
1478/// See UAX #44:
1479/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1480///
1481/// # Example
1482///
1483/// ```
1484/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1485///
1486/// assert_eq!(
1487/// CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1488/// IndicConjunctBreak::None
1489/// );
1490/// assert_eq!(
1491/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1492/// IndicConjunctBreak::Linker
1493/// );
1494/// assert_eq!(
1495/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1496/// IndicConjunctBreak::Consonant
1497/// );
1498/// assert_eq!(
1499/// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1500/// IndicConjunctBreak::Extend
1501/// );
1502/// ```
1503#[doc(hidden)] // draft API in ICU4C
1504#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1505#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1506#[allow(clippy::exhaustive_structs)] // newtype
1507#[repr(transparent)]
1508pub struct IndicConjunctBreak(pub(crate) u8);
1509
1510impl IndicConjunctBreak {
1511 /// Returns an ICU4C `UIndicConjunctBreak` value.
1512 pub const fn to_icu4c_value(self) -> u8 {
1513 self.0
1514 }
1515 /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1516 pub const fn from_icu4c_value(value: u8) -> Self {
1517 Self(value)
1518 }
1519}
1520
1521create_const_array! {
1522#[doc(hidden)] // draft API in ICU4C
1523#[allow(non_upper_case_globals)]
1524impl IndicConjunctBreak {
1525 pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1526 pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1527 pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1528 pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1529}
1530}
1531
1532make_enumerated_property! {
1533 name: "Indic_Conjunct_Break";
1534 short_name: "InCB";
1535 ident: IndicConjunctBreak;
1536 data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1537 singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1538 ule_ty: u8;
1539}
1540
1541/// Property Indic_Syllabic_Category.
1542/// See UAX #44:
1543/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1544///
1545/// # Example
1546///
1547/// ```
1548/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1549///
1550/// assert_eq!(
1551/// CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1552/// IndicSyllabicCategory::Other
1553/// );
1554/// assert_eq!(
1555/// CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1556/// IndicSyllabicCategory::Bindu
1557/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1558/// ```
1559#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1560#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1561#[allow(clippy::exhaustive_structs)] // newtype
1562#[repr(transparent)]
1563pub struct IndicSyllabicCategory(pub(crate) u8);
1564
1565impl IndicSyllabicCategory {
1566 /// Returns an ICU4C `UIndicSyllabicCategory` value.
1567 pub const fn to_icu4c_value(self) -> u8 {
1568 self.0
1569 }
1570 /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1571 pub const fn from_icu4c_value(value: u8) -> Self {
1572 Self(value)
1573 }
1574}
1575
1576create_const_array! {
1577#[allow(missing_docs)] // These constants don't need individual documentation.
1578#[allow(non_upper_case_globals)]
1579impl IndicSyllabicCategory {
1580 pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1581 pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1582 pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1583 pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1584 pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1585 pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1586 pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1587 pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1588 pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1589 pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1590 pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1591 pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1592 pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1593 pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1594 pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1595 pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1596 pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1597 pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1598 pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1599 pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1600 pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1601 pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1602 pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1603 pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1604 pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1605 pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1606 pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1607 pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1608 pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1609 pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1610 pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1611 pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1612 pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1613 pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1614 pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1615 pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1616 pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1617}
1618}
1619
1620make_enumerated_property! {
1621 name: "Indic_Syllabic_Category";
1622 short_name: "InSC";
1623 ident: IndicSyllabicCategory;
1624 data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1625 singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1626 ule_ty: u8;
1627}
1628
1629/// Enumerated property Joining_Type.
1630///
1631/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1632/// each property value.
1633///
1634/// # Example
1635///
1636/// ```
1637/// use icu::properties::{props::JoiningType, CodePointMapData};
1638///
1639/// assert_eq!(
1640/// CodePointMapData::<JoiningType>::new().get('ؠ'),
1641/// JoiningType::DualJoining
1642/// ); // U+0620: Arabic Letter Kashmiri Yeh
1643/// assert_eq!(
1644/// CodePointMapData::<JoiningType>::new().get('𐫍'),
1645/// JoiningType::LeftJoining
1646/// ); // U+10ACD: Manichaean Letter Heth
1647/// ```
1648#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1649#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1650#[allow(clippy::exhaustive_structs)] // newtype
1651#[repr(transparent)]
1652pub struct JoiningType(pub(crate) u8);
1653
1654impl JoiningType {
1655 /// Returns an ICU4C `UJoiningType` value.
1656 pub const fn to_icu4c_value(self) -> u8 {
1657 self.0
1658 }
1659 /// Constructor from an ICU4C `UJoiningType` value.
1660 pub const fn from_icu4c_value(value: u8) -> Self {
1661 Self(value)
1662 }
1663}
1664
1665create_const_array! {
1666#[allow(missing_docs)] // These constants don't need individual documentation.
1667#[allow(non_upper_case_globals)]
1668impl JoiningType {
1669 pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1670 pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1671 pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1672 pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1673 pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1674 pub const Transparent: JoiningType = JoiningType(5); // name="T"
1675}
1676}
1677
1678make_enumerated_property! {
1679 name: "Joining_Type";
1680 short_name: "jt";
1681 ident: JoiningType;
1682 data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1683 singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1684 ule_ty: u8;
1685}
1686
1687/// Property Vertical_Orientation
1688///
1689/// See UTR #50:
1690/// <https://www.unicode.org/reports/tr50/#vo>
1691///
1692/// # Example
1693///
1694/// ```
1695/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1696///
1697/// assert_eq!(
1698/// CodePointMapData::<VerticalOrientation>::new().get('a'),
1699/// VerticalOrientation::Rotated
1700/// );
1701/// assert_eq!(
1702/// CodePointMapData::<VerticalOrientation>::new().get('§'),
1703/// VerticalOrientation::Upright
1704/// );
1705/// assert_eq!(
1706/// CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1707/// VerticalOrientation::TransformedRotated
1708/// );
1709/// assert_eq!(
1710/// CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1711/// VerticalOrientation::TransformedUpright
1712/// );
1713/// ```
1714#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1715#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1716#[allow(clippy::exhaustive_structs)] // newtype
1717#[repr(transparent)]
1718pub struct VerticalOrientation(pub(crate) u8);
1719
1720impl VerticalOrientation {
1721 /// Returns an ICU4C `UVerticalOrientation` value.
1722 pub const fn to_icu4c_value(self) -> u8 {
1723 self.0
1724 }
1725 /// Constructor from an ICU4C `UVerticalOrientation` value.
1726 pub const fn from_icu4c_value(value: u8) -> Self {
1727 Self(value)
1728 }
1729}
1730
1731create_const_array! {
1732#[allow(missing_docs)] // These constants don't need individual documentation.
1733#[allow(non_upper_case_globals)]
1734impl VerticalOrientation {
1735 pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1736 pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1737 pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1738 pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1739}
1740}
1741
1742make_enumerated_property! {
1743 name: "Vertical_Orientation";
1744 short_name: "vo";
1745 ident: VerticalOrientation;
1746 data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1747 singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1748 ule_ty: u8;
1749}
1750
1751pub use crate::code_point_set::BinaryProperty;
1752
1753macro_rules! make_binary_property {
1754 (
1755 name: $name:literal;
1756 short_name: $short_name:literal;
1757 ident: $ident:ident;
1758 data_marker: $data_marker:ty;
1759 singleton: $singleton:ident;
1760 $(#[$doc:meta])+
1761 ) => {
1762 $(#[$doc])+
1763 #[derive(Debug)]
1764 #[non_exhaustive]
1765 pub struct $ident;
1766
1767 impl crate::private::Sealed for $ident {}
1768
1769 impl BinaryProperty for $ident {
1770 type DataMarker = $data_marker;
1771 #[cfg(feature = "compiled_data")]
1772 const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1773 &crate::provider::Baked::$singleton;
1774 const NAME: &'static [u8] = $name.as_bytes();
1775 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1776 }
1777 };
1778}
1779
1780make_binary_property! {
1781 name: "ASCII_Hex_Digit";
1782 short_name: "AHex";
1783 ident: AsciiHexDigit;
1784 data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1785 singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1786 /// ASCII characters commonly used for the representation of hexadecimal numbers.
1787 ///
1788 /// # Example
1789 ///
1790 /// ```
1791 /// use icu::properties::CodePointSetData;
1792 /// use icu::properties::props::AsciiHexDigit;
1793 ///
1794 /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1795 ///
1796 /// assert!(ascii_hex_digit.contains('3'));
1797 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1798 /// assert!(ascii_hex_digit.contains('A'));
1799 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1800 /// ```
1801}
1802
1803make_binary_property! {
1804 name: "Alnum";
1805 short_name: "Alnum";
1806 ident: Alnum;
1807 data_marker: crate::provider::PropertyBinaryAlnumV1;
1808 singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1809 /// Characters with the `Alphabetic` or `Decimal_Number` property.
1810 ///
1811 /// This is defined for POSIX compatibility.
1812}
1813
1814make_binary_property! {
1815 name: "Alphabetic";
1816 short_name: "Alpha";
1817 ident: Alphabetic;
1818 data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1819 singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1820 /// Alphabetic characters.
1821 ///
1822 /// # Example
1823 ///
1824 /// ```
1825 /// use icu::properties::CodePointSetData;
1826 /// use icu::properties::props::Alphabetic;
1827 ///
1828 /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1829 ///
1830 /// assert!(!alphabetic.contains('3'));
1831 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1832 /// assert!(alphabetic.contains('A'));
1833 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1834 /// ```
1835
1836}
1837
1838make_binary_property! {
1839 name: "Bidi_Control";
1840 short_name: "Bidi_C";
1841 ident: BidiControl;
1842 data_marker: crate::provider::PropertyBinaryBidiControlV1;
1843 singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1844 /// Format control characters which have specific functions in the Unicode Bidirectional
1845 /// Algorithm.
1846 ///
1847 /// # Example
1848 ///
1849 /// ```
1850 /// use icu::properties::CodePointSetData;
1851 /// use icu::properties::props::BidiControl;
1852 ///
1853 /// let bidi_control = CodePointSetData::new::<BidiControl>();
1854 ///
1855 /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK
1856 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
1857 /// ```
1858
1859}
1860
1861make_binary_property! {
1862 name: "Bidi_Mirrored";
1863 short_name: "Bidi_M";
1864 ident: BidiMirrored;
1865 data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1866 singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1867 /// Characters that are mirrored in bidirectional text.
1868 ///
1869 /// # Example
1870 ///
1871 /// ```
1872 /// use icu::properties::CodePointSetData;
1873 /// use icu::properties::props::BidiMirrored;
1874 ///
1875 /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1876 ///
1877 /// assert!(bidi_mirrored.contains('['));
1878 /// assert!(bidi_mirrored.contains(']'));
1879 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
1880 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
1881 /// ```
1882
1883}
1884
1885make_binary_property! {
1886 name: "Blank";
1887 short_name: "Blank";
1888 ident: Blank;
1889 data_marker: crate::provider::PropertyBinaryBlankV1;
1890 singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1891 /// Horizontal whitespace characters
1892
1893}
1894
1895make_binary_property! {
1896 name: "Cased";
1897 short_name: "Cased";
1898 ident: Cased;
1899 data_marker: crate::provider::PropertyBinaryCasedV1;
1900 singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1901 /// Uppercase, lowercase, and titlecase characters.
1902 ///
1903 /// # Example
1904 ///
1905 /// ```
1906 /// use icu::properties::CodePointSetData;
1907 /// use icu::properties::props::Cased;
1908 ///
1909 /// let cased = CodePointSetData::new::<Cased>();
1910 ///
1911 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1912 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
1913 /// ```
1914
1915}
1916
1917make_binary_property! {
1918 name: "Case_Ignorable";
1919 short_name: "CI";
1920 ident: CaseIgnorable;
1921 data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1922 singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1923 /// Characters which are ignored for casing purposes.
1924 ///
1925 /// # Example
1926 ///
1927 /// ```
1928 /// use icu::properties::CodePointSetData;
1929 /// use icu::properties::props::CaseIgnorable;
1930 ///
1931 /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1932 ///
1933 /// assert!(case_ignorable.contains(':'));
1934 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA
1935 /// ```
1936
1937}
1938
1939make_binary_property! {
1940 name: "Full_Composition_Exclusion";
1941 short_name: "Comp_Ex";
1942 ident: FullCompositionExclusion;
1943 data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1944 singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1945 /// Characters that are excluded from composition.
1946 ///
1947 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1948
1949}
1950
1951make_binary_property! {
1952 name: "Changes_When_Casefolded";
1953 short_name: "CWCF";
1954 ident: ChangesWhenCasefolded;
1955 data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1956 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1957 /// Characters whose normalized forms are not stable under case folding.
1958 ///
1959 /// # Example
1960 ///
1961 /// ```
1962 /// use icu::properties::CodePointSetData;
1963 /// use icu::properties::props::ChangesWhenCasefolded;
1964 ///
1965 /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1966 ///
1967 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
1968 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
1969 /// ```
1970
1971}
1972
1973make_binary_property! {
1974 name: "Changes_When_Casemapped";
1975 short_name: "CWCM";
1976 ident: ChangesWhenCasemapped;
1977 data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1978 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1979 /// Characters which may change when they undergo case mapping.
1980
1981}
1982
1983make_binary_property! {
1984 name: "Changes_When_NFKC_Casefolded";
1985 short_name: "CWKCF";
1986 ident: ChangesWhenNfkcCasefolded;
1987 data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1988 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1989 /// Characters which are not identical to their `NFKC_Casefold` mapping.
1990 ///
1991 /// # Example
1992 ///
1993 /// ```
1994 /// use icu::properties::CodePointSetData;
1995 /// use icu::properties::props::ChangesWhenNfkcCasefolded;
1996 ///
1997 /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
1998 ///
1999 /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
2000 /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2001 /// ```
2002
2003}
2004
2005make_binary_property! {
2006 name: "Changes_When_Lowercased";
2007 short_name: "CWL";
2008 ident: ChangesWhenLowercased;
2009 data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2010 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2011 /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2012 ///
2013 /// # Example
2014 ///
2015 /// ```
2016 /// use icu::properties::CodePointSetData;
2017 /// use icu::properties::props::ChangesWhenLowercased;
2018 ///
2019 /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2020 ///
2021 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2022 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
2023 /// ```
2024
2025}
2026
2027make_binary_property! {
2028 name: "Changes_When_Titlecased";
2029 short_name: "CWT";
2030 ident: ChangesWhenTitlecased;
2031 data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2032 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2033 /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2034 ///
2035 /// # Example
2036 ///
2037 /// ```
2038 /// use icu::properties::CodePointSetData;
2039 /// use icu::properties::props::ChangesWhenTitlecased;
2040 ///
2041 /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2042 ///
2043 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
2044 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
2045 /// ```
2046
2047}
2048
2049make_binary_property! {
2050 name: "Changes_When_Uppercased";
2051 short_name: "CWU";
2052 ident: ChangesWhenUppercased;
2053 data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2054 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2055 /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2056 ///
2057 /// # Example
2058 ///
2059 /// ```
2060 /// use icu::properties::CodePointSetData;
2061 /// use icu::properties::props::ChangesWhenUppercased;
2062 ///
2063 /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2064 ///
2065 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
2066 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
2067 /// ```
2068
2069}
2070
2071make_binary_property! {
2072 name: "Dash";
2073 short_name: "Dash";
2074 ident: Dash;
2075 data_marker: crate::provider::PropertyBinaryDashV1;
2076 singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2077 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2078 /// their compatibility equivalents.
2079 ///
2080 /// # Example
2081 ///
2082 /// ```
2083 /// use icu::properties::CodePointSetData;
2084 /// use icu::properties::props::Dash;
2085 ///
2086 /// let dash = CodePointSetData::new::<Dash>();
2087 ///
2088 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
2089 /// assert!(dash.contains('-')); // U+002D
2090 /// assert!(!dash.contains('=')); // U+003D
2091 /// ```
2092
2093}
2094
2095make_binary_property! {
2096 name: "Deprecated";
2097 short_name: "Dep";
2098 ident: Deprecated;
2099 data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2100 singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2101 /// Deprecated characters.
2102 ///
2103 /// No characters will ever be removed from the standard, but the
2104 /// usage of deprecated characters is strongly discouraged.
2105 ///
2106 /// # Example
2107 ///
2108 /// ```
2109 /// use icu::properties::CodePointSetData;
2110 /// use icu::properties::props::Deprecated;
2111 ///
2112 /// let deprecated = CodePointSetData::new::<Deprecated>();
2113 ///
2114 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2115 /// assert!(!deprecated.contains('A'));
2116 /// ```
2117
2118}
2119
2120make_binary_property! {
2121 name: "Default_Ignorable_Code_Point";
2122 short_name: "DI";
2123 ident: DefaultIgnorableCodePoint;
2124 data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2125 singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2126 /// For programmatic determination of default ignorable code points.
2127 ///
2128 /// New characters that
2129 /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2130 /// ranges, permitting programs to correctly handle the default rendering of such
2131 /// characters when not otherwise supported.
2132 ///
2133 /// # Example
2134 ///
2135 /// ```
2136 /// use icu::properties::CodePointSetData;
2137 /// use icu::properties::props::DefaultIgnorableCodePoint;
2138 ///
2139 /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2140 ///
2141 /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE
2142 /// assert!(!default_ignorable_code_point.contains('E'));
2143 /// ```
2144
2145}
2146
2147make_binary_property! {
2148 name: "Diacritic";
2149 short_name: "Dia";
2150 ident: Diacritic;
2151 data_marker: crate::provider::PropertyBinaryDiacriticV1;
2152 singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2153 /// Characters that linguistically modify the meaning of another character to which they apply.
2154 ///
2155 /// # Example
2156 ///
2157 /// ```
2158 /// use icu::properties::CodePointSetData;
2159 /// use icu::properties::props::Diacritic;
2160 ///
2161 /// let diacritic = CodePointSetData::new::<Diacritic>();
2162 ///
2163 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
2164 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
2165 /// ```
2166
2167}
2168
2169make_binary_property! {
2170 name: "Emoji_Modifier_Base";
2171 short_name: "EBase";
2172 ident: EmojiModifierBase;
2173 data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2174 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2175 /// Characters that can serve as a base for emoji modifiers.
2176 ///
2177 /// # Example
2178 ///
2179 /// ```
2180 /// use icu::properties::CodePointSetData;
2181 /// use icu::properties::props::EmojiModifierBase;
2182 ///
2183 /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2184 ///
2185 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
2186 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
2187 /// ```
2188
2189}
2190
2191make_binary_property! {
2192 name: "Emoji_Component";
2193 short_name: "EComp";
2194 ident: EmojiComponent;
2195 data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2196 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2197 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2198 /// separate choices, such as base characters for emoji keycaps.
2199 ///
2200 /// # Example
2201 ///
2202 /// ```
2203 /// use icu::properties::CodePointSetData;
2204 /// use icu::properties::props::EmojiComponent;
2205 ///
2206 /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2207 ///
2208 /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2209 /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP
2210 /// assert!(emoji_component.contains('7'));
2211 /// assert!(!emoji_component.contains('T'));
2212 /// ```
2213
2214}
2215
2216make_binary_property! {
2217 name: "Emoji_Modifier";
2218 short_name: "EMod";
2219 ident: EmojiModifier;
2220 data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2221 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2222 /// Characters that are emoji modifiers.
2223 ///
2224 /// # Example
2225 ///
2226 /// ```
2227 /// use icu::properties::CodePointSetData;
2228 /// use icu::properties::props::EmojiModifier;
2229 ///
2230 /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2231 ///
2232 /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4
2233 /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2234 /// ```
2235
2236}
2237
2238make_binary_property! {
2239 name: "Emoji";
2240 short_name: "Emoji";
2241 ident: Emoji;
2242 data_marker: crate::provider::PropertyBinaryEmojiV1;
2243 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2244 /// Characters that are emoji.
2245 ///
2246 /// # Example
2247 ///
2248 /// ```
2249 /// use icu::properties::CodePointSetData;
2250 /// use icu::properties::props::Emoji;
2251 ///
2252 /// let emoji = CodePointSetData::new::<Emoji>();
2253 ///
2254 /// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2255 /// assert!(!emoji.contains('V'));
2256 /// ```
2257
2258}
2259
2260make_binary_property! {
2261 name: "Emoji_Presentation";
2262 short_name: "EPres";
2263 ident: EmojiPresentation;
2264 data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2265 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2266 /// Characters that have emoji presentation by default.
2267 ///
2268 /// # Example
2269 ///
2270 /// ```
2271 /// use icu::properties::CodePointSetData;
2272 /// use icu::properties::props::EmojiPresentation;
2273 ///
2274 /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2275 ///
2276 /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2277 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2278 /// ```
2279
2280}
2281
2282make_binary_property! {
2283 name: "Extender";
2284 short_name: "Ext";
2285 ident: Extender;
2286 data_marker: crate::provider::PropertyBinaryExtenderV1;
2287 singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2288 /// Characters whose principal function is to extend the value of a preceding alphabetic
2289 /// character or to extend the shape of adjacent characters.
2290 ///
2291 /// # Example
2292 ///
2293 /// ```
2294 /// use icu::properties::CodePointSetData;
2295 /// use icu::properties::props::Extender;
2296 ///
2297 /// let extender = CodePointSetData::new::<Extender>();
2298 ///
2299 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
2300 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2301 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
2302 /// ```
2303
2304}
2305
2306make_binary_property! {
2307 name: "Extended_Pictographic";
2308 short_name: "ExtPict";
2309 ident: ExtendedPictographic;
2310 data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2311 singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2312 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2313 /// emoji characters
2314 ///
2315 /// # Example
2316 ///
2317 /// ```
2318 /// use icu::properties::CodePointSetData;
2319 /// use icu::properties::props::ExtendedPictographic;
2320 ///
2321 /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2322 ///
2323 /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2324 /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2325 /// ```
2326
2327}
2328
2329make_binary_property! {
2330 name: "Graph";
2331 short_name: "Graph";
2332 ident: Graph;
2333 data_marker: crate::provider::PropertyBinaryGraphV1;
2334 singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2335 /// Invisible characters.
2336 ///
2337 /// This is defined for POSIX compatibility.
2338
2339}
2340
2341make_binary_property! {
2342 name: "Grapheme_Base";
2343 short_name: "Gr_Base";
2344 ident: GraphemeBase;
2345 data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2346 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2347 /// Property used together with the definition of Standard Korean Syllable Block to define
2348 /// "Grapheme base".
2349 ///
2350 /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2351 ///
2352 /// # Example
2353 ///
2354 /// ```
2355 /// use icu::properties::CodePointSetData;
2356 /// use icu::properties::props::GraphemeBase;
2357 ///
2358 /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2359 ///
2360 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2361 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2362 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2363 /// ```
2364
2365}
2366
2367make_binary_property! {
2368 name: "Grapheme_Extend";
2369 short_name: "Gr_Ext";
2370 ident: GraphemeExtend;
2371 data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2372 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2373 /// Property used to define "Grapheme extender".
2374 ///
2375 /// See D59 in Chapter 3, Conformance in the
2376 /// Unicode Standard.
2377 ///
2378 /// # Example
2379 ///
2380 /// ```
2381 /// use icu::properties::CodePointSetData;
2382 /// use icu::properties::props::GraphemeExtend;
2383 ///
2384 /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2385 ///
2386 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2387 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2388 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2389 /// ```
2390
2391}
2392
2393make_binary_property! {
2394 name: "Grapheme_Link";
2395 short_name: "Gr_Link";
2396 ident: GraphemeLink;
2397 data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2398 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2399 /// Deprecated property.
2400 ///
2401 /// Formerly proposed for programmatic determination of grapheme
2402 /// cluster boundaries.
2403}
2404
2405make_binary_property! {
2406 name: "Hex_Digit";
2407 short_name: "Hex";
2408 ident: HexDigit;
2409 data_marker: crate::provider::PropertyBinaryHexDigitV1;
2410 singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2411 /// Characters commonly used for the representation of hexadecimal numbers, plus their
2412 /// compatibility equivalents.
2413 ///
2414 /// # Example
2415 ///
2416 /// ```
2417 /// use icu::properties::CodePointSetData;
2418 /// use icu::properties::props::HexDigit;
2419 ///
2420 /// let hex_digit = CodePointSetData::new::<HexDigit>();
2421 ///
2422 /// assert!(hex_digit.contains('0'));
2423 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
2424 /// assert!(hex_digit.contains('f'));
2425 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2426 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2427 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2428 /// ```
2429}
2430
2431make_binary_property! {
2432 name: "Hyphen";
2433 short_name: "Hyphen";
2434 ident: Hyphen;
2435 data_marker: crate::provider::PropertyBinaryHyphenV1;
2436 singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2437 /// Deprecated property.
2438 ///
2439 /// Dashes which are used to mark connections between pieces of
2440 /// words, plus the Katakana middle dot.
2441}
2442
2443make_binary_property! {
2444 name: "Id_Continue";
2445 short_name: "IDC";
2446 ident: IdContinue;
2447 data_marker: crate::provider::PropertyBinaryIdContinueV1;
2448 singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2449 /// Characters that can come after the first character in an identifier.
2450 ///
2451 /// If using NFKC to
2452 /// fold differences between characters, use [`XidContinue`] instead. See
2453 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2454 /// more details.
2455 ///
2456 /// # Example
2457 ///
2458 /// ```
2459 /// use icu::properties::CodePointSetData;
2460 /// use icu::properties::props::IdContinue;
2461 ///
2462 /// let id_continue = CodePointSetData::new::<IdContinue>();
2463 ///
2464 /// assert!(id_continue.contains('x'));
2465 /// assert!(id_continue.contains('1'));
2466 /// assert!(id_continue.contains('_'));
2467 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
2468 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2469 /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2470 /// ```
2471}
2472
2473make_binary_property! {
2474 name: "Ideographic";
2475 short_name: "Ideo";
2476 ident: Ideographic;
2477 data_marker: crate::provider::PropertyBinaryIdeographicV1;
2478 singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2479 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2480 /// ideographs, or related siniform ideographs
2481 ///
2482 /// # Example
2483 ///
2484 /// ```
2485 /// use icu::properties::CodePointSetData;
2486 /// use icu::properties::props::Ideographic;
2487 ///
2488 /// let ideographic = CodePointSetData::new::<Ideographic>();
2489 ///
2490 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2491 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
2492 /// ```
2493}
2494
2495make_binary_property! {
2496 name: "Id_Start";
2497 short_name: "IDS";
2498 ident: IdStart;
2499 data_marker: crate::provider::PropertyBinaryIdStartV1;
2500 singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2501 /// Characters that can begin an identifier.
2502 ///
2503 /// If using NFKC to fold differences between
2504 /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex
2505 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2506 ///
2507 /// # Example
2508 ///
2509 /// ```
2510 /// use icu::properties::CodePointSetData;
2511 /// use icu::properties::props::IdStart;
2512 ///
2513 /// let id_start = CodePointSetData::new::<IdStart>();
2514 ///
2515 /// assert!(id_start.contains('x'));
2516 /// assert!(!id_start.contains('1'));
2517 /// assert!(!id_start.contains('_'));
2518 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
2519 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2520 /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2521 /// ```
2522}
2523
2524make_binary_property! {
2525 name: "Ids_Binary_Operator";
2526 short_name: "IDSB";
2527 ident: IdsBinaryOperator;
2528 data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2529 singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2530 /// Characters used in Ideographic Description Sequences.
2531 ///
2532 /// # Example
2533 ///
2534 /// ```
2535 /// use icu::properties::CodePointSetData;
2536 /// use icu::properties::props::IdsBinaryOperator;
2537 ///
2538 /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2539 ///
2540 /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2541 /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2542 /// ```
2543}
2544
2545make_binary_property! {
2546 name: "Ids_Trinary_Operator";
2547 short_name: "IDST";
2548 ident: IdsTrinaryOperator;
2549 data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2550 singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2551 /// Characters used in Ideographic Description Sequences.
2552 ///
2553 /// # Example
2554 ///
2555 /// ```
2556 /// use icu::properties::CodePointSetData;
2557 /// use icu::properties::props::IdsTrinaryOperator;
2558 ///
2559 /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2560 ///
2561 /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2562 /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2563 /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2564 /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2565 /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2566 /// ```
2567}
2568
2569make_binary_property! {
2570 name: "Join_Control";
2571 short_name: "Join_C";
2572 ident: JoinControl;
2573 data_marker: crate::provider::PropertyBinaryJoinControlV1;
2574 singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2575 /// Format control characters which have specific functions for control of cursive joining
2576 /// and ligation.
2577 ///
2578 /// # Example
2579 ///
2580 /// ```
2581 /// use icu::properties::CodePointSetData;
2582 /// use icu::properties::props::JoinControl;
2583 ///
2584 /// let join_control = CodePointSetData::new::<JoinControl>();
2585 ///
2586 /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2587 /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER
2588 /// assert!(!join_control.contains('\u{200E}'));
2589 /// ```
2590}
2591
2592make_binary_property! {
2593 name: "Logical_Order_Exception";
2594 short_name: "LOE";
2595 ident: LogicalOrderException;
2596 data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2597 singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2598 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2599 ///
2600 /// # Example
2601 ///
2602 /// ```
2603 /// use icu::properties::CodePointSetData;
2604 /// use icu::properties::props::LogicalOrderException;
2605 ///
2606 /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2607 ///
2608 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
2609 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
2610 /// ```
2611}
2612
2613make_binary_property! {
2614 name: "Lowercase";
2615 short_name: "Lower";
2616 ident: Lowercase;
2617 data_marker: crate::provider::PropertyBinaryLowercaseV1;
2618 singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2619 /// Lowercase characters.
2620 ///
2621 /// # Example
2622 ///
2623 /// ```
2624 /// use icu::properties::CodePointSetData;
2625 /// use icu::properties::props::Lowercase;
2626 ///
2627 /// let lowercase = CodePointSetData::new::<Lowercase>();
2628 ///
2629 /// assert!(lowercase.contains('a'));
2630 /// assert!(!lowercase.contains('A'));
2631 /// ```
2632}
2633
2634make_binary_property! {
2635 name: "Math";
2636 short_name: "Math";
2637 ident: Math;
2638 data_marker: crate::provider::PropertyBinaryMathV1;
2639 singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2640 /// Characters used in mathematical notation.
2641 ///
2642 /// # Example
2643 ///
2644 /// ```
2645 /// use icu::properties::CodePointSetData;
2646 /// use icu::properties::props::Math;
2647 ///
2648 /// let math = CodePointSetData::new::<Math>();
2649 ///
2650 /// assert!(math.contains('='));
2651 /// assert!(math.contains('+'));
2652 /// assert!(!math.contains('-'));
2653 /// assert!(math.contains('−')); // U+2212 MINUS SIGN
2654 /// assert!(!math.contains('/'));
2655 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH
2656 /// ```
2657}
2658
2659make_binary_property! {
2660 name: "Noncharacter_Code_Point";
2661 short_name: "NChar";
2662 ident: NoncharacterCodePoint;
2663 data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2664 singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2665 /// Code points permanently reserved for internal use.
2666 ///
2667 /// # Example
2668 ///
2669 /// ```
2670 /// use icu::properties::CodePointSetData;
2671 /// use icu::properties::props::NoncharacterCodePoint;
2672 ///
2673 /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2674 ///
2675 /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2676 /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2677 /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2678 /// ```
2679}
2680
2681make_binary_property! {
2682 name: "NFC_Inert";
2683 short_name: "NFC_Inert";
2684 ident: NfcInert;
2685 data_marker: crate::provider::PropertyBinaryNfcInertV1;
2686 singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2687 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2688}
2689
2690make_binary_property! {
2691 name: "NFD_Inert";
2692 short_name: "NFD_Inert";
2693 ident: NfdInert;
2694 data_marker: crate::provider::PropertyBinaryNfdInertV1;
2695 singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2696 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2697}
2698
2699make_binary_property! {
2700 name: "NFKC_Inert";
2701 short_name: "NFKC_Inert";
2702 ident: NfkcInert;
2703 data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2704 singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2705 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2706}
2707
2708make_binary_property! {
2709 name: "NFKD_Inert";
2710 short_name: "NFKD_Inert";
2711 ident: NfkdInert;
2712 data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2713 singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2714 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2715}
2716
2717make_binary_property! {
2718 name: "Pattern_Syntax";
2719 short_name: "Pat_Syn";
2720 ident: PatternSyntax;
2721 data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2722 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2723 /// Characters used as syntax in patterns (such as regular expressions).
2724 ///
2725 /// See [`Unicode
2726 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2727 /// details.
2728 ///
2729 /// # Example
2730 ///
2731 /// ```
2732 /// use icu::properties::CodePointSetData;
2733 /// use icu::properties::props::PatternSyntax;
2734 ///
2735 /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2736 ///
2737 /// assert!(pattern_syntax.contains('{'));
2738 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
2739 /// assert!(!pattern_syntax.contains('0'));
2740 /// ```
2741}
2742
2743make_binary_property! {
2744 name: "Pattern_White_Space";
2745 short_name: "Pat_WS";
2746 ident: PatternWhiteSpace;
2747 data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2748 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2749 /// Characters used as whitespace in patterns (such as regular expressions).
2750 ///
2751 /// See
2752 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2753 /// more details.
2754 ///
2755 /// # Example
2756 ///
2757 /// ```
2758 /// use icu::properties::CodePointSetData;
2759 /// use icu::properties::props::PatternWhiteSpace;
2760 ///
2761 /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2762 ///
2763 /// assert!(pattern_white_space.contains(' '));
2764 /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR
2765 /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE
2766 /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE
2767 /// ```
2768}
2769
2770make_binary_property! {
2771 name: "Prepended_Concatenation_Mark";
2772 short_name: "PCM";
2773 ident: PrependedConcatenationMark;
2774 data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2775 singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2776 /// A small class of visible format controls, which precede and then span a sequence of
2777 /// other characters, usually digits.
2778}
2779
2780make_binary_property! {
2781 name: "Print";
2782 short_name: "Print";
2783 ident: Print;
2784 data_marker: crate::provider::PropertyBinaryPrintV1;
2785 singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2786 /// Printable characters (visible characters and whitespace).
2787 ///
2788 /// This is defined for POSIX compatibility.
2789}
2790
2791make_binary_property! {
2792 name: "Quotation_Mark";
2793 short_name: "QMark";
2794 ident: QuotationMark;
2795 data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2796 singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2797 /// Punctuation characters that function as quotation marks.
2798 ///
2799 /// # Example
2800 ///
2801 /// ```
2802 /// use icu::properties::CodePointSetData;
2803 /// use icu::properties::props::QuotationMark;
2804 ///
2805 /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2806 ///
2807 /// assert!(quotation_mark.contains('\''));
2808 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
2809 /// assert!(!quotation_mark.contains('<'));
2810 /// ```
2811}
2812
2813make_binary_property! {
2814 name: "Radical";
2815 short_name: "Radical";
2816 ident: Radical;
2817 data_marker: crate::provider::PropertyBinaryRadicalV1;
2818 singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2819 /// Characters used in the definition of Ideographic Description Sequences.
2820 ///
2821 /// # Example
2822 ///
2823 /// ```
2824 /// use icu::properties::CodePointSetData;
2825 /// use icu::properties::props::Radical;
2826 ///
2827 /// let radical = CodePointSetData::new::<Radical>();
2828 ///
2829 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
2830 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2831 /// ```
2832}
2833
2834make_binary_property! {
2835 name: "Regional_Indicator";
2836 short_name: "RI";
2837 ident: RegionalIndicator;
2838 data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2839 singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2840 /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2841 ///
2842 /// # Example
2843 ///
2844 /// ```
2845 /// use icu::properties::CodePointSetData;
2846 /// use icu::properties::props::RegionalIndicator;
2847 ///
2848 /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2849 ///
2850 /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2851 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2852 /// assert!(!regional_indicator.contains('T'));
2853 /// ```
2854}
2855
2856make_binary_property! {
2857 name: "Soft_Dotted";
2858 short_name: "SD";
2859 ident: SoftDotted;
2860 data_marker: crate::provider::PropertyBinarySoftDottedV1;
2861 singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2862 /// Characters with a "soft dot", like i or j.
2863 ///
2864 /// An accent placed on these characters causes
2865 /// the dot to disappear.
2866 ///
2867 /// # Example
2868 ///
2869 /// ```
2870 /// use icu::properties::CodePointSetData;
2871 /// use icu::properties::props::SoftDotted;
2872 ///
2873 /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2874 ///
2875 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2876 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
2877 /// ```
2878}
2879
2880make_binary_property! {
2881 name: "Segment_Starter";
2882 short_name: "Segment_Starter";
2883 ident: SegmentStarter;
2884 data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2885 singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2886 /// Characters that are starters in terms of Unicode normalization and combining character
2887 /// sequences.
2888}
2889
2890make_binary_property! {
2891 name: "Case_Sensitive";
2892 short_name: "Case_Sensitive";
2893 ident: CaseSensitive;
2894 data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2895 singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2896 /// Characters that are either the source of a case mapping or in the target of a case
2897 /// mapping.
2898}
2899
2900make_binary_property! {
2901 name: "Sentence_Terminal";
2902 short_name: "STerm";
2903 ident: SentenceTerminal;
2904 data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2905 singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2906 /// Punctuation characters that generally mark the end of sentences.
2907 ///
2908 /// # Example
2909 ///
2910 /// ```
2911 /// use icu::properties::CodePointSetData;
2912 /// use icu::properties::props::SentenceTerminal;
2913 ///
2914 /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2915 ///
2916 /// assert!(sentence_terminal.contains('.'));
2917 /// assert!(sentence_terminal.contains('?'));
2918 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2919 /// assert!(!sentence_terminal.contains(','));
2920 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
2921 /// ```
2922}
2923
2924make_binary_property! {
2925 name: "Terminal_Punctuation";
2926 short_name: "Term";
2927 ident: TerminalPunctuation;
2928 data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2929 singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2930 /// Punctuation characters that generally mark the end of textual units.
2931 ///
2932 /// # Example
2933 ///
2934 /// ```
2935 /// use icu::properties::CodePointSetData;
2936 /// use icu::properties::props::TerminalPunctuation;
2937 ///
2938 /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2939 ///
2940 /// assert!(terminal_punctuation.contains('.'));
2941 /// assert!(terminal_punctuation.contains('?'));
2942 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2943 /// assert!(terminal_punctuation.contains(','));
2944 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
2945 /// ```
2946}
2947
2948make_binary_property! {
2949 name: "Unified_Ideograph";
2950 short_name: "UIdeo";
2951 ident: UnifiedIdeograph;
2952 data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2953 singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2954 /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
2955 ///
2956 /// # Example
2957 ///
2958 /// ```
2959 /// use icu::properties::CodePointSetData;
2960 /// use icu::properties::props::UnifiedIdeograph;
2961 ///
2962 /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
2963 ///
2964 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2965 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
2966 /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
2967 /// ```
2968}
2969
2970make_binary_property! {
2971 name: "Uppercase";
2972 short_name: "Upper";
2973 ident: Uppercase;
2974 data_marker: crate::provider::PropertyBinaryUppercaseV1;
2975 singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
2976 /// Uppercase characters.
2977 ///
2978 /// # Example
2979 ///
2980 /// ```
2981 /// use icu::properties::CodePointSetData;
2982 /// use icu::properties::props::Uppercase;
2983 ///
2984 /// let uppercase = CodePointSetData::new::<Uppercase>();
2985 ///
2986 /// assert!(uppercase.contains('U'));
2987 /// assert!(!uppercase.contains('u'));
2988 /// ```
2989}
2990
2991make_binary_property! {
2992 name: "Variation_Selector";
2993 short_name: "VS";
2994 ident: VariationSelector;
2995 data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
2996 singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
2997 /// Characters that are Variation Selectors.
2998 ///
2999 /// # Example
3000 ///
3001 /// ```
3002 /// use icu::properties::CodePointSetData;
3003 /// use icu::properties::props::VariationSelector;
3004 ///
3005 /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3006 ///
3007 /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE
3008 /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR
3009 /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16
3010 /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA
3011 /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256
3012 /// ```
3013}
3014
3015make_binary_property! {
3016 name: "White_Space";
3017 short_name: "space";
3018 ident: WhiteSpace;
3019 data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3020 singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3021 /// Spaces, separator characters and other control characters which should be treated by
3022 /// programming languages as "white space" for the purpose of parsing elements.
3023 ///
3024 /// # Example
3025 ///
3026 /// ```
3027 /// use icu::properties::CodePointSetData;
3028 /// use icu::properties::props::WhiteSpace;
3029 ///
3030 /// let white_space = CodePointSetData::new::<WhiteSpace>();
3031 ///
3032 /// assert!(white_space.contains(' '));
3033 /// assert!(white_space.contains('\u{000A}')); // NEW LINE
3034 /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE
3035 /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE
3036 /// ```
3037}
3038
3039make_binary_property! {
3040 name: "Xdigit";
3041 short_name: "Xdigit";
3042 ident: Xdigit;
3043 data_marker: crate::provider::PropertyBinaryXdigitV1;
3044 singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3045 /// Hexadecimal digits
3046 ///
3047 /// This is defined for POSIX compatibility.
3048}
3049
3050make_binary_property! {
3051 name: "XID_Continue";
3052 short_name: "XIDC";
3053 ident: XidContinue;
3054 data_marker: crate::provider::PropertyBinaryXidContinueV1;
3055 singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3056 /// Characters that can come after the first character in an identifier.
3057 ///
3058 /// See [`Unicode Standard Annex
3059 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3060 ///
3061 /// # Example
3062 ///
3063 /// ```
3064 /// use icu::properties::CodePointSetData;
3065 /// use icu::properties::props::XidContinue;
3066 ///
3067 /// let xid_continue = CodePointSetData::new::<XidContinue>();
3068 ///
3069 /// assert!(xid_continue.contains('x'));
3070 /// assert!(xid_continue.contains('1'));
3071 /// assert!(xid_continue.contains('_'));
3072 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
3073 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3074 /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3075 /// ```
3076}
3077
3078make_binary_property! {
3079 name: "XID_Start";
3080 short_name: "XIDS";
3081 ident: XidStart;
3082 data_marker: crate::provider::PropertyBinaryXidStartV1;
3083 singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3084 /// Characters that can begin an identifier.
3085 ///
3086 /// See [`Unicode
3087 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3088 /// details.
3089 ///
3090 /// # Example
3091 ///
3092 /// ```
3093 /// use icu::properties::CodePointSetData;
3094 /// use icu::properties::props::XidStart;
3095 ///
3096 /// let xid_start = CodePointSetData::new::<XidStart>();
3097 ///
3098 /// assert!(xid_start.contains('x'));
3099 /// assert!(!xid_start.contains('1'));
3100 /// assert!(!xid_start.contains('_'));
3101 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
3102 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
3103 /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3104 /// ```
3105}
3106
3107pub use crate::emoji::EmojiSet;
3108
3109macro_rules! make_emoji_set {
3110 (
3111 ident: $ident:ident;
3112 data_marker: $data_marker:ty;
3113 singleton: $singleton:ident;
3114 $(#[$doc:meta])+
3115 ) => {
3116 $(#[$doc])+
3117 #[derive(Debug)]
3118 #[non_exhaustive]
3119 pub struct $ident;
3120
3121 impl crate::private::Sealed for $ident {}
3122
3123 impl EmojiSet for $ident {
3124 type DataMarker = $data_marker;
3125 #[cfg(feature = "compiled_data")]
3126 const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3127 &crate::provider::Baked::$singleton;
3128 }
3129 }
3130}
3131
3132make_emoji_set! {
3133 ident: BasicEmoji;
3134 data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3135 singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3136 /// Characters and character sequences intended for general-purpose, independent, direct input.
3137 ///
3138 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3139 /// details.
3140 ///
3141 /// # Example
3142 ///
3143 /// ```
3144 /// use icu::properties::EmojiSetData;
3145 /// use icu::properties::props::BasicEmoji;
3146 ///
3147 /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3148 ///
3149 /// assert!(!basic_emoji.contains('\u{0020}'));
3150 /// assert!(!basic_emoji.contains('\n'));
3151 /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3152 /// assert!(basic_emoji.contains_str("\u{1F983}"));
3153 /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3154 /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
3155 /// ```
3156}
3157
3158#[cfg(test)]
3159mod test_enumerated_property_completeness {
3160 use super::*;
3161 use std::collections::BTreeMap;
3162
3163 fn check_enum<'a, T: NamedEnumeratedProperty>(
3164 lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3165 consts: impl IntoIterator<Item = &'a T>,
3166 ) where
3167 u16: From<T>,
3168 {
3169 let mut data: BTreeMap<_, _> = lookup
3170 .map
3171 .iter()
3172 .map(|(name, value)| (value, (name, "Data")))
3173 .collect();
3174
3175 let names = crate::PropertyNamesLong::<T>::new();
3176 let consts = consts.into_iter().map(|value| {
3177 (
3178 u16::from(*value) as usize,
3179 (
3180 names.get(*value).unwrap_or("<unknown>").to_string(),
3181 "Consts",
3182 ),
3183 )
3184 });
3185
3186 let mut diff = Vec::new();
3187 for t @ (value, _) in consts {
3188 if data.remove(&value).is_none() {
3189 diff.push(t);
3190 }
3191 }
3192 diff.extend(data);
3193
3194 let mut fmt_diff = String::new();
3195 for (value, (name, source)) in diff {
3196 fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3197 }
3198
3199 assert!(
3200 fmt_diff.is_empty(),
3201 "Values defined in data do not match values defined in consts. Difference:\n{}",
3202 fmt_diff
3203 );
3204 }
3205
3206 #[test]
3207 fn test_ea() {
3208 check_enum(
3209 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3210 EastAsianWidth::ALL_VALUES,
3211 );
3212 }
3213
3214 #[test]
3215 fn test_ccc() {
3216 check_enum(
3217 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3218 CanonicalCombiningClass::ALL_VALUES,
3219 );
3220 }
3221
3222 #[test]
3223 fn test_jt() {
3224 check_enum(
3225 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3226 JoiningType::ALL_VALUES,
3227 );
3228 }
3229
3230 #[test]
3231 fn test_insc() {
3232 check_enum(
3233 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3234 IndicSyllabicCategory::ALL_VALUES,
3235 );
3236 }
3237
3238 #[test]
3239 fn test_sb() {
3240 check_enum(
3241 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3242 SentenceBreak::ALL_VALUES,
3243 );
3244 }
3245
3246 #[test]
3247 fn test_wb() {
3248 check_enum(
3249 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3250 WordBreak::ALL_VALUES,
3251 );
3252 }
3253
3254 #[test]
3255 fn test_bc() {
3256 check_enum(
3257 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3258 BidiClass::ALL_VALUES,
3259 );
3260 }
3261
3262 #[test]
3263 fn test_hst() {
3264 check_enum(
3265 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3266 HangulSyllableType::ALL_VALUES,
3267 );
3268 }
3269
3270 #[test]
3271 fn test_vo() {
3272 check_enum(
3273 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3274 VerticalOrientation::ALL_VALUES,
3275 );
3276 }
3277}