icu_properties

Struct GeneralCategoryGroup

source
pub struct GeneralCategoryGroup(/* private fields */);
Expand description

Groupings of multiple General_Category property values.

Instances of GeneralCategoryGroup represent the defined multi-category values that are useful for users in certain contexts, such as regex. In other words, unlike GeneralCategory, this supports groups of general categories: for example, Letter /// is the union of UppercaseLetter, LowercaseLetter, etc.

See https://www.unicode.org/reports/tr44/ .

The discriminants correspond to the U_GC_XX_MASK constants in ICU4C. Unlike GeneralCategory, this supports groups of general categories: for example, Letter is the union of UppercaseLetter, LowercaseLetter, etc.

See UCharCategory and U_GET_GC_MASK in ICU4C.

Implementations§

source§

impl GeneralCategoryGroup

source

pub const UppercaseLetter: GeneralCategoryGroup = _

(Lu) An uppercase letter

source

pub const LowercaseLetter: GeneralCategoryGroup = _

(Ll) A lowercase letter

source

pub const TitlecaseLetter: GeneralCategoryGroup = _

(Lt) A digraphic letter, with first part uppercase

source

pub const ModifierLetter: GeneralCategoryGroup = _

(Lm) A modifier letter

source

pub const OtherLetter: GeneralCategoryGroup = _

(Lo) Other letters, including syllables and ideographs

source

pub const CasedLetter: GeneralCategoryGroup = _

(LC) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter

source

pub const Letter: GeneralCategoryGroup = _

(L) The union of all letter categories

source

pub const NonspacingMark: GeneralCategoryGroup = _

(Mn) A nonspacing combining mark (zero advance width)

source

pub const EnclosingMark: GeneralCategoryGroup = _

(Mc) A spacing combining mark (positive advance width)

source

pub const SpacingMark: GeneralCategoryGroup = _

(Me) An enclosing combining mark

source

pub const Mark: GeneralCategoryGroup = _

(M) The union of all mark categories

source

pub const DecimalNumber: GeneralCategoryGroup = _

(Nd) A decimal digit

source

pub const LetterNumber: GeneralCategoryGroup = _

(Nl) A letterlike numeric character

source

pub const OtherNumber: GeneralCategoryGroup = _

(No) A numeric character of other type

source

pub const Number: GeneralCategoryGroup = _

(N) The union of all number categories

source

pub const SpaceSeparator: GeneralCategoryGroup = _

(Zs) A space character (of various non-zero widths)

source

pub const LineSeparator: GeneralCategoryGroup = _

(Zl) U+2028 LINE SEPARATOR only

source

pub const ParagraphSeparator: GeneralCategoryGroup = _

(Zp) U+2029 PARAGRAPH SEPARATOR only

source

pub const Separator: GeneralCategoryGroup = _

(Z) The union of all separator categories

source

pub const Control: GeneralCategoryGroup = _

(Cc) A C0 or C1 control code

source

pub const Format: GeneralCategoryGroup = _

(Cf) A format control character

source

pub const PrivateUse: GeneralCategoryGroup = _

(Co) A private-use character

source

pub const Surrogate: GeneralCategoryGroup = _

(Cs) A surrogate code point

source

pub const Unassigned: GeneralCategoryGroup = _

(Cn) A reserved unassigned code point or a noncharacter

source

pub const Other: GeneralCategoryGroup = _

(C) The union of all control code, reserved, and unassigned categories

source

pub const DashPunctuation: GeneralCategoryGroup = _

(Pd) A dash or hyphen punctuation mark

source

pub const OpenPunctuation: GeneralCategoryGroup = _

(Ps) An opening punctuation mark (of a pair)

source

pub const ClosePunctuation: GeneralCategoryGroup = _

(Pe) A closing punctuation mark (of a pair)

source

pub const ConnectorPunctuation: GeneralCategoryGroup = _

(Pc) A connecting punctuation mark, like a tie

source

pub const InitialPunctuation: GeneralCategoryGroup = _

(Pi) An initial quotation mark

source

pub const FinalPunctuation: GeneralCategoryGroup = _

(Pf) A final quotation mark

source

pub const OtherPunctuation: GeneralCategoryGroup = _

(Po) A punctuation mark of other type

source

pub const Punctuation: GeneralCategoryGroup = _

(P) The union of all punctuation categories

source

pub const MathSymbol: GeneralCategoryGroup = _

(Sm) A symbol of mathematical use

source

pub const CurrencySymbol: GeneralCategoryGroup = _

(Sc) A currency sign

source

pub const ModifierSymbol: GeneralCategoryGroup = _

(Sk) A non-letterlike modifier symbol

source

pub const OtherSymbol: GeneralCategoryGroup = _

(So) A symbol of other type

source

pub const Symbol: GeneralCategoryGroup = _

(S) The union of all symbol categories

source

pub const fn contains(&self, val: GeneralCategory) -> bool

Return whether the code point belongs in the provided multi-value category.

use icu::properties::{maps, GeneralCategory, GeneralCategoryGroup};

let gc = maps::general_category();

assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));

// U+0B1E ORIYA LETTER NYA
assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));

// U+0301 COMBINING ACUTE ACCENT
assert_eq!(gc.get32(0x0301), GeneralCategory::NonspacingMark);
assert!(GeneralCategoryGroup::Mark.contains(gc.get32(0x0301)));
assert!(!GeneralCategoryGroup::Letter.contains(gc.get32(0x0301)));

assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));

assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));

// U+2713 CHECK MARK
assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));

assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));

// U+E007F CANCEL TAG
assert_eq!(gc.get32(0xE007F), GeneralCategory::Format);
assert!(GeneralCategoryGroup::Other.contains(gc.get32(0xE007F)));
assert!(!GeneralCategoryGroup::Separator.contains(gc.get32(0xE007F)));
source

pub const fn complement(self) -> Self

Produce a GeneralCategoryGroup that is the inverse of this one

§Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let not_letter = letter.complement();

assert!(not_letter.contains(GeneralCategory::MathSymbol));
assert!(!letter.contains(GeneralCategory::MathSymbol));
assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
assert!(!letter.contains(GeneralCategory::OtherPunctuation));
assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
assert!(letter.contains(GeneralCategory::UppercaseLetter));
source

pub const fn all() -> Self

Return the group representing all GeneralCategory values

§Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let all = GeneralCategoryGroup::all();

assert!(all.contains(GeneralCategory::MathSymbol));
assert!(all.contains(GeneralCategory::OtherPunctuation));
assert!(all.contains(GeneralCategory::UppercaseLetter));
source

pub const fn empty() -> Self

Return the empty group

§Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let empty = GeneralCategoryGroup::empty();

assert!(!empty.contains(GeneralCategory::MathSymbol));
assert!(!empty.contains(GeneralCategory::OtherPunctuation));
assert!(!empty.contains(GeneralCategory::UppercaseLetter));
source

pub const fn union(self, other: Self) -> Self

Take the union of two groups

§Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let symbol = GeneralCategoryGroup::Symbol;
let union = letter.union(symbol);

assert!(union.contains(GeneralCategory::MathSymbol));
assert!(!union.contains(GeneralCategory::OtherPunctuation));
assert!(union.contains(GeneralCategory::UppercaseLetter));
source

pub const fn intersection(self, other: Self) -> Self

Take the intersection of two groups

§Example
use icu::properties::{GeneralCategory, GeneralCategoryGroup};

let letter = GeneralCategoryGroup::Letter;
let lu = GeneralCategoryGroup::UppercaseLetter;
let intersection = letter.intersection(lu);

assert!(!intersection.contains(GeneralCategory::MathSymbol));
assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
assert!(intersection.contains(GeneralCategory::UppercaseLetter));
assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
source§

impl GeneralCategoryGroup

source

pub const fn name_to_enum_mapper() -> PropertyValueNameToEnumMapperBorrowed<'static, GeneralCategoryGroup>

Return a PropertyValueNameToEnumMapper, capable of looking up values from strings for the General_Category_Mask mask property.

Enabled with the compiled_data Cargo feature.

📚 Help choosing a constructor

§Example
use icu::properties::GeneralCategoryGroup;

let lookup = GeneralCategoryGroup::name_to_enum_mapper();
// short name for value
assert_eq!(lookup.get_strict("L"), Some(GeneralCategoryGroup::Letter));
assert_eq!(lookup.get_strict("LC"), Some(GeneralCategoryGroup::CasedLetter));
assert_eq!(lookup.get_strict("Lu"), Some(GeneralCategoryGroup::UppercaseLetter));
assert_eq!(lookup.get_strict("Zp"), Some(GeneralCategoryGroup::ParagraphSeparator));
assert_eq!(lookup.get_strict("P"), Some(GeneralCategoryGroup::Punctuation));
// long name for value
assert_eq!(lookup.get_strict("Letter"), Some(GeneralCategoryGroup::Letter));
assert_eq!(lookup.get_strict("Cased_Letter"), Some(GeneralCategoryGroup::CasedLetter));
assert_eq!(lookup.get_strict("Uppercase_Letter"), Some(GeneralCategoryGroup::UppercaseLetter));
// alias name
assert_eq!(lookup.get_strict("punct"), Some(GeneralCategoryGroup::Punctuation));
// name has incorrect casing
assert_eq!(lookup.get_strict("letter"), None);
// loose matching of name
assert_eq!(lookup.get_loose("letter"), Some(GeneralCategoryGroup::Letter));
// fake property
assert_eq!(lookup.get_strict("EverythingLol"), None);
source

pub fn get_name_to_enum_mapper( provider: &(impl DataProvider<GeneralCategoryMaskNameToValueV1Marker> + ?Sized), ) -> Result<PropertyValueNameToEnumMapper<GeneralCategoryGroup>, PropertiesError>

Trait Implementations§

source§

impl AsULE for GeneralCategoryGroup

source§

type ULE = RawBytesULE<2>

The ULE type corresponding to Self. Read more
source§

fn to_unaligned(self) -> Self::ULE

Converts from Self to Self::ULE. Read more
source§

fn from_unaligned(ule: Self::ULE) -> Self

Converts from Self::ULE to Self. Read more
source§

impl Clone for GeneralCategoryGroup

source§

fn clone(&self) -> GeneralCategoryGroup

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for GeneralCategoryGroup

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl From<GeneralCategory> for GeneralCategoryGroup

source§

fn from(subcategory: GeneralCategory) -> Self

Converts to this type from the input type.
source§

impl From<GeneralCategoryGroup> for u32

source§

fn from(group: GeneralCategoryGroup) -> Self

Converts to this type from the input type.
source§

impl From<u32> for GeneralCategoryGroup

source§

fn from(mask: u32) -> Self

Converts to this type from the input type.
source§

impl PartialEq for GeneralCategoryGroup

source§

fn eq(&self, other: &GeneralCategoryGroup) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl TrieValue for GeneralCategoryGroup

source§

type TryFromU32Error = TryFromIntError

Last-resort fallback value to return if we cannot read data from the trie. Read more
source§

fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error>

A parsing function that is primarily motivated by deserialization contexts. When the serialization type width is smaller than 32 bits, then it is expected that the call site will widen the value to a u32 first.
source§

fn to_u32(self) -> u32

A method for converting back to a u32 that can roundtrip through Self::try_from_u32(). The default implementation of this trait method panics in debug mode and returns 0 in release mode. Read more
source§

impl Copy for GeneralCategoryGroup

source§

impl Eq for GeneralCategoryGroup

source§

impl StructuralPartialEq for GeneralCategoryGroup

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

source§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

source§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> ErasedDestructor for T
where T: 'static,

source§

impl<T> MaybeSendSync for T