idna_adapter/
lib.rs

1// Copyright The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! This crate abstracts over a Unicode back end for the [`idna`][1]
10//! crate.
11//!
12//! To work around the lack of [`global-features`][2] in Cargo, this
13//! crate allows the top level `Cargo.lock` to choose an alternative
14//! Unicode back end for the `idna` crate by pinning a version of this
15//! crate.
16//!
17//! See the [README of the latest version][3] for more details.
18//!
19//! [1]: https://docs.rs/crate/idna/latest
20//! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618
21//! [3]: https://docs.rs/crate/idna_adapter/latest
22
23#![no_std]
24
25use icu_normalizer::properties::CanonicalCombiningClassMapBorrowed;
26use icu_normalizer::uts46::Uts46MapperBorrowed;
27use icu_properties::props::GeneralCategory;
28use icu_properties::CodePointMapDataBorrowed;
29
30/// Turns a joining type into a mask for comparing with multiple type at once.
31const fn joining_type_to_mask(jt: icu_properties::props::JoiningType) -> u32 {
32    1u32 << jt.to_icu4c_value()
33}
34
35/// Mask for checking for both left and dual joining.
36pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
37    joining_type_to_mask(icu_properties::props::JoiningType::LeftJoining)
38        | joining_type_to_mask(icu_properties::props::JoiningType::DualJoining),
39);
40
41/// Mask for checking for both left and dual joining.
42pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
43    joining_type_to_mask(icu_properties::props::JoiningType::RightJoining)
44        | joining_type_to_mask(icu_properties::props::JoiningType::DualJoining),
45);
46
47/// Turns a bidi class into a mask for comparing with multiple classes at once.
48const fn bidi_class_to_mask(bc: icu_properties::props::BidiClass) -> u32 {
49    1u32 << bc.to_icu4c_value()
50}
51
52/// Mask for checking if the domain is a bidi domain.
53pub const RTL_MASK: BidiClassMask = BidiClassMask(
54    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
55        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
56        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber),
57);
58
59/// Mask for allowable bidi classes in the first character of a label
60/// (either LTR or RTL) in a bidi domain.
61pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask(
62    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
63        | bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
64        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter),
65);
66
67// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
68// character in an LTR label in a bidi domain.
69pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask(
70    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
71        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber),
72);
73
74// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
75// character in an RTL label in a bidi domain.
76pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask(
77    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
78        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
79        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
80        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber),
81);
82
83// Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain.
84pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask(
85    bidi_class_to_mask(icu_properties::props::BidiClass::LeftToRight)
86        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
87        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanSeparator)
88        | bidi_class_to_mask(icu_properties::props::BidiClass::CommonSeparator)
89        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanTerminator)
90        | bidi_class_to_mask(icu_properties::props::BidiClass::OtherNeutral)
91        | bidi_class_to_mask(icu_properties::props::BidiClass::BoundaryNeutral)
92        | bidi_class_to_mask(icu_properties::props::BidiClass::NonspacingMark),
93);
94
95// Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain.
96pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask(
97    bidi_class_to_mask(icu_properties::props::BidiClass::RightToLeft)
98        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicLetter)
99        | bidi_class_to_mask(icu_properties::props::BidiClass::ArabicNumber)
100        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanNumber)
101        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanSeparator)
102        | bidi_class_to_mask(icu_properties::props::BidiClass::CommonSeparator)
103        | bidi_class_to_mask(icu_properties::props::BidiClass::EuropeanTerminator)
104        | bidi_class_to_mask(icu_properties::props::BidiClass::OtherNeutral)
105        | bidi_class_to_mask(icu_properties::props::BidiClass::BoundaryNeutral)
106        | bidi_class_to_mask(icu_properties::props::BidiClass::NonspacingMark),
107);
108
109/// Turns a genecal category into a mask for comparing with multiple categories at once.
110const fn general_category_to_mask(gc: GeneralCategory) -> u32 {
111    1 << (gc as u32)
112}
113
114/// Mask for the disallowed general categories of the first character in a label.
115const MARK_MASK: u32 = general_category_to_mask(GeneralCategory::NonspacingMark)
116    | general_category_to_mask(GeneralCategory::SpacingMark)
117    | general_category_to_mask(GeneralCategory::EnclosingMark);
118
119/// Value for the Joining_Type Unicode property.
120#[repr(transparent)]
121#[derive(Clone, Copy)]
122pub struct JoiningType(icu_properties::props::JoiningType);
123
124impl JoiningType {
125    /// Returns the corresponding `JoiningTypeMask`.
126    #[inline(always)]
127    pub fn to_mask(self) -> JoiningTypeMask {
128        JoiningTypeMask(joining_type_to_mask(self.0))
129    }
130
131    // `true` iff this value is the Transparent value.
132    #[inline(always)]
133    pub fn is_transparent(self) -> bool {
134        self.0 == icu_properties::props::JoiningType::Transparent
135    }
136}
137
138/// A mask representing potentially multiple `JoiningType`
139/// values.
140#[repr(transparent)]
141#[derive(Clone, Copy)]
142pub struct JoiningTypeMask(u32);
143
144impl JoiningTypeMask {
145    /// `true` iff both masks have at `JoiningType` in common.
146    #[inline(always)]
147    pub fn intersects(self, other: JoiningTypeMask) -> bool {
148        self.0 & other.0 != 0
149    }
150}
151
152/// Value for the Bidi_Class Unicode property.
153#[repr(transparent)]
154#[derive(Clone, Copy)]
155pub struct BidiClass(icu_properties::props::BidiClass);
156
157impl BidiClass {
158    /// Returns the corresponding `BidiClassMask`.
159    #[inline(always)]
160    pub fn to_mask(self) -> BidiClassMask {
161        BidiClassMask(bidi_class_to_mask(self.0))
162    }
163
164    /// `true` iff this value is Left_To_Right
165    #[inline(always)]
166    pub fn is_ltr(self) -> bool {
167        self.0 == icu_properties::props::BidiClass::LeftToRight
168    }
169
170    /// `true` iff this value is Nonspacing_Mark
171    #[inline(always)]
172    pub fn is_nonspacing_mark(self) -> bool {
173        self.0 == icu_properties::props::BidiClass::NonspacingMark
174    }
175
176    /// `true` iff this value is European_Number
177    #[inline(always)]
178    pub fn is_european_number(self) -> bool {
179        self.0 == icu_properties::props::BidiClass::EuropeanNumber
180    }
181
182    /// `true` iff this value is Arabic_Number
183    #[inline(always)]
184    pub fn is_arabic_number(self) -> bool {
185        self.0 == icu_properties::props::BidiClass::ArabicNumber
186    }
187}
188
189/// A mask representing potentially multiple `BidiClass`
190/// values.
191#[repr(transparent)]
192#[derive(Clone, Copy)]
193pub struct BidiClassMask(u32);
194
195impl BidiClassMask {
196    /// `true` iff both masks have at `BidiClass` in common.
197    #[inline(always)]
198    pub fn intersects(self, other: BidiClassMask) -> bool {
199        self.0 & other.0 != 0
200    }
201}
202
203/// An adapter between a Unicode back end an the `idna` crate.
204pub struct Adapter {
205    mapper: Uts46MapperBorrowed<'static>,
206    canonical_combining_class: CanonicalCombiningClassMapBorrowed<'static>,
207    general_category: CodePointMapDataBorrowed<'static, GeneralCategory>,
208    bidi_class: CodePointMapDataBorrowed<'static, icu_properties::props::BidiClass>,
209    joining_type: CodePointMapDataBorrowed<'static, icu_properties::props::JoiningType>,
210}
211
212#[cfg(feature = "compiled_data")]
213impl Default for Adapter {
214    fn default() -> Self {
215        Self::new()
216    }
217}
218
219impl Adapter {
220    /// Constructor using data compiled into the binary.
221    #[cfg(feature = "compiled_data")]
222    #[inline(always)]
223    pub const fn new() -> Self {
224        Self {
225            mapper: Uts46MapperBorrowed::new(),
226            canonical_combining_class: CanonicalCombiningClassMapBorrowed::new(),
227            general_category: icu_properties::CodePointMapData::<GeneralCategory>::new(),
228            bidi_class: icu_properties::CodePointMapData::<icu_properties::props::BidiClass>::new(),
229            joining_type:
230                icu_properties::CodePointMapData::<icu_properties::props::JoiningType>::new(),
231        }
232    }
233
234    /// `true` iff the Canonical_Combining_Class of `c` is Virama.
235    #[inline(always)]
236    pub fn is_virama(&self, c: char) -> bool {
237        self.canonical_combining_class.get_u8(c) == 9
238    }
239
240    /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark,
241    /// Spacing_Mark, or Enclosing_Mark.
242    #[inline(always)]
243    pub fn is_mark(&self, c: char) -> bool {
244        (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0
245    }
246
247    /// Returns the Bidi_Class of `c`.
248    #[inline(always)]
249    pub fn bidi_class(&self, c: char) -> BidiClass {
250        BidiClass(self.bidi_class.get(c))
251    }
252
253    /// Returns the Joining_Type of `c`.
254    #[inline(always)]
255    pub fn joining_type(&self, c: char) -> JoiningType {
256        JoiningType(self.joining_type.get(c))
257    }
258
259    /// See the [method of the same name in `icu_normalizer`][1] for the
260    /// exact semantics.
261    ///
262    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize
263    #[inline(always)]
264    pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>(
265        &'delegate self,
266        iter: I,
267    ) -> impl Iterator<Item = char> + 'delegate {
268        self.mapper.map_normalize(iter)
269    }
270
271    /// See the [method of the same name in `icu_normalizer`][1] for the
272    /// exact semantics.
273    ///
274    /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate
275    #[inline(always)]
276    pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>(
277        &'delegate self,
278        iter: I,
279    ) -> impl Iterator<Item = char> + 'delegate {
280        self.mapper.normalize_validate(iter)
281    }
282}