icu_provider/
marker.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority};
6use crate::{DataError, DataErrorKind, DataLocale, DataProvider, DataProviderWithMarker};
7use core::fmt;
8use core::marker::PhantomData;
9use icu_locale_core::preferences::LocalePreferences;
10use yoke::Yokeable;
11use zerovec::ule::*;
12
13/// Trait marker for data structs. All types delivered by the data provider must be associated with
14/// something implementing this trait.
15///
16/// Data markers normally generated with the [`data_marker`](crate::data_marker) macro.
17///
18/// Also see [`DataMarker`].
19///
20/// Note: `DynamicDataMarker`s are quasi-const-generic compile-time objects, and as such are expected
21/// to be unit structs. As this is not something that can be enforced by the type system, we
22/// currently only have a `'static` bound on them (which is needed by a lot of our code).
23///
24/// # Examples
25///
26/// Manually implementing DynamicDataMarker for a custom type:
27///
28/// ```
29/// use icu_provider::prelude::*;
30/// use std::borrow::Cow;
31///
32/// #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
33/// struct MyDataStruct<'data> {
34///     message: Cow<'data, str>,
35/// }
36///
37/// struct MyDataStructMarker;
38///
39/// impl DynamicDataMarker for MyDataStructMarker {
40///     type DataStruct = MyDataStruct<'static>;
41/// }
42///
43/// // We can now use MyDataStruct with DataProvider:
44/// let s = MyDataStruct {
45///     message: Cow::Owned("Hello World".into()),
46/// };
47/// let payload = DataPayload::<MyDataStructMarker>::from_owned(s);
48/// assert_eq!(payload.get().message, "Hello World");
49/// ```
50///
51/// [`data_struct`]: crate::data_struct
52pub trait DynamicDataMarker: 'static {
53    /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a
54    /// data struct.
55    type DataStruct: for<'a> Yokeable<'a>;
56}
57
58/// A [`DynamicDataMarker`] with a [`DataMarkerInfo`] attached.
59///
60/// Structs implementing this trait are normally generated with the [`data_struct!`] macro.
61///
62/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait.
63/// Most markers should be associated with a specific marker and should therefore implement this
64/// trait.
65///
66/// [`BufferMarker`] is an example of a marker that does _not_ implement this trait.
67///
68/// Note: `DataMarker`s are quasi-const-generic compile-time objects, and as such are expected
69/// to be unit structs. As this is not something that can be enforced by the type system, we
70/// currently only have a `'static` bound on them (which is needed by a lot of our code).
71///
72/// [`data_struct!`]: crate::data_struct
73/// [`DataProvider`]: crate::DataProvider
74/// [`BufferMarker`]: crate::buf::BufferMarker
75pub trait DataMarker: DynamicDataMarker {
76    /// The single [`DataMarkerInfo`] associated with this marker.
77    const INFO: DataMarkerInfo;
78}
79
80/// Extension trait for methods on [`DataMarker`]
81pub trait DataMarkerExt: DataMarker + Sized {
82    /// Binds a [`DataMarker`] to a provider supporting it.
83    fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
84    where
85        P: DataProvider<Self>;
86    /// Constructs a [`DataLocale`] using fallback preferences from this [`DataMarker`].
87    fn make_locale(locale: LocalePreferences) -> DataLocale;
88}
89
90impl<M: DataMarker + Sized> DataMarkerExt for M {
91    fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
92    where
93        P: DataProvider<Self>,
94    {
95        DataProviderWithMarker::new(provider)
96    }
97
98    fn make_locale(locale: LocalePreferences) -> DataLocale {
99        M::INFO.make_locale(locale)
100    }
101}
102
103/// A [`DynamicDataMarker`] that never returns data.
104///
105/// All types that have non-blanket impls of `DataProvider<M>` are expected to explicitly
106/// implement `DataProvider<NeverMarker<Y>>`, returning [`DataErrorKind::MarkerNotFound`].
107/// See [`impl_data_provider_never_marker!`].
108///
109/// [`DataErrorKind::MarkerNotFound`]: crate::DataErrorKind::MarkerNotFound
110/// [`impl_data_provider_never_marker!`]: crate::marker::impl_data_provider_never_marker
111///
112/// # Examples
113///
114/// ```
115/// use icu_locale_core::langid;
116/// use icu_provider::hello_world::*;
117/// use icu_provider::marker::NeverMarker;
118/// use icu_provider::prelude::*;
119///
120/// let buffer_provider = HelloWorldProvider.into_json_provider();
121///
122/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
123///     &buffer_provider.as_deserializing(),
124///     DataRequest {
125///         id: DataIdentifierBorrowed::for_locale(&langid!("en").into()),
126///         ..Default::default()
127///     },
128/// );
129///
130/// assert!(matches!(
131///     result,
132///     Err(DataError {
133///         kind: DataErrorKind::MarkerNotFound,
134///         ..
135///     })
136/// ));
137/// ```
138#[derive(Debug, Copy, Clone)]
139pub struct NeverMarker<Y>(PhantomData<Y>);
140
141impl<Y> DynamicDataMarker for NeverMarker<Y>
142where
143    for<'a> Y: Yokeable<'a>,
144{
145    type DataStruct = Y;
146}
147
148impl<Y> DataMarker for NeverMarker<Y>
149where
150    for<'a> Y: Yokeable<'a>,
151{
152    const INFO: DataMarkerInfo = DataMarkerInfo::from_id(DataMarkerId {
153        #[cfg(any(feature = "export", debug_assertions))]
154        debug: "NeverMarker",
155        hash: *b"nevermar",
156    });
157}
158
159/// Implements `DataProvider<NeverMarker<Y>>` on a struct.
160///
161/// For more information, see [`NeverMarker`].
162///
163/// # Examples
164///
165/// ```
166/// use icu_locale_core::langid;
167/// use icu_provider::hello_world::*;
168/// use icu_provider::marker::NeverMarker;
169/// use icu_provider::prelude::*;
170///
171/// struct MyProvider;
172///
173/// icu_provider::marker::impl_data_provider_never_marker!(MyProvider);
174///
175/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
176///     &MyProvider,
177///     DataRequest {
178///         id: DataIdentifierBorrowed::for_locale(&langid!("und").into()),
179///         ..Default::default()
180///     },
181/// );
182///
183/// assert!(matches!(
184///     result,
185///     Err(DataError {
186///         kind: DataErrorKind::MarkerNotFound,
187///         ..
188///     })
189/// ));
190/// ```
191#[doc(hidden)] // macro
192#[macro_export]
193macro_rules! __impl_data_provider_never_marker {
194    ($ty:path) => {
195        impl<Y> $crate::DataProvider<$crate::marker::NeverMarker<Y>> for $ty
196        where
197            for<'a> Y: $crate::prelude::yoke::Yokeable<'a>,
198        {
199            fn load(
200                &self,
201                req: $crate::DataRequest,
202            ) -> Result<$crate::DataResponse<$crate::marker::NeverMarker<Y>>, $crate::DataError>
203            {
204                Err($crate::DataErrorKind::MarkerNotFound.with_req(
205                    <$crate::marker::NeverMarker<Y> as $crate::DataMarker>::INFO,
206                    req,
207                ))
208            }
209        }
210    };
211}
212#[doc(inline)]
213pub use __impl_data_provider_never_marker as impl_data_provider_never_marker;
214
215/// A compact hash of a [`DataMarkerInfo`]. Useful for keys in maps.
216///
217/// The hash will be stable over time within major releases.
218#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)]
219#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
220#[repr(transparent)]
221pub struct DataMarkerIdHash([u8; 4]);
222
223impl DataMarkerIdHash {
224    /// Magic bytes to locate [`DataMarkerIdHash`]es in binaries.
225    pub const LEADING_TAG: &[u8] = b"tdmh";
226
227    /// Gets the hash value as a byte array.
228    pub const fn to_bytes(self) -> [u8; 4] {
229        self.0
230    }
231}
232
233/// Const function to compute the FxHash of a byte array.
234///
235/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our
236/// use case since the strings being hashed originate from a trusted source (the ICU4X
237/// components), and the hashes are computed at compile time, so we can check for collisions.
238///
239/// We could have considered a SHA or other cryptographic hash function. However, we are using
240/// FxHash because:
241///
242/// 1. There is precedent for this algorithm in Rust
243/// 2. The algorithm is easy to implement as a const function
244/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree
245/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits,
246///    such that truncation would be required in order to fit into a u32, partially reducing
247///    the benefit of a cryptographically secure algorithm
248// The indexing operations in this function have been reviewed in detail and won't panic.
249#[allow(clippy::indexing_slicing)]
250const fn fxhash_32(bytes: &[u8]) -> u32 {
251    // This code is adapted from https://github.com/rust-lang/rustc-hash,
252    // whose license text is reproduced below.
253    //
254    // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
255    // file at the top-level directory of this distribution and at
256    // http://rust-lang.org/COPYRIGHT.
257    //
258    // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
259    // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
260    // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
261    // option. This file may not be copied, modified, or distributed
262    // except according to those terms.
263
264    #[inline]
265    const fn hash_word_32(mut hash: u32, word: u32) -> u32 {
266        const ROTATE: u32 = 5;
267        const SEED32: u32 = 0x9e_37_79_b9;
268        hash = hash.rotate_left(ROTATE);
269        hash ^= word;
270        hash = hash.wrapping_mul(SEED32);
271        hash
272    }
273
274    let mut cursor = 0;
275    let end = bytes.len();
276    let mut hash = 0;
277
278    while end - cursor >= 4 {
279        let word = u32::from_le_bytes([
280            bytes[cursor],
281            bytes[cursor + 1],
282            bytes[cursor + 2],
283            bytes[cursor + 3],
284        ]);
285        hash = hash_word_32(hash, word);
286        cursor += 4;
287    }
288
289    if end - cursor >= 2 {
290        let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
291        hash = hash_word_32(hash, word as u32);
292        cursor += 2;
293    }
294
295    if end - cursor >= 1 {
296        hash = hash_word_32(hash, bytes[cursor] as u32);
297    }
298
299    hash
300}
301
302#[cfg(feature = "alloc")]
303impl<'a> zerovec::maps::ZeroMapKV<'a> for DataMarkerIdHash {
304    type Container = zerovec::ZeroVec<'a, DataMarkerIdHash>;
305    type Slice = zerovec::ZeroSlice<DataMarkerIdHash>;
306    type GetType = <DataMarkerIdHash as AsULE>::ULE;
307    type OwnedType = DataMarkerIdHash;
308}
309
310impl AsULE for DataMarkerIdHash {
311    type ULE = Self;
312    #[inline]
313    fn to_unaligned(self) -> Self::ULE {
314        self
315    }
316    #[inline]
317    fn from_unaligned(unaligned: Self::ULE) -> Self {
318        unaligned
319    }
320}
321
322// Safe since the ULE type is `self`.
323unsafe impl EqULE for DataMarkerIdHash {}
324
325/// The ID of a data marker.
326///
327/// This is generally a [`DataMarkerIdHash`]. If debug assertions or the `export` Cargo feature
328/// are enabled, this also contains a human-readable string for an improved `Debug` implementation.
329#[derive(Debug, Copy, Clone, Eq)]
330pub struct DataMarkerId {
331    #[cfg(any(feature = "export", debug_assertions))]
332    debug: &'static str,
333    hash: [u8; 8],
334}
335
336impl PartialEq for DataMarkerId {
337    #[inline]
338    fn eq(&self, other: &Self) -> bool {
339        self.hash == other.hash
340    }
341}
342
343impl Ord for DataMarkerId {
344    #[inline]
345    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
346        self.hash.cmp(&other.hash)
347    }
348}
349
350impl PartialOrd for DataMarkerId {
351    #[inline]
352    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
353        Some(self.hash.cmp(&other.hash))
354    }
355}
356
357impl core::hash::Hash for DataMarkerId {
358    #[inline]
359    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
360        self.hash.hash(state)
361    }
362}
363
364impl DataMarkerId {
365    #[doc(hidden)]
366    // macro use
367    // Error is a str of the expected character class and the index where it wasn't encountered
368    // The indexing operations in this function have been reviewed in detail and won't panic.
369    pub const fn from_name(name: &'static str) -> Result<Self, (&'static str, usize)> {
370        #![allow(clippy::indexing_slicing)]
371        if !name.as_bytes()[name.len() - 1].is_ascii_digit() {
372            return Err(("[0-9]", name.len()));
373        }
374        let mut i = name.len() - 1;
375        while name.as_bytes()[i - 1].is_ascii_digit() {
376            i -= 1;
377        }
378        if name.as_bytes()[i - 1] != b'V' {
379            return Err(("V", i));
380        }
381
382        let magic = DataMarkerIdHash::LEADING_TAG;
383        let hash = fxhash_32(name.as_bytes()).to_le_bytes();
384
385        Ok(Self {
386            #[cfg(any(feature = "export", debug_assertions))]
387            debug: name,
388            hash: [
389                magic[0], magic[1], magic[2], magic[3], hash[0], hash[1], hash[2], hash[3],
390            ],
391        })
392    }
393
394    /// Gets a platform-independent hash of a [`DataMarkerId`].
395    ///
396    /// The hash is 4 bytes and allows for fast comparison.
397    ///
398    /// # Example
399    ///
400    /// ```
401    /// use icu_provider::prelude::*;
402    ///
403    /// icu_provider::data_marker!(FooV1, &'static str);
404    ///
405    /// assert_eq!(FooV1::INFO.id.hashed().to_bytes(), [198, 217, 86, 48]);
406    /// ```
407    #[inline]
408    pub const fn hashed(self) -> DataMarkerIdHash {
409        let [.., h1, h2, h3, h4] = self.hash;
410        DataMarkerIdHash([h1, h2, h3, h4])
411    }
412
413    /// Returns the marker name.
414    ///
415    /// For size reasons, this is only available with the `export` Cargo feature.
416    #[cfg(feature = "export")]
417    pub const fn name(self) -> &'static str {
418        self.debug
419    }
420}
421
422/// Used for loading data from a dynamic ICU4X data provider.
423///
424/// A data marker is tightly coupled with the code that uses it to load data at runtime.
425/// Executables can be searched for `DataMarkerInfo` instances to produce optimized data files.
426/// Therefore, users should not generally create DataMarkerInfo instances; they should instead use
427/// the ones exported by a component.
428#[derive(Copy, Clone, PartialEq, Eq)]
429#[non_exhaustive]
430pub struct DataMarkerInfo {
431    /// The ID of this marker.
432    pub id: DataMarkerId,
433    /// Whether this data marker only has a single payload, not keyed by a data identifier.
434    pub is_singleton: bool,
435    /// Whether this data marker uses checksums for integrity purposes.
436    pub has_checksum: bool,
437    /// The fallback to use for this data marker.
438    pub fallback_config: LocaleFallbackConfig,
439    /// The attributes domain for this data marker. This can be used for filtering marker
440    /// attributes during provider export.
441    #[cfg(feature = "export")]
442    pub attributes_domain: &'static str,
443}
444
445impl PartialOrd for DataMarkerInfo {
446    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
447        Some(self.id.cmp(&other.id))
448    }
449}
450
451impl Ord for DataMarkerInfo {
452    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
453        self.id.cmp(&other.id)
454    }
455}
456
457impl core::hash::Hash for DataMarkerInfo {
458    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
459        self.id.hash(state)
460    }
461}
462
463impl DataMarkerInfo {
464    /// See [`Default::default`]
465    pub const fn from_id(id: DataMarkerId) -> Self {
466        Self {
467            id,
468            fallback_config: LocaleFallbackConfig::default(),
469            is_singleton: false,
470            has_checksum: false,
471            #[cfg(feature = "export")]
472            attributes_domain: "",
473        }
474    }
475
476    /// Returns [`Ok`] if this data marker matches the argument, or the appropriate error.
477    ///
478    /// Convenience method for data providers that support a single [`DataMarkerInfo`].
479    ///
480    /// # Examples
481    ///
482    /// ```
483    /// use icu_provider::hello_world::*;
484    /// use icu_provider::prelude::*;
485    ///
486    /// icu_provider::data_marker!(
487    ///     DummyV1,
488    ///     <HelloWorldV1 as DynamicDataMarker>::DataStruct
489    /// );
490    ///
491    /// assert!(matches!(
492    ///     HelloWorldV1::INFO.match_marker(HelloWorldV1::INFO),
493    ///     Ok(())
494    /// ));
495    /// assert!(matches!(
496    ///     HelloWorldV1::INFO.match_marker(DummyV1::INFO),
497    ///     Err(DataError {
498    ///         kind: DataErrorKind::MarkerNotFound,
499    ///         ..
500    ///     })
501    /// ));
502    ///
503    /// // The error context contains the argument:
504    /// assert_eq!(
505    ///     HelloWorldV1::INFO
506    ///         .match_marker(DummyV1::INFO)
507    ///         .unwrap_err()
508    ///         .marker,
509    ///     Some(DummyV1::INFO.id)
510    /// );
511    /// ```
512    pub fn match_marker(self, marker: Self) -> Result<(), DataError> {
513        if self == marker {
514            Ok(())
515        } else {
516            Err(DataErrorKind::MarkerNotFound.with_marker(marker))
517        }
518    }
519
520    /// Constructs a [`DataLocale`] for this [`DataMarkerInfo`].
521    pub fn make_locale(self, locale: LocalePreferences) -> DataLocale {
522        if self.fallback_config.priority == LocaleFallbackPriority::Region {
523            locale.to_data_locale_region_priority()
524        } else {
525            locale.to_data_locale_language_priority()
526        }
527    }
528}
529
530/// Creates a data marker.
531///
532/// # Examples
533///
534/// ```
535/// icu_provider::data_marker!(DummyV1, &'static str);
536/// ```
537///
538/// The identifier needs to end with a `V` followed by one or more digits (the version number).
539///
540/// Invalid identifiers are compile-time errors (as [`data_marker!`](crate::data_marker) uses `const`).
541///
542/// ```compile_fail,E0080
543/// icu_provider::data_marker!(Dummy, &'static str);
544/// ```
545#[macro_export] // canonical location is crate root
546macro_rules! data_marker {
547    ($(#[$doc:meta])* $name:ident, $($debug:literal,)? $struct:ty $(, $(#[$meta:meta])* $info_field:ident = $info_val:expr)* $(,)?) => {
548        $(#[$doc])*
549        #[non_exhaustive]
550        pub struct $name;
551        impl $crate::DynamicDataMarker for $name {
552            type DataStruct = $struct;
553        }
554        impl $crate::DataMarker for $name {
555            const INFO: $crate::DataMarkerInfo = {
556                $(
557                    /// ```rust
558                    #[doc = concat!("let ident = \"", stringify!($name), "\";")]
559                    #[doc = concat!("let debug = \"", $debug, "\";")]
560                    /// assert_eq!(
561                    ///     debug.split('/').map(|s| {
562                    ///         let mut b = s.to_ascii_lowercase().into_bytes();
563                    ///         b[0] = b[0].to_ascii_uppercase();
564                    ///         String::from_utf8(b).unwrap()
565                    ///     })
566                    ///     .collect::<Vec<_>>()
567                    ///     .join(""),
568                    ///     ident
569                    /// );
570                    /// ```
571                    #[allow(dead_code)]
572                    struct DebugTest;
573                )?
574                #[allow(unused_mut)]
575                // Force evaluation even if marker is unused
576                let mut info = const { $crate::DataMarkerInfo::from_id(
577                     match $crate::marker::DataMarkerId::from_name(stringify!($name)) {
578                        Ok(path) => path,
579                        #[allow(clippy::panic)] // Const context
580                        Err(_) => panic!(concat!("Invalid marker name: ", stringify!($name))),
581                })};
582                $(
583                    $(#[$meta])*
584                    {info.$info_field = $info_val;}
585                )*
586                info
587            };
588        }
589    }
590}
591
592impl fmt::Debug for DataMarkerInfo {
593    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
594        #[cfg(any(feature = "export", debug_assertions))]
595        return f.write_str(self.id.debug);
596        #[cfg(not(any(feature = "export", debug_assertions)))]
597        return write!(f, "{:?}", self.id);
598    }
599}
600
601/// A marker for the given `DataStruct`.
602#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
603pub struct ErasedMarker<DataStruct: for<'a> Yokeable<'a>>(PhantomData<DataStruct>);
604impl<DataStruct: for<'a> Yokeable<'a>> DynamicDataMarker for ErasedMarker<DataStruct> {
605    type DataStruct = DataStruct;
606}
607
608#[test]
609fn test_marker_syntax() {
610    // Valid markers:
611    DataMarkerId::from_name("HelloWorldV1").unwrap();
612    DataMarkerId::from_name("HelloWorldFooV1").unwrap();
613    DataMarkerId::from_name("HelloWorldV999").unwrap();
614    DataMarkerId::from_name("Hello485FooV1").unwrap();
615
616    // No version:
617    assert_eq!(
618        DataMarkerId::from_name("HelloWorld"),
619        Err(("[0-9]", "HelloWorld".len()))
620    );
621
622    assert_eq!(
623        DataMarkerId::from_name("HelloWorldV"),
624        Err(("[0-9]", "HelloWorldV".len()))
625    );
626    assert_eq!(
627        DataMarkerId::from_name("HelloWorldVFoo"),
628        Err(("[0-9]", "HelloWorldVFoo".len()))
629    );
630    assert_eq!(
631        DataMarkerId::from_name("HelloWorldV1Foo"),
632        Err(("[0-9]", "HelloWorldV1Foo".len()))
633    );
634}
635
636#[test]
637fn test_id_debug() {
638    assert_eq!(DataMarkerId::from_name("BarV1").unwrap().debug, "BarV1");
639}
640
641#[test]
642fn test_hash_word_32() {
643    assert_eq!(0, fxhash_32(b""));
644    assert_eq!(0xF3051F19, fxhash_32(b"a"));
645    assert_eq!(0x2F9DF119, fxhash_32(b"ab"));
646    assert_eq!(0xCB1D9396, fxhash_32(b"abc"));
647    assert_eq!(0x8628F119, fxhash_32(b"abcd"));
648    assert_eq!(0xBEBDB56D, fxhash_32(b"abcde"));
649    assert_eq!(0x1CE8476D, fxhash_32(b"abcdef"));
650    assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg"));
651    assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh"));
652    assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi"));
653}
654
655#[test]
656fn test_id_hash() {
657    assert_eq!(
658        DataMarkerId::from_name("BarV1").unwrap().hashed(),
659        DataMarkerIdHash([212, 77, 158, 241]),
660    );
661}