icu_provider/marker.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority};
6use crate::{DataError, DataErrorKind, DataLocale, DataProvider, DataProviderWithMarker};
7use core::fmt;
8use core::marker::PhantomData;
9use icu_locale_core::preferences::LocalePreferences;
10use yoke::Yokeable;
11use zerovec::ule::*;
12
13/// Trait marker for data structs. All types delivered by the data provider must be associated with
14/// something implementing this trait.
15///
16/// Data markers normally generated with the [`data_marker`](crate::data_marker) macro.
17///
18/// Also see [`DataMarker`].
19///
20/// Note: `DynamicDataMarker`s are quasi-const-generic compile-time objects, and as such are expected
21/// to be unit structs. As this is not something that can be enforced by the type system, we
22/// currently only have a `'static` bound on them (which is needed by a lot of our code).
23///
24/// # Examples
25///
26/// Manually implementing DynamicDataMarker for a custom type:
27///
28/// ```
29/// use icu_provider::prelude::*;
30/// use std::borrow::Cow;
31///
32/// #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
33/// struct MyDataStruct<'data> {
34/// message: Cow<'data, str>,
35/// }
36///
37/// struct MyDataStructMarker;
38///
39/// impl DynamicDataMarker for MyDataStructMarker {
40/// type DataStruct = MyDataStruct<'static>;
41/// }
42///
43/// // We can now use MyDataStruct with DataProvider:
44/// let s = MyDataStruct {
45/// message: Cow::Owned("Hello World".into()),
46/// };
47/// let payload = DataPayload::<MyDataStructMarker>::from_owned(s);
48/// assert_eq!(payload.get().message, "Hello World");
49/// ```
50///
51/// [`data_struct`]: crate::data_struct
52pub trait DynamicDataMarker: 'static {
53 /// A type that implements [`Yokeable`]. This should typically be the `'static` version of a
54 /// data struct.
55 type DataStruct: for<'a> Yokeable<'a>;
56}
57
58/// A [`DynamicDataMarker`] with a [`DataMarkerInfo`] attached.
59///
60/// Structs implementing this trait are normally generated with the [`data_struct!`] macro.
61///
62/// Implementing this trait enables this marker to be used with the main [`DataProvider`] trait.
63/// Most markers should be associated with a specific marker and should therefore implement this
64/// trait.
65///
66/// [`BufferMarker`] is an example of a marker that does _not_ implement this trait.
67///
68/// Note: `DataMarker`s are quasi-const-generic compile-time objects, and as such are expected
69/// to be unit structs. As this is not something that can be enforced by the type system, we
70/// currently only have a `'static` bound on them (which is needed by a lot of our code).
71///
72/// [`data_struct!`]: crate::data_struct
73/// [`DataProvider`]: crate::DataProvider
74/// [`BufferMarker`]: crate::buf::BufferMarker
75pub trait DataMarker: DynamicDataMarker {
76 /// The single [`DataMarkerInfo`] associated with this marker.
77 const INFO: DataMarkerInfo;
78}
79
80/// Extension trait for methods on [`DataMarker`]
81pub trait DataMarkerExt: DataMarker + Sized {
82 /// Binds a [`DataMarker`] to a provider supporting it.
83 fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
84 where
85 P: DataProvider<Self>;
86 /// Constructs a [`DataLocale`] using fallback preferences from this [`DataMarker`].
87 fn make_locale(locale: LocalePreferences) -> DataLocale;
88}
89
90impl<M: DataMarker + Sized> DataMarkerExt for M {
91 fn bind<P>(provider: P) -> DataProviderWithMarker<Self, P>
92 where
93 P: DataProvider<Self>,
94 {
95 DataProviderWithMarker::new(provider)
96 }
97
98 fn make_locale(locale: LocalePreferences) -> DataLocale {
99 M::INFO.make_locale(locale)
100 }
101}
102
103/// A [`DynamicDataMarker`] that never returns data.
104///
105/// All types that have non-blanket impls of `DataProvider<M>` are expected to explicitly
106/// implement `DataProvider<NeverMarker<Y>>`, returning [`DataErrorKind::MarkerNotFound`].
107/// See [`impl_data_provider_never_marker!`].
108///
109/// [`DataErrorKind::MarkerNotFound`]: crate::DataErrorKind::MarkerNotFound
110/// [`impl_data_provider_never_marker!`]: crate::marker::impl_data_provider_never_marker
111///
112/// # Examples
113///
114/// ```
115/// use icu_locale_core::langid;
116/// use icu_provider::hello_world::*;
117/// use icu_provider::marker::NeverMarker;
118/// use icu_provider::prelude::*;
119///
120/// let buffer_provider = HelloWorldProvider.into_json_provider();
121///
122/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
123/// &buffer_provider.as_deserializing(),
124/// DataRequest {
125/// id: DataIdentifierBorrowed::for_locale(&langid!("en").into()),
126/// ..Default::default()
127/// },
128/// );
129///
130/// assert!(matches!(
131/// result,
132/// Err(DataError {
133/// kind: DataErrorKind::MarkerNotFound,
134/// ..
135/// })
136/// ));
137/// ```
138#[derive(Debug, Copy, Clone)]
139pub struct NeverMarker<Y>(PhantomData<Y>);
140
141impl<Y> DynamicDataMarker for NeverMarker<Y>
142where
143 for<'a> Y: Yokeable<'a>,
144{
145 type DataStruct = Y;
146}
147
148impl<Y> DataMarker for NeverMarker<Y>
149where
150 for<'a> Y: Yokeable<'a>,
151{
152 const INFO: DataMarkerInfo = DataMarkerInfo::from_id(DataMarkerId {
153 #[cfg(any(feature = "export", debug_assertions))]
154 debug: "NeverMarker",
155 hash: *b"nevermar",
156 });
157}
158
159/// Implements `DataProvider<NeverMarker<Y>>` on a struct.
160///
161/// For more information, see [`NeverMarker`].
162///
163/// # Examples
164///
165/// ```
166/// use icu_locale_core::langid;
167/// use icu_provider::hello_world::*;
168/// use icu_provider::marker::NeverMarker;
169/// use icu_provider::prelude::*;
170///
171/// struct MyProvider;
172///
173/// icu_provider::marker::impl_data_provider_never_marker!(MyProvider);
174///
175/// let result = DataProvider::<NeverMarker<HelloWorld<'static>>>::load(
176/// &MyProvider,
177/// DataRequest {
178/// id: DataIdentifierBorrowed::for_locale(&langid!("und").into()),
179/// ..Default::default()
180/// },
181/// );
182///
183/// assert!(matches!(
184/// result,
185/// Err(DataError {
186/// kind: DataErrorKind::MarkerNotFound,
187/// ..
188/// })
189/// ));
190/// ```
191#[doc(hidden)] // macro
192#[macro_export]
193macro_rules! __impl_data_provider_never_marker {
194 ($ty:path) => {
195 impl<Y> $crate::DataProvider<$crate::marker::NeverMarker<Y>> for $ty
196 where
197 for<'a> Y: $crate::prelude::yoke::Yokeable<'a>,
198 {
199 fn load(
200 &self,
201 req: $crate::DataRequest,
202 ) -> Result<$crate::DataResponse<$crate::marker::NeverMarker<Y>>, $crate::DataError>
203 {
204 Err($crate::DataErrorKind::MarkerNotFound.with_req(
205 <$crate::marker::NeverMarker<Y> as $crate::DataMarker>::INFO,
206 req,
207 ))
208 }
209 }
210 };
211}
212#[doc(inline)]
213pub use __impl_data_provider_never_marker as impl_data_provider_never_marker;
214
215/// A compact hash of a [`DataMarkerInfo`]. Useful for keys in maps.
216///
217/// The hash will be stable over time within major releases.
218#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)]
219#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
220#[repr(transparent)]
221pub struct DataMarkerIdHash([u8; 4]);
222
223impl DataMarkerIdHash {
224 /// Magic bytes to locate [`DataMarkerIdHash`]es in binaries.
225 pub const LEADING_TAG: &[u8] = b"tdmh";
226
227 /// Gets the hash value as a byte array.
228 pub const fn to_bytes(self) -> [u8; 4] {
229 self.0
230 }
231}
232
233/// Const function to compute the FxHash of a byte array.
234///
235/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our
236/// use case since the strings being hashed originate from a trusted source (the ICU4X
237/// components), and the hashes are computed at compile time, so we can check for collisions.
238///
239/// We could have considered a SHA or other cryptographic hash function. However, we are using
240/// FxHash because:
241///
242/// 1. There is precedent for this algorithm in Rust
243/// 2. The algorithm is easy to implement as a const function
244/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree
245/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits,
246/// such that truncation would be required in order to fit into a u32, partially reducing
247/// the benefit of a cryptographically secure algorithm
248// The indexing operations in this function have been reviewed in detail and won't panic.
249#[allow(clippy::indexing_slicing)]
250const fn fxhash_32(bytes: &[u8]) -> u32 {
251 // This code is adapted from https://github.com/rust-lang/rustc-hash,
252 // whose license text is reproduced below.
253 //
254 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
255 // file at the top-level directory of this distribution and at
256 // http://rust-lang.org/COPYRIGHT.
257 //
258 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
259 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
260 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
261 // option. This file may not be copied, modified, or distributed
262 // except according to those terms.
263
264 #[inline]
265 const fn hash_word_32(mut hash: u32, word: u32) -> u32 {
266 const ROTATE: u32 = 5;
267 const SEED32: u32 = 0x9e_37_79_b9;
268 hash = hash.rotate_left(ROTATE);
269 hash ^= word;
270 hash = hash.wrapping_mul(SEED32);
271 hash
272 }
273
274 let mut cursor = 0;
275 let end = bytes.len();
276 let mut hash = 0;
277
278 while end - cursor >= 4 {
279 let word = u32::from_le_bytes([
280 bytes[cursor],
281 bytes[cursor + 1],
282 bytes[cursor + 2],
283 bytes[cursor + 3],
284 ]);
285 hash = hash_word_32(hash, word);
286 cursor += 4;
287 }
288
289 if end - cursor >= 2 {
290 let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
291 hash = hash_word_32(hash, word as u32);
292 cursor += 2;
293 }
294
295 if end - cursor >= 1 {
296 hash = hash_word_32(hash, bytes[cursor] as u32);
297 }
298
299 hash
300}
301
302#[cfg(feature = "alloc")]
303impl<'a> zerovec::maps::ZeroMapKV<'a> for DataMarkerIdHash {
304 type Container = zerovec::ZeroVec<'a, DataMarkerIdHash>;
305 type Slice = zerovec::ZeroSlice<DataMarkerIdHash>;
306 type GetType = <DataMarkerIdHash as AsULE>::ULE;
307 type OwnedType = DataMarkerIdHash;
308}
309
310impl AsULE for DataMarkerIdHash {
311 type ULE = Self;
312 #[inline]
313 fn to_unaligned(self) -> Self::ULE {
314 self
315 }
316 #[inline]
317 fn from_unaligned(unaligned: Self::ULE) -> Self {
318 unaligned
319 }
320}
321
322// Safe since the ULE type is `self`.
323unsafe impl EqULE for DataMarkerIdHash {}
324
325/// The ID of a data marker.
326///
327/// This is generally a [`DataMarkerIdHash`]. If debug assertions or the `export` Cargo feature
328/// are enabled, this also contains a human-readable string for an improved `Debug` implementation.
329#[derive(Debug, Copy, Clone, Eq)]
330pub struct DataMarkerId {
331 #[cfg(any(feature = "export", debug_assertions))]
332 debug: &'static str,
333 hash: [u8; 8],
334}
335
336impl PartialEq for DataMarkerId {
337 #[inline]
338 fn eq(&self, other: &Self) -> bool {
339 self.hash == other.hash
340 }
341}
342
343impl Ord for DataMarkerId {
344 #[inline]
345 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
346 self.hash.cmp(&other.hash)
347 }
348}
349
350impl PartialOrd for DataMarkerId {
351 #[inline]
352 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
353 Some(self.hash.cmp(&other.hash))
354 }
355}
356
357impl core::hash::Hash for DataMarkerId {
358 #[inline]
359 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
360 self.hash.hash(state)
361 }
362}
363
364impl DataMarkerId {
365 #[doc(hidden)]
366 // macro use
367 // Error is a str of the expected character class and the index where it wasn't encountered
368 // The indexing operations in this function have been reviewed in detail and won't panic.
369 pub const fn from_name(name: &'static str) -> Result<Self, (&'static str, usize)> {
370 #![allow(clippy::indexing_slicing)]
371 if !name.as_bytes()[name.len() - 1].is_ascii_digit() {
372 return Err(("[0-9]", name.len()));
373 }
374 let mut i = name.len() - 1;
375 while name.as_bytes()[i - 1].is_ascii_digit() {
376 i -= 1;
377 }
378 if name.as_bytes()[i - 1] != b'V' {
379 return Err(("V", i));
380 }
381
382 let magic = DataMarkerIdHash::LEADING_TAG;
383 let hash = fxhash_32(name.as_bytes()).to_le_bytes();
384
385 Ok(Self {
386 #[cfg(any(feature = "export", debug_assertions))]
387 debug: name,
388 hash: [
389 magic[0], magic[1], magic[2], magic[3], hash[0], hash[1], hash[2], hash[3],
390 ],
391 })
392 }
393
394 /// Gets a platform-independent hash of a [`DataMarkerId`].
395 ///
396 /// The hash is 4 bytes and allows for fast comparison.
397 ///
398 /// # Example
399 ///
400 /// ```
401 /// use icu_provider::prelude::*;
402 ///
403 /// icu_provider::data_marker!(FooV1, &'static str);
404 ///
405 /// assert_eq!(FooV1::INFO.id.hashed().to_bytes(), [198, 217, 86, 48]);
406 /// ```
407 #[inline]
408 pub const fn hashed(self) -> DataMarkerIdHash {
409 let [.., h1, h2, h3, h4] = self.hash;
410 DataMarkerIdHash([h1, h2, h3, h4])
411 }
412
413 /// Returns the marker name.
414 ///
415 /// For size reasons, this is only available with the `export` Cargo feature.
416 #[cfg(feature = "export")]
417 pub const fn name(self) -> &'static str {
418 self.debug
419 }
420}
421
422/// Used for loading data from a dynamic ICU4X data provider.
423///
424/// A data marker is tightly coupled with the code that uses it to load data at runtime.
425/// Executables can be searched for `DataMarkerInfo` instances to produce optimized data files.
426/// Therefore, users should not generally create DataMarkerInfo instances; they should instead use
427/// the ones exported by a component.
428#[derive(Copy, Clone, PartialEq, Eq)]
429#[non_exhaustive]
430pub struct DataMarkerInfo {
431 /// The ID of this marker.
432 pub id: DataMarkerId,
433 /// Whether this data marker only has a single payload, not keyed by a data identifier.
434 pub is_singleton: bool,
435 /// Whether this data marker uses checksums for integrity purposes.
436 pub has_checksum: bool,
437 /// The fallback to use for this data marker.
438 pub fallback_config: LocaleFallbackConfig,
439 /// The attributes domain for this data marker. This can be used for filtering marker
440 /// attributes during provider export.
441 #[cfg(feature = "export")]
442 pub attributes_domain: &'static str,
443}
444
445impl PartialOrd for DataMarkerInfo {
446 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
447 Some(self.id.cmp(&other.id))
448 }
449}
450
451impl Ord for DataMarkerInfo {
452 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
453 self.id.cmp(&other.id)
454 }
455}
456
457impl core::hash::Hash for DataMarkerInfo {
458 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
459 self.id.hash(state)
460 }
461}
462
463impl DataMarkerInfo {
464 /// See [`Default::default`]
465 pub const fn from_id(id: DataMarkerId) -> Self {
466 Self {
467 id,
468 fallback_config: LocaleFallbackConfig::default(),
469 is_singleton: false,
470 has_checksum: false,
471 #[cfg(feature = "export")]
472 attributes_domain: "",
473 }
474 }
475
476 /// Returns [`Ok`] if this data marker matches the argument, or the appropriate error.
477 ///
478 /// Convenience method for data providers that support a single [`DataMarkerInfo`].
479 ///
480 /// # Examples
481 ///
482 /// ```
483 /// use icu_provider::hello_world::*;
484 /// use icu_provider::prelude::*;
485 ///
486 /// icu_provider::data_marker!(
487 /// DummyV1,
488 /// <HelloWorldV1 as DynamicDataMarker>::DataStruct
489 /// );
490 ///
491 /// assert!(matches!(
492 /// HelloWorldV1::INFO.match_marker(HelloWorldV1::INFO),
493 /// Ok(())
494 /// ));
495 /// assert!(matches!(
496 /// HelloWorldV1::INFO.match_marker(DummyV1::INFO),
497 /// Err(DataError {
498 /// kind: DataErrorKind::MarkerNotFound,
499 /// ..
500 /// })
501 /// ));
502 ///
503 /// // The error context contains the argument:
504 /// assert_eq!(
505 /// HelloWorldV1::INFO
506 /// .match_marker(DummyV1::INFO)
507 /// .unwrap_err()
508 /// .marker,
509 /// Some(DummyV1::INFO.id)
510 /// );
511 /// ```
512 pub fn match_marker(self, marker: Self) -> Result<(), DataError> {
513 if self == marker {
514 Ok(())
515 } else {
516 Err(DataErrorKind::MarkerNotFound.with_marker(marker))
517 }
518 }
519
520 /// Constructs a [`DataLocale`] for this [`DataMarkerInfo`].
521 pub fn make_locale(self, locale: LocalePreferences) -> DataLocale {
522 if self.fallback_config.priority == LocaleFallbackPriority::Region {
523 locale.to_data_locale_region_priority()
524 } else {
525 locale.to_data_locale_language_priority()
526 }
527 }
528}
529
530/// Creates a data marker.
531///
532/// # Examples
533///
534/// ```
535/// icu_provider::data_marker!(DummyV1, &'static str);
536/// ```
537///
538/// The identifier needs to end with a `V` followed by one or more digits (the version number).
539///
540/// Invalid identifiers are compile-time errors (as [`data_marker!`](crate::data_marker) uses `const`).
541///
542/// ```compile_fail,E0080
543/// icu_provider::data_marker!(Dummy, &'static str);
544/// ```
545#[macro_export] // canonical location is crate root
546macro_rules! data_marker {
547 ($(#[$doc:meta])* $name:ident, $($debug:literal,)? $struct:ty $(, $(#[$meta:meta])* $info_field:ident = $info_val:expr)* $(,)?) => {
548 $(#[$doc])*
549 #[non_exhaustive]
550 pub struct $name;
551 impl $crate::DynamicDataMarker for $name {
552 type DataStruct = $struct;
553 }
554 impl $crate::DataMarker for $name {
555 const INFO: $crate::DataMarkerInfo = {
556 $(
557 /// ```rust
558 #[doc = concat!("let ident = \"", stringify!($name), "\";")]
559 #[doc = concat!("let debug = \"", $debug, "\";")]
560 /// assert_eq!(
561 /// debug.split('/').map(|s| {
562 /// let mut b = s.to_ascii_lowercase().into_bytes();
563 /// b[0] = b[0].to_ascii_uppercase();
564 /// String::from_utf8(b).unwrap()
565 /// })
566 /// .collect::<Vec<_>>()
567 /// .join(""),
568 /// ident
569 /// );
570 /// ```
571 #[allow(dead_code)]
572 struct DebugTest;
573 )?
574 #[allow(unused_mut)]
575 // Force evaluation even if marker is unused
576 let mut info = const { $crate::DataMarkerInfo::from_id(
577 match $crate::marker::DataMarkerId::from_name(stringify!($name)) {
578 Ok(path) => path,
579 #[allow(clippy::panic)] // Const context
580 Err(_) => panic!(concat!("Invalid marker name: ", stringify!($name))),
581 })};
582 $(
583 $(#[$meta])*
584 {info.$info_field = $info_val;}
585 )*
586 info
587 };
588 }
589 }
590}
591
592impl fmt::Debug for DataMarkerInfo {
593 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
594 #[cfg(any(feature = "export", debug_assertions))]
595 return f.write_str(self.id.debug);
596 #[cfg(not(any(feature = "export", debug_assertions)))]
597 return write!(f, "{:?}", self.id);
598 }
599}
600
601/// A marker for the given `DataStruct`.
602#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
603pub struct ErasedMarker<DataStruct: for<'a> Yokeable<'a>>(PhantomData<DataStruct>);
604impl<DataStruct: for<'a> Yokeable<'a>> DynamicDataMarker for ErasedMarker<DataStruct> {
605 type DataStruct = DataStruct;
606}
607
608#[test]
609fn test_marker_syntax() {
610 // Valid markers:
611 DataMarkerId::from_name("HelloWorldV1").unwrap();
612 DataMarkerId::from_name("HelloWorldFooV1").unwrap();
613 DataMarkerId::from_name("HelloWorldV999").unwrap();
614 DataMarkerId::from_name("Hello485FooV1").unwrap();
615
616 // No version:
617 assert_eq!(
618 DataMarkerId::from_name("HelloWorld"),
619 Err(("[0-9]", "HelloWorld".len()))
620 );
621
622 assert_eq!(
623 DataMarkerId::from_name("HelloWorldV"),
624 Err(("[0-9]", "HelloWorldV".len()))
625 );
626 assert_eq!(
627 DataMarkerId::from_name("HelloWorldVFoo"),
628 Err(("[0-9]", "HelloWorldVFoo".len()))
629 );
630 assert_eq!(
631 DataMarkerId::from_name("HelloWorldV1Foo"),
632 Err(("[0-9]", "HelloWorldV1Foo".len()))
633 );
634}
635
636#[test]
637fn test_id_debug() {
638 assert_eq!(DataMarkerId::from_name("BarV1").unwrap().debug, "BarV1");
639}
640
641#[test]
642fn test_hash_word_32() {
643 assert_eq!(0, fxhash_32(b""));
644 assert_eq!(0xF3051F19, fxhash_32(b"a"));
645 assert_eq!(0x2F9DF119, fxhash_32(b"ab"));
646 assert_eq!(0xCB1D9396, fxhash_32(b"abc"));
647 assert_eq!(0x8628F119, fxhash_32(b"abcd"));
648 assert_eq!(0xBEBDB56D, fxhash_32(b"abcde"));
649 assert_eq!(0x1CE8476D, fxhash_32(b"abcdef"));
650 assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg"));
651 assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh"));
652 assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi"));
653}
654
655#[test]
656fn test_id_hash() {
657 assert_eq!(
658 DataMarkerId::from_name("BarV1").unwrap().hashed(),
659 DataMarkerIdHash([212, 77, 158, 241]),
660 );
661}