half/
bfloat.rs

1#[cfg(all(feature = "serde", feature = "alloc"))]
2#[allow(unused_imports)]
3use alloc::string::ToString;
4#[cfg(feature = "bytemuck")]
5use bytemuck::{Pod, Zeroable};
6use core::{
7    cmp::Ordering,
8    iter::{Product, Sum},
9    num::FpCategory,
10    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign},
11};
12#[cfg(not(target_arch = "spirv"))]
13use core::{
14    fmt::{
15        Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
16    },
17    num::ParseFloatError,
18    str::FromStr,
19};
20#[cfg(feature = "serde")]
21use serde::{Deserialize, Serialize};
22#[cfg(feature = "zerocopy")]
23use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
24
25pub(crate) mod convert;
26
27/// A 16-bit floating point type implementing the [`bfloat16`] format.
28///
29/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard
30/// `binary32`, a.k.a [`f32`]. [`struct@bf16`] has approximately the same dynamic range as [`f32`] by
31/// having a lower precision than [`struct@f16`][crate::f16]. While [`struct@f16`][crate::f16] has a precision of
32/// 11 bits, [`struct@bf16`] has a precision of only 8 bits.
33///
34/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
35#[allow(non_camel_case_types)]
36#[derive(Clone, Copy, Default)]
37#[repr(transparent)]
38#[cfg_attr(feature = "serde", derive(Serialize))]
39#[cfg_attr(
40    feature = "rkyv",
41    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
42)]
43#[cfg_attr(feature = "rkyv", rkyv(resolver = Bf16Resolver))]
44#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
45#[cfg_attr(
46    feature = "zerocopy",
47    derive(FromBytes, Immutable, IntoBytes, KnownLayout)
48)]
49#[cfg_attr(kani, derive(kani::Arbitrary))]
50pub struct bf16(u16);
51
52impl bf16 {
53    /// Constructs a [`struct@bf16`] value from the raw bits.
54    #[inline]
55    #[must_use]
56    pub const fn from_bits(bits: u16) -> bf16 {
57        bf16(bits)
58    }
59
60    /// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
61    ///
62    /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
63    /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
64    /// other values are truncated and rounded to the nearest representable value.
65    #[inline]
66    #[must_use]
67    pub fn from_f32(value: f32) -> bf16 {
68        Self::from_f32_const(value)
69    }
70
71    /// Constructs a [`struct@bf16`] value from a 32-bit floating point value.
72    ///
73    /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware
74    /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred
75    /// in any non-`const` context.
76    ///
77    /// This operation is lossy. If the 32-bit value is too large to fit, ±∞ will result. NaN values
78    /// are preserved. Subnormal values that are too tiny to be represented will result in ±0. All
79    /// other values are truncated and rounded to the nearest representable value.
80    #[inline]
81    #[must_use]
82    pub const fn from_f32_const(value: f32) -> bf16 {
83        bf16(convert::f32_to_bf16(value))
84    }
85
86    /// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
87    ///
88    /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
89    /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
90    /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
91    /// values are truncated and rounded to the nearest representable value.
92    #[inline]
93    #[must_use]
94    pub fn from_f64(value: f64) -> bf16 {
95        Self::from_f64_const(value)
96    }
97
98    /// Constructs a [`struct@bf16`] value from a 64-bit floating point value.
99    ///
100    /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware
101    /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred
102    /// in any non-`const` context.
103    ///
104    /// This operation is lossy. If the 64-bit value is to large to fit, ±∞ will result. NaN values
105    /// are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0.
106    /// Exponents that underflow the minimum exponent will result in subnormals or ±0. All other
107    /// values are truncated and rounded to the nearest representable value.
108    #[inline]
109    #[must_use]
110    pub const fn from_f64_const(value: f64) -> bf16 {
111        bf16(convert::f64_to_bf16(value))
112    }
113
114    /// Converts a [`struct@bf16`] into the underlying bit representation.
115    #[inline]
116    #[must_use]
117    pub const fn to_bits(self) -> u16 {
118        self.0
119    }
120
121    /// Returns the memory representation of the underlying bit representation as a byte array in
122    /// little-endian byte order.
123    ///
124    /// # Examples
125    ///
126    /// ```rust
127    /// # use half::prelude::*;
128    /// let bytes = bf16::from_f32(12.5).to_le_bytes();
129    /// assert_eq!(bytes, [0x48, 0x41]);
130    /// ```
131    #[inline]
132    #[must_use]
133    pub const fn to_le_bytes(self) -> [u8; 2] {
134        self.0.to_le_bytes()
135    }
136
137    /// Returns the memory representation of the underlying bit representation as a byte array in
138    /// big-endian (network) byte order.
139    ///
140    /// # Examples
141    ///
142    /// ```rust
143    /// # use half::prelude::*;
144    /// let bytes = bf16::from_f32(12.5).to_be_bytes();
145    /// assert_eq!(bytes, [0x41, 0x48]);
146    /// ```
147    #[inline]
148    #[must_use]
149    pub const fn to_be_bytes(self) -> [u8; 2] {
150        self.0.to_be_bytes()
151    }
152
153    /// Returns the memory representation of the underlying bit representation as a byte array in
154    /// native byte order.
155    ///
156    /// As the target platform's native endianness is used, portable code should use
157    /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate,
158    /// instead.
159    ///
160    /// # Examples
161    ///
162    /// ```rust
163    /// # use half::prelude::*;
164    /// let bytes = bf16::from_f32(12.5).to_ne_bytes();
165    /// assert_eq!(bytes, if cfg!(target_endian = "big") {
166    ///     [0x41, 0x48]
167    /// } else {
168    ///     [0x48, 0x41]
169    /// });
170    /// ```
171    #[inline]
172    #[must_use]
173    pub const fn to_ne_bytes(self) -> [u8; 2] {
174        self.0.to_ne_bytes()
175    }
176
177    /// Creates a floating point value from its representation as a byte array in little endian.
178    ///
179    /// # Examples
180    ///
181    /// ```rust
182    /// # use half::prelude::*;
183    /// let value = bf16::from_le_bytes([0x48, 0x41]);
184    /// assert_eq!(value, bf16::from_f32(12.5));
185    /// ```
186    #[inline]
187    #[must_use]
188    pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 {
189        bf16::from_bits(u16::from_le_bytes(bytes))
190    }
191
192    /// Creates a floating point value from its representation as a byte array in big endian.
193    ///
194    /// # Examples
195    ///
196    /// ```rust
197    /// # use half::prelude::*;
198    /// let value = bf16::from_be_bytes([0x41, 0x48]);
199    /// assert_eq!(value, bf16::from_f32(12.5));
200    /// ```
201    #[inline]
202    #[must_use]
203    pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 {
204        bf16::from_bits(u16::from_be_bytes(bytes))
205    }
206
207    /// Creates a floating point value from its representation as a byte array in native endian.
208    ///
209    /// As the target platform's native endianness is used, portable code likely wants to use
210    /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as
211    /// appropriate instead.
212    ///
213    /// # Examples
214    ///
215    /// ```rust
216    /// # use half::prelude::*;
217    /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") {
218    ///     [0x41, 0x48]
219    /// } else {
220    ///     [0x48, 0x41]
221    /// });
222    /// assert_eq!(value, bf16::from_f32(12.5));
223    /// ```
224    #[inline]
225    #[must_use]
226    pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 {
227        bf16::from_bits(u16::from_ne_bytes(bytes))
228    }
229
230    /// Converts a [`struct@bf16`] value into an [`f32`] value.
231    ///
232    /// This conversion is lossless as all values can be represented exactly in [`f32`].
233    #[inline]
234    #[must_use]
235    pub fn to_f32(self) -> f32 {
236        self.to_f32_const()
237    }
238
239    /// Converts a [`struct@bf16`] value into an [`f32`] value.
240    ///
241    /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware
242    /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred
243    /// in any non-`const` context.
244    ///
245    /// This conversion is lossless as all values can be represented exactly in [`f32`].
246    #[inline]
247    #[must_use]
248    pub const fn to_f32_const(self) -> f32 {
249        convert::bf16_to_f32(self.0)
250    }
251
252    /// Converts a [`struct@bf16`] value into an [`f64`] value.
253    ///
254    /// This conversion is lossless as all values can be represented exactly in [`f64`].
255    #[inline]
256    #[must_use]
257    pub fn to_f64(self) -> f64 {
258        self.to_f64_const()
259    }
260
261    /// Converts a [`struct@bf16`] value into an [`f64`] value.
262    ///
263    /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware
264    /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred
265    /// in any non-`const` context.
266    ///
267    /// This conversion is lossless as all values can be represented exactly in [`f64`].
268    #[inline]
269    #[must_use]
270    pub const fn to_f64_const(self) -> f64 {
271        convert::bf16_to_f64(self.0)
272    }
273
274    /// Returns `true` if this value is NaN and `false` otherwise.
275    ///
276    /// # Examples
277    ///
278    /// ```rust
279    /// # use half::prelude::*;
280    ///
281    /// let nan = bf16::NAN;
282    /// let f = bf16::from_f32(7.0_f32);
283    ///
284    /// assert!(nan.is_nan());
285    /// assert!(!f.is_nan());
286    /// ```
287    #[inline]
288    #[must_use]
289    pub const fn is_nan(self) -> bool {
290        self.0 & 0x7FFFu16 > 0x7F80u16
291    }
292
293    /// Returns `true` if this value is ±∞ and `false` otherwise.
294    ///
295    /// # Examples
296    ///
297    /// ```rust
298    /// # use half::prelude::*;
299    ///
300    /// let f = bf16::from_f32(7.0f32);
301    /// let inf = bf16::INFINITY;
302    /// let neg_inf = bf16::NEG_INFINITY;
303    /// let nan = bf16::NAN;
304    ///
305    /// assert!(!f.is_infinite());
306    /// assert!(!nan.is_infinite());
307    ///
308    /// assert!(inf.is_infinite());
309    /// assert!(neg_inf.is_infinite());
310    /// ```
311    #[inline]
312    #[must_use]
313    pub const fn is_infinite(self) -> bool {
314        self.0 & 0x7FFFu16 == 0x7F80u16
315    }
316
317    /// Returns `true` if this number is neither infinite nor NaN.
318    ///
319    /// # Examples
320    ///
321    /// ```rust
322    /// # use half::prelude::*;
323    ///
324    /// let f = bf16::from_f32(7.0f32);
325    /// let inf = bf16::INFINITY;
326    /// let neg_inf = bf16::NEG_INFINITY;
327    /// let nan = bf16::NAN;
328    ///
329    /// assert!(f.is_finite());
330    ///
331    /// assert!(!nan.is_finite());
332    /// assert!(!inf.is_finite());
333    /// assert!(!neg_inf.is_finite());
334    /// ```
335    #[inline]
336    #[must_use]
337    pub const fn is_finite(self) -> bool {
338        self.0 & 0x7F80u16 != 0x7F80u16
339    }
340
341    /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN.
342    ///
343    /// # Examples
344    ///
345    /// ```rust
346    /// # use half::prelude::*;
347    ///
348    /// let min = bf16::MIN_POSITIVE;
349    /// let max = bf16::MAX;
350    /// let lower_than_min = bf16::from_f32(1.0e-39_f32);
351    /// let zero = bf16::from_f32(0.0_f32);
352    ///
353    /// assert!(min.is_normal());
354    /// assert!(max.is_normal());
355    ///
356    /// assert!(!zero.is_normal());
357    /// assert!(!bf16::NAN.is_normal());
358    /// assert!(!bf16::INFINITY.is_normal());
359    /// // Values between 0 and `min` are subnormal.
360    /// assert!(!lower_than_min.is_normal());
361    /// ```
362    #[inline]
363    #[must_use]
364    pub const fn is_normal(self) -> bool {
365        let exp = self.0 & 0x7F80u16;
366        exp != 0x7F80u16 && exp != 0
367    }
368
369    /// Returns the floating point category of the number.
370    ///
371    /// If only one property is going to be tested, it is generally faster to use the specific
372    /// predicate instead.
373    ///
374    /// # Examples
375    ///
376    /// ```rust
377    /// use std::num::FpCategory;
378    /// # use half::prelude::*;
379    ///
380    /// let num = bf16::from_f32(12.4_f32);
381    /// let inf = bf16::INFINITY;
382    ///
383    /// assert_eq!(num.classify(), FpCategory::Normal);
384    /// assert_eq!(inf.classify(), FpCategory::Infinite);
385    /// ```
386    #[must_use]
387    pub const fn classify(self) -> FpCategory {
388        let exp = self.0 & 0x7F80u16;
389        let man = self.0 & 0x007Fu16;
390        match (exp, man) {
391            (0, 0) => FpCategory::Zero,
392            (0, _) => FpCategory::Subnormal,
393            (0x7F80u16, 0) => FpCategory::Infinite,
394            (0x7F80u16, _) => FpCategory::Nan,
395            _ => FpCategory::Normal,
396        }
397    }
398
399    /// Returns a number that represents the sign of `self`.
400    ///
401    /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY]
402    /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY]
403    /// * [`NAN`][bf16::NAN] if the number is NaN
404    ///
405    /// # Examples
406    ///
407    /// ```rust
408    /// # use half::prelude::*;
409    ///
410    /// let f = bf16::from_f32(3.5_f32);
411    ///
412    /// assert_eq!(f.signum(), bf16::from_f32(1.0));
413    /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0));
414    ///
415    /// assert!(bf16::NAN.signum().is_nan());
416    /// ```
417    #[must_use]
418    pub const fn signum(self) -> bf16 {
419        if self.is_nan() {
420            self
421        } else if self.0 & 0x8000u16 != 0 {
422            Self::NEG_ONE
423        } else {
424            Self::ONE
425        }
426    }
427
428    /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a
429    /// positive sign bit and +∞.
430    ///
431    /// # Examples
432    ///
433    /// ```rust
434    /// # use half::prelude::*;
435    ///
436    /// let nan = bf16::NAN;
437    /// let f = bf16::from_f32(7.0_f32);
438    /// let g = bf16::from_f32(-7.0_f32);
439    ///
440    /// assert!(f.is_sign_positive());
441    /// assert!(!g.is_sign_positive());
442    /// // NaN can be either positive or negative
443    /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
444    /// ```
445    #[inline]
446    #[must_use]
447    pub const fn is_sign_positive(self) -> bool {
448        self.0 & 0x8000u16 == 0
449    }
450
451    /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a
452    /// negative sign bit and −∞.
453    ///
454    /// # Examples
455    ///
456    /// ```rust
457    /// # use half::prelude::*;
458    ///
459    /// let nan = bf16::NAN;
460    /// let f = bf16::from_f32(7.0f32);
461    /// let g = bf16::from_f32(-7.0f32);
462    ///
463    /// assert!(!f.is_sign_negative());
464    /// assert!(g.is_sign_negative());
465    /// // NaN can be either positive or negative
466    /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
467    /// ```
468    #[inline]
469    #[must_use]
470    pub const fn is_sign_negative(self) -> bool {
471        self.0 & 0x8000u16 != 0
472    }
473
474    /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
475    ///
476    /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`.
477    /// If `self` is NaN, then NaN with the sign of `sign` is returned.
478    ///
479    /// # Examples
480    ///
481    /// ```
482    /// # use half::prelude::*;
483    /// let f = bf16::from_f32(3.5);
484    ///
485    /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
486    /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
487    /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5));
488    /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5));
489    ///
490    /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan());
491    /// ```
492    #[inline]
493    #[must_use]
494    pub const fn copysign(self, sign: bf16) -> bf16 {
495        bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16))
496    }
497
498    /// Returns the maximum of the two numbers.
499    ///
500    /// If one of the arguments is NaN, then the other argument is returned.
501    ///
502    /// # Examples
503    ///
504    /// ```
505    /// # use half::prelude::*;
506    /// let x = bf16::from_f32(1.0);
507    /// let y = bf16::from_f32(2.0);
508    ///
509    /// assert_eq!(x.max(y), y);
510    /// ```
511    #[inline]
512    #[must_use]
513    pub fn max(self, other: bf16) -> bf16 {
514        if other > self && !other.is_nan() {
515            other
516        } else {
517            self
518        }
519    }
520
521    /// Returns the minimum of the two numbers.
522    ///
523    /// If one of the arguments is NaN, then the other argument is returned.
524    ///
525    /// # Examples
526    ///
527    /// ```
528    /// # use half::prelude::*;
529    /// let x = bf16::from_f32(1.0);
530    /// let y = bf16::from_f32(2.0);
531    ///
532    /// assert_eq!(x.min(y), x);
533    /// ```
534    #[inline]
535    #[must_use]
536    pub fn min(self, other: bf16) -> bf16 {
537        if other < self && !other.is_nan() {
538            other
539        } else {
540            self
541        }
542    }
543
544    /// Restrict a value to a certain interval unless it is NaN.
545    ///
546    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`.
547    /// Otherwise this returns `self`.
548    ///
549    /// Note that this function returns NaN if the initial value was NaN as well.
550    ///
551    /// # Panics
552    /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
553    ///
554    /// # Examples
555    ///
556    /// ```
557    /// # use half::prelude::*;
558    /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0));
559    /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0));
560    /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0));
561    /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan());
562    /// ```
563    #[inline]
564    #[must_use]
565    pub fn clamp(self, min: bf16, max: bf16) -> bf16 {
566        assert!(min <= max);
567        let mut x = self;
568        if x < min {
569            x = min;
570        }
571        if x > max {
572            x = max;
573        }
574        x
575    }
576
577    /// Returns the ordering between `self` and `other`.
578    ///
579    /// Unlike the standard partial comparison between floating point numbers,
580    /// this comparison always produces an ordering in accordance to
581    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
582    /// floating point standard. The values are ordered in the following sequence:
583    ///
584    /// - negative quiet NaN
585    /// - negative signaling NaN
586    /// - negative infinity
587    /// - negative numbers
588    /// - negative subnormal numbers
589    /// - negative zero
590    /// - positive zero
591    /// - positive subnormal numbers
592    /// - positive numbers
593    /// - positive infinity
594    /// - positive signaling NaN
595    /// - positive quiet NaN.
596    ///
597    /// The ordering established by this function does not always agree with the
598    /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example,
599    /// they consider negative and positive zero equal, while `total_cmp`
600    /// doesn't.
601    ///
602    /// The interpretation of the signaling NaN bit follows the definition in
603    /// the IEEE 754 standard, which may not match the interpretation by some of
604    /// the older, non-conformant (e.g. MIPS) hardware implementations.
605    ///
606    /// # Examples
607    /// ```
608    /// # use half::bf16;
609    /// let mut v: Vec<bf16> = vec![];
610    /// v.push(bf16::ONE);
611    /// v.push(bf16::INFINITY);
612    /// v.push(bf16::NEG_INFINITY);
613    /// v.push(bf16::NAN);
614    /// v.push(bf16::MAX_SUBNORMAL);
615    /// v.push(-bf16::MAX_SUBNORMAL);
616    /// v.push(bf16::ZERO);
617    /// v.push(bf16::NEG_ZERO);
618    /// v.push(bf16::NEG_ONE);
619    /// v.push(bf16::MIN_POSITIVE);
620    ///
621    /// v.sort_by(|a, b| a.total_cmp(&b));
622    ///
623    /// assert!(v
624    ///     .into_iter()
625    ///     .zip(
626    ///         [
627    ///             bf16::NEG_INFINITY,
628    ///             bf16::NEG_ONE,
629    ///             -bf16::MAX_SUBNORMAL,
630    ///             bf16::NEG_ZERO,
631    ///             bf16::ZERO,
632    ///             bf16::MAX_SUBNORMAL,
633    ///             bf16::MIN_POSITIVE,
634    ///             bf16::ONE,
635    ///             bf16::INFINITY,
636    ///             bf16::NAN
637    ///         ]
638    ///         .iter()
639    ///     )
640    ///     .all(|(a, b)| a.to_bits() == b.to_bits()));
641    /// ```
642    // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp
643    #[inline]
644    #[must_use]
645    pub fn total_cmp(&self, other: &Self) -> Ordering {
646        let mut left = self.to_bits() as i16;
647        let mut right = other.to_bits() as i16;
648        left ^= (((left >> 15) as u16) >> 1) as i16;
649        right ^= (((right >> 15) as u16) >> 1) as i16;
650        left.cmp(&right)
651    }
652
653    /// Alternate serialize adapter for serializing as a float.
654    ///
655    /// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
656    /// implementation that serializes as an [`f32`] value. It is designed for use with
657    /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by
658    /// the default deserialize implementation.
659    ///
660    /// # Examples
661    ///
662    /// A demonstration on how to use this adapater:
663    ///
664    /// ```
665    /// use serde::{Serialize, Deserialize};
666    /// use half::bf16;
667    ///
668    /// #[derive(Serialize, Deserialize)]
669    /// struct MyStruct {
670    ///     #[serde(serialize_with = "bf16::serialize_as_f32")]
671    ///     value: bf16 // Will be serialized as f32 instead of u16
672    /// }
673    /// ```
674    #[cfg(feature = "serde")]
675    pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
676        serializer.serialize_f32(self.to_f32())
677    }
678
679    /// Alternate serialize adapter for serializing as a string.
680    ///
681    /// By default, [`struct@bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize
682    /// implementation that serializes as a string value. It is designed for use with
683    /// `serialize_with` serde attributes. Deserialization from string values is already supported
684    /// by the default deserialize implementation.
685    ///
686    /// # Examples
687    ///
688    /// A demonstration on how to use this adapater:
689    ///
690    /// ```
691    /// use serde::{Serialize, Deserialize};
692    /// use half::bf16;
693    ///
694    /// #[derive(Serialize, Deserialize)]
695    /// struct MyStruct {
696    ///     #[serde(serialize_with = "bf16::serialize_as_string")]
697    ///     value: bf16 // Will be serialized as a string instead of u16
698    /// }
699    /// ```
700    #[cfg(all(feature = "serde", feature = "alloc"))]
701    pub fn serialize_as_string<S: serde::Serializer>(
702        &self,
703        serializer: S,
704    ) -> Result<S::Ok, S::Error> {
705        serializer.serialize_str(&self.to_string())
706    }
707
708    /// Approximate number of [`struct@bf16`] significant digits in base 10
709    pub const DIGITS: u32 = 2;
710    /// [`struct@bf16`]
711    /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value
712    ///
713    /// This is the difference between 1.0 and the next largest representable number.
714    pub const EPSILON: bf16 = bf16(0x3C00u16);
715    /// [`struct@bf16`] positive Infinity (+∞)
716    pub const INFINITY: bf16 = bf16(0x7F80u16);
717    /// Number of [`struct@bf16`] significant digits in base 2
718    pub const MANTISSA_DIGITS: u32 = 8;
719    /// Largest finite [`struct@bf16`] value
720    pub const MAX: bf16 = bf16(0x7F7F);
721    /// Maximum possible [`struct@bf16`] power of 10 exponent
722    pub const MAX_10_EXP: i32 = 38;
723    /// Maximum possible [`struct@bf16`] power of 2 exponent
724    pub const MAX_EXP: i32 = 128;
725    /// Smallest finite [`struct@bf16`] value
726    pub const MIN: bf16 = bf16(0xFF7F);
727    /// Minimum possible normal [`struct@bf16`] power of 10 exponent
728    pub const MIN_10_EXP: i32 = -37;
729    /// One greater than the minimum possible normal [`struct@bf16`] power of 2 exponent
730    pub const MIN_EXP: i32 = -125;
731    /// Smallest positive normal [`struct@bf16`] value
732    pub const MIN_POSITIVE: bf16 = bf16(0x0080u16);
733    /// [`struct@bf16`] Not a Number (NaN)
734    pub const NAN: bf16 = bf16(0x7FC0u16);
735    /// [`struct@bf16`] negative infinity (-∞).
736    pub const NEG_INFINITY: bf16 = bf16(0xFF80u16);
737    /// The radix or base of the internal representation of [`struct@bf16`]
738    pub const RADIX: u32 = 2;
739
740    /// Minimum positive subnormal [`struct@bf16`] value
741    pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16);
742    /// Maximum subnormal [`struct@bf16`] value
743    pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16);
744
745    /// [`struct@bf16`] 1
746    pub const ONE: bf16 = bf16(0x3F80u16);
747    /// [`struct@bf16`] 0
748    pub const ZERO: bf16 = bf16(0x0000u16);
749    /// [`struct@bf16`] -0
750    pub const NEG_ZERO: bf16 = bf16(0x8000u16);
751    /// [`struct@bf16`] -1
752    pub const NEG_ONE: bf16 = bf16(0xBF80u16);
753
754    /// [`struct@bf16`] Euler's number (ℯ)
755    pub const E: bf16 = bf16(0x402Eu16);
756    /// [`struct@bf16`] Archimedes' constant (π)
757    pub const PI: bf16 = bf16(0x4049u16);
758    /// [`struct@bf16`] 1/π
759    pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16);
760    /// [`struct@bf16`] 1/√2
761    pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16);
762    /// [`struct@bf16`] 2/π
763    pub const FRAC_2_PI: bf16 = bf16(0x3F23u16);
764    /// [`struct@bf16`] 2/√π
765    pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16);
766    /// [`struct@bf16`] π/2
767    pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16);
768    /// [`struct@bf16`] π/3
769    pub const FRAC_PI_3: bf16 = bf16(0x3F86u16);
770    /// [`struct@bf16`] π/4
771    pub const FRAC_PI_4: bf16 = bf16(0x3F49u16);
772    /// [`struct@bf16`] π/6
773    pub const FRAC_PI_6: bf16 = bf16(0x3F06u16);
774    /// [`struct@bf16`] π/8
775    pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16);
776    /// [`struct@bf16`] 𝗅𝗇 10
777    pub const LN_10: bf16 = bf16(0x4013u16);
778    /// [`struct@bf16`] 𝗅𝗇 2
779    pub const LN_2: bf16 = bf16(0x3F31u16);
780    /// [`struct@bf16`] 𝗅𝗈𝗀₁₀ℯ
781    pub const LOG10_E: bf16 = bf16(0x3EDEu16);
782    /// [`struct@bf16`] 𝗅𝗈𝗀₁₀2
783    pub const LOG10_2: bf16 = bf16(0x3E9Au16);
784    /// [`struct@bf16`] 𝗅𝗈𝗀₂ℯ
785    pub const LOG2_E: bf16 = bf16(0x3FB9u16);
786    /// [`struct@bf16`] 𝗅𝗈𝗀₂10
787    pub const LOG2_10: bf16 = bf16(0x4055u16);
788    /// [`struct@bf16`] √2
789    pub const SQRT_2: bf16 = bf16(0x3FB5u16);
790}
791
792impl From<bf16> for f32 {
793    #[inline]
794    fn from(x: bf16) -> f32 {
795        x.to_f32()
796    }
797}
798
799impl From<bf16> for f64 {
800    #[inline]
801    fn from(x: bf16) -> f64 {
802        x.to_f64()
803    }
804}
805
806impl From<i8> for bf16 {
807    #[inline]
808    fn from(x: i8) -> bf16 {
809        // Convert to f32, then to bf16
810        bf16::from_f32(f32::from(x))
811    }
812}
813
814impl From<u8> for bf16 {
815    #[inline]
816    fn from(x: u8) -> bf16 {
817        // Convert to f32, then to f16
818        bf16::from_f32(f32::from(x))
819    }
820}
821
822impl PartialEq for bf16 {
823    fn eq(&self, other: &bf16) -> bool {
824        if self.is_nan() || other.is_nan() {
825            false
826        } else {
827            (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0)
828        }
829    }
830}
831
832impl PartialOrd for bf16 {
833    fn partial_cmp(&self, other: &bf16) -> Option<Ordering> {
834        if self.is_nan() || other.is_nan() {
835            None
836        } else {
837            let neg = self.0 & 0x8000u16 != 0;
838            let other_neg = other.0 & 0x8000u16 != 0;
839            match (neg, other_neg) {
840                (false, false) => Some(self.0.cmp(&other.0)),
841                (false, true) => {
842                    if (self.0 | other.0) & 0x7FFFu16 == 0 {
843                        Some(Ordering::Equal)
844                    } else {
845                        Some(Ordering::Greater)
846                    }
847                }
848                (true, false) => {
849                    if (self.0 | other.0) & 0x7FFFu16 == 0 {
850                        Some(Ordering::Equal)
851                    } else {
852                        Some(Ordering::Less)
853                    }
854                }
855                (true, true) => Some(other.0.cmp(&self.0)),
856            }
857        }
858    }
859
860    fn lt(&self, other: &bf16) -> bool {
861        if self.is_nan() || other.is_nan() {
862            false
863        } else {
864            let neg = self.0 & 0x8000u16 != 0;
865            let other_neg = other.0 & 0x8000u16 != 0;
866            match (neg, other_neg) {
867                (false, false) => self.0 < other.0,
868                (false, true) => false,
869                (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0,
870                (true, true) => self.0 > other.0,
871            }
872        }
873    }
874
875    fn le(&self, other: &bf16) -> bool {
876        if self.is_nan() || other.is_nan() {
877            false
878        } else {
879            let neg = self.0 & 0x8000u16 != 0;
880            let other_neg = other.0 & 0x8000u16 != 0;
881            match (neg, other_neg) {
882                (false, false) => self.0 <= other.0,
883                (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0,
884                (true, false) => true,
885                (true, true) => self.0 >= other.0,
886            }
887        }
888    }
889
890    fn gt(&self, other: &bf16) -> bool {
891        if self.is_nan() || other.is_nan() {
892            false
893        } else {
894            let neg = self.0 & 0x8000u16 != 0;
895            let other_neg = other.0 & 0x8000u16 != 0;
896            match (neg, other_neg) {
897                (false, false) => self.0 > other.0,
898                (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0,
899                (true, false) => false,
900                (true, true) => self.0 < other.0,
901            }
902        }
903    }
904
905    fn ge(&self, other: &bf16) -> bool {
906        if self.is_nan() || other.is_nan() {
907            false
908        } else {
909            let neg = self.0 & 0x8000u16 != 0;
910            let other_neg = other.0 & 0x8000u16 != 0;
911            match (neg, other_neg) {
912                (false, false) => self.0 >= other.0,
913                (false, true) => true,
914                (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0,
915                (true, true) => self.0 <= other.0,
916            }
917        }
918    }
919}
920
921#[cfg(not(target_arch = "spirv"))]
922impl FromStr for bf16 {
923    type Err = ParseFloatError;
924    fn from_str(src: &str) -> Result<bf16, ParseFloatError> {
925        f32::from_str(src).map(bf16::from_f32)
926    }
927}
928
929#[cfg(not(target_arch = "spirv"))]
930impl Debug for bf16 {
931    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
932        Debug::fmt(&self.to_f32(), f)
933    }
934}
935
936#[cfg(not(target_arch = "spirv"))]
937impl Display for bf16 {
938    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
939        Display::fmt(&self.to_f32(), f)
940    }
941}
942
943#[cfg(not(target_arch = "spirv"))]
944impl LowerExp for bf16 {
945    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
946        write!(f, "{:e}", self.to_f32())
947    }
948}
949
950#[cfg(not(target_arch = "spirv"))]
951impl UpperExp for bf16 {
952    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
953        write!(f, "{:E}", self.to_f32())
954    }
955}
956
957#[cfg(not(target_arch = "spirv"))]
958impl Binary for bf16 {
959    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
960        write!(f, "{:b}", self.0)
961    }
962}
963
964#[cfg(not(target_arch = "spirv"))]
965impl Octal for bf16 {
966    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
967        write!(f, "{:o}", self.0)
968    }
969}
970
971#[cfg(not(target_arch = "spirv"))]
972impl LowerHex for bf16 {
973    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
974        write!(f, "{:x}", self.0)
975    }
976}
977
978#[cfg(not(target_arch = "spirv"))]
979impl UpperHex for bf16 {
980    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
981        write!(f, "{:X}", self.0)
982    }
983}
984
985impl Neg for bf16 {
986    type Output = Self;
987
988    fn neg(self) -> Self::Output {
989        Self(self.0 ^ 0x8000)
990    }
991}
992
993impl Neg for &bf16 {
994    type Output = <bf16 as Neg>::Output;
995
996    #[inline]
997    fn neg(self) -> Self::Output {
998        Neg::neg(*self)
999    }
1000}
1001
1002impl Add for bf16 {
1003    type Output = Self;
1004
1005    fn add(self, rhs: Self) -> Self::Output {
1006        Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs))
1007    }
1008}
1009
1010impl Add<&bf16> for bf16 {
1011    type Output = <bf16 as Add<bf16>>::Output;
1012
1013    #[inline]
1014    fn add(self, rhs: &bf16) -> Self::Output {
1015        self.add(*rhs)
1016    }
1017}
1018
1019impl Add<&bf16> for &bf16 {
1020    type Output = <bf16 as Add<bf16>>::Output;
1021
1022    #[inline]
1023    fn add(self, rhs: &bf16) -> Self::Output {
1024        (*self).add(*rhs)
1025    }
1026}
1027
1028impl Add<bf16> for &bf16 {
1029    type Output = <bf16 as Add<bf16>>::Output;
1030
1031    #[inline]
1032    fn add(self, rhs: bf16) -> Self::Output {
1033        (*self).add(rhs)
1034    }
1035}
1036
1037impl AddAssign for bf16 {
1038    #[inline]
1039    fn add_assign(&mut self, rhs: Self) {
1040        *self = (*self).add(rhs);
1041    }
1042}
1043
1044impl AddAssign<&bf16> for bf16 {
1045    #[inline]
1046    fn add_assign(&mut self, rhs: &bf16) {
1047        *self = (*self).add(rhs);
1048    }
1049}
1050
1051impl Sub for bf16 {
1052    type Output = Self;
1053
1054    fn sub(self, rhs: Self) -> Self::Output {
1055        Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs))
1056    }
1057}
1058
1059impl Sub<&bf16> for bf16 {
1060    type Output = <bf16 as Sub<bf16>>::Output;
1061
1062    #[inline]
1063    fn sub(self, rhs: &bf16) -> Self::Output {
1064        self.sub(*rhs)
1065    }
1066}
1067
1068impl Sub<&bf16> for &bf16 {
1069    type Output = <bf16 as Sub<bf16>>::Output;
1070
1071    #[inline]
1072    fn sub(self, rhs: &bf16) -> Self::Output {
1073        (*self).sub(*rhs)
1074    }
1075}
1076
1077impl Sub<bf16> for &bf16 {
1078    type Output = <bf16 as Sub<bf16>>::Output;
1079
1080    #[inline]
1081    fn sub(self, rhs: bf16) -> Self::Output {
1082        (*self).sub(rhs)
1083    }
1084}
1085
1086impl SubAssign for bf16 {
1087    #[inline]
1088    fn sub_assign(&mut self, rhs: Self) {
1089        *self = (*self).sub(rhs);
1090    }
1091}
1092
1093impl SubAssign<&bf16> for bf16 {
1094    #[inline]
1095    fn sub_assign(&mut self, rhs: &bf16) {
1096        *self = (*self).sub(rhs);
1097    }
1098}
1099
1100impl Mul for bf16 {
1101    type Output = Self;
1102
1103    fn mul(self, rhs: Self) -> Self::Output {
1104        Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs))
1105    }
1106}
1107
1108impl Mul<&bf16> for bf16 {
1109    type Output = <bf16 as Mul<bf16>>::Output;
1110
1111    #[inline]
1112    fn mul(self, rhs: &bf16) -> Self::Output {
1113        self.mul(*rhs)
1114    }
1115}
1116
1117impl Mul<&bf16> for &bf16 {
1118    type Output = <bf16 as Mul<bf16>>::Output;
1119
1120    #[inline]
1121    fn mul(self, rhs: &bf16) -> Self::Output {
1122        (*self).mul(*rhs)
1123    }
1124}
1125
1126impl Mul<bf16> for &bf16 {
1127    type Output = <bf16 as Mul<bf16>>::Output;
1128
1129    #[inline]
1130    fn mul(self, rhs: bf16) -> Self::Output {
1131        (*self).mul(rhs)
1132    }
1133}
1134
1135impl MulAssign for bf16 {
1136    #[inline]
1137    fn mul_assign(&mut self, rhs: Self) {
1138        *self = (*self).mul(rhs);
1139    }
1140}
1141
1142impl MulAssign<&bf16> for bf16 {
1143    #[inline]
1144    fn mul_assign(&mut self, rhs: &bf16) {
1145        *self = (*self).mul(rhs);
1146    }
1147}
1148
1149impl Div for bf16 {
1150    type Output = Self;
1151
1152    fn div(self, rhs: Self) -> Self::Output {
1153        Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs))
1154    }
1155}
1156
1157impl Div<&bf16> for bf16 {
1158    type Output = <bf16 as Div<bf16>>::Output;
1159
1160    #[inline]
1161    fn div(self, rhs: &bf16) -> Self::Output {
1162        self.div(*rhs)
1163    }
1164}
1165
1166impl Div<&bf16> for &bf16 {
1167    type Output = <bf16 as Div<bf16>>::Output;
1168
1169    #[inline]
1170    fn div(self, rhs: &bf16) -> Self::Output {
1171        (*self).div(*rhs)
1172    }
1173}
1174
1175impl Div<bf16> for &bf16 {
1176    type Output = <bf16 as Div<bf16>>::Output;
1177
1178    #[inline]
1179    fn div(self, rhs: bf16) -> Self::Output {
1180        (*self).div(rhs)
1181    }
1182}
1183
1184impl DivAssign for bf16 {
1185    #[inline]
1186    fn div_assign(&mut self, rhs: Self) {
1187        *self = (*self).div(rhs);
1188    }
1189}
1190
1191impl DivAssign<&bf16> for bf16 {
1192    #[inline]
1193    fn div_assign(&mut self, rhs: &bf16) {
1194        *self = (*self).div(rhs);
1195    }
1196}
1197
1198impl Rem for bf16 {
1199    type Output = Self;
1200
1201    fn rem(self, rhs: Self) -> Self::Output {
1202        Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs))
1203    }
1204}
1205
1206impl Rem<&bf16> for bf16 {
1207    type Output = <bf16 as Rem<bf16>>::Output;
1208
1209    #[inline]
1210    fn rem(self, rhs: &bf16) -> Self::Output {
1211        self.rem(*rhs)
1212    }
1213}
1214
1215impl Rem<&bf16> for &bf16 {
1216    type Output = <bf16 as Rem<bf16>>::Output;
1217
1218    #[inline]
1219    fn rem(self, rhs: &bf16) -> Self::Output {
1220        (*self).rem(*rhs)
1221    }
1222}
1223
1224impl Rem<bf16> for &bf16 {
1225    type Output = <bf16 as Rem<bf16>>::Output;
1226
1227    #[inline]
1228    fn rem(self, rhs: bf16) -> Self::Output {
1229        (*self).rem(rhs)
1230    }
1231}
1232
1233impl RemAssign for bf16 {
1234    #[inline]
1235    fn rem_assign(&mut self, rhs: Self) {
1236        *self = (*self).rem(rhs);
1237    }
1238}
1239
1240impl RemAssign<&bf16> for bf16 {
1241    #[inline]
1242    fn rem_assign(&mut self, rhs: &bf16) {
1243        *self = (*self).rem(rhs);
1244    }
1245}
1246
1247impl Product for bf16 {
1248    #[inline]
1249    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
1250        bf16::from_f32(iter.map(|f| f.to_f32()).product())
1251    }
1252}
1253
1254impl<'a> Product<&'a bf16> for bf16 {
1255    #[inline]
1256    fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1257        bf16::from_f32(iter.map(|f| f.to_f32()).product())
1258    }
1259}
1260
1261impl Sum for bf16 {
1262    #[inline]
1263    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
1264        bf16::from_f32(iter.map(|f| f.to_f32()).sum())
1265    }
1266}
1267
1268impl<'a> Sum<&'a bf16> for bf16 {
1269    #[inline]
1270    fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self {
1271        bf16::from_f32(iter.map(|f| f.to_f32()).sum())
1272    }
1273}
1274
1275#[cfg(feature = "serde")]
1276struct Visitor;
1277
1278#[cfg(feature = "serde")]
1279impl<'de> Deserialize<'de> for bf16 {
1280    fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error>
1281    where
1282        D: serde::de::Deserializer<'de>,
1283    {
1284        deserializer.deserialize_newtype_struct("bf16", Visitor)
1285    }
1286}
1287
1288#[cfg(feature = "serde")]
1289impl<'de> serde::de::Visitor<'de> for Visitor {
1290    type Value = bf16;
1291
1292    fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
1293        write!(formatter, "tuple struct bf16")
1294    }
1295
1296    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
1297    where
1298        D: serde::Deserializer<'de>,
1299    {
1300        Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?))
1301    }
1302
1303    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1304    where
1305        E: serde::de::Error,
1306    {
1307        v.parse().map_err(|_| {
1308            serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string")
1309        })
1310    }
1311
1312    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
1313    where
1314        E: serde::de::Error,
1315    {
1316        Ok(bf16::from_f32(v))
1317    }
1318
1319    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
1320    where
1321        E: serde::de::Error,
1322    {
1323        Ok(bf16::from_f64(v))
1324    }
1325}
1326
1327#[allow(
1328    clippy::cognitive_complexity,
1329    clippy::float_cmp,
1330    clippy::neg_cmp_op_on_partial_ord
1331)]
1332#[cfg(test)]
1333mod test {
1334    use super::*;
1335    #[allow(unused_imports)]
1336    use core::cmp::Ordering;
1337    #[cfg(feature = "num-traits")]
1338    use num_traits::{AsPrimitive, FromBytes, FromPrimitive, ToBytes, ToPrimitive};
1339    use quickcheck_macros::quickcheck;
1340
1341    #[cfg(feature = "num-traits")]
1342    #[test]
1343    fn as_primitive() {
1344        let two = bf16::from_f32(2.0);
1345        assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two);
1346        assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2);
1347
1348        assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two);
1349        assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0);
1350
1351        assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two);
1352        assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0);
1353    }
1354
1355    #[cfg(feature = "num-traits")]
1356    #[test]
1357    fn to_primitive() {
1358        let two = bf16::from_f32(2.0);
1359        assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32);
1360        assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32);
1361        assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64);
1362    }
1363
1364    #[cfg(feature = "num-traits")]
1365    #[test]
1366    fn from_primitive() {
1367        let two = bf16::from_f32(2.0);
1368        assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two);
1369        assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two);
1370        assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two);
1371    }
1372
1373    #[cfg(feature = "num-traits")]
1374    #[test]
1375    fn to_and_from_bytes() {
1376        let two = bf16::from_f32(2.0);
1377        assert_eq!(<bf16 as ToBytes>::to_le_bytes(&two), [0, 64]);
1378        assert_eq!(<bf16 as FromBytes>::from_le_bytes(&[0, 64]), two);
1379        assert_eq!(<bf16 as ToBytes>::to_be_bytes(&two), [64, 0]);
1380        assert_eq!(<bf16 as FromBytes>::from_be_bytes(&[64, 0]), two);
1381    }
1382
1383    #[test]
1384    fn test_bf16_consts_from_f32() {
1385        let one = bf16::from_f32(1.0);
1386        let zero = bf16::from_f32(0.0);
1387        let neg_zero = bf16::from_f32(-0.0);
1388        let neg_one = bf16::from_f32(-1.0);
1389        let inf = bf16::from_f32(core::f32::INFINITY);
1390        let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY);
1391        let nan = bf16::from_f32(core::f32::NAN);
1392
1393        assert_eq!(bf16::ONE, one);
1394        assert_eq!(bf16::ZERO, zero);
1395        assert!(zero.is_sign_positive());
1396        assert_eq!(bf16::NEG_ZERO, neg_zero);
1397        assert!(neg_zero.is_sign_negative());
1398        assert_eq!(bf16::NEG_ONE, neg_one);
1399        assert!(neg_one.is_sign_negative());
1400        assert_eq!(bf16::INFINITY, inf);
1401        assert_eq!(bf16::NEG_INFINITY, neg_inf);
1402        assert!(nan.is_nan());
1403        assert!(bf16::NAN.is_nan());
1404
1405        let e = bf16::from_f32(core::f32::consts::E);
1406        let pi = bf16::from_f32(core::f32::consts::PI);
1407        let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI);
1408        let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
1409        let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI);
1410        let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
1411        let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2);
1412        let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3);
1413        let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4);
1414        let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6);
1415        let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8);
1416        let ln_10 = bf16::from_f32(core::f32::consts::LN_10);
1417        let ln_2 = bf16::from_f32(core::f32::consts::LN_2);
1418        let log10_e = bf16::from_f32(core::f32::consts::LOG10_E);
1419        // core::f32::consts::LOG10_2 requires rustc 1.43.0
1420        let log10_2 = bf16::from_f32(2f32.log10());
1421        let log2_e = bf16::from_f32(core::f32::consts::LOG2_E);
1422        // core::f32::consts::LOG2_10 requires rustc 1.43.0
1423        let log2_10 = bf16::from_f32(10f32.log2());
1424        let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2);
1425
1426        assert_eq!(bf16::E, e);
1427        assert_eq!(bf16::PI, pi);
1428        assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1429        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1430        assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1431        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1432        assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1433        assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1434        assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1435        assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1436        assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1437        assert_eq!(bf16::LN_10, ln_10);
1438        assert_eq!(bf16::LN_2, ln_2);
1439        assert_eq!(bf16::LOG10_E, log10_e);
1440        assert_eq!(bf16::LOG10_2, log10_2);
1441        assert_eq!(bf16::LOG2_E, log2_e);
1442        assert_eq!(bf16::LOG2_10, log2_10);
1443        assert_eq!(bf16::SQRT_2, sqrt_2);
1444    }
1445
1446    #[test]
1447    fn test_bf16_consts_from_f64() {
1448        let one = bf16::from_f64(1.0);
1449        let zero = bf16::from_f64(0.0);
1450        let neg_zero = bf16::from_f64(-0.0);
1451        let inf = bf16::from_f64(core::f64::INFINITY);
1452        let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY);
1453        let nan = bf16::from_f64(core::f64::NAN);
1454
1455        assert_eq!(bf16::ONE, one);
1456        assert_eq!(bf16::ZERO, zero);
1457        assert_eq!(bf16::NEG_ZERO, neg_zero);
1458        assert_eq!(bf16::INFINITY, inf);
1459        assert_eq!(bf16::NEG_INFINITY, neg_inf);
1460        assert!(nan.is_nan());
1461        assert!(bf16::NAN.is_nan());
1462
1463        let e = bf16::from_f64(core::f64::consts::E);
1464        let pi = bf16::from_f64(core::f64::consts::PI);
1465        let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI);
1466        let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
1467        let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI);
1468        let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
1469        let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2);
1470        let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3);
1471        let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4);
1472        let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6);
1473        let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8);
1474        let ln_10 = bf16::from_f64(core::f64::consts::LN_10);
1475        let ln_2 = bf16::from_f64(core::f64::consts::LN_2);
1476        let log10_e = bf16::from_f64(core::f64::consts::LOG10_E);
1477        // core::f64::consts::LOG10_2 requires rustc 1.43.0
1478        let log10_2 = bf16::from_f64(2f64.log10());
1479        let log2_e = bf16::from_f64(core::f64::consts::LOG2_E);
1480        // core::f64::consts::LOG2_10 requires rustc 1.43.0
1481        let log2_10 = bf16::from_f64(10f64.log2());
1482        let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2);
1483
1484        assert_eq!(bf16::E, e);
1485        assert_eq!(bf16::PI, pi);
1486        assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
1487        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
1488        assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
1489        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
1490        assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
1491        assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
1492        assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
1493        assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
1494        assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
1495        assert_eq!(bf16::LN_10, ln_10);
1496        assert_eq!(bf16::LN_2, ln_2);
1497        assert_eq!(bf16::LOG10_E, log10_e);
1498        assert_eq!(bf16::LOG10_2, log10_2);
1499        assert_eq!(bf16::LOG2_E, log2_e);
1500        assert_eq!(bf16::LOG2_10, log2_10);
1501        assert_eq!(bf16::SQRT_2, sqrt_2);
1502    }
1503
1504    #[test]
1505    fn test_nan_conversion_to_smaller() {
1506        let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64);
1507        let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64);
1508        let nan32 = f32::from_bits(0x7F80_0001u32);
1509        let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1510        let nan32_from_64 = nan64 as f32;
1511        let neg_nan32_from_64 = neg_nan64 as f32;
1512        let nan16_from_64 = bf16::from_f64(nan64);
1513        let neg_nan16_from_64 = bf16::from_f64(neg_nan64);
1514        let nan16_from_32 = bf16::from_f32(nan32);
1515        let neg_nan16_from_32 = bf16::from_f32(neg_nan32);
1516
1517        assert!(nan64.is_nan() && nan64.is_sign_positive());
1518        assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
1519        assert!(nan32.is_nan() && nan32.is_sign_positive());
1520        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1521
1522        // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1523        assert!(neg_nan32_from_64.is_nan());
1524        assert!(nan32_from_64.is_nan());
1525        assert!(nan16_from_64.is_nan());
1526        assert!(neg_nan16_from_64.is_nan());
1527        assert!(nan16_from_32.is_nan());
1528        assert!(neg_nan16_from_32.is_nan());
1529    }
1530
1531    #[test]
1532    fn test_nan_conversion_to_larger() {
1533        let nan16 = bf16::from_bits(0x7F81u16);
1534        let neg_nan16 = bf16::from_bits(0xFF81u16);
1535        let nan32 = f32::from_bits(0x7F80_0001u32);
1536        let neg_nan32 = f32::from_bits(0xFF80_0001u32);
1537        let nan32_from_16 = f32::from(nan16);
1538        let neg_nan32_from_16 = f32::from(neg_nan16);
1539        let nan64_from_16 = f64::from(nan16);
1540        let neg_nan64_from_16 = f64::from(neg_nan16);
1541        let nan64_from_32 = f64::from(nan32);
1542        let neg_nan64_from_32 = f64::from(neg_nan32);
1543
1544        assert!(nan16.is_nan() && nan16.is_sign_positive());
1545        assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
1546        assert!(nan32.is_nan() && nan32.is_sign_positive());
1547        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
1548
1549        // // f32/f64 NaN conversion sign is non-deterministic: https://github.com/starkat99/half-rs/issues/103
1550        assert!(nan32_from_16.is_nan());
1551        assert!(neg_nan32_from_16.is_nan());
1552        assert!(nan64_from_16.is_nan());
1553        assert!(neg_nan64_from_16.is_nan());
1554        assert!(nan64_from_32.is_nan());
1555        assert!(neg_nan64_from_32.is_nan());
1556    }
1557
1558    #[test]
1559    fn test_bf16_to_f32() {
1560        let f = bf16::from_f32(7.0);
1561        assert_eq!(f.to_f32(), 7.0f32);
1562
1563        // 7.1 is NOT exactly representable in 16-bit, it's rounded
1564        let f = bf16::from_f32(7.1);
1565        let diff = (f.to_f32() - 7.1f32).abs();
1566        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1567        assert!(diff <= 4.0 * bf16::EPSILON.to_f32());
1568
1569        let tiny32 = f32::from_bits(0x0001_0000u32);
1570        assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32);
1571        assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32);
1572
1573        assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32));
1574        assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32));
1575    }
1576
1577    #[test]
1578    fn test_bf16_to_f64() {
1579        let f = bf16::from_f64(7.0);
1580        assert_eq!(f.to_f64(), 7.0f64);
1581
1582        // 7.1 is NOT exactly representable in 16-bit, it's rounded
1583        let f = bf16::from_f64(7.1);
1584        let diff = (f.to_f64() - 7.1f64).abs();
1585        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
1586        assert!(diff <= 4.0 * bf16::EPSILON.to_f64());
1587
1588        let tiny64 = 2.0f64.powi(-133);
1589        assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64);
1590        assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64);
1591
1592        assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64));
1593        assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64));
1594    }
1595
1596    #[test]
1597    fn test_comparisons() {
1598        let zero = bf16::from_f64(0.0);
1599        let one = bf16::from_f64(1.0);
1600        let neg_zero = bf16::from_f64(-0.0);
1601        let neg_one = bf16::from_f64(-1.0);
1602
1603        assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
1604        assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
1605        assert!(zero == neg_zero);
1606        assert!(neg_zero == zero);
1607        assert!(!(zero != neg_zero));
1608        assert!(!(neg_zero != zero));
1609        assert!(!(zero < neg_zero));
1610        assert!(!(neg_zero < zero));
1611        assert!(zero <= neg_zero);
1612        assert!(neg_zero <= zero);
1613        assert!(!(zero > neg_zero));
1614        assert!(!(neg_zero > zero));
1615        assert!(zero >= neg_zero);
1616        assert!(neg_zero >= zero);
1617
1618        assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
1619        assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
1620        assert!(!(one == neg_zero));
1621        assert!(!(neg_zero == one));
1622        assert!(one != neg_zero);
1623        assert!(neg_zero != one);
1624        assert!(!(one < neg_zero));
1625        assert!(neg_zero < one);
1626        assert!(!(one <= neg_zero));
1627        assert!(neg_zero <= one);
1628        assert!(one > neg_zero);
1629        assert!(!(neg_zero > one));
1630        assert!(one >= neg_zero);
1631        assert!(!(neg_zero >= one));
1632
1633        assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
1634        assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
1635        assert!(!(one == neg_one));
1636        assert!(!(neg_one == one));
1637        assert!(one != neg_one);
1638        assert!(neg_one != one);
1639        assert!(!(one < neg_one));
1640        assert!(neg_one < one);
1641        assert!(!(one <= neg_one));
1642        assert!(neg_one <= one);
1643        assert!(one > neg_one);
1644        assert!(!(neg_one > one));
1645        assert!(one >= neg_one);
1646        assert!(!(neg_one >= one));
1647    }
1648
1649    #[test]
1650    #[allow(clippy::erasing_op, clippy::identity_op)]
1651    fn round_to_even_f32() {
1652        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1653        let min_sub = bf16::from_bits(1);
1654        let min_sub_f = (-133f32).exp2();
1655        assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
1656        assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
1657
1658        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1659        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1660        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1661        assert_eq!(
1662            bf16::from_f32(min_sub_f * 0.49).to_bits(),
1663            min_sub.to_bits() * 0
1664        );
1665        assert_eq!(
1666            bf16::from_f32(min_sub_f * 0.50).to_bits(),
1667            min_sub.to_bits() * 0
1668        );
1669        assert_eq!(
1670            bf16::from_f32(min_sub_f * 0.51).to_bits(),
1671            min_sub.to_bits() * 1
1672        );
1673
1674        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1675        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1676        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1677        assert_eq!(
1678            bf16::from_f32(min_sub_f * 1.49).to_bits(),
1679            min_sub.to_bits() * 1
1680        );
1681        assert_eq!(
1682            bf16::from_f32(min_sub_f * 1.50).to_bits(),
1683            min_sub.to_bits() * 2
1684        );
1685        assert_eq!(
1686            bf16::from_f32(min_sub_f * 1.51).to_bits(),
1687            min_sub.to_bits() * 2
1688        );
1689
1690        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1691        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1692        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1693        assert_eq!(
1694            bf16::from_f32(min_sub_f * 2.49).to_bits(),
1695            min_sub.to_bits() * 2
1696        );
1697        assert_eq!(
1698            bf16::from_f32(min_sub_f * 2.50).to_bits(),
1699            min_sub.to_bits() * 2
1700        );
1701        assert_eq!(
1702            bf16::from_f32(min_sub_f * 2.51).to_bits(),
1703            min_sub.to_bits() * 3
1704        );
1705
1706        assert_eq!(
1707            bf16::from_f32(250.49f32).to_bits(),
1708            bf16::from_f32(250.0).to_bits()
1709        );
1710        assert_eq!(
1711            bf16::from_f32(250.50f32).to_bits(),
1712            bf16::from_f32(250.0).to_bits()
1713        );
1714        assert_eq!(
1715            bf16::from_f32(250.51f32).to_bits(),
1716            bf16::from_f32(251.0).to_bits()
1717        );
1718        assert_eq!(
1719            bf16::from_f32(251.49f32).to_bits(),
1720            bf16::from_f32(251.0).to_bits()
1721        );
1722        assert_eq!(
1723            bf16::from_f32(251.50f32).to_bits(),
1724            bf16::from_f32(252.0).to_bits()
1725        );
1726        assert_eq!(
1727            bf16::from_f32(251.51f32).to_bits(),
1728            bf16::from_f32(252.0).to_bits()
1729        );
1730        assert_eq!(
1731            bf16::from_f32(252.49f32).to_bits(),
1732            bf16::from_f32(252.0).to_bits()
1733        );
1734        assert_eq!(
1735            bf16::from_f32(252.50f32).to_bits(),
1736            bf16::from_f32(252.0).to_bits()
1737        );
1738        assert_eq!(
1739            bf16::from_f32(252.51f32).to_bits(),
1740            bf16::from_f32(253.0).to_bits()
1741        );
1742    }
1743
1744    #[test]
1745    #[allow(clippy::erasing_op, clippy::identity_op)]
1746    fn round_to_even_f64() {
1747        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1748        let min_sub = bf16::from_bits(1);
1749        let min_sub_f = (-133f64).exp2();
1750        assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1751        assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1752
1753        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1754        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1755        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1756        assert_eq!(
1757            bf16::from_f64(min_sub_f * 0.49).to_bits(),
1758            min_sub.to_bits() * 0
1759        );
1760        assert_eq!(
1761            bf16::from_f64(min_sub_f * 0.50).to_bits(),
1762            min_sub.to_bits() * 0
1763        );
1764        assert_eq!(
1765            bf16::from_f64(min_sub_f * 0.51).to_bits(),
1766            min_sub.to_bits() * 1
1767        );
1768
1769        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1770        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1771        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1772        assert_eq!(
1773            bf16::from_f64(min_sub_f * 1.49).to_bits(),
1774            min_sub.to_bits() * 1
1775        );
1776        assert_eq!(
1777            bf16::from_f64(min_sub_f * 1.50).to_bits(),
1778            min_sub.to_bits() * 2
1779        );
1780        assert_eq!(
1781            bf16::from_f64(min_sub_f * 1.51).to_bits(),
1782            min_sub.to_bits() * 2
1783        );
1784
1785        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1786        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1787        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1788        assert_eq!(
1789            bf16::from_f64(min_sub_f * 2.49).to_bits(),
1790            min_sub.to_bits() * 2
1791        );
1792        assert_eq!(
1793            bf16::from_f64(min_sub_f * 2.50).to_bits(),
1794            min_sub.to_bits() * 2
1795        );
1796        assert_eq!(
1797            bf16::from_f64(min_sub_f * 2.51).to_bits(),
1798            min_sub.to_bits() * 3
1799        );
1800
1801        assert_eq!(
1802            bf16::from_f64(250.49f64).to_bits(),
1803            bf16::from_f64(250.0).to_bits()
1804        );
1805        assert_eq!(
1806            bf16::from_f64(250.50f64).to_bits(),
1807            bf16::from_f64(250.0).to_bits()
1808        );
1809        assert_eq!(
1810            bf16::from_f64(250.51f64).to_bits(),
1811            bf16::from_f64(251.0).to_bits()
1812        );
1813        assert_eq!(
1814            bf16::from_f64(251.49f64).to_bits(),
1815            bf16::from_f64(251.0).to_bits()
1816        );
1817        assert_eq!(
1818            bf16::from_f64(251.50f64).to_bits(),
1819            bf16::from_f64(252.0).to_bits()
1820        );
1821        assert_eq!(
1822            bf16::from_f64(251.51f64).to_bits(),
1823            bf16::from_f64(252.0).to_bits()
1824        );
1825        assert_eq!(
1826            bf16::from_f64(252.49f64).to_bits(),
1827            bf16::from_f64(252.0).to_bits()
1828        );
1829        assert_eq!(
1830            bf16::from_f64(252.50f64).to_bits(),
1831            bf16::from_f64(252.0).to_bits()
1832        );
1833        assert_eq!(
1834            bf16::from_f64(252.51f64).to_bits(),
1835            bf16::from_f64(253.0).to_bits()
1836        );
1837    }
1838
1839    #[cfg(feature = "std")]
1840    #[test]
1841    fn formatting() {
1842        let f = bf16::from_f32(0.1152344);
1843
1844        assert_eq!(format!("{:.3}", f), "0.115");
1845        assert_eq!(format!("{:.4}", f), "0.1152");
1846        assert_eq!(format!("{:+.4}", f), "+0.1152");
1847        assert_eq!(format!("{:>+10.4}", f), "   +0.1152");
1848
1849        assert_eq!(format!("{:.3?}", f), "0.115");
1850        assert_eq!(format!("{:.4?}", f), "0.1152");
1851        assert_eq!(format!("{:+.4?}", f), "+0.1152");
1852        assert_eq!(format!("{:>+10.4?}", f), "   +0.1152");
1853    }
1854
1855    impl quickcheck::Arbitrary for bf16 {
1856        fn arbitrary(g: &mut quickcheck::Gen) -> Self {
1857            bf16(u16::arbitrary(g))
1858        }
1859    }
1860
1861    #[quickcheck]
1862    fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool {
1863        let roundtrip = bf16::from_f32(f.to_f32());
1864        if f.is_nan() {
1865            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1866        } else {
1867            f.0 == roundtrip.0
1868        }
1869    }
1870
1871    #[quickcheck]
1872    fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool {
1873        let roundtrip = bf16::from_f64(f.to_f64());
1874        if f.is_nan() {
1875            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1876        } else {
1877            f.0 == roundtrip.0
1878        }
1879    }
1880}