libm/math/support/
float_traits.rs

1#![allow(unknown_lints)] // FIXME(msrv) we shouldn't need this
2
3use core::{fmt, mem, ops};
4
5use super::int_traits::{CastFrom, Int, MinInt};
6
7/// Trait for some basic operations on floats
8// #[allow(dead_code)]
9pub trait Float:
10    Copy
11    + fmt::Debug
12    + PartialEq
13    + PartialOrd
14    + ops::AddAssign
15    + ops::MulAssign
16    + ops::Add<Output = Self>
17    + ops::Sub<Output = Self>
18    + ops::Mul<Output = Self>
19    + ops::Div<Output = Self>
20    + ops::Rem<Output = Self>
21    + ops::Neg<Output = Self>
22    + 'static
23{
24    /// A uint of the same width as the float
25    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
26
27    /// A int of the same width as the float
28    type SignedInt: Int
29        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
30        + ops::Neg<Output = Self::SignedInt>;
31
32    const ZERO: Self;
33    const NEG_ZERO: Self;
34    const ONE: Self;
35    const NEG_ONE: Self;
36    const INFINITY: Self;
37    const NEG_INFINITY: Self;
38    const NAN: Self;
39    const NEG_NAN: Self;
40    const MAX: Self;
41    const MIN: Self;
42    const EPSILON: Self;
43    const PI: Self;
44    const NEG_PI: Self;
45    const FRAC_PI_2: Self;
46
47    const MIN_POSITIVE_NORMAL: Self;
48
49    /// The bitwidth of the float type
50    const BITS: u32;
51
52    /// The bitwidth of the significand
53    const SIG_BITS: u32;
54
55    /// The bitwidth of the exponent
56    const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
57
58    /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
59    /// representation.
60    ///
61    /// This shifted fully right, use `EXP_MASK` for the shifted value.
62    const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
63
64    /// The exponent bias value
65    const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
66
67    /// Maximum unbiased exponent value.
68    const EXP_MAX: i32 = Self::EXP_BIAS as i32;
69
70    /// Minimum *NORMAL* unbiased exponent value.
71    const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
72
73    /// Minimum subnormal exponent value.
74    const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
75
76    /// A mask for the sign bit
77    const SIGN_MASK: Self::Int;
78
79    /// A mask for the significand
80    const SIG_MASK: Self::Int;
81
82    /// A mask for the exponent
83    const EXP_MASK: Self::Int;
84
85    /// The implicit bit of the float format
86    const IMPLICIT_BIT: Self::Int;
87
88    /// Returns `self` transmuted to `Self::Int`
89    fn to_bits(self) -> Self::Int;
90
91    /// Returns `self` transmuted to `Self::SignedInt`
92    #[allow(dead_code)]
93    fn to_bits_signed(self) -> Self::SignedInt {
94        self.to_bits().signed()
95    }
96
97    /// Check bitwise equality.
98    #[allow(dead_code)]
99    fn biteq(self, rhs: Self) -> bool {
100        self.to_bits() == rhs.to_bits()
101    }
102
103    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
104    /// represented in multiple different ways.
105    ///
106    /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
107    /// if `NaN` should not be treated separately.
108    #[allow(dead_code)]
109    fn eq_repr(self, rhs: Self) -> bool {
110        if self.is_nan() && rhs.is_nan() {
111            true
112        } else {
113            self.biteq(rhs)
114        }
115    }
116
117    /// Returns true if the value is NaN.
118    fn is_nan(self) -> bool;
119
120    /// Returns true if the value is +inf or -inf.
121    fn is_infinite(self) -> bool;
122
123    /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
124    fn is_sign_negative(self) -> bool;
125
126    /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
127    fn is_sign_positive(self) -> bool {
128        !self.is_sign_negative()
129    }
130
131    /// Returns if `self` is subnormal.
132    #[allow(dead_code)]
133    fn is_subnormal(self) -> bool {
134        (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
135    }
136
137    /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
138    fn ex(self) -> u32 {
139        u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
140    }
141
142    /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
143    fn exp_unbiased(self) -> i32 {
144        self.ex().signed() - (Self::EXP_BIAS as i32)
145    }
146
147    /// Returns the significand with no implicit bit (or the "fractional" part)
148    #[allow(dead_code)]
149    fn frac(self) -> Self::Int {
150        self.to_bits() & Self::SIG_MASK
151    }
152
153    /// Returns a `Self::Int` transmuted back to `Self`
154    fn from_bits(a: Self::Int) -> Self;
155
156    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
157    fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
158        let sign = if negative {
159            Self::Int::ONE
160        } else {
161            Self::Int::ZERO
162        };
163        Self::from_bits(
164            (sign << (Self::BITS - 1))
165                | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
166                | (significand & Self::SIG_MASK),
167        )
168    }
169
170    #[allow(dead_code)]
171    fn abs(self) -> Self;
172
173    /// Returns a number composed of the magnitude of self and the sign of sign.
174    fn copysign(self, other: Self) -> Self;
175
176    /// Fused multiply add, rounding once.
177    fn fma(self, y: Self, z: Self) -> Self;
178
179    /// Returns (normalized exponent, normalized significand)
180    #[allow(dead_code)]
181    fn normalize(significand: Self::Int) -> (i32, Self::Int);
182
183    /// Returns a number that represents the sign of self.
184    #[allow(dead_code)]
185    fn signum(self) -> Self {
186        if self.is_nan() {
187            self
188        } else {
189            Self::ONE.copysign(self)
190        }
191    }
192}
193
194/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
195pub type IntTy<F> = <F as Float>::Int;
196
197macro_rules! float_impl {
198    (
199        $ty:ident,
200        $ity:ident,
201        $sity:ident,
202        $bits:expr,
203        $significand_bits:expr,
204        $from_bits:path,
205        $to_bits:path,
206        $fma_fn:ident,
207        $fma_intrinsic:ident
208    ) => {
209        impl Float for $ty {
210            type Int = $ity;
211            type SignedInt = $sity;
212
213            const ZERO: Self = 0.0;
214            const NEG_ZERO: Self = -0.0;
215            const ONE: Self = 1.0;
216            const NEG_ONE: Self = -1.0;
217            const INFINITY: Self = Self::INFINITY;
218            const NEG_INFINITY: Self = Self::NEG_INFINITY;
219            const NAN: Self = Self::NAN;
220            // NAN isn't guaranteed to be positive but it usually is. We only use this for
221            // tests.
222            const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK);
223            const MAX: Self = -Self::MIN;
224            // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
225            const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
226            const EPSILON: Self = <$ty>::EPSILON;
227
228            // Exponent is a 1 in the LSB
229            const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
230
231            const PI: Self = core::$ty::consts::PI;
232            const NEG_PI: Self = -Self::PI;
233            const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
234
235            const BITS: u32 = $bits;
236            const SIG_BITS: u32 = $significand_bits;
237
238            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
239            const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
240            const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
241            const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
242
243            fn to_bits(self) -> Self::Int {
244                self.to_bits()
245            }
246            fn is_nan(self) -> bool {
247                self.is_nan()
248            }
249            fn is_infinite(self) -> bool {
250                self.is_infinite()
251            }
252            fn is_sign_negative(self) -> bool {
253                self.is_sign_negative()
254            }
255            fn from_bits(a: Self::Int) -> Self {
256                Self::from_bits(a)
257            }
258            fn abs(self) -> Self {
259                cfg_if! {
260                    // FIXME(msrv): `abs` is available in `core` starting with 1.85.
261                    if #[cfg(intrinsics_enabled)] {
262                        self.abs()
263                    } else {
264                        super::super::generic::fabs(self)
265                    }
266                }
267            }
268            fn copysign(self, other: Self) -> Self {
269                cfg_if! {
270                    // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
271                    if #[cfg(intrinsics_enabled)] {
272                        self.copysign(other)
273                    } else {
274                        super::super::generic::copysign(self, other)
275                    }
276                }
277            }
278            fn fma(self, y: Self, z: Self) -> Self {
279                cfg_if! {
280                    // fma is not yet available in `core`
281                    if #[cfg(intrinsics_enabled)] {
282                        unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
283                    } else {
284                        super::super::$fma_fn(self, y, z)
285                    }
286                }
287            }
288            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
289                let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
290                (
291                    1i32.wrapping_sub(shift as i32),
292                    significand << shift as Self::Int,
293                )
294            }
295        }
296    };
297}
298
299#[cfg(f16_enabled)]
300float_impl!(
301    f16,
302    u16,
303    i16,
304    16,
305    10,
306    f16::from_bits,
307    f16::to_bits,
308    fmaf16,
309    fmaf16
310);
311float_impl!(
312    f32,
313    u32,
314    i32,
315    32,
316    23,
317    f32_from_bits,
318    f32_to_bits,
319    fmaf,
320    fmaf32
321);
322float_impl!(
323    f64,
324    u64,
325    i64,
326    64,
327    52,
328    f64_from_bits,
329    f64_to_bits,
330    fma,
331    fmaf64
332);
333#[cfg(f128_enabled)]
334float_impl!(
335    f128,
336    u128,
337    i128,
338    128,
339    112,
340    f128::from_bits,
341    f128::to_bits,
342    fmaf128,
343    fmaf128
344);
345
346/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
347
348/// `f32::from_bits`
349#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
350pub const fn f32_from_bits(bits: u32) -> f32 {
351    // SAFETY: POD cast with no preconditions
352    unsafe { mem::transmute::<u32, f32>(bits) }
353}
354
355/// `f32::to_bits`
356#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
357pub const fn f32_to_bits(x: f32) -> u32 {
358    // SAFETY: POD cast with no preconditions
359    unsafe { mem::transmute::<f32, u32>(x) }
360}
361
362/// `f64::from_bits`
363#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
364pub const fn f64_from_bits(bits: u64) -> f64 {
365    // SAFETY: POD cast with no preconditions
366    unsafe { mem::transmute::<u64, f64>(bits) }
367}
368
369/// `f64::to_bits`
370#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
371pub const fn f64_to_bits(x: f64) -> u64 {
372    // SAFETY: POD cast with no preconditions
373    unsafe { mem::transmute::<f64, u64>(x) }
374}
375
376/// Trait for floats twice the bit width of another integer.
377pub trait DFloat: Float {
378    /// Float that is half the bit width of the floatthis trait is implemented for.
379    type H: HFloat<D = Self>;
380
381    /// Narrow the float type.
382    fn narrow(self) -> Self::H;
383}
384
385/// Trait for floats half the bit width of another float.
386pub trait HFloat: Float {
387    /// Float that is double the bit width of the float this trait is implemented for.
388    type D: DFloat<H = Self>;
389
390    /// Widen the float type.
391    fn widen(self) -> Self::D;
392}
393
394macro_rules! impl_d_float {
395    ($($X:ident $D:ident),*) => {
396        $(
397            impl DFloat for $D {
398                type H = $X;
399
400                fn narrow(self) -> Self::H {
401                    self as $X
402                }
403            }
404        )*
405    };
406}
407
408macro_rules! impl_h_float {
409    ($($H:ident $X:ident),*) => {
410        $(
411            impl HFloat for $H {
412                type D = $X;
413
414                fn widen(self) -> Self::D {
415                    self as $X
416                }
417            }
418        )*
419    };
420}
421
422impl_d_float!(f32 f64);
423#[cfg(f16_enabled)]
424impl_d_float!(f16 f32);
425#[cfg(f128_enabled)]
426impl_d_float!(f64 f128);
427
428impl_h_float!(f32 f64);
429#[cfg(f16_enabled)]
430impl_h_float!(f16 f32);
431#[cfg(f128_enabled)]
432impl_h_float!(f64 f128);
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    #[test]
439    #[cfg(f16_enabled)]
440    fn check_f16() {
441        // Constants
442        assert_eq!(f16::EXP_SAT, 0b11111);
443        assert_eq!(f16::EXP_BIAS, 15);
444        assert_eq!(f16::EXP_MAX, 15);
445        assert_eq!(f16::EXP_MIN, -14);
446        assert_eq!(f16::EXP_MIN_SUBNORM, -24);
447
448        // `exp_unbiased`
449        assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
450        assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
451        assert_eq!(f16::MAX.exp_unbiased(), 15);
452        assert_eq!(f16::MIN.exp_unbiased(), 15);
453        assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
454        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
455        // results for zero and subnormals.
456        assert_eq!(f16::ZERO.exp_unbiased(), -15);
457        assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
458        assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
459
460        // `from_parts`
461        assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
462        assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
463    }
464
465    #[test]
466    fn check_f32() {
467        // Constants
468        assert_eq!(f32::EXP_SAT, 0b11111111);
469        assert_eq!(f32::EXP_BIAS, 127);
470        assert_eq!(f32::EXP_MAX, 127);
471        assert_eq!(f32::EXP_MIN, -126);
472        assert_eq!(f32::EXP_MIN_SUBNORM, -149);
473
474        // `exp_unbiased`
475        assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
476        assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
477        assert_eq!(f32::MAX.exp_unbiased(), 127);
478        assert_eq!(f32::MIN.exp_unbiased(), 127);
479        assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
480        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
481        // results for zero and subnormals.
482        assert_eq!(f32::ZERO.exp_unbiased(), -127);
483        assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
484        assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
485
486        // `from_parts`
487        assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
488        assert_biteq!(
489            f32::from_parts(false, 10 + f32::EXP_BIAS, 0),
490            hf32!("0x1p10")
491        );
492        assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
493    }
494
495    #[test]
496    fn check_f64() {
497        // Constants
498        assert_eq!(f64::EXP_SAT, 0b11111111111);
499        assert_eq!(f64::EXP_BIAS, 1023);
500        assert_eq!(f64::EXP_MAX, 1023);
501        assert_eq!(f64::EXP_MIN, -1022);
502        assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
503
504        // `exp_unbiased`
505        assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
506        assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
507        assert_eq!(f64::MAX.exp_unbiased(), 1023);
508        assert_eq!(f64::MIN.exp_unbiased(), 1023);
509        assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
510        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
511        // results for zero and subnormals.
512        assert_eq!(f64::ZERO.exp_unbiased(), -1023);
513        assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
514        assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
515
516        // `from_parts`
517        assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
518        assert_biteq!(
519            f64::from_parts(false, 10 + f64::EXP_BIAS, 0),
520            hf64!("0x1p10")
521        );
522        assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
523    }
524
525    #[test]
526    #[cfg(f128_enabled)]
527    fn check_f128() {
528        // Constants
529        assert_eq!(f128::EXP_SAT, 0b111111111111111);
530        assert_eq!(f128::EXP_BIAS, 16383);
531        assert_eq!(f128::EXP_MAX, 16383);
532        assert_eq!(f128::EXP_MIN, -16382);
533        assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
534
535        // `exp_unbiased`
536        assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
537        assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
538        assert_eq!(f128::MAX.exp_unbiased(), 16383);
539        assert_eq!(f128::MIN.exp_unbiased(), 16383);
540        assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
541        // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
542        // results for zero and subnormals.
543        assert_eq!(f128::ZERO.exp_unbiased(), -16383);
544        assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
545        assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
546
547        // `from_parts`
548        assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
549        assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
550    }
551}