libm/math/sqrtf.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/// The square root of `x` (f32).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn sqrtf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.sqrt` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return if x < 0.0 {
::core::f32::NAN
} else {
unsafe { ::core::intrinsics::sqrtf32(x) }
}
}
}
#[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
{
// Note: This path is unlikely since LLVM will usually have already
// optimized sqrt calls into hardware instructions if sse is available,
// but if someone does end up here they'll appreciate the speed increase.
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
unsafe {
let m = _mm_set_ss(x);
let m_sqrt = _mm_sqrt_ss(m);
_mm_cvtss_f32(m_sqrt)
}
}
#[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))]
{
const TINY: f32 = 1.0e-30;
let mut z: f32;
let sign: i32 = 0x80000000u32 as i32;
let mut ix: i32;
let mut s: i32;
let mut q: i32;
let mut m: i32;
let mut t: i32;
let mut i: i32;
let mut r: u32;
ix = x.to_bits() as i32;
/* take care of Inf and NaN */
if (ix as u32 & 0x7f800000) == 0x7f800000 {
return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
}
/* take care of zero */
if ix <= 0 {
if (ix & !sign) == 0 {
return x; /* sqrt(+-0) = +-0 */
}
if ix < 0 {
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
}
}
/* normalize x */
m = ix >> 23;
if m == 0 {
/* subnormal x */
i = 0;
while ix & 0x00800000 == 0 {
ix <<= 1;
i = i + 1;
}
m -= i - 1;
}
m -= 127; /* unbias exponent */
ix = (ix & 0x007fffff) | 0x00800000;
if m & 1 == 1 {
/* odd m, double x to make it even */
ix += ix;
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = 0;
s = 0;
r = 0x01000000; /* r = moving bit from right to left */
while r != 0 {
t = s + r as i32;
if t <= ix {
s = t + r as i32;
ix -= t;
q += r as i32;
}
ix += ix;
r >>= 1;
}
/* use floating add to find out rounding direction */
if ix != 0 {
z = 1.0 - TINY; /* raise inexact flag */
if z >= 1.0 {
z = 1.0 + TINY;
if z > 1.0 {
q += 2;
} else {
q += q & 1;
}
}
}
ix = (q >> 1) + 0x3f000000;
ix += m << 23;
f32::from_bits(ix as u32)
}
}
// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
#[cfg(not(target_arch = "powerpc64"))]
#[cfg(test)]
mod tests {
use core::f32::*;
use super::*;
#[test]
fn sanity_check() {
assert_eq!(sqrtf(100.0), 10.0);
assert_eq!(sqrtf(4.0), 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
#[test]
fn spec_tests() {
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrtf(-1.0).is_nan());
assert!(sqrtf(NAN).is_nan());
for f in [0.0, -0.0, INFINITY].iter().copied() {
assert_eq!(sqrtf(f), f);
}
}
#[test]
fn conformance_tests() {
let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY];
let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32];
for i in 0..values.len() {
let bits = f32::to_bits(sqrtf(values[i]));
assert_eq!(results[i], bits);
}
}
}