1use core::{f32, f64};
2
3use super::scalbn;
4
5const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
6
7struct Num {
8 m: u64,
9 e: i32,
10 sign: i32,
11}
12
13fn normalize(x: f64) -> Num {
14 let x1p63: f64 = f64::from_bits(0x43e0000000000000); let mut ix: u64 = x.to_bits();
17 let mut e: i32 = (ix >> 52) as i32;
18 let sign: i32 = e & 0x800;
19 e &= 0x7ff;
20 if e == 0 {
21 ix = (x * x1p63).to_bits();
22 e = (ix >> 52) as i32 & 0x7ff;
23 e = if e != 0 { e - 63 } else { 0x800 };
24 }
25 ix &= (1 << 52) - 1;
26 ix |= 1 << 52;
27 ix <<= 1;
28 e -= 0x3ff + 52 + 1;
29 Num { m: ix, e, sign }
30}
31
32#[inline]
33fn mul(x: u64, y: u64) -> (u64, u64) {
34 let t = (x as u128).wrapping_mul(y as u128);
35 ((t >> 64) as u64, t as u64)
36}
37
38#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
44pub fn fma(x: f64, y: f64, z: f64) -> f64 {
45 let x1p63: f64 = f64::from_bits(0x43e0000000000000); let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); let nx = normalize(x);
50 let ny = normalize(y);
51 let nz = normalize(z);
52
53 if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
54 return x * y + z;
55 }
56 if nz.e >= ZEROINFNAN {
57 if nz.e > ZEROINFNAN {
58 return x * y + z;
60 }
61 return z;
62 }
63
64 let zhi: u64;
66 let zlo: u64;
67 let (mut rhi, mut rlo) = mul(nx.m, ny.m);
68 let mut e: i32 = nx.e + ny.e;
72 let mut d: i32 = nz.e - e;
73 if d > 0 {
75 if d < 64 {
76 zlo = nz.m << d;
77 zhi = nz.m >> (64 - d);
78 } else {
79 zlo = 0;
80 zhi = nz.m;
81 e = nz.e - 64;
82 d -= 64;
83 if d == 0 {
84 } else if d < 64 {
85 rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64;
86 rhi = rhi >> d;
87 } else {
88 rlo = 1;
89 rhi = 0;
90 }
91 }
92 } else {
93 zhi = 0;
94 d = -d;
95 if d == 0 {
96 zlo = nz.m;
97 } else if d < 64 {
98 zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64;
99 } else {
100 zlo = 1;
101 }
102 }
103
104 let mut sign: i32 = nx.sign ^ ny.sign;
106 let samesign: bool = (sign ^ nz.sign) == 0;
107 let mut nonzero: i32 = 1;
108 if samesign {
109 rlo = rlo.wrapping_add(zlo);
111 rhi += zhi + (rlo < zlo) as u64;
112 } else {
113 let (res, borrow) = rlo.overflowing_sub(zlo);
115 rlo = res;
116 rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64));
117 if (rhi >> 63) != 0 {
118 rlo = (rlo as i64).wrapping_neg() as u64;
119 rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64;
120 sign = (sign == 0) as i32;
121 }
122 nonzero = (rhi != 0) as i32;
123 }
124
125 if nonzero != 0 {
127 e += 64;
128 d = rhi.leading_zeros() as i32 - 1;
129 rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64;
131 } else if rlo != 0 {
132 d = rlo.leading_zeros() as i32 - 1;
133 if d < 0 {
134 rhi = rlo >> 1 | (rlo & 1);
135 } else {
136 rhi = rlo << d;
137 }
138 } else {
139 return x * y + z;
141 }
142 e -= d;
143
144 let mut i: i64 = rhi as i64; if sign != 0 {
147 i = -i;
148 }
149 let mut r: f64 = i as f64; if e < -1022 - 62 {
152 if e == -1022 - 63 {
154 let mut c: f64 = x1p63;
155 if sign != 0 {
156 c = -c;
157 }
158 if r == c {
159 let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
163 return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
164 }
165 if (rhi << 53) != 0 {
168 i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
169 if sign != 0 {
170 i = -i;
171 }
172 r = i as f64;
173 r = 2. * r - c; {
178 let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
179 r += (tiny * tiny) * (r - r);
180 }
181 }
182 } else {
183 d = 10;
185 i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
186 if sign != 0 {
187 i = -i;
188 }
189 r = i as f64;
190 }
191 }
192 scalbn(r, e)
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198 #[test]
199 fn fma_segfault() {
200 assert_eq!(
202 fma(
203 -0.0000000000000002220446049250313,
204 -0.0000000000000002220446049250313,
205 -0.0000000000000002220446049250313
206 ),
207 -0.00000000000000022204460492503126,
208 );
209
210 let result = fma(-0.992, -0.992, -0.992);
211 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
213 let result = force_eval!(result);
214 assert_eq!(result, -0.007936000000000007,);
215 }
216
217 #[test]
218 fn fma_sbb() {
219 assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
220 }
221
222 #[test]
223 fn fma_underflow() {
224 assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
225 }
226}