ring/arithmetic/limbs/x86_64/
mont.rs

1// Copyright 2015-2025 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15#![cfg(target_arch = "x86_64")]
16
17use super::super::super::{
18    inout::{AliasingSlices2, AliasingSlices3},
19    n0::N0,
20    LimbSliceError, MAX_LIMBS,
21};
22use crate::{
23    c,
24    cpu::intel::{Adx, Bmi1, Bmi2},
25    error::LenMismatchError,
26    limb::{LeakyWindow, Limb, Window},
27    polyfill::slice::{AsChunks, AsChunksMut},
28};
29use core::num::NonZeroUsize;
30
31const _512_IS_LIMB_BITS_TIMES_8: () = assert!(8 * Limb::BITS == 512);
32
33#[inline]
34pub(in super::super::super) fn mul_mont5(
35    mut r: AsChunksMut<Limb, 8>,
36    a: AsChunks<Limb, 8>,
37    b: AsChunks<Limb, 8>,
38    m: AsChunks<Limb, 8>,
39    n0: &N0,
40    maybe_adx_bmi2: Option<(Adx, Bmi2)>,
41) -> Result<(), LimbSliceError> {
42    mul_mont5_4x(
43        (r.as_flattened_mut(), a.as_flattened(), b.as_flattened()),
44        m.into(),
45        n0,
46        maybe_adx_bmi2,
47    )
48}
49
50pub const MIN_4X: usize = 8;
51
52#[inline]
53pub(in super::super::super) fn mul_mont5_4x(
54    in_out: impl AliasingSlices3<Limb>,
55    n: AsChunks<Limb, 4>,
56    n0: &N0,
57    maybe_adx_bmi2: Option<(Adx, Bmi2)>,
58) -> Result<(), LimbSliceError> {
59    const MOD_4X: usize = 4;
60    let n = n.as_flattened();
61    if let Some(cpu) = maybe_adx_bmi2 {
62        bn_mul_mont_ffi!(in_out, n, n0, cpu, unsafe {
63            (MIN_4X, MOD_4X, (Adx, Bmi2)) => bn_mulx4x_mont
64        })
65    } else {
66        bn_mul_mont_ffi!(in_out, n, n0, (), unsafe {
67            (MIN_4X, MOD_4X, ()) => bn_mul4x_mont
68        })
69    }
70}
71
72#[inline]
73pub(in super::super::super) fn sqr_mont5(
74    mut in_out: AsChunksMut<Limb, 8>,
75    n: AsChunks<Limb, 8>,
76    n0: &N0,
77    maybe_adx_bmi2: Option<(Adx, Bmi2)>,
78) -> Result<(), LimbSliceError> {
79    prefixed_extern! {
80        // `r` and/or 'a' may alias.
81        // XXX: BoringSSL declares this to return `int`.
82        // `num` must be a non-zero multiple of 8.
83        fn bn_sqr8x_mont(
84            rp: *mut Limb,
85            ap: *const Limb,
86            mulx_adx_capable: Limb,
87            np: *const Limb,
88            n0: &N0,
89            num: c::NonZero_size_t);
90    }
91
92    let in_out = in_out.as_flattened_mut();
93    let n = n.as_flattened();
94    let num_limbs = NonZeroUsize::new(n.len()).ok_or_else(|| LimbSliceError::too_short(n.len()))?;
95
96    // Avoid stack overflow from the alloca inside.
97    if num_limbs.get() > MAX_LIMBS {
98        return Err(LimbSliceError::too_long(num_limbs.get()));
99    }
100
101    // `Limb::from(mulx_adx.is_some())`, but intentionally branchy.
102    let mulx_adx_capable = match maybe_adx_bmi2 {
103        Some(_) => Limb::from(true),
104        None => Limb::from(false),
105    };
106
107    in_out
108        .with_non_dangling_non_null_pointers_ra(num_limbs, |r, a| {
109            let n = n.as_ptr(); // Non-dangling because num_limbs > 0.
110            unsafe { bn_sqr8x_mont(r, a, mulx_adx_capable, n, n0, num_limbs) };
111        })
112        .map_err(LimbSliceError::len_mismatch)
113}
114
115#[inline(always)]
116pub(in super::super::super) fn scatter5(
117    a: AsChunks<Limb, 8>,
118    mut table: AsChunksMut<Limb, 8>,
119    power: LeakyWindow,
120) -> Result<(), LimbSliceError> {
121    prefixed_extern! {
122        // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see
123        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
124        fn bn_scatter5(
125            inp: *const Limb,
126            num: c::NonZero_size_t,
127            table: *mut Limb,
128            power: LeakyWindow,
129        );
130    }
131    let num_limbs = check_common(a, table.as_ref())?;
132    let a = a.as_flattened();
133    let table = table.as_flattened_mut();
134    assert!(power < 32);
135    unsafe { bn_scatter5(a.as_ptr(), num_limbs, table.as_mut_ptr(), power) };
136    Ok(())
137}
138
139// SAFETY: `power` must be less than 32.
140#[inline(always)]
141pub(in super::super::super) unsafe fn gather5(
142    mut r: AsChunksMut<Limb, 8>,
143    table: AsChunks<Limb, 8>,
144    power: Window,
145) -> Result<(), LimbSliceError> {
146    prefixed_extern! {
147        // Upstream uses `num: c::size_t` too, and `power: c::size_t`; see
148        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
149        fn bn_gather5(
150            out: *mut Limb,
151            num: c::NonZero_size_t,
152            table: *const Limb,
153            power: Window);
154    }
155    let num_limbs = check_common(r.as_ref(), table)?;
156    let r = r.as_flattened_mut();
157    let table = table.as_flattened();
158    // SAFETY: We cannot assert that `power` is in range because it is secret.
159    // TODO: Create a `Window5` type that is guaranteed to be in range.
160    unsafe { bn_gather5(r.as_mut_ptr(), num_limbs, table.as_ptr(), power) };
161    Ok(())
162}
163
164// SAFETY: `power` must be less than 32.
165#[inline(always)]
166pub(in super::super::super) unsafe fn mul_mont_gather5_amm(
167    mut r: AsChunksMut<Limb, 8>,
168    a: AsChunks<Limb, 8>,
169    table: AsChunks<Limb, 8>,
170    n: AsChunks<Limb, 8>,
171    n0: &N0,
172    power: Window,
173    maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>,
174) -> Result<(), LimbSliceError> {
175    prefixed_extern! {
176        // Upstream has `num: c_int` and `power: c_int`; see
177        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
178        fn bn_mul4x_mont_gather5(
179            rp: *mut Limb,
180            ap: *const Limb,
181            table: *const Limb,
182            np: *const Limb,
183            n0: &N0,
184            num: c::NonZero_size_t,
185            power: Window,
186        );
187        // Upstream has `num: c_int` and `power: c_int`; see
188        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
189        fn bn_mulx4x_mont_gather5(
190            rp: *mut Limb,
191            ap: *const Limb,
192            table: *const Limb,
193            np: *const Limb,
194            n0: &N0,
195            num: c::NonZero_size_t,
196            power: Window,
197        );
198    }
199    let num_limbs = check_common_with_n(r.as_ref(), table, n)?;
200    let a = a.as_flattened();
201    if a.len() != num_limbs.get() {
202        return Err(LimbSliceError::len_mismatch(LenMismatchError::new(a.len())));
203    }
204    let r = r.as_flattened_mut();
205    let r = r.as_mut_ptr();
206    let a = a.as_ptr();
207    let table = table.as_flattened();
208    let table = table.as_ptr();
209    let n = n.as_flattened();
210    let n = n.as_ptr();
211    // SAFETY: We cannot assert that `power` is in range because it is secret.
212    // TODO: Create a `Window5` type that is guaranteed to be in range.
213    if maybe_adx_bmi1_bmi2.is_some() {
214        unsafe { bn_mulx4x_mont_gather5(r, a, table, n, n0, num_limbs, power) }
215    } else {
216        unsafe { bn_mul4x_mont_gather5(r, a, table, n, n0, num_limbs, power) }
217    };
218    Ok(())
219}
220
221// SAFETY: `power` must be less than 32.
222#[inline(always)]
223pub(in super::super::super) unsafe fn power5_amm(
224    mut in_out: AsChunksMut<Limb, 8>,
225    table: AsChunks<Limb, 8>,
226    n: AsChunks<Limb, 8>,
227    n0: &N0,
228    power: Window,
229    maybe_adx_bmi1_bmi2: Option<(Adx, Bmi1, Bmi2)>,
230) -> Result<(), LimbSliceError> {
231    prefixed_extern! {
232        // Upstream has `num: c_int` and `power: c_int`; see
233        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
234        fn bn_power5_nohw(
235            rp: *mut Limb,
236            ap: *const Limb,
237            table: *const Limb,
238            np: *const Limb,
239            n0: &N0,
240            num: c::NonZero_size_t,
241            power: Window,
242        );
243        // Upstream has `num: c_int` and `power: c_int`; see
244        // `_MAX_LIMBS_ADDRESSES_MEMORY_SAFETY_ISSUES`.
245        fn bn_powerx5(
246            rp: *mut Limb,
247            ap: *const Limb,
248            table: *const Limb,
249            np: *const Limb,
250            n0: &N0,
251            num: c::NonZero_size_t,
252            power: Window,
253        );
254    }
255    let num_limbs = check_common_with_n(in_out.as_ref(), table, n)?;
256    let in_out = in_out.as_flattened_mut();
257    let r = in_out.as_mut_ptr();
258    let a = in_out.as_ptr();
259    let table = table.as_flattened();
260    let table = table.as_ptr();
261    let n = n.as_flattened();
262    let n = n.as_ptr();
263    // SAFETY: We cannot assert that `power` is in range because it is secret.
264    // TODO: Create a `Window5` type that is guaranteed to be in range.
265    if maybe_adx_bmi1_bmi2.is_some() {
266        unsafe { bn_powerx5(r, a, table, n, n0, num_limbs, power) }
267    } else {
268        unsafe { bn_power5_nohw(r, a, table, n, n0, num_limbs, power) }
269    };
270    Ok(())
271}
272
273// Helps the compiler will be able to hoist all of these checks out of the
274// loops in the caller. Try to help the compiler by doing the checks
275// consistently in each function and also by inlining this function and all the
276// callers.
277#[inline(always)]
278fn check_common(
279    a: AsChunks<Limb, 8>,
280    table: AsChunks<Limb, 8>,
281) -> Result<NonZeroUsize, LimbSliceError> {
282    assert_eq!((table.as_ptr() as usize) % 16, 0); // According to BoringSSL.
283    let a = a.as_flattened();
284    let table = table.as_flattened();
285    let num_limbs = NonZeroUsize::new(a.len()).ok_or_else(|| LimbSliceError::too_short(a.len()))?;
286    if num_limbs.get() > MAX_LIMBS {
287        return Err(LimbSliceError::too_long(a.len()));
288    }
289    if num_limbs.get() * 32 != table.len() {
290        return Err(LimbSliceError::len_mismatch(LenMismatchError::new(
291            table.len(),
292        )));
293    };
294    Ok(num_limbs)
295}
296
297#[inline(always)]
298fn check_common_with_n(
299    a: AsChunks<Limb, 8>,
300    table: AsChunks<Limb, 8>,
301    n: AsChunks<Limb, 8>,
302) -> Result<NonZeroUsize, LimbSliceError> {
303    // Choose `a` instead of `n` so that every function starts with
304    // `check_common` passing the exact same arguments, so that the compiler
305    // can easily de-dupe the checks.
306    let num_limbs = check_common(a, table)?;
307    let n = n.as_flattened();
308    if n.len() != num_limbs.get() {
309        return Err(LimbSliceError::len_mismatch(LenMismatchError::new(n.len())));
310    }
311    Ok(num_limbs)
312}