1#![allow(non_snake_case)]
28
29const A_LANES: u8 = 0b0000_0101;
30const B_LANES: u8 = 0b0000_1010;
31const C_LANES: u8 = 0b0101_0000;
32const D_LANES: u8 = 0b1010_0000;
33
34#[allow(unused)]
35const A_LANES64: u8 = 0b00_00_00_11;
36#[allow(unused)]
37const B_LANES64: u8 = 0b00_00_11_00;
38#[allow(unused)]
39const C_LANES64: u8 = 0b00_11_00_00;
40#[allow(unused)]
41const D_LANES64: u8 = 0b11_00_00_00;
42
43use crate::backend::vector::packed_simd::{u32x8, u64x4};
44use core::ops::{Add, Mul, Neg};
45
46use crate::backend::serial::u64::field::FieldElement51;
47use crate::backend::vector::avx2::constants::{
48 P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO,
49};
50
51use curve25519_dalek_derive::unsafe_target_feature;
52
53#[unsafe_target_feature("avx2")]
63#[inline(always)]
64fn unpack_pair(src: u32x8) -> (u32x8, u32x8) {
65 let a: u32x8;
66 let b: u32x8;
67 let zero = u32x8::splat(0);
68 unsafe {
69 use core::arch::x86_64::_mm256_unpackhi_epi32;
70 use core::arch::x86_64::_mm256_unpacklo_epi32;
71 a = _mm256_unpacklo_epi32(src.into(), zero.into()).into();
72 b = _mm256_unpackhi_epi32(src.into(), zero.into()).into();
73 }
74 (a, b)
75}
76
77#[unsafe_target_feature("avx2")]
87#[inline(always)]
88fn repack_pair(x: u32x8, y: u32x8) -> u32x8 {
89 unsafe {
90 use core::arch::x86_64::_mm256_blend_epi32;
91 use core::arch::x86_64::_mm256_shuffle_epi32;
92
93 let x_shuffled = _mm256_shuffle_epi32(x.into(), 0b11_01_10_00);
97 let y_shuffled = _mm256_shuffle_epi32(y.into(), 0b10_00_11_01);
98
99 _mm256_blend_epi32(x_shuffled, y_shuffled, 0b11001100).into()
103 }
104}
105
106#[allow(clippy::upper_case_acronyms)]
113#[derive(Copy, Clone, Debug)]
114pub enum Lanes {
115 C,
116 D,
117 AB,
118 AC,
119 CD,
120 AD,
121 BC,
122 ABCD,
123}
124
125#[allow(clippy::upper_case_acronyms)]
131#[derive(Copy, Clone, Debug)]
132pub enum Shuffle {
133 AAAA,
134 BBBB,
135 CACA,
136 DBBD,
137 ADDA,
138 CBCB,
139 ABAB,
140 BADC,
141 BACD,
142 ABDC,
143}
144
145#[derive(Clone, Copy, Debug)]
153pub struct FieldElement2625x4(pub(crate) [u32x8; 5]);
154
155use subtle::Choice;
156use subtle::ConditionallySelectable;
157
158#[unsafe_target_feature("avx2")]
159impl ConditionallySelectable for FieldElement2625x4 {
160 fn conditional_select(
161 a: &FieldElement2625x4,
162 b: &FieldElement2625x4,
163 choice: Choice,
164 ) -> FieldElement2625x4 {
165 let mask = (-(choice.unwrap_u8() as i32)) as u32;
166 let mask_vec = u32x8::splat(mask);
167 FieldElement2625x4([
168 a.0[0] ^ (mask_vec & (a.0[0] ^ b.0[0])),
169 a.0[1] ^ (mask_vec & (a.0[1] ^ b.0[1])),
170 a.0[2] ^ (mask_vec & (a.0[2] ^ b.0[2])),
171 a.0[3] ^ (mask_vec & (a.0[3] ^ b.0[3])),
172 a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])),
173 ])
174 }
175
176 fn conditional_assign(&mut self, other: &FieldElement2625x4, choice: Choice) {
177 let mask = (-(choice.unwrap_u8() as i32)) as u32;
178 let mask_vec = u32x8::splat(mask);
179 self.0[0] ^= mask_vec & (self.0[0] ^ other.0[0]);
180 self.0[1] ^= mask_vec & (self.0[1] ^ other.0[1]);
181 self.0[2] ^= mask_vec & (self.0[2] ^ other.0[2]);
182 self.0[3] ^= mask_vec & (self.0[3] ^ other.0[3]);
183 self.0[4] ^= mask_vec & (self.0[4] ^ other.0[4]);
184 }
185}
186
187#[unsafe_target_feature("avx2")]
188impl FieldElement2625x4 {
189 pub const ZERO: FieldElement2625x4 = FieldElement2625x4([u32x8::splat_const::<0>(); 5]);
190
191 #[rustfmt::skip] pub fn split(&self) -> [FieldElement51; 4] {
195 let mut out = [FieldElement51::ZERO; 4];
196 for i in 0..5 {
197 let a_2i = self.0[i].extract::<0>() as u64; let b_2i = self.0[i].extract::<1>() as u64; let a_2i_1 = self.0[i].extract::<2>() as u64; let b_2i_1 = self.0[i].extract::<3>() as u64; let c_2i = self.0[i].extract::<4>() as u64; let d_2i = self.0[i].extract::<5>() as u64; let c_2i_1 = self.0[i].extract::<6>() as u64; let d_2i_1 = self.0[i].extract::<7>() as u64; out[0].0[i] = a_2i + (a_2i_1 << 26);
207 out[1].0[i] = b_2i + (b_2i_1 << 26);
208 out[2].0[i] = c_2i + (c_2i_1 << 26);
209 out[3].0[i] = d_2i + (d_2i_1 << 26);
210 }
211
212 out
213 }
214
215 #[inline]
221 pub fn shuffle(&self, control: Shuffle) -> FieldElement2625x4 {
222 #[inline(always)]
223 fn shuffle_lanes(x: u32x8, control: Shuffle) -> u32x8 {
224 unsafe {
225 use core::arch::x86_64::_mm256_permutevar8x32_epi32;
226
227 let c: u32x8 = match control {
228 Shuffle::AAAA => u32x8::new(0, 0, 2, 2, 0, 0, 2, 2),
229 Shuffle::BBBB => u32x8::new(1, 1, 3, 3, 1, 1, 3, 3),
230 Shuffle::CACA => u32x8::new(4, 0, 6, 2, 4, 0, 6, 2),
231 Shuffle::DBBD => u32x8::new(5, 1, 7, 3, 1, 5, 3, 7),
232 Shuffle::ADDA => u32x8::new(0, 5, 2, 7, 5, 0, 7, 2),
233 Shuffle::CBCB => u32x8::new(4, 1, 6, 3, 4, 1, 6, 3),
234 Shuffle::ABAB => u32x8::new(0, 1, 2, 3, 0, 1, 2, 3),
235 Shuffle::BADC => u32x8::new(1, 0, 3, 2, 5, 4, 7, 6),
236 Shuffle::BACD => u32x8::new(1, 0, 3, 2, 4, 5, 6, 7),
237 Shuffle::ABDC => u32x8::new(0, 1, 2, 3, 5, 4, 7, 6),
238 };
239 _mm256_permutevar8x32_epi32(x.into(), c.into()).into()
243 }
244 }
245
246 FieldElement2625x4([
247 shuffle_lanes(self.0[0], control),
248 shuffle_lanes(self.0[1], control),
249 shuffle_lanes(self.0[2], control),
250 shuffle_lanes(self.0[3], control),
251 shuffle_lanes(self.0[4], control),
252 ])
253 }
254
255 #[inline]
261 pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 {
262 #[inline(always)]
263 fn blend_lanes(x: u32x8, y: u32x8, control: Lanes) -> u32x8 {
264 unsafe {
265 use core::arch::x86_64::_mm256_blend_epi32;
266
267 match control {
288 Lanes::C => _mm256_blend_epi32(x.into(), y.into(), C_LANES as i32).into(),
289 Lanes::D => _mm256_blend_epi32(x.into(), y.into(), D_LANES as i32).into(),
290 Lanes::AD => {
291 _mm256_blend_epi32(x.into(), y.into(), (A_LANES | D_LANES) as i32).into()
292 }
293 Lanes::AB => {
294 _mm256_blend_epi32(x.into(), y.into(), (A_LANES | B_LANES) as i32).into()
295 }
296 Lanes::AC => {
297 _mm256_blend_epi32(x.into(), y.into(), (A_LANES | C_LANES) as i32).into()
298 }
299 Lanes::CD => {
300 _mm256_blend_epi32(x.into(), y.into(), (C_LANES | D_LANES) as i32).into()
301 }
302 Lanes::BC => {
303 _mm256_blend_epi32(x.into(), y.into(), (B_LANES | C_LANES) as i32).into()
304 }
305 Lanes::ABCD => _mm256_blend_epi32(
306 x.into(),
307 y.into(),
308 (A_LANES | B_LANES | C_LANES | D_LANES) as i32,
309 )
310 .into(),
311 }
312 }
313 }
314
315 FieldElement2625x4([
316 blend_lanes(self.0[0], other.0[0], control),
317 blend_lanes(self.0[1], other.0[1], control),
318 blend_lanes(self.0[2], other.0[2], control),
319 blend_lanes(self.0[3], other.0[3], control),
320 blend_lanes(self.0[4], other.0[4], control),
321 ])
322 }
323
324 pub fn splat(x: &FieldElement51) -> FieldElement2625x4 {
326 FieldElement2625x4::new(x, x, x, x)
327 }
328
329 #[rustfmt::skip] pub fn new(
336 x0: &FieldElement51,
337 x1: &FieldElement51,
338 x2: &FieldElement51,
339 x3: &FieldElement51,
340 ) -> FieldElement2625x4 {
341 let mut buf = [u32x8::splat(0); 5];
342 let low_26_bits = (1 << 26) - 1;
343 #[allow(clippy::needless_range_loop)]
344 for i in 0..5 {
345 let a_2i = (x0.0[i] & low_26_bits) as u32;
346 let a_2i_1 = (x0.0[i] >> 26) as u32;
347 let b_2i = (x1.0[i] & low_26_bits) as u32;
348 let b_2i_1 = (x1.0[i] >> 26) as u32;
349 let c_2i = (x2.0[i] & low_26_bits) as u32;
350 let c_2i_1 = (x2.0[i] >> 26) as u32;
351 let d_2i = (x3.0[i] & low_26_bits) as u32;
352 let d_2i_1 = (x3.0[i] >> 26) as u32;
353
354 buf[i] = u32x8::new(a_2i, b_2i, a_2i_1, b_2i_1, c_2i, d_2i, c_2i_1, d_2i_1);
355 }
356
357 FieldElement2625x4(buf).reduce()
361 }
362
363 #[inline]
374 pub fn negate_lazy(&self) -> FieldElement2625x4 {
375 FieldElement2625x4([
379 P_TIMES_2_LO - self.0[0],
380 P_TIMES_2_HI - self.0[1],
381 P_TIMES_2_HI - self.0[2],
382 P_TIMES_2_HI - self.0[3],
383 P_TIMES_2_HI - self.0[4],
384 ])
385 }
386
387 #[inline]
397 pub fn diff_sum(&self) -> FieldElement2625x4 {
398 let tmp1 = self.shuffle(Shuffle::BADC);
400 let tmp2 = self.blend(self.negate_lazy(), Lanes::AC);
402 tmp1 + tmp2
404 }
405
406 #[inline]
412 pub fn reduce(&self) -> FieldElement2625x4 {
413 let shifts = u32x8::new(26, 26, 25, 25, 26, 26, 25, 25);
414 let masks = u32x8::new(
415 (1 << 26) - 1,
416 (1 << 26) - 1,
417 (1 << 25) - 1,
418 (1 << 25) - 1,
419 (1 << 26) - 1,
420 (1 << 26) - 1,
421 (1 << 25) - 1,
422 (1 << 25) - 1,
423 );
424
425 let rotated_carryout = |v: u32x8| -> u32x8 {
432 unsafe {
433 use core::arch::x86_64::_mm256_shuffle_epi32;
434 use core::arch::x86_64::_mm256_srlv_epi32;
435
436 let c = _mm256_srlv_epi32(v.into(), shifts.into());
437 _mm256_shuffle_epi32(c, 0b01_00_11_10).into()
438 }
439 };
440
441 let combine = |v_lo: u32x8, v_hi: u32x8| -> u32x8 {
456 unsafe {
457 use core::arch::x86_64::_mm256_blend_epi32;
458 _mm256_blend_epi32(v_lo.into(), v_hi.into(), 0b11_00_11_00).into()
459 }
460 };
461
462 let mut v = self.0;
463
464 let c10 = rotated_carryout(v[0]);
465 v[0] = (v[0] & masks) + combine(u32x8::splat(0), c10);
466
467 let c32 = rotated_carryout(v[1]);
468 v[1] = (v[1] & masks) + combine(c10, c32);
469
470 let c54 = rotated_carryout(v[2]);
471 v[2] = (v[2] & masks) + combine(c32, c54);
472
473 let c76 = rotated_carryout(v[3]);
474 v[3] = (v[3] & masks) + combine(c54, c76);
475
476 let c98 = rotated_carryout(v[4]);
477 v[4] = (v[4] & masks) + combine(c76, c98);
478
479 let c9_19: u32x8 = unsafe {
480 use core::arch::x86_64::_mm256_mul_epu32;
481 use core::arch::x86_64::_mm256_shuffle_epi32;
482
483 let c9_spread = _mm256_shuffle_epi32(c98.into(), 0b11_01_10_00);
489
490 let c9_19_spread = _mm256_mul_epu32(c9_spread, u64x4::splat(19).into());
495
496 _mm256_shuffle_epi32(c9_19_spread, 0b11_01_10_00).into()
499 };
500
501 v[0] += c9_19;
503
504 FieldElement2625x4(v)
512 }
513
514 #[inline]
520 #[rustfmt::skip] fn reduce64(mut z: [u64x4; 10]) -> FieldElement2625x4 {
522 let LOW_25_BITS: u64x4 = u64x4::splat((1 << 25) - 1);
524 let LOW_26_BITS: u64x4 = u64x4::splat((1 << 26) - 1);
525
526 let carry = |z: &mut [u64x4; 10], i: usize| {
528 debug_assert!(i < 9);
529 if i % 2 == 0 {
530 z[i + 1] += z[i].shr::<26>();
532 z[i] &= LOW_26_BITS;
533 } else {
534 z[i + 1] += z[i].shr::<25>();
536 z[i] &= LOW_25_BITS;
537 }
538 };
539
540 carry(&mut z, 0); carry(&mut z, 4);
542 carry(&mut z, 1); carry(&mut z, 5);
543 carry(&mut z, 2); carry(&mut z, 6);
544 carry(&mut z, 3); carry(&mut z, 7);
545 carry(&mut z, 4); carry(&mut z, 8);
548 let c = z[9].shr::<25>();
559 z[9] &= LOW_25_BITS;
560 let mut c0: u64x4 = c & LOW_26_BITS; let mut c1: u64x4 = c.shr::<26>(); let x19 = u64x4::splat(19);
564 c0 = u32x8::from(c0).mul32(u32x8::from(x19));
565 c1 = u32x8::from(c1).mul32(u32x8::from(x19));
566
567 z[0] += c0; z[1] += c1; carry(&mut z, 0); FieldElement2625x4([
579 repack_pair(z[0].into(), z[1].into()),
580 repack_pair(z[2].into(), z[3].into()),
581 repack_pair(z[4].into(), z[5].into()),
582 repack_pair(z[6].into(), z[7].into()),
583 repack_pair(z[8].into(), z[9].into()),
584 ])
585 }
586
587 #[rustfmt::skip] pub fn square_and_negate_D(&self) -> FieldElement2625x4 {
598 #[inline(always)]
599 fn m(x: u32x8, y: u32x8) -> u64x4 {
600 x.mul32(y)
601 }
602
603 #[inline(always)]
604 fn m_lo(x: u32x8, y: u32x8) -> u32x8 {
605 x.mul32(y).into()
606 }
607
608 let v19 = u32x8::new(19, 0, 19, 0, 19, 0, 19, 0);
609
610 let (x0, x1) = unpack_pair(self.0[0]);
611 let (x2, x3) = unpack_pair(self.0[1]);
612 let (x4, x5) = unpack_pair(self.0[2]);
613 let (x6, x7) = unpack_pair(self.0[3]);
614 let (x8, x9) = unpack_pair(self.0[4]);
615
616 let x0_2 = x0.shl::<1>();
617 let x1_2 = x1.shl::<1>();
618 let x2_2 = x2.shl::<1>();
619 let x3_2 = x3.shl::<1>();
620 let x4_2 = x4.shl::<1>();
621 let x5_2 = x5.shl::<1>();
622 let x6_2 = x6.shl::<1>();
623 let x7_2 = x7.shl::<1>();
624
625 let x5_19 = m_lo(v19, x5);
626 let x6_19 = m_lo(v19, x6);
627 let x7_19 = m_lo(v19, x7);
628 let x8_19 = m_lo(v19, x8);
629 let x9_19 = m_lo(v19, x9);
630
631 let mut z0 = m(x0, x0) + m(x2_2, x8_19) + m(x4_2, x6_19) + ((m(x1_2, x9_19) + m(x3_2, x7_19) + m(x5, x5_19)).shl::<1>());
632 let mut z1 = m(x0_2, x1) + m(x3_2, x8_19) + m(x5_2, x6_19) + ((m(x2, x9_19) + m(x4, x7_19)).shl::<1>());
633 let mut z2 = m(x0_2, x2) + m(x1_2, x1) + m(x4_2, x8_19) + m(x6, x6_19) + ((m(x3_2, x9_19) + m(x5_2, x7_19)).shl::<1>());
634 let mut z3 = m(x0_2, x3) + m(x1_2, x2) + m(x5_2, x8_19) + ((m(x4, x9_19) + m(x6, x7_19)).shl::<1>());
635 let mut z4 = m(x0_2, x4) + m(x1_2, x3_2) + m(x2, x2) + m(x6_2, x8_19) + ((m(x5_2, x9_19) + m(x7, x7_19)).shl::<1>());
636 let mut z5 = m(x0_2, x5) + m(x1_2, x4) + m(x2_2, x3) + m(x7_2, x8_19) + ((m(x6, x9_19)).shl::<1>());
637 let mut z6 = m(x0_2, x6) + m(x1_2, x5_2) + m(x2_2, x4) + m(x3_2, x3) + m(x8, x8_19) + ((m(x7_2, x9_19)).shl::<1>());
638 let mut z7 = m(x0_2, x7) + m(x1_2, x6) + m(x2_2, x5) + m(x3_2, x4) + ((m(x8, x9_19)).shl::<1>());
639 let mut z8 = m(x0_2, x8) + m(x1_2, x7_2) + m(x2_2, x6) + m(x3_2, x5_2) + m(x4, x4) + ((m(x9, x9_19)).shl::<1>());
640 let mut z9 = m(x0_2, x9) + m(x1_2, x8) + m(x2_2, x7) + m(x3_2, x6) + m(x4_2, x5) ;
641
642 let low__p37 = u64x4::splat(0x3ffffed << 37);
659 let even_p37 = u64x4::splat(0x3ffffff << 37);
660 let odd__p37 = u64x4::splat(0x1ffffff << 37);
661
662 let negate_D = |x: u64x4, p: u64x4| -> u64x4 {
663 unsafe {
664 use core::arch::x86_64::_mm256_blend_epi32;
665 _mm256_blend_epi32(x.into(), (p - x).into(), D_LANES64 as i32).into()
666 }
667 };
668
669 z0 = negate_D(z0, low__p37);
670 z1 = negate_D(z1, odd__p37);
671 z2 = negate_D(z2, even_p37);
672 z3 = negate_D(z3, odd__p37);
673 z4 = negate_D(z4, even_p37);
674 z5 = negate_D(z5, odd__p37);
675 z6 = negate_D(z6, even_p37);
676 z7 = negate_D(z7, odd__p37);
677 z8 = negate_D(z8, even_p37);
678 z9 = negate_D(z9, odd__p37);
679
680 FieldElement2625x4::reduce64([z0, z1, z2, z3, z4, z5, z6, z7, z8, z9])
681 }
682}
683
684#[unsafe_target_feature("avx2")]
685impl Neg for FieldElement2625x4 {
686 type Output = FieldElement2625x4;
687
688 #[inline]
701 fn neg(self) -> FieldElement2625x4 {
702 FieldElement2625x4([
703 P_TIMES_16_LO - self.0[0],
704 P_TIMES_16_HI - self.0[1],
705 P_TIMES_16_HI - self.0[2],
706 P_TIMES_16_HI - self.0[3],
707 P_TIMES_16_HI - self.0[4],
708 ])
709 .reduce()
710 }
711}
712
713#[unsafe_target_feature("avx2")]
714impl Add<FieldElement2625x4> for FieldElement2625x4 {
715 type Output = FieldElement2625x4;
716 #[inline]
718 fn add(self, rhs: FieldElement2625x4) -> FieldElement2625x4 {
719 FieldElement2625x4([
720 self.0[0] + rhs.0[0],
721 self.0[1] + rhs.0[1],
722 self.0[2] + rhs.0[2],
723 self.0[3] + rhs.0[3],
724 self.0[4] + rhs.0[4],
725 ])
726 }
727}
728
729#[unsafe_target_feature("avx2")]
730impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 {
731 type Output = FieldElement2625x4;
732 #[inline]
738 fn mul(self, scalars: (u32, u32, u32, u32)) -> FieldElement2625x4 {
739 let consts = u32x8::new(scalars.0, 0, scalars.1, 0, scalars.2, 0, scalars.3, 0);
740
741 let (b0, b1) = unpack_pair(self.0[0]);
742 let (b2, b3) = unpack_pair(self.0[1]);
743 let (b4, b5) = unpack_pair(self.0[2]);
744 let (b6, b7) = unpack_pair(self.0[3]);
745 let (b8, b9) = unpack_pair(self.0[4]);
746
747 FieldElement2625x4::reduce64([
748 b0.mul32(consts),
749 b1.mul32(consts),
750 b2.mul32(consts),
751 b3.mul32(consts),
752 b4.mul32(consts),
753 b5.mul32(consts),
754 b6.mul32(consts),
755 b7.mul32(consts),
756 b8.mul32(consts),
757 b9.mul32(consts),
758 ])
759 }
760}
761
762#[unsafe_target_feature("avx2")]
763impl Mul<&FieldElement2625x4> for &FieldElement2625x4 {
764 type Output = FieldElement2625x4;
765 #[rustfmt::skip] #[inline]
779 fn mul(self, rhs: &FieldElement2625x4) -> FieldElement2625x4 {
780 #[inline(always)]
781 fn m(x: u32x8, y: u32x8) -> u64x4 {
782 x.mul32(y)
783 }
784
785 #[inline(always)]
786 fn m_lo(x: u32x8, y: u32x8) -> u32x8 {
787 x.mul32(y).into()
788 }
789
790 let (x0, x1) = unpack_pair(self.0[0]);
791 let (x2, x3) = unpack_pair(self.0[1]);
792 let (x4, x5) = unpack_pair(self.0[2]);
793 let (x6, x7) = unpack_pair(self.0[3]);
794 let (x8, x9) = unpack_pair(self.0[4]);
795
796 let (y0, y1) = unpack_pair(rhs.0[0]);
797 let (y2, y3) = unpack_pair(rhs.0[1]);
798 let (y4, y5) = unpack_pair(rhs.0[2]);
799 let (y6, y7) = unpack_pair(rhs.0[3]);
800 let (y8, y9) = unpack_pair(rhs.0[4]);
801
802 let v19 = u32x8::new(19, 0, 19, 0, 19, 0, 19, 0);
803
804 let y1_19 = m_lo(v19, y1); let y2_19 = m_lo(v19, y2); let y3_19 = m_lo(v19, y3); let y4_19 = m_lo(v19, y4);
808 let y5_19 = m_lo(v19, y5);
809 let y6_19 = m_lo(v19, y6);
810 let y7_19 = m_lo(v19, y7);
811 let y8_19 = m_lo(v19, y8);
812 let y9_19 = m_lo(v19, y9);
813
814 let x1_2 = x1 + x1; let x3_2 = x3 + x3; let x5_2 = x5 + x5;
817 let x7_2 = x7 + x7;
818 let x9_2 = x9 + x9;
819
820 let z0 = m(x0, y0) + m(x1_2, y9_19) + m(x2, y8_19) + m(x3_2, y7_19) + m(x4, y6_19) + m(x5_2, y5_19) + m(x6, y4_19) + m(x7_2, y3_19) + m(x8, y2_19) + m(x9_2, y1_19);
821 let z1 = m(x0, y1) + m(x1, y0) + m(x2, y9_19) + m(x3, y8_19) + m(x4, y7_19) + m(x5, y6_19) + m(x6, y5_19) + m(x7, y4_19) + m(x8, y3_19) + m(x9, y2_19);
822 let z2 = m(x0, y2) + m(x1_2, y1) + m(x2, y0) + m(x3_2, y9_19) + m(x4, y8_19) + m(x5_2, y7_19) + m(x6, y6_19) + m(x7_2, y5_19) + m(x8, y4_19) + m(x9_2, y3_19);
823 let z3 = m(x0, y3) + m(x1, y2) + m(x2, y1) + m(x3, y0) + m(x4, y9_19) + m(x5, y8_19) + m(x6, y7_19) + m(x7, y6_19) + m(x8, y5_19) + m(x9, y4_19);
824 let z4 = m(x0, y4) + m(x1_2, y3) + m(x2, y2) + m(x3_2, y1) + m(x4, y0) + m(x5_2, y9_19) + m(x6, y8_19) + m(x7_2, y7_19) + m(x8, y6_19) + m(x9_2, y5_19);
825 let z5 = m(x0, y5) + m(x1, y4) + m(x2, y3) + m(x3, y2) + m(x4, y1) + m(x5, y0) + m(x6, y9_19) + m(x7, y8_19) + m(x8, y7_19) + m(x9, y6_19);
826 let z6 = m(x0, y6) + m(x1_2, y5) + m(x2, y4) + m(x3_2, y3) + m(x4, y2) + m(x5_2, y1) + m(x6, y0) + m(x7_2, y9_19) + m(x8, y8_19) + m(x9_2, y7_19);
827 let z7 = m(x0, y7) + m(x1, y6) + m(x2, y5) + m(x3, y4) + m(x4, y3) + m(x5, y2) + m(x6, y1) + m(x7, y0) + m(x8, y9_19) + m(x9, y8_19);
828 let z8 = m(x0, y8) + m(x1_2, y7) + m(x2, y6) + m(x3_2, y5) + m(x4, y4) + m(x5_2, y3) + m(x6, y2) + m(x7_2, y1) + m(x8, y0) + m(x9_2, y9_19);
829 let z9 = m(x0, y9) + m(x1, y8) + m(x2, y7) + m(x3, y6) + m(x4, y5) + m(x5, y4) + m(x6, y3) + m(x7, y2) + m(x8, y1) + m(x9, y0);
830
831 FieldElement2625x4::reduce64([z0, z1, z2, z3, z4, z5, z6, z7, z8, z9])
870 }
871}
872
873#[cfg(target_feature = "avx2")]
874#[cfg(test)]
875mod test {
876 use super::*;
877
878 #[test]
879 fn scale_by_curve_constants() {
880 let mut x = FieldElement2625x4::splat(&FieldElement51::ONE);
881
882 x = x * (121666, 121666, 2 * 121666, 2 * 121665);
883
884 let xs = x.split();
885 assert_eq!(xs[0], FieldElement51([121666, 0, 0, 0, 0]));
886 assert_eq!(xs[1], FieldElement51([121666, 0, 0, 0, 0]));
887 assert_eq!(xs[2], FieldElement51([2 * 121666, 0, 0, 0, 0]));
888 assert_eq!(xs[3], FieldElement51([2 * 121665, 0, 0, 0, 0]));
889 }
890
891 #[test]
892 fn diff_sum_vs_serial() {
893 let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]);
894 let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]);
895 let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]);
896 let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]);
897
898 let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3).diff_sum();
899
900 let result = vec.split();
901
902 assert_eq!(result[0], &x1 - &x0);
903 assert_eq!(result[1], &x1 + &x0);
904 assert_eq!(result[2], &x3 - &x2);
905 assert_eq!(result[3], &x3 + &x2);
906 }
907
908 #[test]
909 fn square_vs_serial() {
910 let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]);
911 let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]);
912 let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]);
913 let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]);
914
915 let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3);
916
917 let result = vec.square_and_negate_D().split();
918
919 assert_eq!(result[0], &x0 * &x0);
920 assert_eq!(result[1], &x1 * &x1);
921 assert_eq!(result[2], &x2 * &x2);
922 assert_eq!(result[3], -&(&x3 * &x3));
923 }
924
925 #[test]
926 fn multiply_vs_serial() {
927 let x0 = FieldElement51([10000, 10001, 10002, 10003, 10004]);
928 let x1 = FieldElement51([10100, 10101, 10102, 10103, 10104]);
929 let x2 = FieldElement51([10200, 10201, 10202, 10203, 10204]);
930 let x3 = FieldElement51([10300, 10301, 10302, 10303, 10304]);
931
932 let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3);
933 let vecprime = vec.clone();
934
935 let result = (&vec * &vecprime).split();
936
937 assert_eq!(result[0], &x0 * &x0);
938 assert_eq!(result[1], &x1 * &x1);
939 assert_eq!(result[2], &x2 * &x2);
940 assert_eq!(result[3], &x3 * &x3);
941 }
942
943 #[test]
944 fn test_unpack_repack_pair() {
945 let x0 = FieldElement51([10000 + (10001 << 26), 0, 0, 0, 0]);
946 let x1 = FieldElement51([10100 + (10101 << 26), 0, 0, 0, 0]);
947 let x2 = FieldElement51([10200 + (10201 << 26), 0, 0, 0, 0]);
948 let x3 = FieldElement51([10300 + (10301 << 26), 0, 0, 0, 0]);
949
950 let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3);
951
952 let src = vec.0[0];
953
954 let (a, b) = unpack_pair(src);
955
956 let expected_a = u32x8::new(10000, 0, 10100, 0, 10200, 0, 10300, 0);
957 let expected_b = u32x8::new(10001, 0, 10101, 0, 10201, 0, 10301, 0);
958
959 assert_eq!(a, expected_a);
960 assert_eq!(b, expected_b);
961
962 let expected_src = repack_pair(a, b);
963
964 assert_eq!(src, expected_src);
965 }
966
967 #[test]
968 fn new_split_roundtrips() {
969 let x0 = FieldElement51::from_bytes(&[0x10; 32]);
970 let x1 = FieldElement51::from_bytes(&[0x11; 32]);
971 let x2 = FieldElement51::from_bytes(&[0x12; 32]);
972 let x3 = FieldElement51::from_bytes(&[0x13; 32]);
973
974 let vec = FieldElement2625x4::new(&x0, &x1, &x2, &x3);
975
976 let splits = vec.split();
977
978 assert_eq!(x0, splits[0]);
979 assert_eq!(x1, splits[1]);
980 assert_eq!(x2, splits[2]);
981 assert_eq!(x3, splits[3]);
982 }
983}