1#![allow(non_upper_case_globals)]
2
3use core::ptr;
4use digest::generic_array::{typenum::U64, GenericArray};
5use simd::{dispatch, vec128_storage, AndNot, Machine, Swap64, VZip, Vec2};
6
7#[rustfmt::skip]
8macro_rules! unroll7 {
9 ($j:ident, $body:block) => {
10 { const $j: usize = 0; $body }
11 { const $j: usize = 1; $body }
12 { const $j: usize = 2; $body }
13 { const $j: usize = 3; $body }
14 { const $j: usize = 4; $body }
15 { const $j: usize = 5; $body }
16 { const $j: usize = 6; $body }
17 };
18}
19
20#[repr(C)]
21#[derive(Copy, Clone)]
22struct X8<M: Machine>(
23 M::u128x1,
24 M::u128x1,
25 M::u128x1,
26 M::u128x1,
27 M::u128x1,
28 M::u128x1,
29 M::u128x1,
30 M::u128x1,
31);
32
33impl<M: Machine> X8<M> {
34 #[inline(always)]
35 fn zip(self) -> (M::u128x2, M::u128x2, M::u128x2, M::u128x2) {
36 (
37 [self.0, self.1].vzip(),
38 [self.2, self.3].vzip(),
39 [self.4, self.5].vzip(),
40 [self.6, self.7].vzip(),
41 )
42 }
43
44 #[inline(always)]
45 fn unzip((a, b, c, d): (M::u128x2, M::u128x2, M::u128x2, M::u128x2)) -> Self {
46 X8(
47 a.extract(0),
48 a.extract(1),
49 b.extract(0),
50 b.extract(1),
51 c.extract(0),
52 c.extract(1),
53 d.extract(0),
54 d.extract(1),
55 )
56 }
57}
58
59#[inline(always)]
61fn ss<M: Machine>(state: X8<M>, mut k: M::u128x2) -> X8<M> {
62 let mut m = state.zip();
63 m.3 = !m.3;
65 m.0 ^= m.2.andnot(k);
66 k ^= m.0 & m.1;
67 m.0 ^= m.3 & m.2;
68 m.3 ^= m.1.andnot(m.2);
69 m.1 ^= m.0 & m.2;
70 m.2 ^= m.3.andnot(m.0);
71 m.0 ^= m.1 | m.3;
72 m.3 ^= m.1 & m.2;
73 m.2 ^= k;
74 m.1 ^= k & m.0;
75 X8::unzip(m)
76}
77
78#[inline(always)]
79fn l<M: Machine>(mut y: X8<M>) -> X8<M> {
80 y.1 ^= y.2;
81 y.3 ^= y.4;
82 y.5 ^= y.6 ^ y.0;
83 y.7 ^= y.0;
84 y.0 ^= y.3;
85 y.2 ^= y.5;
86 y.4 ^= y.7 ^ y.1;
87 y.6 ^= y.1;
88 y
89}
90
91union X2Bytes<M: Machine> {
92 x2: M::u128x2,
93 bytes: [u8; 32],
94}
95
96#[inline(always)]
97#[doc(hidden)]
98pub fn f8_impl<M: Machine>(mach: M, state: &mut [vec128_storage; 8], data: *const u8) {
99 #[allow(clippy::cast_ptr_alignment)]
100 let data = data as *const M::u128x1;
101 let mut y = X8::<M>(
102 mach.unpack(state[0]),
103 mach.unpack(state[1]),
104 mach.unpack(state[2]),
105 mach.unpack(state[3]),
106 mach.unpack(state[4]),
107 mach.unpack(state[5]),
108 mach.unpack(state[6]),
109 mach.unpack(state[7]),
110 );
111 unsafe {
112 y.0 ^= ptr::read_unaligned(data);
113 y.1 ^= ptr::read_unaligned(data.offset(1));
114 y.2 ^= ptr::read_unaligned(data.offset(2));
115 y.3 ^= ptr::read_unaligned(data.offset(3));
116 }
117 for rc in crate::consts::E8_BITSLICE_ROUNDCONSTANT.chunks_exact(7) {
118 unroll7!(j, {
119 y = ss(y, unsafe { X2Bytes::<M> { bytes: rc[j] }.x2 });
120 y = l(y);
121 let f = match j {
122 0 => M::u128x1::swap1,
123 1 => M::u128x1::swap2,
124 2 => M::u128x1::swap4,
125 3 => M::u128x1::swap8,
126 4 => M::u128x1::swap16,
127 5 => M::u128x1::swap32,
128 6 => M::u128x1::swap64,
129 _ => unreachable!(),
130 };
131 y = X8(y.0, f(y.1), y.2, f(y.3), y.4, f(y.5), y.6, f(y.7));
132 });
133 }
134 unsafe {
135 y.4 ^= ptr::read_unaligned(data);
136 y.5 ^= ptr::read_unaligned(data.offset(1));
137 y.6 ^= ptr::read_unaligned(data.offset(2));
138 y.7 ^= ptr::read_unaligned(data.offset(3));
139 }
140 *state = [
141 y.0.into(),
142 y.1.into(),
143 y.2.into(),
144 y.3.into(),
145 y.4.into(),
146 y.5.into(),
147 y.6.into(),
148 y.7.into(),
149 ];
150}
151
152dispatch!(mach, M, {
153 fn f8(state: &mut [vec128_storage; 8], data: *const u8) {
154 f8_impl(mach, state, data);
155 }
156});
157
158pub(crate) union Compressor {
159 cv: [vec128_storage; 8],
160 bytes: [u8; 128],
161}
162
163impl Compressor {
164 #[inline]
165 pub(crate) fn new(bytes: [u8; 128]) -> Self {
166 Compressor { bytes }
167 }
168
169 #[inline]
170 pub(crate) fn update(&mut self, data: &GenericArray<u8, U64>) {
171 f8(unsafe { &mut self.cv }, data.as_ptr());
172 }
173
174 #[inline]
175 pub(crate) fn finalize(&self) -> &[u8; 128] {
176 unsafe { &self.bytes }
177 }
178}
179
180impl Clone for Compressor {
181 fn clone(&self) -> Self {
182 Self {
183 bytes: unsafe { self.bytes },
184 }
185 }
186}