jh/
compressor.rs

1#![allow(non_upper_case_globals)]
2
3use core::ptr;
4use digest::generic_array::{typenum::U64, GenericArray};
5use simd::{dispatch, vec128_storage, AndNot, Machine, Swap64, VZip, Vec2};
6
7#[rustfmt::skip]
8macro_rules! unroll7 {
9    ($j:ident, $body:block) => {
10        { const $j: usize = 0; $body }
11        { const $j: usize = 1; $body }
12        { const $j: usize = 2; $body }
13        { const $j: usize = 3; $body }
14        { const $j: usize = 4; $body }
15        { const $j: usize = 5; $body }
16        { const $j: usize = 6; $body }
17    };
18}
19
20#[repr(C)]
21#[derive(Copy, Clone)]
22struct X8<M: Machine>(
23    M::u128x1,
24    M::u128x1,
25    M::u128x1,
26    M::u128x1,
27    M::u128x1,
28    M::u128x1,
29    M::u128x1,
30    M::u128x1,
31);
32
33impl<M: Machine> X8<M> {
34    #[inline(always)]
35    fn zip(self) -> (M::u128x2, M::u128x2, M::u128x2, M::u128x2) {
36        (
37            [self.0, self.1].vzip(),
38            [self.2, self.3].vzip(),
39            [self.4, self.5].vzip(),
40            [self.6, self.7].vzip(),
41        )
42    }
43
44    #[inline(always)]
45    fn unzip((a, b, c, d): (M::u128x2, M::u128x2, M::u128x2, M::u128x2)) -> Self {
46        X8(
47            a.extract(0),
48            a.extract(1),
49            b.extract(0),
50            b.extract(1),
51            c.extract(0),
52            c.extract(1),
53            d.extract(0),
54            d.extract(1),
55        )
56    }
57}
58
59/// two Sboxes computed in parallel; each Sbox implements S0 and S1, selected by a constant bit
60#[inline(always)]
61fn ss<M: Machine>(state: X8<M>, mut k: M::u128x2) -> X8<M> {
62    let mut m = state.zip();
63    // TODO: replace ! with andnot ops?
64    m.3 = !m.3;
65    m.0 ^= m.2.andnot(k);
66    k ^= m.0 & m.1;
67    m.0 ^= m.3 & m.2;
68    m.3 ^= m.1.andnot(m.2);
69    m.1 ^= m.0 & m.2;
70    m.2 ^= m.3.andnot(m.0);
71    m.0 ^= m.1 | m.3;
72    m.3 ^= m.1 & m.2;
73    m.2 ^= k;
74    m.1 ^= k & m.0;
75    X8::unzip(m)
76}
77
78#[inline(always)]
79fn l<M: Machine>(mut y: X8<M>) -> X8<M> {
80    y.1 ^= y.2;
81    y.3 ^= y.4;
82    y.5 ^= y.6 ^ y.0;
83    y.7 ^= y.0;
84    y.0 ^= y.3;
85    y.2 ^= y.5;
86    y.4 ^= y.7 ^ y.1;
87    y.6 ^= y.1;
88    y
89}
90
91union X2Bytes<M: Machine> {
92    x2: M::u128x2,
93    bytes: [u8; 32],
94}
95
96#[inline(always)]
97#[doc(hidden)]
98pub fn f8_impl<M: Machine>(mach: M, state: &mut [vec128_storage; 8], data: *const u8) {
99    #[allow(clippy::cast_ptr_alignment)]
100    let data = data as *const M::u128x1;
101    let mut y = X8::<M>(
102        mach.unpack(state[0]),
103        mach.unpack(state[1]),
104        mach.unpack(state[2]),
105        mach.unpack(state[3]),
106        mach.unpack(state[4]),
107        mach.unpack(state[5]),
108        mach.unpack(state[6]),
109        mach.unpack(state[7]),
110    );
111    unsafe {
112        y.0 ^= ptr::read_unaligned(data);
113        y.1 ^= ptr::read_unaligned(data.offset(1));
114        y.2 ^= ptr::read_unaligned(data.offset(2));
115        y.3 ^= ptr::read_unaligned(data.offset(3));
116    }
117    for rc in crate::consts::E8_BITSLICE_ROUNDCONSTANT.chunks_exact(7) {
118        unroll7!(j, {
119            y = ss(y, unsafe { X2Bytes::<M> { bytes: rc[j] }.x2 });
120            y = l(y);
121            let f = match j {
122                0 => M::u128x1::swap1,
123                1 => M::u128x1::swap2,
124                2 => M::u128x1::swap4,
125                3 => M::u128x1::swap8,
126                4 => M::u128x1::swap16,
127                5 => M::u128x1::swap32,
128                6 => M::u128x1::swap64,
129                _ => unreachable!(),
130            };
131            y = X8(y.0, f(y.1), y.2, f(y.3), y.4, f(y.5), y.6, f(y.7));
132        });
133    }
134    unsafe {
135        y.4 ^= ptr::read_unaligned(data);
136        y.5 ^= ptr::read_unaligned(data.offset(1));
137        y.6 ^= ptr::read_unaligned(data.offset(2));
138        y.7 ^= ptr::read_unaligned(data.offset(3));
139    }
140    *state = [
141        y.0.into(),
142        y.1.into(),
143        y.2.into(),
144        y.3.into(),
145        y.4.into(),
146        y.5.into(),
147        y.6.into(),
148        y.7.into(),
149    ];
150}
151
152dispatch!(mach, M, {
153    fn f8(state: &mut [vec128_storage; 8], data: *const u8) {
154        f8_impl(mach, state, data);
155    }
156});
157
158pub(crate) union Compressor {
159    cv: [vec128_storage; 8],
160    bytes: [u8; 128],
161}
162
163impl Compressor {
164    #[inline]
165    pub(crate) fn new(bytes: [u8; 128]) -> Self {
166        Compressor { bytes }
167    }
168
169    #[inline]
170    pub(crate) fn update(&mut self, data: &GenericArray<u8, U64>) {
171        f8(unsafe { &mut self.cv }, data.as_ptr());
172    }
173
174    #[inline]
175    pub(crate) fn finalize(&self) -> &[u8; 128] {
176        unsafe { &self.bytes }
177    }
178}
179
180impl Clone for Compressor {
181    fn clone(&self) -> Self {
182        Self {
183            bytes: unsafe { self.bytes },
184        }
185    }
186}