ring/aead/
gcm.rs

1// Copyright 2018 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15use super::{
16    aes_gcm,
17    block::{Block, BLOCK_LEN},
18    Aad,
19};
20use crate::{
21    bits::{BitLength, FromUsizeBytes},
22    cpu, error,
23    polyfill::ArraySplitMap,
24};
25use core::ops::BitXorAssign;
26
27mod gcm_nohw;
28
29#[derive(Clone)]
30pub struct Key {
31    h_table: HTable,
32}
33
34impl Key {
35    pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
36        let h: [u64; 2] = h_be.as_ref().array_split_map(u64::from_be_bytes);
37
38        let mut key = Self {
39            h_table: HTable {
40                Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
41            },
42        };
43        let h_table = &mut key.h_table;
44
45        match detect_implementation(cpu_features) {
46            #[cfg(target_arch = "x86_64")]
47            Implementation::CLMUL if has_avx_movbe(cpu_features) => {
48                prefixed_extern! {
49                    fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
50                }
51                unsafe {
52                    gcm_init_avx(h_table, &h);
53                }
54            }
55
56            #[cfg(any(
57                target_arch = "aarch64",
58                target_arch = "arm",
59                target_arch = "x86_64",
60                target_arch = "x86"
61            ))]
62            Implementation::CLMUL => {
63                prefixed_extern! {
64                    fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
65                }
66                unsafe {
67                    gcm_init_clmul(h_table, &h);
68                }
69            }
70
71            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
72            Implementation::NEON => {
73                prefixed_extern! {
74                    fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
75                }
76                unsafe {
77                    gcm_init_neon(h_table, &h);
78                }
79            }
80
81            Implementation::Fallback => {
82                h_table.Htable[0] = gcm_nohw::init(h);
83            }
84        }
85
86        key
87    }
88}
89
90pub struct Context {
91    inner: ContextInner,
92    aad_len: BitLength<u64>,
93    in_out_len: BitLength<u64>,
94    cpu_features: cpu::Features,
95}
96
97impl Context {
98    pub(crate) fn new(
99        key: &Key,
100        aad: Aad<&[u8]>,
101        in_out_len: usize,
102        cpu_features: cpu::Features,
103    ) -> Result<Self, error::Unspecified> {
104        if in_out_len > aes_gcm::MAX_IN_OUT_LEN {
105            return Err(error::Unspecified);
106        }
107
108        // NIST SP800-38D Section 5.2.1.1 says that the maximum AAD length is
109        // 2**64 - 1 bits, i.e. BitLength<u64>::MAX, so we don't need to do an
110        // explicit check here.
111
112        let mut ctx = Self {
113            inner: ContextInner {
114                Xi: Xi(Block::zero()),
115                Htable: key.h_table.clone(),
116            },
117            aad_len: BitLength::from_usize_bytes(aad.as_ref().len())?,
118            in_out_len: BitLength::from_usize_bytes(in_out_len)?,
119            cpu_features,
120        };
121
122        for ad in aad.0.chunks(BLOCK_LEN) {
123            let mut block = Block::zero();
124            block.overwrite_part_at(0, ad);
125            ctx.update_block(block);
126        }
127
128        Ok(ctx)
129    }
130
131    #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))]
132    pub(super) fn in_out_whole_block_bits(&self) -> BitLength<usize> {
133        use crate::polyfill::usize_from_u64;
134        const WHOLE_BLOCK_BITS_MASK: usize = !0b111_1111;
135        const _WHOLE_BLOCK_BITS_MASK_CORRECT: () =
136            assert!(WHOLE_BLOCK_BITS_MASK == !((BLOCK_LEN * 8) - 1));
137        BitLength::from_usize_bits(
138            usize_from_u64(self.in_out_len.as_bits()) & WHOLE_BLOCK_BITS_MASK,
139        )
140    }
141
142    /// Access to `inner` for the integrated AES-GCM implementations only.
143    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
144    #[inline]
145    pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
146        (&self.inner.Htable, &mut self.inner.Xi)
147    }
148
149    pub fn update_blocks(&mut self, input: &[u8]) {
150        // Th assembly functions take the input length in bytes, not blocks.
151        let input_bytes = input.len();
152
153        debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
154        debug_assert!(input_bytes > 0);
155
156        let input = input.as_ptr().cast::<[u8; BLOCK_LEN]>();
157        // SAFETY:
158        // - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`.
159        // - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`.
160        // - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of
161        //   the new `[[u8; BLOCK_LEN]]` will not be longer than the original
162        //   `[u8]`.
163        let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
164
165        let xi = &mut self.inner.Xi;
166        let h_table = &self.inner.Htable;
167
168        match detect_implementation(self.cpu_features) {
169            #[cfg(target_arch = "x86_64")]
170            Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
171                prefixed_extern! {
172                    fn gcm_ghash_avx(
173                        xi: &mut Xi,
174                        Htable: &HTable,
175                        inp: *const [u8; BLOCK_LEN],
176                        len: crate::c::size_t,
177                    );
178                }
179                unsafe {
180                    gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
181                }
182            }
183
184            #[cfg(any(
185                target_arch = "aarch64",
186                target_arch = "arm",
187                target_arch = "x86_64",
188                target_arch = "x86"
189            ))]
190            Implementation::CLMUL => {
191                prefixed_extern! {
192                    fn gcm_ghash_clmul(
193                        xi: &mut Xi,
194                        Htable: &HTable,
195                        inp: *const [u8; BLOCK_LEN],
196                        len: crate::c::size_t,
197                    );
198                }
199                unsafe {
200                    gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
201                }
202            }
203
204            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
205            Implementation::NEON => {
206                prefixed_extern! {
207                    fn gcm_ghash_neon(
208                        xi: &mut Xi,
209                        Htable: &HTable,
210                        inp: *const [u8; BLOCK_LEN],
211                        len: crate::c::size_t,
212                    );
213                }
214                unsafe {
215                    gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
216                }
217            }
218
219            Implementation::Fallback => {
220                gcm_nohw::ghash(xi, h_table.Htable[0], input);
221            }
222        }
223    }
224
225    pub fn update_block(&mut self, a: Block) {
226        self.inner.Xi.bitxor_assign(a);
227
228        // Although these functions take `Xi` and `h_table` as separate
229        // parameters, one or more of them might assume that they are part of
230        // the same `ContextInner` structure.
231        let xi = &mut self.inner.Xi;
232        let h_table = &self.inner.Htable;
233
234        match detect_implementation(self.cpu_features) {
235            #[cfg(any(
236                target_arch = "aarch64",
237                target_arch = "arm",
238                target_arch = "x86_64",
239                target_arch = "x86"
240            ))]
241            Implementation::CLMUL => {
242                prefixed_extern! {
243                    fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
244                }
245                unsafe {
246                    gcm_gmult_clmul(xi, h_table);
247                }
248            }
249
250            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
251            Implementation::NEON => {
252                prefixed_extern! {
253                    fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
254                }
255                unsafe {
256                    gcm_gmult_neon(xi, h_table);
257                }
258            }
259
260            Implementation::Fallback => {
261                gcm_nohw::gmult(xi, h_table.Htable[0]);
262            }
263        }
264    }
265
266    pub(super) fn pre_finish<F>(mut self, f: F) -> super::Tag
267    where
268        F: FnOnce(Block, cpu::Features) -> super::Tag,
269    {
270        self.update_block(Block::from(
271            [self.aad_len.as_bits(), self.in_out_len.as_bits()].map(u64::to_be_bytes),
272        ));
273
274        f(self.inner.Xi.0, self.cpu_features)
275    }
276
277    #[cfg(target_arch = "x86_64")]
278    pub(super) fn is_avx(&self) -> bool {
279        match detect_implementation(self.cpu_features) {
280            Implementation::CLMUL => has_avx_movbe(self.cpu_features),
281            _ => false,
282        }
283    }
284
285    #[cfg(target_arch = "aarch64")]
286    pub(super) fn is_clmul(&self) -> bool {
287        matches!(
288            detect_implementation(self.cpu_features),
289            Implementation::CLMUL
290        )
291    }
292}
293
294// The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
295#[derive(Clone)]
296#[repr(C, align(16))]
297pub(super) struct HTable {
298    Htable: [u128; HTABLE_LEN],
299}
300
301#[derive(Clone, Copy)]
302#[repr(C)]
303struct u128 {
304    hi: u64,
305    lo: u64,
306}
307
308const HTABLE_LEN: usize = 16;
309
310#[repr(transparent)]
311pub struct Xi(Block);
312
313impl BitXorAssign<Block> for Xi {
314    #[inline]
315    fn bitxor_assign(&mut self, a: Block) {
316        self.0 ^= a;
317    }
318}
319
320impl From<Xi> for Block {
321    #[inline]
322    fn from(Xi(block): Xi) -> Self {
323        block
324    }
325}
326
327// This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
328// Some assembly language code, in particular the MOVEBE+AVX2 X86-64
329// implementation, requires this exact layout.
330#[repr(C, align(16))]
331struct ContextInner {
332    Xi: Xi,
333    Htable: HTable,
334}
335
336#[allow(clippy::upper_case_acronyms)]
337enum Implementation {
338    #[cfg(any(
339        target_arch = "aarch64",
340        target_arch = "arm",
341        target_arch = "x86_64",
342        target_arch = "x86"
343    ))]
344    CLMUL,
345
346    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
347    NEON,
348
349    Fallback,
350}
351
352#[inline]
353fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
354    // `cpu_features` is only used for specific platforms.
355    #[cfg(not(any(
356        target_arch = "aarch64",
357        target_arch = "arm",
358        target_arch = "x86_64",
359        target_arch = "x86"
360    )))]
361    let _cpu_features = cpu_features;
362
363    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
364    {
365        if cpu::arm::PMULL.available(cpu_features) {
366            return Implementation::CLMUL;
367        }
368    }
369
370    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
371    {
372        if cpu::intel::FXSR.available(cpu_features) && cpu::intel::PCLMULQDQ.available(cpu_features)
373        {
374            return Implementation::CLMUL;
375        }
376    }
377
378    #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
379    {
380        if cpu::arm::NEON.available(cpu_features) {
381            return Implementation::NEON;
382        }
383    }
384
385    Implementation::Fallback
386}
387
388#[cfg(target_arch = "x86_64")]
389fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
390    cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
391}