| /* Copyright 2024 The ChromiumOS Authors |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "2common.h" |
| #include "2rsa.h" |
| |
| /** |
| * Montgomery c[] = d[] - e[] if d[] > e[], c[] = d[] - e[] + n[] otherwise. |
| * Uses "Subtract with Carry" and "Add with Carry" instructions to optimize BigNum |
| * arithmetic. e[] will be overwritten with intermediate results. |
| */ |
| static void sub_mod(uint32_t *c, uint32_t *ed, const uint32_t *n, const uint32_t arrsize) |
| { |
| uint32_t borrow, tmp1, tmp2, i; |
| |
| /* e[] = d[] - e[] */ |
| uint32_t size_clobber = arrsize; |
| uint32_t *ed_clobber = ed; |
| asm ( |
| "subs wzr, wzr, wzr\n\t" /* init carry flag for subtraction */ |
| "1:\n\t" |
| "ldp %w[e], %w[d], [%[ed_ptr]]\n\t" |
| "sbcs %w[e], %w[d], %w[e]\n\t" |
| "str %w[e], [%[ed_ptr]], #8\n\t" |
| "sub %w[size], %w[size], #1\n\t" |
| "cbnz %w[size], 1b\n\t" |
| "cset %w[e], cc\n\t" /* "borrow" = carry flag is 0 (cleared) */ |
| : [e] "=r" (borrow), |
| [d] "=r" (tmp1), |
| [size] "+r" (size_clobber), |
| [ed_ptr] "+r" (ed_clobber) |
| :: "cc", "memory" |
| ); |
| |
| if (borrow) { |
| /* e[] = e[] + n[] */ |
| size_clobber = arrsize; |
| ed_clobber = ed; |
| asm volatile ( |
| "adds wzr, wzr, wzr\n\t" /* init carry flag for addition */ |
| "1:\n\t" |
| "ldr %w[e], [%[ed_ptr]]\n\t" |
| "ldr %w[n], [%[n_ptr]], #4\n\t" |
| "adcs %w[e], %w[e], %w[n]\n\t" |
| "str %w[e], [%[ed_ptr]], #8\n\t" |
| "sub %w[size], %w[size], #1\n\t" |
| "cbnz %w[size], 1b\n\t" |
| : [e] "=r" (tmp1), |
| [n] "=r" (tmp2), |
| [size] "+r" (size_clobber), |
| [ed_ptr] "+r" (ed_clobber), |
| [n_ptr] "+r" (n) |
| :: "cc", "memory" |
| ); |
| } |
| |
| /* c[] = e[] */ |
| for (i = 0; i < arrsize; i++) |
| c[i] = ed[i * 2]; |
| } |
| |
| /** |
| * Montgomery c[] = a[] * b[] / R % mod (`ed` is a local scratch buffer) |
| * |
| * Algorithm according to https://eprint.iacr.org/2013/519.pdf and |
| * https://chromium-review.googlesource.com/5055251. |
| */ |
| static void mont_mult(uint32_t *c, |
| const uint32_t *a, |
| const uint32_t *b, |
| const uint32_t *n, |
| uint32_t *ed, |
| const uint32_t mu, |
| const uint32_t arrsize) |
| { |
| const uint32_t mub0 = mu * b[0]; |
| uint32_t i; |
| |
| memset(ed, 0, arrsize * sizeof(uint32_t) * 2); |
| |
| for (i = 0; i < arrsize; i++) { |
| const uint32_t c0 = ed[1] - ed[0]; |
| const uint32_t muc0 = mu * c0; |
| const uint32_t a_i = a[i]; |
| const uint32_t q = muc0 + mub0 * a_i; |
| const uint32_t *n_clobber = n; |
| const uint32_t *b_clobber = b; |
| void *ed_clobber = ed; |
| uint32_t size_clobber = arrsize - 1; |
| asm volatile ( |
| /* v4.2d = always contains [0, 0] (for idempotent Add High Narrow) */ |
| "movi v4.2d, #0\n\t" |
| /* v3.2s = "mul" = [q, a[i]] */ |
| "fmov s3, %w[q]\n\t" |
| "mov v3.s[1], %w[a_i]\n\t" |
| /* v1.2s = "bmod" = [n[0], b[0]] */ |
| "ldr s1, [%[n]], #4\n\t" |
| "ld1 {v1.s}[1], [%[b]], #4\n\t" |
| /* v2.2s = [e, d] */ |
| "ldr d2, [%[ed]]\n\t" |
| "uxtl v2.2d, v2.2s\n\t" |
| /* v2.2d = "p01" = ed + bmod * mul */ |
| "umlal v2.2d, v1.2s, v3.2s\n\t" |
| /* v2.2d = "t01" = MSB-half(p01) */ |
| "addhn v2.2s, v2.2d, v4.2d\n\t" |
| /* for (j = 1; j < arrsize - 1; j++) */ |
| "1:" |
| /* v0.2d = zero-extend(ed + t01) */ |
| "ldr d0, [%[ed], #8]\n\t" |
| "uaddl v0.2d, v0.2s, v2.2s\n\t" |
| /* v1.2s = "bmod" = [n[j], b[j]] */ |
| "ldr s1, [%[n]], #4\n\t" |
| "ld1 {v1.s}[1], [%[b]], #4\n\t" |
| /* v0.2d = "p01" = ed[j] + t01 + bmod * mul */ |
| "umlal v0.2d, v1.2s, v3.2s\n\t" |
| /* v2.2s = "t01" = MSB-half(p01) */ |
| "addhn v2.2s, v0.2d, v4.2d\n\t" |
| /* store ed[j - 1] = LSB-half(p01) */ |
| "xtn v0.2s, v0.2d\n\t" |
| "str d0, [%[ed]], #8\n\t" |
| "subs %w[size], %w[size], #1\n\t" |
| "b.hi 1b\n\t" |
| /* store ed[arrsize - 1] = final t01 */ |
| "str d2, [%[ed]]\n\t" |
| : [ed] "+r" (ed_clobber), |
| [n] "+r" (n_clobber), |
| [b] "+r" (b_clobber), |
| [size] "+r" (size_clobber) |
| : [q] "r" (q), |
| [a_i] "r" (a_i) |
| : "v0", "v1","v2", "v3", "v4", "cc", "memory" |
| ); |
| } |
| |
| sub_mod(c, ed, n, arrsize); |
| } |
| |
| static void swap_bignumber_endianness(const void *in, void *out, size_t size_bytes) |
| { |
| const void *in_end = in + size_bytes; |
| |
| /* REV64 can only swap within each 8-byte half of the 16-byte register, so use a |
| transposed STP to do the final swap of the two halves afterwards. */ |
| asm volatile ( |
| "1:\n\t" |
| "ldr q0, [%[in], #-16]!\n\t" |
| "rev64 v0.16b, v0.16b\n\t" |
| "mov d1, v0.d[1]\n\t" |
| "stp d1, d0, [%[out]], #16\n\t" |
| "subs %[size], %[size], #16\n\t" |
| "b.hi 1b\n\t" |
| : [in] "+r" (in_end), |
| [out] "+r" (out), |
| [size] "+r" (size_bytes) |
| :: "v0", "v1", "cc", "memory" |
| ); |
| } |
| |
| vb2_error_t vb2ex_hwcrypto_modexp(const struct vb2_public_key *key, |
| uint8_t *inout, void *workbuf, |
| size_t workbuf_size, int exp) |
| { |
| const uint32_t mu = -key->n0inv; |
| const uint32_t *n = key->n; |
| const uint32_t arrsize = key->arrsize; |
| uint32_t *a = workbuf; |
| uint32_t *aR = (void *)inout; /* Re-use location. */ |
| uint32_t *aaR = a + arrsize; |
| uint32_t *aaa = aaR; /* Re-use location. */ |
| uint32_t *ed = aaR + arrsize; /* 8-byte align guaranteed by VB2_WORKBUF_ALIGN */ |
| uint32_t i; |
| |
| if (exp != 65537 || arrsize % 16 != 0 || |
| (void *)&ed[arrsize * 2] - workbuf > workbuf_size) |
| return VB2_ERROR_EX_HWCRYPTO_UNSUPPORTED; |
| |
| /* Convert from big endian byte array to little endian word array. */ |
| swap_bignumber_endianness(inout, a, arrsize * sizeof(uint32_t)); |
| |
| mont_mult(aR, a, key->rr, n, ed, mu, arrsize); /* aR = a * RR / R mod M */ |
| for (i = 0; i < 16; i += 2) { |
| mont_mult(aaR, aR, aR, n, ed, mu, arrsize); /* aaR = aR * aR / R mod M */ |
| mont_mult(aR, aaR, aaR, n, ed, mu, arrsize); /* aR = aaR * aaR / R mod M */ |
| } |
| mont_mult(aaa, aR, a, n, ed, mu, arrsize); /* aaa = aR * a / R mod M */ |
| |
| /* Convert back to bigendian byte array */ |
| swap_bignumber_endianness(aaa, inout, arrsize * sizeof(uint32_t)); |
| |
| return VB2_SUCCESS; |
| } |