2023-11-06 23:58:23 -08:00

370 lines
9.1 KiB
C

/* $OpenBSD: bn_arch.h,v 1.13 2023/07/24 10:21:29 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if defined(__GNUC__)
#define HAVE_BN_CLZW
static inline int
bn_clzw(BN_ULONG w)
{
BN_ULONG n;
__asm__ ("clz %[n], %[w]"
: [n]"=r"(n)
: [w]"r"(w));
return n;
}
#define HAVE_BN_ADDW
static inline void
bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG carry, r0;
__asm__ (
"adds %[r0], %[a], %[b] \n"
"cset %[carry], cs \n"
: [carry]"=r"(carry), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b)
: "cc");
*out_r1 = carry;
*out_r0 = r0;
}
#define HAVE_BN_ADDW_ADDW
static inline void
bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG carry, r0;
__asm__ (
"adds %[r0], %[a], %[b] \n"
"cset %[carry], cs \n"
"adds %[r0], %[r0], %[c] \n"
"cinc %[carry], %[carry], cs \n"
: [carry]"=&r"(carry), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_r1 = carry;
*out_r0 = r0;
}
#define HAVE_BN_QWADDQW
static inline void
bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
__asm__ (
"adds xzr, %[carry], #-1 \n"
"adcs %[r0], %[a0], %[b0] \n"
"adcs %[r1], %[a1], %[b1] \n"
"adcs %[r2], %[a2], %[b2] \n"
"adcs %[r3], %[a3], %[b3] \n"
"cset %[carry], cs \n"
: [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
: "cc");
*out_carry = carry;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/* Unsigned multiplication using a umulh/mul pair. */
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
: [r1]"=&r"(r1), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b));
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDW
static inline void
bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"adc %[r1], %[r1], xzr \n"
: [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDW_ADDW
static inline void
bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"adc %[r1], %[r1], xzr \n"
"adds %[r0], %[r0], %[d] \n"
"adc %[r1], %[r1], xzr \n"
: [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDTW
static inline void
bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r2, r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c0] \n"
"adcs %[r1], %[r1], %[c1] \n"
"adc %[r2], xzr, %[c2] \n"
: [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MUL2_MULW_ADDTW
static inline void
bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r2, r1, r0, x1, x0;
__asm__ (
"umulh %[x1], %[a], %[b] \n"
"mul %[x0], %[a], %[b] \n"
"adds %[r0], %[c0], %[x0] \n"
"adcs %[r1], %[c1], %[x1] \n"
"adc %[r2], xzr, %[c2] \n"
"adds %[r0], %[r0], %[x0] \n"
"adcs %[r1], %[r1], %[x1] \n"
"adc %[r2], xzr, %[r2] \n"
: [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
[x0]"=&r"(x0)
: [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_QWMULW_ADDW
static inline void
bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r4, r3, r2, r1, r0;
__asm__ (
"umulh %[r1], %[a0], %[b] \n"
"mul %[r0], %[a0], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"umulh %[r2], %[a1], %[b] \n"
"mul %[c], %[a1], %[b] \n"
"adcs %[r1], %[r1], %[c] \n"
"umulh %[r3], %[a2], %[b] \n"
"mul %[c], %[a2], %[b] \n"
"adcs %[r2], %[r2], %[c] \n"
"umulh %[r4], %[a3], %[b] \n"
"mul %[c], %[a3], %[b] \n"
"adcs %[r3], %[r3], %[c] \n"
"adc %[r4], %[r4], xzr \n"
: [c]"+&r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
: "cc");
*out_r4 = r4;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_QWMULW_ADDQW_ADDW
static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG r4, r3, r2, r1, r0;
__asm__ (
"umulh %[r1], %[a0], %[b] \n"
"mul %[r0], %[a0], %[b] \n"
"adds %[r0], %[r0], %[d] \n"
"umulh %[r2], %[a1], %[b] \n"
"mul %[d], %[a1], %[b] \n"
"adcs %[r1], %[r1], %[d] \n"
"umulh %[r3], %[a2], %[b] \n"
"mul %[d], %[a2], %[b] \n"
"adcs %[r2], %[r2], %[d] \n"
"umulh %[r4], %[a3], %[b] \n"
"mul %[d], %[a3], %[b] \n"
"adcs %[r3], %[r3], %[d] \n"
"adc %[r4], %[r4], xzr \n"
"adds %[r0], %[r0], %[c0] \n"
"adcs %[r1], %[r1], %[c1] \n"
"adcs %[r2], %[r2], %[c2] \n"
"adcs %[r3], %[r3], %[c3] \n"
"adc %[r4], %[r4], xzr \n"
: [d]"+&r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
[c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r4 = r4;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_SUBW
static inline void
bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"subs %[r0], %[a], %[b] \n"
"cset %[borrow], cc \n"
: [borrow]"=r"(borrow), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#define HAVE_BN_SUBW_SUBW
static inline void
bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"subs %[r0], %[a], %[b] \n"
"cset %[borrow], cc \n"
"subs %[r0], %[r0], %[c] \n"
"cinc %[borrow], %[borrow], cc \n"
: [borrow]"=&r"(borrow), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#define HAVE_BN_QWSUBQW
static inline void
bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
__asm__ (
"subs xzr, xzr, %[borrow] \n"
"sbcs %[r0], %[a0], %[b0] \n"
"sbcs %[r1], %[a1], %[b1] \n"
"sbcs %[r2], %[a2], %[b2] \n"
"sbcs %[r3], %[a3], %[b3] \n"
"cset %[borrow], cc \n"
: [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
: "cc");
*out_borrow = borrow;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif