check in v3.8.1 source

This commit is contained in:
2023-09-03 18:24:16 -07:00
parent fbb21ed921
commit b31c897352
1205 changed files with 561101 additions and 0 deletions

View File

@@ -0,0 +1,369 @@
/* $OpenBSD: bn_arch.h,v 1.13 2023/07/24 10:21:29 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if defined(__GNUC__)
#define HAVE_BN_CLZW
static inline int
bn_clzw(BN_ULONG w)
{
BN_ULONG n;
__asm__ ("clz %[n], %[w]"
: [n]"=r"(n)
: [w]"r"(w));
return n;
}
#define HAVE_BN_ADDW
static inline void
bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG carry, r0;
__asm__ (
"adds %[r0], %[a], %[b] \n"
"cset %[carry], cs \n"
: [carry]"=r"(carry), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b)
: "cc");
*out_r1 = carry;
*out_r0 = r0;
}
#define HAVE_BN_ADDW_ADDW
static inline void
bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG carry, r0;
__asm__ (
"adds %[r0], %[a], %[b] \n"
"cset %[carry], cs \n"
"adds %[r0], %[r0], %[c] \n"
"cinc %[carry], %[carry], cs \n"
: [carry]"=&r"(carry), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_r1 = carry;
*out_r0 = r0;
}
#define HAVE_BN_QWADDQW
static inline void
bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
__asm__ (
"adds xzr, %[carry], #-1 \n"
"adcs %[r0], %[a0], %[b0] \n"
"adcs %[r1], %[a1], %[b1] \n"
"adcs %[r2], %[a2], %[b2] \n"
"adcs %[r3], %[a3], %[b3] \n"
"cset %[carry], cs \n"
: [carry]"+r"(carry), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
: "cc");
*out_carry = carry;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/* Unsigned multiplication using a umulh/mul pair. */
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
: [r1]"=&r"(r1), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b));
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDW
static inline void
bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"adc %[r1], %[r1], xzr \n"
: [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDW_ADDW
static inline void
bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"adc %[r1], %[r1], xzr \n"
"adds %[r0], %[r0], %[d] \n"
"adc %[r1], %[r1], xzr \n"
: [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c), [d]"r"(d)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MULW_ADDTW
static inline void
bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r2, r1, r0;
__asm__ (
"umulh %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
"adds %[r0], %[r0], %[c0] \n"
"adcs %[r1], %[r1], %[c1] \n"
"adc %[r2], xzr, %[c2] \n"
: [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_MUL2_MULW_ADDTW
static inline void
bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r2, r1, r0, x1, x0;
__asm__ (
"umulh %[x1], %[a], %[b] \n"
"mul %[x0], %[a], %[b] \n"
"adds %[r0], %[c0], %[x0] \n"
"adcs %[r1], %[c1], %[x1] \n"
"adc %[r2], xzr, %[c2] \n"
"adds %[r0], %[r0], %[x0] \n"
"adcs %[r1], %[r1], %[x1] \n"
"adc %[r2], xzr, %[r2] \n"
: [r2]"=&r"(r2), [r1]"=&r"(r1), [r0]"=&r"(r0), [x1]"=&r"(x1),
[x0]"=&r"(x0)
: [a]"r"(a), [b]"r"(b), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_QWMULW_ADDW
static inline void
bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r4, r3, r2, r1, r0;
__asm__ (
"umulh %[r1], %[a0], %[b] \n"
"mul %[r0], %[a0], %[b] \n"
"adds %[r0], %[r0], %[c] \n"
"umulh %[r2], %[a1], %[b] \n"
"mul %[c], %[a1], %[b] \n"
"adcs %[r1], %[r1], %[c] \n"
"umulh %[r3], %[a2], %[b] \n"
"mul %[c], %[a2], %[b] \n"
"adcs %[r2], %[r2], %[c] \n"
"umulh %[r4], %[a3], %[b] \n"
"mul %[c], %[a3], %[b] \n"
"adcs %[r3], %[r3], %[c] \n"
"adc %[r4], %[r4], xzr \n"
: [c]"+&r"(c), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b)
: "cc");
*out_r4 = r4;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_QWMULW_ADDQW_ADDW
static inline void
bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
BN_ULONG *out_r0)
{
BN_ULONG r4, r3, r2, r1, r0;
__asm__ (
"umulh %[r1], %[a0], %[b] \n"
"mul %[r0], %[a0], %[b] \n"
"adds %[r0], %[r0], %[d] \n"
"umulh %[r2], %[a1], %[b] \n"
"mul %[d], %[a1], %[b] \n"
"adcs %[r1], %[r1], %[d] \n"
"umulh %[r3], %[a2], %[b] \n"
"mul %[d], %[a2], %[b] \n"
"adcs %[r2], %[r2], %[d] \n"
"umulh %[r4], %[a3], %[b] \n"
"mul %[d], %[a3], %[b] \n"
"adcs %[r3], %[r3], %[d] \n"
"adc %[r4], %[r4], xzr \n"
"adds %[r0], %[r0], %[c0] \n"
"adcs %[r1], %[r1], %[c1] \n"
"adcs %[r2], %[r2], %[c2] \n"
"adcs %[r3], %[r3], %[c3] \n"
"adc %[r4], %[r4], xzr \n"
: [d]"+&r"(d), [r4]"=&r"(r4), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0), [b]"r"(b),
[c3]"r"(c3), [c2]"r"(c2), [c1]"r"(c1), [c0]"r"(c0)
: "cc");
*out_r4 = r4;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_SUBW
static inline void
bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"subs %[r0], %[a], %[b] \n"
"cset %[borrow], cc \n"
: [borrow]"=r"(borrow), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#define HAVE_BN_SUBW_SUBW
static inline void
bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"subs %[r0], %[a], %[b] \n"
"cset %[borrow], cc \n"
"subs %[r0], %[r0], %[c] \n"
"cinc %[borrow], %[borrow], cc \n"
: [borrow]"=&r"(borrow), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#define HAVE_BN_QWSUBQW
static inline void
bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r3, r2, r1, r0;
__asm__ (
"subs xzr, xzr, %[borrow] \n"
"sbcs %[r0], %[a0], %[b0] \n"
"sbcs %[r1], %[a1], %[b1] \n"
"sbcs %[r2], %[a2], %[b2] \n"
"sbcs %[r3], %[a3], %[b3] \n"
"cset %[borrow], cc \n"
: [borrow]"+r"(borrow), [r3]"=&r"(r3), [r2]"=&r"(r2),
[r1]"=&r"(r1), [r0]"=&r"(r0)
: [a3]"r"(a3), [a2]"r"(a2), [a1]"r"(a1), [a0]"r"(a0),
[b3]"r"(b3), [b2]"r"(b2), [b1]"r"(b1), [b0]"r"(b0)
: "cc");
*out_borrow = borrow;
*out_r3 = r3;
*out_r2 = r2;
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif

View File

@@ -0,0 +1,44 @@
/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if 0 /* Needs testing and enabling. */
#if defined(__GNUC__)
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/* Unsigned multiplication using a umulh/mulq pair. */
__asm__ ("umulh %2, %3, %0; mulq %2, %3, %1"
: "=&r"(r1), "=r"(r0)
: "r"(a), "r"(b));
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif
#endif

View File

@@ -0,0 +1,165 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Add, z := x + y
// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
//
// extern uint64_t bignum_add
// (uint64_t p, uint64_t *z,
// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
//
// Does the z := x + y operation, truncating modulo p words in general and
// returning a top carry (0 or 1) in the p'th place, only adding the input
// words below p (as well as m and n respectively) to get the sum and carry.
//
// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add)
.text
#define p rdi
#define z rsi
#define m rdx
#define x rcx
#define n r8
#define y r9
#define i r10
#define a rax
#define ashort eax
S2N_BN_SYMBOL(bignum_add):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
mov r8, [rsp+56]
mov r9, [rsp+64]
#endif
// Zero the main index counter for both branches
xor i, i
// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
// we'll never need words past the p'th. Can now assume m <= p and n <= p.
// Then compare the modified m and n and branch accordingly
cmp p, m
cmovc m, p
cmp p, n
cmovc n, p
cmp m, n
jc ylonger
// The case where x is longer or of the same size (p >= m >= n)
sub p, m
sub m, n
inc m
test n, n
jz xtest
xmainloop:
mov a, [x+8*i]
adc a, [y+8*i]
mov [z+8*i],a
inc i
dec n
jnz xmainloop
jmp xtest
xtoploop:
mov a, [x+8*i]
adc a, 0
mov [z+8*i],a
inc i
xtest:
dec m
jnz xtoploop
mov ashort, 0
adc a, 0
test p, p
jnz tails
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
// The case where y is longer (p >= n > m)
ylonger:
sub p, n
sub n, m
test m, m
jz ytoploop
ymainloop:
mov a, [x+8*i]
adc a, [y+8*i]
mov [z+8*i],a
inc i
dec m
jnz ymainloop
ytoploop:
mov a, [y+8*i]
adc a, 0
mov [z+8*i],a
inc i
dec n
jnz ytoploop
mov ashort, 0
adc a, 0
test p, p
jnz tails
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
// Adding a non-trivial tail, when p > max(m,n)
tails:
mov [z+8*i],a
xor a, a
jmp tail
tailloop:
mov [z+8*i],a
tail:
inc i
dec p
jnz tailloop
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,155 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Multiply-add with single-word multiplier, z := z + c * y
// Inputs c, y[n]; outputs function return (carry-out) and z[k]
//
// extern uint64_t bignum_cmadd
// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
//
// Does the "z := z + c * y" operation where y is n digits, result z is p.
// Truncates the result in general.
//
// The return value is a high/carry word that is meaningful when p = n + 1, or
// more generally when n <= p and the result fits in p + 1 digits. In these
// cases it gives the top digit of the (p + 1)-digit result.
//
// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd)
.text
#define p rdi
#define z rsi
#define c r9
#define n rcx
#define x r8
#define i r10
#define h r11
#define r rbx
#define hshort r11d
#define ishort r10d
S2N_BN_SYMBOL(bignum_cmadd):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
mov r8, [rsp+56]
#endif
// Seems hard to avoid one more register
push rbx
// First clamp the input size n := min(p,n) since we can never need to read
// past the p'th term of the input to generate p-digit output.
// Subtract p := p - min(n,p) so it holds the size of the extra tail needed
cmp p, n
cmovc n, p
sub p, n
// Initialize high part h = 0; if n = 0 do nothing but return that zero
xor h, h
test n, n
jz end
// Move c into a safer register as multiplies overwrite rdx
mov c, rdx
// Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0
mov rax, [x]
mul c
add [z], rax
mov h, rdx
mov ishort, 1
dec n
jz hightail
// Main loop, where we always have CF + previous high part h to add in
loop:
adc h, [z+8*i]
sbb r, r
mov rax, [x+8*i]
mul c
sub rdx, r
add rax, h
mov [z+8*i], rax
mov h, rdx
inc i
dec n
jnz loop
hightail:
adc h, 0
// Propagate the carry all the way to the end with h as extra carry word
tail:
test p, p
jz end
add [z+8*i], h
mov hshort, 0
inc i
dec p
jz highend
tloop:
adc [z+8*i], h
inc i
dec p
jnz tloop
highend:
adc h, 0
// Return the high/carry word
end:
mov rax, h
pop rbx
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,138 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Multiply by a single word, z := c * y
// Inputs c, y[n]; outputs function return (carry-out) and z[k]
//
// extern uint64_t bignum_cmul
// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
//
// Does the "z := c * y" operation where y is n digits, result z is p.
// Truncates the result in general unless p >= n + 1.
//
// The return value is a high/carry word that is meaningful when p >= n as
// giving the high part of the result. Since this is always zero if p > n,
// it is mainly of interest in the special case p = n, i.e. where the source
// and destination have the same nominal size, when it gives the extra word
// of the full result.
//
// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul)
.text
#define p rdi
#define z rsi
#define c r9
#define n rcx
#define x r8
#define i r10
#define h r11
S2N_BN_SYMBOL(bignum_cmul):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
mov r8, [rsp+56]
#endif
// First clamp the input size n := min(p,n) since we can never need to read
// past the p'th term of the input to generate p-digit output. Now we can
// assume that n <= p
cmp p, n
cmovc n, p
// Initialize current input/output pointer offset i and high part h.
// But then if n = 0 skip the multiplication and go to the tail part
xor h, h
xor i, i
test n, n
jz tail
// Move c into a safer register as multiplies overwrite rdx
mov c, rdx
// Initialization of the loop: [h,l] = c * x_0
mov rax, [x]
mul c
mov [z], rax
mov h, rdx
inc i
cmp i, n
jz tail
// Main loop doing the multiplications
loop:
mov rax, [x+8*i]
mul c
add rax, h
adc rdx, 0
mov [z+8*i], rax
mov h, rdx
inc i
cmp i, n
jc loop
// Add a tail when the destination is longer
tail:
cmp i, p
jnc end
mov [z+8*i], h
xor h, h
inc i
cmp i, p
jnc end
tloop:
mov [z+8*i], h
inc i
cmp i, p
jc tloop
// Return the high/carry word
end:
mov rax, h
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,167 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Multiply z := x * y
// Inputs x[m], y[n]; output z[k]
//
// extern void bignum_mul
// (uint64_t k, uint64_t *z,
// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
//
// Does the "z := x * y" operation where x is m digits, y is n, result z is k.
// Truncates the result in general unless k >= m + n
//
// Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y
// Microsoft x64 ABI: RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul)
.text
// These are actually right
#define p rdi
#define z rsi
#define n r8
// These are not
#define c r15
#define h r14
#define l r13
#define x r12
#define y r11
#define i rbx
#define k r10
#define m rbp
// These are always local scratch since multiplier result is in these
#define a rax
#define d rdx
S2N_BN_SYMBOL(bignum_mul):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
mov r8, [rsp+56]
mov r9, [rsp+64]
#endif
// We use too many registers, and also we need rax:rdx for multiplications
push rbx
push rbp
push r12
push r13
push r14
push r15
mov m, rdx
// If the result size is zero, do nothing
// Note that even if either or both inputs has size zero, we can't
// just give up because we at least need to zero the output array
// If we did a multiply-add variant, however, then we could
test p, p
jz end
// Set initial 2-part sum to zero (we zero c inside the body)
xor h,h
xor l,l
// Otherwise do outer loop k = 0 ... k = p - 1
xor k, k
outerloop:
// Zero our carry term first; we eventually want it and a zero is useful now
// Set a = max 0 (k + 1 - n), i = min (k + 1) m
// This defines the range a <= j < i for the inner summation
// Note that since k < p < 2^64 we can assume k + 1 doesn't overflow
// And since we want to increment it anyway, we might as well do it now
xor c, c // c = 0
inc k // k = k + 1
mov a, k // a = k + 1
sub a, n // a = k + 1 - n
cmovc a, c // a = max 0 (k + 1 - n)
mov i, m // i = m
cmp k, m // CF <=> k + 1 < m
cmovc i, k // i = min (k + 1) m
// Turn i into a loop count, and skip things if it's <= 0
// Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a]
// and then launch into the main inner loop, postdecrementing i
mov d, k
sub d, i
sub i, a
jbe innerend
lea x,[rcx+8*a]
lea y,[r9+8*d-8]
innerloop:
mov rax, [y+8*i]
mul QWORD PTR [x]
add x, 8
add l, rax
adc h, rdx
adc c, 0
dec i
jnz innerloop
innerend:
mov [z], l
mov l, h
mov h, c
add z, 8
cmp k, p
jc outerloop
end:
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,157 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Multiply z := x * y
// Inputs x[4], y[4]; output z[8]
//
// extern void bignum_mul_4_8_alt
// (uint64_t z[static 8], uint64_t x[static 4], uint64_t y[static 4]);
//
// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_4_8_alt)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_4_8_alt)
.text
// These are actually right
#define z rdi
#define x rsi
// This is moved from rdx to free it for muls
#define y rcx
// Other variables used as a rotating 3-word window to add terms to
#define t0 r8
#define t1 r9
#define t2 r10
// Macro for the key "multiply and add to (c,h,l)" step
#define combadd(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, 0
// A minutely shorter form for when c = 0 initially
#define combadz(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, c
// A short form where we don't expect a top carry
#define combads(h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx
S2N_BN_SYMBOL(bignum_mul_4_8_alt):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
#endif
// Copy y into a safe register to start with
mov y, rdx
// Result term 0
mov rax, [x]
mul QWORD PTR [y]
mov [z], rax
mov t0, rdx
xor t1, t1
// Result term 1
xor t2, t2
combads(t1,t0,[x],[y+8])
combadz(t2,t1,t0,[x+8],[y])
mov [z+8], t0
// Result term 2
xor t0, t0
combadz(t0,t2,t1,[x],[y+16])
combadd(t0,t2,t1,[x+8],[y+8])
combadd(t0,t2,t1,[x+16],[y])
mov [z+16], t1
// Result term 3
xor t1, t1
combadz(t1,t0,t2,[x],[y+24])
combadd(t1,t0,t2,[x+8],[y+16])
combadd(t1,t0,t2,[x+16],[y+8])
combadd(t1,t0,t2,[x+24],[y])
mov [z+24], t2
// Result term 4
xor t2, t2
combadz(t2,t1,t0,[x+8],[y+24])
combadd(t2,t1,t0,[x+16],[y+16])
combadd(t2,t1,t0,[x+24],[y+8])
mov [z+32], t0
// Result term 5
xor t0, t0
combadz(t0,t2,t1,[x+16],[y+24])
combadd(t0,t2,t1,[x+24],[y+16])
mov [z+40], t1
// Result term 6
xor t1, t1
combads(t0,t2,[x+24],[y+24])
mov [z+48], t2
// Result term 7
mov [z+56], t0
// Return
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,244 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Multiply z := x * y
// Inputs x[8], y[8]; output z[16]
//
// extern void bignum_mul_8_16_alt
// (uint64_t z[static 16], uint64_t x[static 8], uint64_t y[static 8]);
//
// Standard x86-64 ABI: RDI = z, RSI = x, RDX = y
// Microsoft x64 ABI: RCX = z, RDX = x, R8 = y
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul_8_16_alt)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul_8_16_alt)
.text
// These are actually right
#define z rdi
#define x rsi
// This is moved from rdx to free it for muls
#define y rcx
// Other variables used as a rotating 3-word window to add terms to
#define t0 r8
#define t1 r9
#define t2 r10
// Macro for the key "multiply and add to (c,h,l)" step
#define combadd(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, 0
// A minutely shorter form for when c = 0 initially
#define combadz(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, c
// A short form where we don't expect a top carry
#define combads(h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx
S2N_BN_SYMBOL(bignum_mul_8_16_alt):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
#endif
// Copy y into a safe register to start with
mov y, rdx
// Result term 0
mov rax, [x]
mul QWORD PTR [y]
mov [z], rax
mov t0, rdx
xor t1, t1
// Result term 1
xor t2, t2
combads(t1,t0,[x],[y+8])
combadz(t2,t1,t0,[x+8],[y])
mov [z+8], t0
// Result term 2
xor t0, t0
combadz(t0,t2,t1,[x],[y+16])
combadd(t0,t2,t1,[x+8],[y+8])
combadd(t0,t2,t1,[x+16],[y])
mov [z+16], t1
// Result term 3
xor t1, t1
combadz(t1,t0,t2,[x],[y+24])
combadd(t1,t0,t2,[x+8],[y+16])
combadd(t1,t0,t2,[x+16],[y+8])
combadd(t1,t0,t2,[x+24],[y])
mov [z+24], t2
// Result term 4
xor t2, t2
combadz(t2,t1,t0,[x],[y+32])
combadd(t2,t1,t0,[x+8],[y+24])
combadd(t2,t1,t0,[x+16],[y+16])
combadd(t2,t1,t0,[x+24],[y+8])
combadd(t2,t1,t0,[x+32],[y])
mov [z+32], t0
// Result term 5
xor t0, t0
combadz(t0,t2,t1,[x],[y+40])
combadd(t0,t2,t1,[x+8],[y+32])
combadd(t0,t2,t1,[x+16],[y+24])
combadd(t0,t2,t1,[x+24],[y+16])
combadd(t0,t2,t1,[x+32],[y+8])
combadd(t0,t2,t1,[x+40],[y])
mov [z+40], t1
// Result term 6
xor t1, t1
combadz(t1,t0,t2,[x],[y+48])
combadd(t1,t0,t2,[x+8],[y+40])
combadd(t1,t0,t2,[x+16],[y+32])
combadd(t1,t0,t2,[x+24],[y+24])
combadd(t1,t0,t2,[x+32],[y+16])
combadd(t1,t0,t2,[x+40],[y+8])
combadd(t1,t0,t2,[x+48],[y])
mov [z+48], t2
// Result term 7
xor t2, t2
combadz(t2,t1,t0,[x],[y+56])
combadd(t2,t1,t0,[x+8],[y+48])
combadd(t2,t1,t0,[x+16],[y+40])
combadd(t2,t1,t0,[x+24],[y+32])
combadd(t2,t1,t0,[x+32],[y+24])
combadd(t2,t1,t0,[x+40],[y+16])
combadd(t2,t1,t0,[x+48],[y+8])
combadd(t2,t1,t0,[x+56],[y])
mov [z+56], t0
// Result term 8
xor t0, t0
combadz(t0,t2,t1,[x+8],[y+56])
combadd(t0,t2,t1,[x+16],[y+48])
combadd(t0,t2,t1,[x+24],[y+40])
combadd(t0,t2,t1,[x+32],[y+32])
combadd(t0,t2,t1,[x+40],[y+24])
combadd(t0,t2,t1,[x+48],[y+16])
combadd(t0,t2,t1,[x+56],[y+8])
mov [z+64], t1
// Result term 9
xor t1, t1
combadz(t1,t0,t2,[x+16],[y+56])
combadd(t1,t0,t2,[x+24],[y+48])
combadd(t1,t0,t2,[x+32],[y+40])
combadd(t1,t0,t2,[x+40],[y+32])
combadd(t1,t0,t2,[x+48],[y+24])
combadd(t1,t0,t2,[x+56],[y+16])
mov [z+72], t2
// Result term 10
xor t2, t2
combadz(t2,t1,t0,[x+24],[y+56])
combadd(t2,t1,t0,[x+32],[y+48])
combadd(t2,t1,t0,[x+40],[y+40])
combadd(t2,t1,t0,[x+48],[y+32])
combadd(t2,t1,t0,[x+56],[y+24])
mov [z+80], t0
// Result term 11
xor t0, t0
combadz(t0,t2,t1,[x+32],[y+56])
combadd(t0,t2,t1,[x+40],[y+48])
combadd(t0,t2,t1,[x+48],[y+40])
combadd(t0,t2,t1,[x+56],[y+32])
mov [z+88], t1
// Result term 12
xor t1, t1
combadz(t1,t0,t2,[x+40],[y+56])
combadd(t1,t0,t2,[x+48],[y+48])
combadd(t1,t0,t2,[x+56],[y+40])
mov [z+96], t2
// Result term 13
xor t2, t2
combadz(t2,t1,t0,[x+48],[y+56])
combadd(t2,t1,t0,[x+56],[y+48])
mov [z+104], t0
// Result term 14
combads(t2,t1,[x+56],[y+56])
mov [z+112], t1
// Result term 11
mov [z+120], t2
// Return
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,197 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Square z := x^2
// Input x[n]; output z[k]
//
// extern void bignum_sqr
// (uint64_t k, uint64_t *z, uint64_t n, uint64_t *x);
//
// Does the "z := x^2" operation where x is n digits and result z is k.
// Truncates the result in general unless k >= 2 * n
//
// Standard x86-64 ABI: RDI = k, RSI = z, RDX = n, RCX = x
// Microsoft x64 ABI: RCX = k, RDX = z, R8 = n, R9 = x
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr)
.text
// First three are where arguments come in, but n is moved.
#define p rdi
#define z rsi
#define x rcx
#define n r8
// These are always local scratch since multiplier result is in these
#define a rax
#define d rdx
// Other variables
#define i rbx
#define ll rbp
#define hh r9
#define k r10
#define y r11
#define htop r12
#define l r13
#define h r14
#define c r15
// Short versions
#define llshort ebp
S2N_BN_SYMBOL(bignum_sqr):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
#endif
// We use too many registers, and also we need rax:rdx for multiplications
push rbx
push rbp
push r12
push r13
push r14
push r15
mov n, rdx
// If p = 0 the result is trivial and nothing needs doing
test p, p
jz end
// initialize (hh,ll) = 0
xor llshort, llshort
xor hh, hh
// Iterate outer loop from k = 0 ... k = p - 1 producing result digits
xor k, k
outerloop:
// First let bot = MAX 0 (k + 1 - n) and top = MIN (k + 1) n
// We want to accumulate all x[i] * x[k - i] for bot <= i < top
// For the optimization of squaring we avoid duplication and do
// 2 * x[i] * x[k - i] for i < htop, where htop = MIN ((k+1)/2) n
// Initialize i = bot; in fact just compute bot as i directly.
xor c, c
lea i, [k+1]
mov htop, i
shr htop, 1
sub i, n
cmovc i, c
cmp htop, n
cmovnc htop, n
// Initialize the three-part local sum (c,h,l); c was already done above
xor l, l
xor h, h
// If htop <= bot then main doubled part of the sum is empty
cmp i, htop
jnc nosumming
// Use a moving pointer for [y] = x[k-i] for the cofactor
mov a, k
sub a, i
lea y, [x+8*a]
// Do the main part of the sum x[i] * x[k - i] for 2 * i < k
innerloop:
mov a, [x+8*i]
mul QWORD PTR [y]
add l, a
adc h, d
adc c, 0
sub y, 8
inc i
cmp i, htop
jc innerloop
// Now double it
add l, l
adc h, h
adc c, c
// If k is even (which means 2 * i = k) and i < n add the extra x[i]^2 term
nosumming:
test k, 1
jnz innerend
cmp i, n
jnc innerend
mov a, [x+8*i]
mul a
add l, a
adc h, d
adc c, 0
// Now add the local sum into the global sum, store and shift
innerend:
add l, ll
mov [z+8*k], l
adc h, hh
mov ll, h
adc c, 0
mov hh, c
inc k
cmp k, p
jc outerloop
// Restore registers and return
end:
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,145 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Square, z := x^2
// Input x[4]; output z[8]
//
// extern void bignum_sqr_4_8_alt
// (uint64_t z[static 8], uint64_t x[static 4]);
//
// Standard x86-64 ABI: RDI = z, RSI = x
// Microsoft x64 ABI: RCX = z, RDX = x
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt)
.text
// Input arguments
#define z rdi
#define x rsi
// Other variables used as a rotating 3-word window to add terms to
#define t0 rcx
#define t1 r8
#define t2 r9
// Macro for the key "multiply and add to (c,h,l)" step, for square term
#define combadd1(c,h,l,numa) \
mov rax, numa; \
mul rax; \
add l, rax; \
adc h, rdx; \
adc c, 0
// A short form where we don't expect a top carry
#define combads(h,l,numa) \
mov rax, numa; \
mul rax; \
add l, rax; \
adc h, rdx
// A version doubling before adding, for non-square terms
#define combadd2(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add rax, rax; \
adc rdx, rdx; \
adc c, 0; \
add l, rax; \
adc h, rdx; \
adc c, 0
S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
#endif
// Result term 0
mov rax, [x]
mul rax
mov [z], rax
mov t0, rdx
xor t1, t1
// Result term 1
xor t2, t2
combadd2(t2,t1,t0,[x],[x+8])
mov [z+8], t0
// Result term 2
xor t0, t0
combadd1(t0,t2,t1,[x+8])
combadd2(t0,t2,t1,[x],[x+16])
mov [z+16], t1
// Result term 3
xor t1, t1
combadd2(t1,t0,t2,[x],[x+24])
combadd2(t1,t0,t2,[x+8],[x+16])
mov [z+24], t2
// Result term 4
xor t2, t2
combadd2(t2,t1,t0,[x+8],[x+24])
combadd1(t2,t1,t0,[x+16])
mov [z+32], t0
// Result term 5
xor t0, t0
combadd2(t0,t2,t1,[x+16],[x+24])
mov [z+40], t1
// Result term 6
xor t1, t1
combads(t0,t2,[x+24])
mov [z+48], t2
// Result term 7
mov [z+56], t0
// Return
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,242 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Square, z := x^2
// Input x[8]; output z[16]
//
// extern void bignum_sqr_8_16_alt (uint64_t z[static 16], uint64_t x[static 8]);
//
// Standard x86-64 ABI: RDI = z, RSI = x
// Microsoft x64 ABI: RCX = z, RDX = x
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_8_16_alt)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_8_16_alt)
.text
// Input arguments
#define z rdi
#define x rsi
// Other variables used as a rotating 3-word window to add terms to
#define t0 r8
#define t1 r9
#define t2 r10
// Additional temporaries for local windows to share doublings
#define u0 rcx
#define u1 r11
// Macro for the key "multiply and add to (c,h,l)" step
#define combadd(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add l, rax; \
adc h, rdx; \
adc c, 0
// Set up initial window (c,h,l) = numa * numb
#define combaddz(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
xor c, c; \
mov l, rax; \
mov h, rdx
// Doubling step (c,h,l) = 2 * (c,hh,ll) + (0,h,l)
#define doubladd(c,h,l,hh,ll) \
add ll, ll; \
adc hh, hh; \
adc c, c; \
add l, ll; \
adc h, hh; \
adc c, 0
// Square term incorporation (c,h,l) += numba^2
#define combadd1(c,h,l,numa) \
mov rax, numa; \
mul rax; \
add l, rax; \
adc h, rdx; \
adc c, 0
// A short form where we don't expect a top carry
#define combads(h,l,numa) \
mov rax, numa; \
mul rax; \
add l, rax; \
adc h, rdx
// A version doubling directly before adding, for single non-square terms
#define combadd2(c,h,l,numa,numb) \
mov rax, numa; \
mul QWORD PTR numb; \
add rax, rax; \
adc rdx, rdx; \
adc c, 0; \
add l, rax; \
adc h, rdx; \
adc c, 0
S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
#endif
// Result term 0
mov rax, [x]
mul rax
mov [z], rax
mov t0, rdx
xor t1, t1
// Result term 1
xor t2, t2
combadd2(t2,t1,t0,[x],[x+8])
mov [z+8], t0
// Result term 2
xor t0, t0
combadd1(t0,t2,t1,[x+8])
combadd2(t0,t2,t1,[x],[x+16])
mov [z+16], t1
// Result term 3
combaddz(t1,u1,u0,[x],[x+24])
combadd(t1,u1,u0,[x+8],[x+16])
doubladd(t1,t0,t2,u1,u0)
mov [z+24], t2
// Result term 4
combaddz(t2,u1,u0,[x],[x+32])
combadd(t2,u1,u0,[x+8],[x+24])
doubladd(t2,t1,t0,u1,u0)
combadd1(t2,t1,t0,[x+16])
mov [z+32], t0
// Result term 5
combaddz(t0,u1,u0,[x],[x+40])
combadd(t0,u1,u0,[x+8],[x+32])
combadd(t0,u1,u0,[x+16],[x+24])
doubladd(t0,t2,t1,u1,u0)
mov [z+40], t1
// Result term 6
combaddz(t1,u1,u0,[x],[x+48])
combadd(t1,u1,u0,[x+8],[x+40])
combadd(t1,u1,u0,[x+16],[x+32])
doubladd(t1,t0,t2,u1,u0)
combadd1(t1,t0,t2,[x+24])
mov [z+48], t2
// Result term 7
combaddz(t2,u1,u0,[x],[x+56])
combadd(t2,u1,u0,[x+8],[x+48])
combadd(t2,u1,u0,[x+16],[x+40])
combadd(t2,u1,u0,[x+24],[x+32])
doubladd(t2,t1,t0,u1,u0)
mov [z+56], t0
// Result term 8
combaddz(t0,u1,u0,[x+8],[x+56])
combadd(t0,u1,u0,[x+16],[x+48])
combadd(t0,u1,u0,[x+24],[x+40])
doubladd(t0,t2,t1,u1,u0)
combadd1(t0,t2,t1,[x+32])
mov [z+64], t1
// Result term 9
combaddz(t1,u1,u0,[x+16],[x+56])
combadd(t1,u1,u0,[x+24],[x+48])
combadd(t1,u1,u0,[x+32],[x+40])
doubladd(t1,t0,t2,u1,u0)
mov [z+72], t2
// Result term 10
combaddz(t2,u1,u0,[x+24],[x+56])
combadd(t2,u1,u0,[x+32],[x+48])
doubladd(t2,t1,t0,u1,u0)
combadd1(t2,t1,t0,[x+40])
mov [z+80], t0
// Result term 11
combaddz(t0,u1,u0,[x+32],[x+56])
combadd(t0,u1,u0,[x+40],[x+48])
doubladd(t0,t2,t1,u1,u0)
mov [z+88], t1
// Result term 12
xor t1, t1
combadd2(t1,t0,t2,[x+40],[x+56])
combadd1(t1,t0,t2,[x+48])
mov [z+96], t2
// Result term 13
xor t2, t2
combadd2(t2,t1,t0,[x+48],[x+56])
mov [z+104], t0
// Result term 14
combads(t2,t1,[x+56])
mov [z+112], t1
// Result term 15
mov [z+120], t2
// Return
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,153 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Subtract, z := x - y
// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
//
// extern uint64_t bignum_sub
// (uint64_t p, uint64_t *z,
// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
//
// Does the z := x - y operation, truncating modulo p words in general and
// returning a top borrow (0 or 1) in the p'th place, only subtracting input
// words below p (as well as m and n respectively) to get the diff and borrow.
//
// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
.text
#define p rdi
#define z rsi
#define m rdx
#define x rcx
#define n r8
#define y r9
#define i r10
#define a rax
#define ashort eax
S2N_BN_SYMBOL(bignum_sub):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
mov rsi, rdx
mov rdx, r8
mov rcx, r9
mov r8, [rsp+56]
mov r9, [rsp+64]
#endif
// Zero the main index counter for both branches
xor i, i
// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
// we'll never need words past the p'th. Can now assume m <= p and n <= p.
// Then compare the modified m and n and branch accordingly
cmp p, m
cmovc m, p
cmp p, n
cmovc n, p
cmp m, n
jc ylonger
// The case where x is longer or of the same size (p >= m >= n)
sub p, m
sub m, n
inc m
test n, n
jz xtest
xmainloop:
mov a, [x+8*i]
sbb a, [y+8*i]
mov [z+8*i],a
inc i
dec n
jnz xmainloop
jmp xtest
xtoploop:
mov a, [x+8*i]
sbb a, 0
mov [z+8*i],a
inc i
xtest:
dec m
jnz xtoploop
sbb a, a
test p, p
jz tailskip
tailloop:
mov [z+8*i],a
inc i
dec p
jnz tailloop
tailskip:
neg a
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
// The case where y is longer (p >= n > m)
ylonger:
sub p, n
sub n, m
test m, m
jz ytoploop
ymainloop:
mov a, [x+8*i]
sbb a, [y+8*i]
mov [z+8*i],a
inc i
dec m
jnz ymainloop
ytoploop:
mov ashort, 0
sbb a, [y+8*i]
mov [z+8*i],a
inc i
dec n
jnz ytoploop
sbb a, a
test p, p
jnz tailloop
neg a
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,131 @@
/* $OpenBSD: bn_arch.c,v 1.7 2023/06/24 16:01:44 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#include "bn_arch.h"
#include "bn_local.h"
#include "s2n_bignum.h"
#ifdef HAVE_BN_ADD
BN_ULONG
bn_add(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
int b_len)
{
return bignum_add(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
b_len, (uint64_t *)b);
}
#endif
#ifdef HAVE_BN_ADD_WORDS
BN_ULONG
bn_add_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
{
return bignum_add(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
(uint64_t *)bd);
}
#endif
#ifdef HAVE_BN_SUB
BN_ULONG
bn_sub(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, const BN_ULONG *b,
int b_len)
{
return bignum_sub(r_len, (uint64_t *)r, a_len, (uint64_t *)a,
b_len, (uint64_t *)b);
}
#endif
#ifdef HAVE_BN_SUB_WORDS
BN_ULONG
bn_sub_words(BN_ULONG *rd, const BN_ULONG *ad, const BN_ULONG *bd, int n)
{
return bignum_sub(n, (uint64_t *)rd, n, (uint64_t *)ad, n,
(uint64_t *)bd);
}
#endif
#ifdef HAVE_BN_MUL_ADD_WORDS
BN_ULONG
bn_mul_add_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
{
return bignum_cmadd(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
}
#endif
#ifdef HAVE_BN_MUL_WORDS
BN_ULONG
bn_mul_words(BN_ULONG *rd, const BN_ULONG *ad, int num, BN_ULONG w)
{
return bignum_cmul(num, (uint64_t *)rd, w, num, (uint64_t *)ad);
}
#endif
#ifdef HAVE_BN_MUL_COMBA4
void
bn_mul_comba4(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
{
/* XXX - consider using non-alt on CPUs that have the ADX extension. */
bignum_mul_4_8_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
}
#endif
#ifdef HAVE_BN_MUL_COMBA8
void
bn_mul_comba8(BN_ULONG *rd, BN_ULONG *ad, BN_ULONG *bd)
{
/* XXX - consider using non-alt on CPUs that have the ADX extension. */
bignum_mul_8_16_alt((uint64_t *)rd, (uint64_t *)ad, (uint64_t *)bd);
}
#endif
#ifdef HAVE_BN_SQR
int
bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
{
bignum_sqr(r_len, (uint64_t *)r->d, a->top, (uint64_t *)a->d);
return 1;
}
#endif
#ifdef HAVE_BN_SQR_COMBA4
void
bn_sqr_comba4(BN_ULONG *rd, const BN_ULONG *ad)
{
/* XXX - consider using non-alt on CPUs that have the ADX extension. */
bignum_sqr_4_8_alt((uint64_t *)rd, (uint64_t *)ad);
}
#endif
#ifdef HAVE_BN_SQR_COMBA8
void
bn_sqr_comba8(BN_ULONG *rd, const BN_ULONG *ad)
{
/* XXX - consider using non-alt on CPUs that have the ADX extension. */
bignum_sqr_8_16_alt((uint64_t *)rd, (uint64_t *)ad);
}
#endif
#ifdef HAVE_BN_WORD_CLZ
int
bn_word_clz(BN_ULONG w)
{
return word_clz(w);
}
#endif

View File

@@ -0,0 +1,95 @@
/* $OpenBSD: bn_arch.h,v 1.13 2023/02/16 11:13:05 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifdef _WIN32
#define OPENSSL_NO_ASM
#else
#ifndef OPENSSL_NO_ASM
#define HAVE_BN_ADD
#define HAVE_BN_ADD_WORDS
#define HAVE_BN_DIV_WORDS
#define HAVE_BN_MUL_ADD_WORDS
#define HAVE_BN_MUL_COMBA4
#define HAVE_BN_MUL_COMBA8
#define HAVE_BN_MUL_WORDS
#define HAVE_BN_SQR
#define HAVE_BN_SQR_COMBA4
#define HAVE_BN_SQR_COMBA8
#define HAVE_BN_SUB
#define HAVE_BN_SUB_WORDS
#define HAVE_BN_WORD_CLZ
#if defined(__GNUC__)
#define HAVE_BN_DIV_REM_WORDS_INLINE
static inline void
bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
BN_ULONG *out_r)
{
BN_ULONG q, r;
/*
* Unsigned division of %rdx:%rax by d with quotient being stored in
* %rax and remainder in %rdx.
*/
__asm__ volatile ("divq %4"
: "=a"(q), "=d"(r)
: "d"(h), "a"(l), "rm"(d)
: "cc");
*out_q = q;
*out_r = r;
}
#endif /* __GNUC__ */
#if defined(__GNUC__)
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/*
* Unsigned multiplication of %rax, with the double word result being
* stored in %rdx:%rax.
*/
__asm__ ("mulq %3"
: "=d"(r1), "=a"(r0)
: "a"(a), "rm"(b)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif /* _WIN32 */
#endif
#endif

View File

@@ -0,0 +1,60 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------------
// Count leading zero bits in a single word
// Input a; output function return
//
// extern uint64_t word_clz (uint64_t a);
//
// Standard x86-64 ABI: RDI = a, returns RAX
// Microsoft x64 ABI: RCX = a, returns RAX
// ----------------------------------------------------------------------------
#include "s2n_bignum_internal.h"
.intel_syntax noprefix
S2N_BN_SYM_VISIBILITY_DIRECTIVE(word_clz)
S2N_BN_SYM_PRIVACY_DIRECTIVE(word_clz)
.text
S2N_BN_SYMBOL(word_clz):
endbr64
#if WINDOWS_ABI
push rdi
push rsi
mov rdi, rcx
#endif
// First do rax = 63 - bsr(a), which is right except (maybe) for zero inputs
bsr rax, rdi
xor rax, 63
// Force return of 64 in the zero-input case
mov edx, 64
test rdi, rdi
cmove rax, rdx
#if WINDOWS_ABI
pop rsi
pop rdi
#endif
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,73 @@
/* $OpenBSD: bn_arch.h,v 1.2 2023/06/24 15:51:47 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if defined(__GNUC__)
#define HAVE_BN_SUBW
static inline void
bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"mov %[borrow], #0 \n"
"subs %[r0], %[a], %[b] \n"
"sbc %[borrow], %[borrow], #0 \n"
"neg %[borrow], %[borrow] \n"
: [borrow]"=&r"(borrow), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#define HAVE_BN_SUBW_SUBW
static inline void
bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"mov %[borrow], #0 \n"
"subs %[r0], %[a], %[b] \n"
"sbc %[borrow], %[borrow], #0 \n"
"subs %[r0], %[r0], %[c] \n"
"sbc %[borrow], %[borrow], #0 \n"
"neg %[borrow], %[borrow] \n"
: [borrow]"=&r"(borrow), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b), [c]"r"(c)
: "cc");
*out_borrow = borrow;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif

View File

@@ -0,0 +1,24 @@
/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#endif
#endif

View File

@@ -0,0 +1,86 @@
/* $OpenBSD: bn_arch.h,v 1.9 2023/02/16 10:41:03 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#define HAVE_BN_ADD_WORDS
#define HAVE_BN_DIV_WORDS
#define HAVE_BN_MUL_ADD_WORDS
#define HAVE_BN_MUL_COMBA4
#define HAVE_BN_MUL_COMBA8
#define HAVE_BN_MUL_WORDS
#define HAVE_BN_SQR_COMBA4
#define HAVE_BN_SQR_COMBA8
#define HAVE_BN_SQR_WORDS
#define HAVE_BN_SUB_WORDS
#if defined(__GNUC__)
#define HAVE_BN_DIV_REM_WORDS_INLINE
static inline void
bn_div_rem_words_inline(BN_ULONG h, BN_ULONG l, BN_ULONG d, BN_ULONG *out_q,
BN_ULONG *out_r)
{
BN_ULONG q, r;
/*
* Unsigned division of %edx:%eax by d with quotient being stored in
* %eax and remainder in %edx.
*/
__asm__ volatile ("divl %4"
: "=a"(q), "=d"(r)
: "a"(l), "d"(h), "rm"(d)
: "cc");
*out_q = q;
*out_r = r;
}
#endif /* __GNUC__ */
#if defined(__GNUC__)
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/*
* Unsigned multiplication of %eax, with the double word result being
* stored in %edx:%eax.
*/
__asm__ ("mull %3"
: "=d"(r1), "=a"(r0)
: "a"(a), "rm"(b)
: "cc");
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif

View File

@@ -0,0 +1,24 @@
/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#endif
#endif

View File

@@ -0,0 +1,40 @@
/* $OpenBSD: bn_arch.h,v 1.7 2023/01/23 12:17:58 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#define HAVE_BN_ADD_WORDS
#define HAVE_BN_DIV_WORDS
#define HAVE_BN_DIV_3_WORDS
#define HAVE_BN_MUL_ADD_WORDS
#define HAVE_BN_MUL_COMBA4
#define HAVE_BN_MUL_COMBA8
#define HAVE_BN_MUL_WORDS
#define HAVE_BN_SQR_COMBA4
#define HAVE_BN_SQR_COMBA8
#define HAVE_BN_SQR_WORDS
#define HAVE_BN_SUB_WORDS
#endif
#endif

View File

@@ -0,0 +1,39 @@
/* $OpenBSD: bn_arch.h,v 1.6 2023/01/23 12:17:58 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#define HAVE_BN_ADD_WORDS
#define HAVE_BN_DIV_WORDS
#define HAVE_BN_MUL_ADD_WORDS
#define HAVE_BN_MUL_COMBA4
#define HAVE_BN_MUL_COMBA8
#define HAVE_BN_MUL_WORDS
#define HAVE_BN_SQR_COMBA4
#define HAVE_BN_SQR_COMBA8
#define HAVE_BN_SQR_WORDS
#define HAVE_BN_SUB_WORDS
#endif
#endif

View File

@@ -0,0 +1,44 @@
/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if 0 /* Needs testing and enabling. */
#if defined(__GNUC__)
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/* Unsigned multiplication using a mulhdu/mul pair. */
__asm__ ("mulhdu %0, %2, %3; mul %1, %2, %3"
: "=&r"(r1), "=r"(r0)
: "r"(a), "r"(b));
*out_r1 = r1;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif
#endif

View File

@@ -0,0 +1,86 @@
/* $OpenBSD: bn_arch.h,v 1.7 2023/07/09 10:37:32 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <openssl/bn.h>
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#if defined(__GNUC__)
#define HAVE_BN_ADDW
static inline void
bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG carry, r0;
__asm__ (
"add %[r0], %[a], %[b] \n"
"sltu %[carry], %[r0], %[a] \n"
: [carry]"=r"(carry), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b));
*out_r1 = carry;
*out_r0 = r0;
}
#define HAVE_BN_MULW
static inline void
bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
{
BN_ULONG r1, r0;
/*
* Unsigned multiplication using a mulh/mul pair. Note that the order
* of these instructions is important, as they can potentially be fused
* into a single operation.
*/
__asm__ (
"mulhu %[r1], %[a], %[b] \n"
"mul %[r0], %[a], %[b] \n"
: [r1]"=&r"(r1), [r0]"=r"(r0)
: [a]"r"(a), [b]"r"(b));
*out_r1 = r1;
*out_r0 = r0;
}
#define HAVE_BN_SUBW
static inline void
bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
{
BN_ULONG borrow, r0;
__asm__ (
"sub %[r0], %[a], %[b] \n"
"sltu %[borrow], %[a], %[r0] \n"
: [borrow]"=r"(borrow), [r0]"=&r"(r0)
: [a]"r"(a), [b]"r"(b));
*out_borrow = borrow;
*out_r0 = r0;
}
#endif /* __GNUC__ */
#endif
#endif

View File

@@ -0,0 +1,24 @@
/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#endif
#endif

View File

@@ -0,0 +1,24 @@
/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */
/*
* Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef HEADER_BN_ARCH_H
#define HEADER_BN_ARCH_H
#ifndef OPENSSL_NO_ASM
#endif
#endif