libressl-portable/crypto/bn/mont-mips64.S

286 lines
3.9 KiB
ArmAsm
Raw Normal View History

2023-11-06 23:46:37 -08:00
.text
.set noat
.set noreorder
.align 5
.globl bn_mul_mont
.ent bn_mul_mont
bn_mul_mont:
slt $1,$9,4
bnez $1,1f
li $2,0
slt $1,$9,17 # on in-order CPU
bnez $1,bn_mul_mont_internal
nop
1: jr $31
li $4,0
.end bn_mul_mont
.align 5
.ent bn_mul_mont_internal
bn_mul_mont_internal:
.frame $30,14*8,$31
.mask 0x40000000|16711680,-8
dsub $29,14*8
sd $30,(14-1)*8($29)
sd $23,(14-2)*8($29)
sd $22,(14-3)*8($29)
sd $21,(14-4)*8($29)
sd $20,(14-5)*8($29)
sd $19,(14-6)*8($29)
sd $18,(14-7)*8($29)
sd $17,(14-8)*8($29)
sd $16,(14-9)*8($29)
move $30,$29
.set reorder
ld $8,0($8)
ld $13,0($6) # bp[0]
ld $12,0($5) # ap[0]
ld $14,0($7) # np[0]
dsub $29,2*8 # place for two extra words
sll $9,3
li $1,-4096
dsub $29,$9
and $29,$1
dmultu $12,$13
ld $16,8($5)
ld $18,8($7)
mflo $10
mfhi $11
dmultu $10,$8
mflo $23
dmultu $16,$13
mflo $16
mfhi $17
dmultu $14,$23
mflo $24
mfhi $25
dmultu $18,$23
daddu $24,$10
sltu $1,$24,$10
daddu $25,$1
mflo $18
mfhi $19
move $15,$29
li $22,2*8
.align 4
.L1st:
.set noreorder
dadd $12,$5,$22
dadd $14,$7,$22
ld $12,($12)
ld $14,($14)
dmultu $12,$13
daddu $10,$16,$11
daddu $24,$18,$25
sltu $1,$10,$11
sltu $2,$24,$25
daddu $11,$17,$1
daddu $25,$19,$2
mflo $16
mfhi $17
daddu $24,$10
sltu $1,$24,$10
dmultu $14,$23
daddu $25,$1
addu $22,8
sd $24,($15)
sltu $2,$22,$9
mflo $18
mfhi $19
bnez $2,.L1st
dadd $15,8
.set reorder
daddu $10,$16,$11
sltu $1,$10,$11
daddu $11,$17,$1
daddu $24,$18,$25
sltu $2,$24,$25
daddu $25,$19,$2
daddu $24,$10
sltu $1,$24,$10
daddu $25,$1
sd $24,($15)
daddu $25,$11
sltu $1,$25,$11
sd $25,8($15)
sd $1,2*8($15)
li $21,8
.align 4
.Louter:
dadd $13,$6,$21
ld $13,($13)
ld $12,($5)
ld $16,8($5)
ld $20,($29)
dmultu $12,$13
ld $14,($7)
ld $18,8($7)
mflo $10
mfhi $11
daddu $10,$20
dmultu $10,$8
sltu $1,$10,$20
daddu $11,$1
mflo $23
dmultu $16,$13
mflo $16
mfhi $17
dmultu $14,$23
mflo $24
mfhi $25
dmultu $18,$23
daddu $24,$10
sltu $1,$24,$10
daddu $25,$1
mflo $18
mfhi $19
move $15,$29
li $22,2*8
ld $20,8($15)
.align 4
.Linner:
.set noreorder
dadd $12,$5,$22
dadd $14,$7,$22
ld $12,($12)
ld $14,($14)
dmultu $12,$13
daddu $10,$16,$11
daddu $24,$18,$25
sltu $1,$10,$11
sltu $2,$24,$25
daddu $11,$17,$1
daddu $25,$19,$2
mflo $16
mfhi $17
daddu $10,$20
addu $22,8
dmultu $14,$23
sltu $1,$10,$20
daddu $24,$10
daddu $11,$1
sltu $2,$24,$10
ld $20,2*8($15)
daddu $25,$2
sltu $1,$22,$9
mflo $18
mfhi $19
sd $24,($15)
bnez $1,.Linner
dadd $15,8
.set reorder
daddu $10,$16,$11
sltu $1,$10,$11
daddu $11,$17,$1
daddu $10,$20
sltu $2,$10,$20
daddu $11,$2
ld $20,2*8($15)
daddu $24,$18,$25
sltu $1,$24,$25
daddu $25,$19,$1
daddu $24,$10
sltu $2,$24,$10
daddu $25,$2
sd $24,($15)
daddu $24,$25,$11
sltu $25,$24,$11
daddu $24,$20
sltu $1,$24,$20
daddu $25,$1
sd $24,8($15)
sd $25,2*8($15)
addu $21,8
sltu $2,$21,$9
bnez $2,.Louter
.set noreorder
dadd $20,$29,$9 # &tp[num]
move $15,$29
move $5,$29
li $11,0 # clear borrow bit
.align 4
.Lsub: ld $10,($15)
ld $24,($7)
dadd $15,8
dadd $7,8
dsubu $24,$10,$24 # tp[i]-np[i]
sgtu $1,$24,$10
dsubu $10,$24,$11
sgtu $11,$10,$24
sd $10,($4)
or $11,$1
sltu $1,$15,$20
bnez $1,.Lsub
dadd $4,8
dsubu $11,$25,$11 # handle upmost overflow bit
move $15,$29
dsub $4,$9 # restore rp
not $25,$11
and $5,$11,$29
and $6,$25,$4
or $5,$5,$6 # ap=borrow?tp:rp
.align 4
.Lcopy: ld $12,($5)
dadd $5,8
sd $0,($15)
dadd $15,8
sltu $1,$15,$20
sd $12,($4)
bnez $1,.Lcopy
dadd $4,8
li $4,1
li $2,1
.set noreorder
move $29,$30
ld $30,(14-1)*8($29)
ld $23,(14-2)*8($29)
ld $22,(14-3)*8($29)
ld $21,(14-4)*8($29)
ld $20,(14-5)*8($29)
ld $19,(14-6)*8($29)
ld $18,(14-7)*8($29)
ld $17,(14-8)*8($29)
ld $16,(14-9)*8($29)
jr $31
dadd $29,14*8
.end bn_mul_mont_internal
.rdata
.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
#if defined(HAVE_GNU_STACK)
.section .note.GNU-stack,"",%progbits
#endif