286 lines
3.9 KiB
ArmAsm
286 lines
3.9 KiB
ArmAsm
.text
|
||
|
||
.set noat
|
||
.set noreorder
|
||
|
||
.align 5
|
||
.globl bn_mul_mont
|
||
.ent bn_mul_mont
|
||
bn_mul_mont:
|
||
slt $1,$9,4
|
||
bnez $1,1f
|
||
li $2,0
|
||
slt $1,$9,17 # on in-order CPU
|
||
bnez $1,bn_mul_mont_internal
|
||
nop
|
||
1: jr $31
|
||
li $4,0
|
||
.end bn_mul_mont
|
||
|
||
.align 5
|
||
.ent bn_mul_mont_internal
|
||
bn_mul_mont_internal:
|
||
.frame $30,14*8,$31
|
||
.mask 0x40000000|16711680,-8
|
||
dsub $29,14*8
|
||
sd $30,(14-1)*8($29)
|
||
sd $23,(14-2)*8($29)
|
||
sd $22,(14-3)*8($29)
|
||
sd $21,(14-4)*8($29)
|
||
sd $20,(14-5)*8($29)
|
||
sd $19,(14-6)*8($29)
|
||
sd $18,(14-7)*8($29)
|
||
sd $17,(14-8)*8($29)
|
||
sd $16,(14-9)*8($29)
|
||
move $30,$29
|
||
|
||
.set reorder
|
||
ld $8,0($8)
|
||
ld $13,0($6) # bp[0]
|
||
ld $12,0($5) # ap[0]
|
||
ld $14,0($7) # np[0]
|
||
|
||
dsub $29,2*8 # place for two extra words
|
||
sll $9,3
|
||
li $1,-4096
|
||
dsub $29,$9
|
||
and $29,$1
|
||
|
||
dmultu $12,$13
|
||
ld $16,8($5)
|
||
ld $18,8($7)
|
||
mflo $10
|
||
mfhi $11
|
||
dmultu $10,$8
|
||
mflo $23
|
||
|
||
dmultu $16,$13
|
||
mflo $16
|
||
mfhi $17
|
||
|
||
dmultu $14,$23
|
||
mflo $24
|
||
mfhi $25
|
||
dmultu $18,$23
|
||
daddu $24,$10
|
||
sltu $1,$24,$10
|
||
daddu $25,$1
|
||
mflo $18
|
||
mfhi $19
|
||
|
||
move $15,$29
|
||
li $22,2*8
|
||
.align 4
|
||
.L1st:
|
||
.set noreorder
|
||
dadd $12,$5,$22
|
||
dadd $14,$7,$22
|
||
ld $12,($12)
|
||
ld $14,($14)
|
||
|
||
dmultu $12,$13
|
||
daddu $10,$16,$11
|
||
daddu $24,$18,$25
|
||
sltu $1,$10,$11
|
||
sltu $2,$24,$25
|
||
daddu $11,$17,$1
|
||
daddu $25,$19,$2
|
||
mflo $16
|
||
mfhi $17
|
||
|
||
daddu $24,$10
|
||
sltu $1,$24,$10
|
||
dmultu $14,$23
|
||
daddu $25,$1
|
||
addu $22,8
|
||
sd $24,($15)
|
||
sltu $2,$22,$9
|
||
mflo $18
|
||
mfhi $19
|
||
|
||
bnez $2,.L1st
|
||
dadd $15,8
|
||
.set reorder
|
||
|
||
daddu $10,$16,$11
|
||
sltu $1,$10,$11
|
||
daddu $11,$17,$1
|
||
|
||
daddu $24,$18,$25
|
||
sltu $2,$24,$25
|
||
daddu $25,$19,$2
|
||
daddu $24,$10
|
||
sltu $1,$24,$10
|
||
daddu $25,$1
|
||
|
||
sd $24,($15)
|
||
|
||
daddu $25,$11
|
||
sltu $1,$25,$11
|
||
sd $25,8($15)
|
||
sd $1,2*8($15)
|
||
|
||
li $21,8
|
||
.align 4
|
||
.Louter:
|
||
dadd $13,$6,$21
|
||
ld $13,($13)
|
||
ld $12,($5)
|
||
ld $16,8($5)
|
||
ld $20,($29)
|
||
|
||
dmultu $12,$13
|
||
ld $14,($7)
|
||
ld $18,8($7)
|
||
mflo $10
|
||
mfhi $11
|
||
daddu $10,$20
|
||
dmultu $10,$8
|
||
sltu $1,$10,$20
|
||
daddu $11,$1
|
||
mflo $23
|
||
|
||
dmultu $16,$13
|
||
mflo $16
|
||
mfhi $17
|
||
|
||
dmultu $14,$23
|
||
mflo $24
|
||
mfhi $25
|
||
|
||
dmultu $18,$23
|
||
daddu $24,$10
|
||
sltu $1,$24,$10
|
||
daddu $25,$1
|
||
mflo $18
|
||
mfhi $19
|
||
|
||
move $15,$29
|
||
li $22,2*8
|
||
ld $20,8($15)
|
||
.align 4
|
||
.Linner:
|
||
.set noreorder
|
||
dadd $12,$5,$22
|
||
dadd $14,$7,$22
|
||
ld $12,($12)
|
||
ld $14,($14)
|
||
|
||
dmultu $12,$13
|
||
daddu $10,$16,$11
|
||
daddu $24,$18,$25
|
||
sltu $1,$10,$11
|
||
sltu $2,$24,$25
|
||
daddu $11,$17,$1
|
||
daddu $25,$19,$2
|
||
mflo $16
|
||
mfhi $17
|
||
|
||
daddu $10,$20
|
||
addu $22,8
|
||
dmultu $14,$23
|
||
sltu $1,$10,$20
|
||
daddu $24,$10
|
||
daddu $11,$1
|
||
sltu $2,$24,$10
|
||
ld $20,2*8($15)
|
||
daddu $25,$2
|
||
sltu $1,$22,$9
|
||
mflo $18
|
||
mfhi $19
|
||
sd $24,($15)
|
||
bnez $1,.Linner
|
||
dadd $15,8
|
||
.set reorder
|
||
|
||
daddu $10,$16,$11
|
||
sltu $1,$10,$11
|
||
daddu $11,$17,$1
|
||
daddu $10,$20
|
||
sltu $2,$10,$20
|
||
daddu $11,$2
|
||
|
||
ld $20,2*8($15)
|
||
daddu $24,$18,$25
|
||
sltu $1,$24,$25
|
||
daddu $25,$19,$1
|
||
daddu $24,$10
|
||
sltu $2,$24,$10
|
||
daddu $25,$2
|
||
sd $24,($15)
|
||
|
||
daddu $24,$25,$11
|
||
sltu $25,$24,$11
|
||
daddu $24,$20
|
||
sltu $1,$24,$20
|
||
daddu $25,$1
|
||
sd $24,8($15)
|
||
sd $25,2*8($15)
|
||
|
||
addu $21,8
|
||
sltu $2,$21,$9
|
||
bnez $2,.Louter
|
||
|
||
.set noreorder
|
||
dadd $20,$29,$9 # &tp[num]
|
||
move $15,$29
|
||
move $5,$29
|
||
li $11,0 # clear borrow bit
|
||
|
||
.align 4
|
||
.Lsub: ld $10,($15)
|
||
ld $24,($7)
|
||
dadd $15,8
|
||
dadd $7,8
|
||
dsubu $24,$10,$24 # tp[i]-np[i]
|
||
sgtu $1,$24,$10
|
||
dsubu $10,$24,$11
|
||
sgtu $11,$10,$24
|
||
sd $10,($4)
|
||
or $11,$1
|
||
sltu $1,$15,$20
|
||
bnez $1,.Lsub
|
||
dadd $4,8
|
||
|
||
dsubu $11,$25,$11 # handle upmost overflow bit
|
||
move $15,$29
|
||
dsub $4,$9 # restore rp
|
||
not $25,$11
|
||
|
||
and $5,$11,$29
|
||
and $6,$25,$4
|
||
or $5,$5,$6 # ap=borrow?tp:rp
|
||
|
||
.align 4
|
||
.Lcopy: ld $12,($5)
|
||
dadd $5,8
|
||
sd $0,($15)
|
||
dadd $15,8
|
||
sltu $1,$15,$20
|
||
sd $12,($4)
|
||
bnez $1,.Lcopy
|
||
dadd $4,8
|
||
|
||
li $4,1
|
||
li $2,1
|
||
|
||
.set noreorder
|
||
move $29,$30
|
||
ld $30,(14-1)*8($29)
|
||
ld $23,(14-2)*8($29)
|
||
ld $22,(14-3)*8($29)
|
||
ld $21,(14-4)*8($29)
|
||
ld $20,(14-5)*8($29)
|
||
ld $19,(14-6)*8($29)
|
||
ld $18,(14-7)*8($29)
|
||
ld $17,(14-8)*8($29)
|
||
ld $16,(14-9)*8($29)
|
||
jr $31
|
||
dadd $29,14*8
|
||
.end bn_mul_mont_internal
|
||
.rdata
|
||
.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
|
||
#if defined(HAVE_GNU_STACK)
|
||
.section .note.GNU-stack,"",%progbits
|
||
#endif
|