2023-11-06 23:58:23 -08:00

2162 lines
34 KiB
ArmAsm

.rdata
.asciiz "mips3.s, Version 1.2"
.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
.text
.set noat
.align 5
.globl bn_mul_add_words
.ent bn_mul_add_words
bn_mul_add_words:
.set noreorder
bgtz $6,bn_mul_add_words_internal
move $2,$0
jr $31
move $4,$2
.end bn_mul_add_words
.align 5
.ent bn_mul_add_words_internal
bn_mul_add_words_internal:
.set reorder
li $3,-4
and $8,$6,$3
beqz $8,.L_bn_mul_add_words_tail
.L_bn_mul_add_words_loop:
ld $12,0($5)
dmultu $12,$7
ld $13,0($4)
ld $14,8($5)
ld $15,8($4)
ld $8,2*8($5)
ld $9,2*8($4)
daddu $13,$2
sltu $2,$13,$2 # All manuals say it "compares 32-bit
# values", but it seems to work fine
# even on 64-bit registers.
mflo $1
mfhi $12
daddu $13,$1
daddu $2,$12
dmultu $14,$7
sltu $1,$13,$1
sd $13,0($4)
daddu $2,$1
ld $10,3*8($5)
ld $11,3*8($4)
daddu $15,$2
sltu $2,$15,$2
mflo $1
mfhi $14
daddu $15,$1
daddu $2,$14
dmultu $8,$7
sltu $1,$15,$1
sd $15,8($4)
daddu $2,$1
subu $6,4
daddu $4,4*8
daddu $5,4*8
daddu $9,$2
sltu $2,$9,$2
mflo $1
mfhi $8
daddu $9,$1
daddu $2,$8
dmultu $10,$7
sltu $1,$9,$1
sd $9,-2*8($4)
daddu $2,$1
and $8,$6,$3
daddu $11,$2
sltu $2,$11,$2
mflo $1
mfhi $10
daddu $11,$1
daddu $2,$10
sltu $1,$11,$1
sd $11,-8($4)
.set noreorder
bgtz $8,.L_bn_mul_add_words_loop
daddu $2,$1
beqz $6,.L_bn_mul_add_words_return
nop
.L_bn_mul_add_words_tail:
.set reorder
ld $12,0($5)
dmultu $12,$7
ld $13,0($4)
subu $6,1
daddu $13,$2
sltu $2,$13,$2
mflo $1
mfhi $12
daddu $13,$1
daddu $2,$12
sltu $1,$13,$1
sd $13,0($4)
daddu $2,$1
beqz $6,.L_bn_mul_add_words_return
ld $12,8($5)
dmultu $12,$7
ld $13,8($4)
subu $6,1
daddu $13,$2
sltu $2,$13,$2
mflo $1
mfhi $12
daddu $13,$1
daddu $2,$12
sltu $1,$13,$1
sd $13,8($4)
daddu $2,$1
beqz $6,.L_bn_mul_add_words_return
ld $12,2*8($5)
dmultu $12,$7
ld $13,2*8($4)
daddu $13,$2
sltu $2,$13,$2
mflo $1
mfhi $12
daddu $13,$1
daddu $2,$12
sltu $1,$13,$1
sd $13,2*8($4)
daddu $2,$1
.L_bn_mul_add_words_return:
.set noreorder
jr $31
move $4,$2
.end bn_mul_add_words_internal
.align 5
.globl bn_mul_words
.ent bn_mul_words
bn_mul_words:
.set noreorder
bgtz $6,bn_mul_words_internal
move $2,$0
jr $31
move $4,$2
.end bn_mul_words
.align 5
.ent bn_mul_words_internal
bn_mul_words_internal:
.set reorder
li $3,-4
and $8,$6,$3
beqz $8,.L_bn_mul_words_tail
.L_bn_mul_words_loop:
ld $12,0($5)
dmultu $12,$7
ld $14,8($5)
ld $8,2*8($5)
ld $10,3*8($5)
mflo $1
mfhi $12
daddu $2,$1
sltu $13,$2,$1
dmultu $14,$7
sd $2,0($4)
daddu $2,$13,$12
subu $6,4
daddu $4,4*8
daddu $5,4*8
mflo $1
mfhi $14
daddu $2,$1
sltu $15,$2,$1
dmultu $8,$7
sd $2,-3*8($4)
daddu $2,$15,$14
mflo $1
mfhi $8
daddu $2,$1
sltu $9,$2,$1
dmultu $10,$7
sd $2,-2*8($4)
daddu $2,$9,$8
and $8,$6,$3
mflo $1
mfhi $10
daddu $2,$1
sltu $11,$2,$1
sd $2,-8($4)
.set noreorder
bgtz $8,.L_bn_mul_words_loop
daddu $2,$11,$10
beqz $6,.L_bn_mul_words_return
nop
.L_bn_mul_words_tail:
.set reorder
ld $12,0($5)
dmultu $12,$7
subu $6,1
mflo $1
mfhi $12
daddu $2,$1
sltu $13,$2,$1
sd $2,0($4)
daddu $2,$13,$12
beqz $6,.L_bn_mul_words_return
ld $12,8($5)
dmultu $12,$7
subu $6,1
mflo $1
mfhi $12
daddu $2,$1
sltu $13,$2,$1
sd $2,8($4)
daddu $2,$13,$12
beqz $6,.L_bn_mul_words_return
ld $12,2*8($5)
dmultu $12,$7
mflo $1
mfhi $12
daddu $2,$1
sltu $13,$2,$1
sd $2,2*8($4)
daddu $2,$13,$12
.L_bn_mul_words_return:
.set noreorder
jr $31
move $4,$2
.end bn_mul_words_internal
.align 5
.globl bn_sqr_words
.ent bn_sqr_words
bn_sqr_words:
.set noreorder
bgtz $6,bn_sqr_words_internal
move $2,$0
jr $31
move $4,$2
.end bn_sqr_words
.align 5
.ent bn_sqr_words_internal
bn_sqr_words_internal:
.set reorder
li $3,-4
and $8,$6,$3
beqz $8,.L_bn_sqr_words_tail
.L_bn_sqr_words_loop:
ld $12,0($5)
dmultu $12,$12
ld $14,8($5)
ld $8,2*8($5)
ld $10,3*8($5)
mflo $13
mfhi $12
sd $13,0($4)
sd $12,8($4)
dmultu $14,$14
subu $6,4
daddu $4,8*8
daddu $5,4*8
mflo $15
mfhi $14
sd $15,-6*8($4)
sd $14,-5*8($4)
dmultu $8,$8
mflo $9
mfhi $8
sd $9,-4*8($4)
sd $8,-3*8($4)
dmultu $10,$10
and $8,$6,$3
mflo $11
mfhi $10
sd $11,-2*8($4)
.set noreorder
bgtz $8,.L_bn_sqr_words_loop
sd $10,-8($4)
beqz $6,.L_bn_sqr_words_return
nop
.L_bn_sqr_words_tail:
.set reorder
ld $12,0($5)
dmultu $12,$12
subu $6,1
mflo $13
mfhi $12
sd $13,0($4)
sd $12,8($4)
beqz $6,.L_bn_sqr_words_return
ld $12,8($5)
dmultu $12,$12
subu $6,1
mflo $13
mfhi $12
sd $13,2*8($4)
sd $12,3*8($4)
beqz $6,.L_bn_sqr_words_return
ld $12,2*8($5)
dmultu $12,$12
mflo $13
mfhi $12
sd $13,4*8($4)
sd $12,5*8($4)
.L_bn_sqr_words_return:
.set noreorder
jr $31
move $4,$2
.end bn_sqr_words_internal
.align 5
.globl bn_add_words
.ent bn_add_words
bn_add_words:
.set noreorder
bgtz $7,bn_add_words_internal
move $2,$0
jr $31
move $4,$2
.end bn_add_words
.align 5
.ent bn_add_words_internal
bn_add_words_internal:
.set reorder
li $3,-4
and $1,$7,$3
beqz $1,.L_bn_add_words_tail
.L_bn_add_words_loop:
ld $12,0($5)
ld $8,0($6)
subu $7,4
ld $13,8($5)
and $1,$7,$3
ld $14,2*8($5)
daddu $6,4*8
ld $15,3*8($5)
daddu $4,4*8
ld $9,-3*8($6)
daddu $5,4*8
ld $10,-2*8($6)
ld $11,-8($6)
daddu $8,$12
sltu $24,$8,$12
daddu $12,$8,$2
sltu $2,$12,$8
sd $12,-4*8($4)
daddu $2,$24
daddu $9,$13
sltu $25,$9,$13
daddu $13,$9,$2
sltu $2,$13,$9
sd $13,-3*8($4)
daddu $2,$25
daddu $10,$14
sltu $24,$10,$14
daddu $14,$10,$2
sltu $2,$14,$10
sd $14,-2*8($4)
daddu $2,$24
daddu $11,$15
sltu $25,$11,$15
daddu $15,$11,$2
sltu $2,$15,$11
sd $15,-8($4)
.set noreorder
bgtz $1,.L_bn_add_words_loop
daddu $2,$25
beqz $7,.L_bn_add_words_return
nop
.L_bn_add_words_tail:
.set reorder
ld $12,0($5)
ld $8,0($6)
daddu $8,$12
subu $7,1
sltu $24,$8,$12
daddu $12,$8,$2
sltu $2,$12,$8
sd $12,0($4)
daddu $2,$24
beqz $7,.L_bn_add_words_return
ld $13,8($5)
ld $9,8($6)
daddu $9,$13
subu $7,1
sltu $25,$9,$13
daddu $13,$9,$2
sltu $2,$13,$9
sd $13,8($4)
daddu $2,$25
beqz $7,.L_bn_add_words_return
ld $14,2*8($5)
ld $10,2*8($6)
daddu $10,$14
sltu $24,$10,$14
daddu $14,$10,$2
sltu $2,$14,$10
sd $14,2*8($4)
daddu $2,$24
.L_bn_add_words_return:
.set noreorder
jr $31
move $4,$2
.end bn_add_words_internal
.align 5
.globl bn_sub_words
.ent bn_sub_words
bn_sub_words:
.set noreorder
bgtz $7,bn_sub_words_internal
move $2,$0
jr $31
move $4,$0
.end bn_sub_words
.align 5
.ent bn_sub_words_internal
bn_sub_words_internal:
.set reorder
li $3,-4
and $1,$7,$3
beqz $1,.L_bn_sub_words_tail
.L_bn_sub_words_loop:
ld $12,0($5)
ld $8,0($6)
subu $7,4
ld $13,8($5)
and $1,$7,$3
ld $14,2*8($5)
daddu $6,4*8
ld $15,3*8($5)
daddu $4,4*8
ld $9,-3*8($6)
daddu $5,4*8
ld $10,-2*8($6)
ld $11,-8($6)
sltu $24,$12,$8
dsubu $8,$12,$8
dsubu $12,$8,$2
sgtu $2,$12,$8
sd $12,-4*8($4)
daddu $2,$24
sltu $25,$13,$9
dsubu $9,$13,$9
dsubu $13,$9,$2
sgtu $2,$13,$9
sd $13,-3*8($4)
daddu $2,$25
sltu $24,$14,$10
dsubu $10,$14,$10
dsubu $14,$10,$2
sgtu $2,$14,$10
sd $14,-2*8($4)
daddu $2,$24
sltu $25,$15,$11
dsubu $11,$15,$11
dsubu $15,$11,$2
sgtu $2,$15,$11
sd $15,-8($4)
.set noreorder
bgtz $1,.L_bn_sub_words_loop
daddu $2,$25
beqz $7,.L_bn_sub_words_return
nop
.L_bn_sub_words_tail:
.set reorder
ld $12,0($5)
ld $8,0($6)
subu $7,1
sltu $24,$12,$8
dsubu $8,$12,$8
dsubu $12,$8,$2
sgtu $2,$12,$8
sd $12,0($4)
daddu $2,$24
beqz $7,.L_bn_sub_words_return
ld $13,8($5)
subu $7,1
ld $9,8($6)
sltu $25,$13,$9
dsubu $9,$13,$9
dsubu $13,$9,$2
sgtu $2,$13,$9
sd $13,8($4)
daddu $2,$25
beqz $7,.L_bn_sub_words_return
ld $14,2*8($5)
ld $10,2*8($6)
sltu $24,$14,$10
dsubu $10,$14,$10
dsubu $14,$10,$2
sgtu $2,$14,$10
sd $14,2*8($4)
daddu $2,$24
.L_bn_sub_words_return:
.set noreorder
jr $31
move $4,$2
.end bn_sub_words_internal
.align 5
.globl bn_div_3_words
.ent bn_div_3_words
bn_div_3_words:
.set noreorder
move $7,$4 # we know that bn_div_words does not
# touch $7, $10, $11 and preserves $6
# so that we can save two arguments
# and return address in registers
# instead of stack:-)
ld $4,($7)
move $10,$5
bne $4,$6,bn_div_3_words_internal
ld $5,-8($7)
li $2,-1
jr $31
move $4,$2
.end bn_div_3_words
.align 5
.ent bn_div_3_words_internal
bn_div_3_words_internal:
.set reorder
move $11,$31
bal bn_div_words_internal
move $31,$11
dmultu $10,$2
ld $14,-2*8($7)
move $8,$0
mfhi $13
mflo $12
sltu $24,$13,$5
.L_bn_div_3_words_inner_loop:
bnez $24,.L_bn_div_3_words_inner_loop_done
sgeu $1,$14,$12
seq $25,$13,$5
and $1,$25
sltu $15,$12,$10
daddu $5,$6
dsubu $13,$15
dsubu $12,$10
sltu $24,$13,$5
sltu $8,$5,$6
or $24,$8
.set noreorder
beqz $1,.L_bn_div_3_words_inner_loop
dsubu $2,1
daddu $2,1
.set reorder
.L_bn_div_3_words_inner_loop_done:
.set noreorder
jr $31
move $4,$2
.end bn_div_3_words_internal
.align 5
.globl bn_div_words
.ent bn_div_words
bn_div_words:
.set noreorder
bnez $6,bn_div_words_internal
li $2,-1 # I would rather signal div-by-zero
# which can be done with 'break 7'
jr $31
move $4,$2
.end bn_div_words
.align 5
.ent bn_div_words_internal
bn_div_words_internal:
move $3,$0
bltz $6,.L_bn_div_words_body
move $25,$3
dsll $6,1
bgtz $6,.-4
addu $25,1
.set reorder
negu $13,$25
li $14,-1
dsll $14,$13
and $14,$4
dsrl $1,$5,$13
.set noreorder
beqz $14,.+12
nop
break 6 # signal overflow
.set reorder
dsll $4,$25
dsll $5,$25
or $4,$1
.L_bn_div_words_body:
dsrl $3,$6,4*8 # bits
sgeu $1,$4,$6
.set noreorder
beqz $1,.+12
nop
dsubu $4,$6
.set reorder
li $8,-1
dsrl $9,$4,4*8 # bits
dsrl $8,4*8 # q=0xffffffff
beq $3,$9,.L_bn_div_words_skip_div1
ddivu $0,$4,$3
mflo $8
.L_bn_div_words_skip_div1:
dmultu $6,$8
dsll $15,$4,4*8 # bits
dsrl $1,$5,4*8 # bits
or $15,$1
mflo $12
mfhi $13
.L_bn_div_words_inner_loop1:
sltu $14,$15,$12
seq $24,$9,$13
sltu $1,$9,$13
and $14,$24
sltu $2,$12,$6
or $1,$14
.set noreorder
beqz $1,.L_bn_div_words_inner_loop1_done
dsubu $13,$2
dsubu $12,$6
b .L_bn_div_words_inner_loop1
dsubu $8,1
.set reorder
.L_bn_div_words_inner_loop1_done:
dsll $5,4*8 # bits
dsubu $4,$15,$12
dsll $2,$8,4*8 # bits
li $8,-1
dsrl $9,$4,4*8 # bits
dsrl $8,4*8 # q=0xffffffff
beq $3,$9,.L_bn_div_words_skip_div2
ddivu $0,$4,$3
mflo $8
.L_bn_div_words_skip_div2:
dmultu $6,$8
dsll $15,$4,4*8 # bits
dsrl $1,$5,4*8 # bits
or $15,$1
mflo $12
mfhi $13
.L_bn_div_words_inner_loop2:
sltu $14,$15,$12
seq $24,$9,$13
sltu $1,$9,$13
and $14,$24
sltu $3,$12,$6
or $1,$14
.set noreorder
beqz $1,.L_bn_div_words_inner_loop2_done
dsubu $13,$3
dsubu $12,$6
b .L_bn_div_words_inner_loop2
dsubu $8,1
.set reorder
.L_bn_div_words_inner_loop2_done:
dsubu $4,$15,$12
or $2,$8
dsrl $3,$4,$25 # $3 contains remainder if anybody wants it
dsrl $6,$25 # restore $6
.set noreorder
move $5,$3
jr $31
move $4,$2
.end bn_div_words_internal
.align 5
.globl bn_mul_comba8
.ent bn_mul_comba8
bn_mul_comba8:
.set noreorder
.frame $29,6*8,$31
.mask 0x003f0000,-8
dsubu $29,6*8
sd $21,5*8($29)
sd $20,4*8($29)
sd $19,3*8($29)
sd $18,2*8($29)
sd $17,1*8($29)
sd $16,0*8($29)
.set reorder
ld $12,0($5) # If compiled with -mips3 option on
# R5000 box assembler barks on this
# 1ine with "should not have mult/div
# as last instruction in bb (R10K
# bug)" warning. If anybody out there
# has a clue about how to circumvent
# this do send me a note.
# <appro@fy.chalmers.se>
ld $8,0($6)
ld $13,8($5)
ld $14,2*8($5)
dmultu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
ld $15,3*8($5)
ld $9,8($6)
ld $10,2*8($6)
ld $11,3*8($6)
mflo $2
mfhi $3
ld $16,4*8($5)
ld $18,5*8($5)
dmultu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
ld $20,6*8($5)
ld $5,7*8($5)
ld $17,4*8($6)
ld $19,5*8($6)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
daddu $7,$25,$1
ld $21,6*8($6)
ld $6,7*8($6)
sd $2,0($4) # r[0]=c1;
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
sd $3,8($4) # r[1]=c2;
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
daddu $25,$1
daddu $2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,2*8($4) # r[2]=c3;
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $7,$3,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,3*8($4) # r[3]=c1;
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,4*8($4) # r[4]=c2;
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,5*8($4) # r[5]=c3;
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $7,$3,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,6*8($4) # r[6]=c1;
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,7*8($4) # r[7]=c2;
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,8*8($4) # r[8]=c3;
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $7,$3,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,9*8($4) # r[9]=c1;
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,10*8($4) # r[10]=c2;
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,11*8($4) # r[11]=c3;
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $7,$3,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,12*8($4) # r[12]=c1;
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,13*8($4) # r[13]=c2;
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
daddu $25,$1
daddu $2,$25
sd $7,14*8($4) # r[14]=c3;
sd $2,15*8($4) # r[15]=c1;
.set noreorder
ld $21,5*8($29)
ld $20,4*8($29)
ld $19,3*8($29)
ld $18,2*8($29)
ld $17,1*8($29)
ld $16,0*8($29)
jr $31
daddu $29,6*8
.end bn_mul_comba8
.align 5
.globl bn_mul_comba4
.ent bn_mul_comba4
bn_mul_comba4:
.set reorder
ld $12,0($5)
ld $8,0($6)
ld $13,8($5)
ld $14,2*8($5)
dmultu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
ld $15,3*8($5)
ld $9,8($6)
ld $10,2*8($6)
ld $11,3*8($6)
mflo $2
mfhi $3
sd $2,0($4)
dmultu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
daddu $7,$25,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
sd $3,8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
daddu $25,$1
daddu $2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,2*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $7,$3,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,3*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $2,$7,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,4*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
daddu $25,$1
daddu $2,$25
sltu $3,$2,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,5*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
daddu $25,$1
daddu $3,$25
sd $2,6*8($4)
sd $3,7*8($4)
.set noreorder
jr $31
nop
.end bn_mul_comba4
.align 5
.globl bn_sqr_comba8
.ent bn_sqr_comba8
bn_sqr_comba8:
.set reorder
ld $12,0($5)
ld $13,8($5)
ld $14,2*8($5)
ld $15,3*8($5)
dmultu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
ld $8,4*8($5)
ld $9,5*8($5)
ld $10,6*8($5)
ld $11,7*8($5)
mflo $2
mfhi $3
sd $2,0($4)
dmultu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
mflo $24
mfhi $25
slt $2,$25,$0
dsll $25,1
dmultu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $6,$24,$0
daddu $25,$6
dsll $24,1
daddu $3,$24
sltu $1,$3,$24
daddu $7,$25,$1
sd $3,8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$13 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,2*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $13,$14 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $7,$3,$1
daddu $3,$25
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $8,$12 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
sd $2,3*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $15,$13 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $2,$7,$1
daddu $7,$25
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$14 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $1,$7,$1
daddu $7,$25
daddu $2,$1
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,4*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$8 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $14,$15 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $1,$2,$1
daddu $2,$25
daddu $3,$1
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $10,$12 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $1,$2,$1
daddu $2,$25
daddu $3,$1
sltu $25,$2,$25
daddu $3,$25
sd $7,5*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $9,$13 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $7,$3,$1
daddu $3,$25
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $8,$14 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$15 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,6*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $13,$10 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $2,$7,$1
daddu $7,$25
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$9 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $1,$7,$1
daddu $7,$25
daddu $2,$1
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $15,$8 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $1,$7,$1
daddu $7,$25
daddu $2,$1
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $11,$13 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $1,$7,$1
daddu $7,$25
daddu $2,$1
sltu $25,$7,$25
daddu $2,$25
sd $3,7*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $10,$14 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $9,$15 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $1,$2,$1
daddu $2,$25
daddu $3,$1
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $8,$8 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $1,$2,$1
daddu $2,$25
daddu $3,$1
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,8*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$10 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $7,$3,$1
daddu $3,$25
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $8,$9 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $11,$15 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
sd $2,9*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $10,$8 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $2,$7,$1
daddu $7,$25
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $9,$9 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $1,$7,$1
daddu $7,$25
daddu $2,$1
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,10*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $9,$10 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $11,$9 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $1,$2,$1
daddu $2,$25
daddu $3,$1
sltu $25,$2,$25
daddu $3,$25
sd $7,11*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $10,$10 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $7,$3,$1
daddu $3,$25
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1);
daddu $25,$1
daddu $3,$25
sltu $1,$3,$25
daddu $7,$1
sd $2,12*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $11,$11 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $2,$7,$1
daddu $7,$25
sltu $25,$7,$25
daddu $2,$25
sd $3,13*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
daddu $25,$1
daddu $2,$25
sd $7,14*8($4)
sd $2,15*8($4)
.set noreorder
jr $31
nop
.end bn_sqr_comba8
.align 5
.globl bn_sqr_comba4
.ent bn_sqr_comba4
bn_sqr_comba4:
.set reorder
ld $12,0($5)
ld $13,8($5)
dmultu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
ld $14,2*8($5)
ld $15,3*8($5)
mflo $2
mfhi $3
sd $2,0($4)
dmultu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
mflo $24
mfhi $25
slt $2,$25,$0
dsll $25,1
dmultu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $6,$24,$0
daddu $25,$6
dsll $24,1
daddu $3,$24
sltu $1,$3,$24
daddu $7,$25,$1
sd $3,8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $13,$13 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
daddu $25,$1
daddu $2,$25
sltu $1,$2,$25
daddu $3,$1
sd $7,2*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $13,$14 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $7,$3,$1
daddu $3,$25
sltu $25,$3,$25
daddu $7,$25
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
dmultu $15,$13 # forward multiplication
daddu $2,$24
daddu $1,$25
sltu $24,$2,$24
daddu $3,$1
daddu $25,$24
sltu $1,$3,$1
daddu $3,$25
daddu $7,$1
sltu $25,$3,$25
daddu $7,$25
sd $2,3*8($4)
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$14 # forward multiplication
daddu $3,$24
daddu $1,$25
sltu $24,$3,$24
daddu $7,$1
daddu $25,$24
sltu $2,$7,$1
daddu $7,$25
sltu $25,$7,$25
daddu $2,$25
mflo $24
mfhi $25
daddu $3,$24
sltu $1,$3,$24
dmultu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2);
daddu $25,$1
daddu $7,$25
sltu $1,$7,$25
daddu $2,$1
sd $3,4*8($4)
mflo $24
mfhi $25
daddu $7,$24
sltu $1,$7,$24
dmultu $15,$15 # forward multiplication
daddu $7,$24
daddu $1,$25
sltu $24,$7,$24
daddu $2,$1
daddu $25,$24
sltu $3,$2,$1
daddu $2,$25
sltu $25,$2,$25
daddu $3,$25
sd $7,5*8($4)
mflo $24
mfhi $25
daddu $2,$24
sltu $1,$2,$24
daddu $25,$1
daddu $3,$25
sd $2,6*8($4)
sd $3,7*8($4)
.set noreorder
jr $31
nop
.end bn_sqr_comba4
#if defined(HAVE_GNU_STACK)
.section .note.GNU-stack,"",%progbits
#endif